Coverage for packages/ml/features.py: 0%

1"""Feature engineering for ML model inputs.

3Extracts structured feature vectors from raw racing data to feed

4into ensemble or standalone ML models for win/place prediction.

5"""

7from __future__ import annotations

9from datetime import date

10from typing import Any

12from sqlalchemy.orm import Session

15class FeatureEngineering:

16 """Feature extraction and dataset construction for racing ML models.

18 Features are built from Elo ratings, form metrics, race conditions,

19 and historical performance statistics.

20 """

22 def __init__(self, session: Session) -> None:

23 """Initialize with a database session.

25 Args:

26 session: Database session for loading data.

27 """

28 self._session = session

30 def extract_horse_features(self, horse_id: int) -> dict[str, Any]:

31 """Extract feature vector for a single horse.

33 Features include:

34 - Current Elo rating and RD

35 - Recent form (avg placing, win rate, place rate)

36 - Distance performance stats

37 - Barrier/handicap statistics

38 - Course/venue familiarity

39 - Days since last start

40 - Driver/trainer combination stats

42 Args:

43 horse_id: The horse entity ID.

45 Returns:

46 Dict of feature name -> value.

47 """

48 raise NotImplementedError

50 def extract_race_features(self, race_id: int) -> dict[str, Any]:

51 """Extract aggregate features for a race.

53 Features include:

54 - Race distance, start type, gait

55 - Track condition, weather

56 - Field size

57 - Aggregate ratings of all starters

58 - Race class / grade indicators

60 Args:

61 race_id: The race ID.

63 Returns:

64 Dict of feature name -> value.

65 """

66 raise NotImplementedError

68 def build_training_dataset(

69 self,

70 date_from: date,

71 date_to: date,

72 include_target: bool = True,

73 ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:

74 """Build a complete training dataset for a date range.

76 For each starter in completed races within the range, extract

77 pre-race features and (optionally) the actual outcome as target.

79 Args:

80 date_from: Start date (inclusive).

81 date_to: End date (inclusive).

82 include_target: If True, include actual placing/probability

83 targets in the returned data.

85 Returns:

86 Tuple of (features_list, targets_list_or_None).

87 Each element in features_list is a dict of feature name -> value.

88 Each element in targets_list is a dict with 'starter_id', 'placing',

89 and optionally 'win_frac', 'place_frac'.

90 """

91 raise NotImplementedError

93 def feature_names(self) -> list[str]:

94 """Return ordered list of all feature names produced by this engine.

96 Returns:

97 List of feature name strings.

98 """

99 raise NotImplementedError

Coverage for packages / ml / features.py: 0%

11 statements