Coverage for packages / ml / features.py: 0%

11 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-08 08:14 +1200

1"""Feature engineering for ML model inputs. 

2 

3Extracts structured feature vectors from raw racing data to feed 

4into ensemble or standalone ML models for win/place prediction. 

5""" 

6 

7from __future__ import annotations 

8 

9from datetime import date 

10from typing import Any 

11 

12from sqlalchemy.orm import Session 

13 

14 

15class FeatureEngineering: 

16 """Feature extraction and dataset construction for racing ML models. 

17 

18 Features are built from Elo ratings, form metrics, race conditions, 

19 and historical performance statistics. 

20 """ 

21 

22 def __init__(self, session: Session) -> None: 

23 """Initialize with a database session. 

24 

25 Args: 

26 session: Database session for loading data. 

27 """ 

28 self._session = session 

29 

30 def extract_horse_features(self, horse_id: int) -> dict[str, Any]: 

31 """Extract feature vector for a single horse. 

32 

33 Features include: 

34 - Current Elo rating and RD 

35 - Recent form (avg placing, win rate, place rate) 

36 - Distance performance stats 

37 - Barrier/handicap statistics 

38 - Course/venue familiarity 

39 - Days since last start 

40 - Driver/trainer combination stats 

41 

42 Args: 

43 horse_id: The horse entity ID. 

44 

45 Returns: 

46 Dict of feature name -> value. 

47 """ 

48 raise NotImplementedError 

49 

50 def extract_race_features(self, race_id: int) -> dict[str, Any]: 

51 """Extract aggregate features for a race. 

52 

53 Features include: 

54 - Race distance, start type, gait 

55 - Track condition, weather 

56 - Field size 

57 - Aggregate ratings of all starters 

58 - Race class / grade indicators 

59 

60 Args: 

61 race_id: The race ID. 

62 

63 Returns: 

64 Dict of feature name -> value. 

65 """ 

66 raise NotImplementedError 

67 

68 def build_training_dataset( 

69 self, 

70 date_from: date, 

71 date_to: date, 

72 include_target: bool = True, 

73 ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]: 

74 """Build a complete training dataset for a date range. 

75 

76 For each starter in completed races within the range, extract 

77 pre-race features and (optionally) the actual outcome as target. 

78 

79 Args: 

80 date_from: Start date (inclusive). 

81 date_to: End date (inclusive). 

82 include_target: If True, include actual placing/probability 

83 targets in the returned data. 

84 

85 Returns: 

86 Tuple of (features_list, targets_list_or_None). 

87 Each element in features_list is a dict of feature name -> value. 

88 Each element in targets_list is a dict with 'starter_id', 'placing', 

89 and optionally 'win_frac', 'place_frac'. 

90 """ 

91 raise NotImplementedError 

92 

93 def feature_names(self) -> list[str]: 

94 """Return ordered list of all feature names produced by this engine. 

95 

96 Returns: 

97 List of feature name strings. 

98 """ 

99 raise NotImplementedError