Coverage for packages / ml / features.py: 0%
11 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:14 +1200
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:14 +1200
1"""Feature engineering for ML model inputs.
3Extracts structured feature vectors from raw racing data to feed
4into ensemble or standalone ML models for win/place prediction.
5"""
7from __future__ import annotations
9from datetime import date
10from typing import Any
12from sqlalchemy.orm import Session
15class FeatureEngineering:
16 """Feature extraction and dataset construction for racing ML models.
18 Features are built from Elo ratings, form metrics, race conditions,
19 and historical performance statistics.
20 """
22 def __init__(self, session: Session) -> None:
23 """Initialize with a database session.
25 Args:
26 session: Database session for loading data.
27 """
28 self._session = session
30 def extract_horse_features(self, horse_id: int) -> dict[str, Any]:
31 """Extract feature vector for a single horse.
33 Features include:
34 - Current Elo rating and RD
35 - Recent form (avg placing, win rate, place rate)
36 - Distance performance stats
37 - Barrier/handicap statistics
38 - Course/venue familiarity
39 - Days since last start
40 - Driver/trainer combination stats
42 Args:
43 horse_id: The horse entity ID.
45 Returns:
46 Dict of feature name -> value.
47 """
48 raise NotImplementedError
50 def extract_race_features(self, race_id: int) -> dict[str, Any]:
51 """Extract aggregate features for a race.
53 Features include:
54 - Race distance, start type, gait
55 - Track condition, weather
56 - Field size
57 - Aggregate ratings of all starters
58 - Race class / grade indicators
60 Args:
61 race_id: The race ID.
63 Returns:
64 Dict of feature name -> value.
65 """
66 raise NotImplementedError
68 def build_training_dataset(
69 self,
70 date_from: date,
71 date_to: date,
72 include_target: bool = True,
73 ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
74 """Build a complete training dataset for a date range.
76 For each starter in completed races within the range, extract
77 pre-race features and (optionally) the actual outcome as target.
79 Args:
80 date_from: Start date (inclusive).
81 date_to: End date (inclusive).
82 include_target: If True, include actual placing/probability
83 targets in the returned data.
85 Returns:
86 Tuple of (features_list, targets_list_or_None).
87 Each element in features_list is a dict of feature name -> value.
88 Each element in targets_list is a dict with 'starter_id', 'placing',
89 and optionally 'win_frac', 'place_frac'.
90 """
91 raise NotImplementedError
93 def feature_names(self) -> list[str]:
94 """Return ordered list of all feature names produced by this engine.
96 Returns:
97 List of feature name strings.
98 """
99 raise NotImplementedError