
    1i                    L    S r SSKJr  SSKJr  SSKJr  SSKJr   " S S5      r	g)	zFeature engineering for ML model inputs.

Extracts structured feature vectors from raw racing data to feed
into ensemble or standalone ML models for win/place prediction.
    )annotations)date)Any)Sessionc                  ^    \ rS rSrSrS
S jrSS jrSS jr S       SS jjrSS jr	Sr
g	)FeatureEngineering   zFeature extraction and dataset construction for racing ML models.

Features are built from Elo ratings, form metrics, race conditions,
and historical performance statistics.
c                    Xl         g)z[Initialize with a database session.

Args:
    session: Database session for loading data.
N_session)selfsessions     9/root/tipsharks/tipsharks-elo-api/packages/ml/features.py__init__FeatureEngineering.__init__   s	          c                    [         e)at  Extract feature vector for a single horse.

Features include:
- Current Elo rating and RD
- Recent form (avg placing, win rate, place rate)
- Distance performance stats
- Barrier/handicap statistics
- Course/venue familiarity
- Days since last start
- Driver/trainer combination stats

Args:
    horse_id: The horse entity ID.

Returns:
    Dict of feature name -> value.
NotImplementedError)r   horse_ids     r   extract_horse_features)FeatureEngineering.extract_horse_features   s
    $ "!r   c                    [         e)a  Extract aggregate features for a race.

Features include:
- Race distance, start type, gait
- Track condition, weather
- Field size
- Aggregate ratings of all starters
- Race class / grade indicators

Args:
    race_id: The race ID.

Returns:
    Dict of feature name -> value.
r   )r   race_ids     r   extract_race_features(FeatureEngineering.extract_race_features2   s
      "!r   c                    [         e)ae  Build a complete training dataset for a date range.

For each starter in completed races within the range, extract
pre-race features and (optionally) the actual outcome as target.

Args:
    date_from: Start date (inclusive).
    date_to: End date (inclusive).
    include_target: If True, include actual placing/probability
        targets in the returned data.

Returns:
    Tuple of (features_list, targets_list_or_None).
    Each element in features_list is a dict of feature name -> value.
    Each element in targets_list is a dict with 'starter_id', 'placing',
    and optionally 'win_frac', 'place_frac'.
r   )r   	date_fromdate_toinclude_targets       r   build_training_dataset)FeatureEngineering.build_training_datasetD   s
    . "!r   c                    [         e)znReturn ordered list of all feature names produced by this engine.

Returns:
    List of feature name strings.
r   )r   s    r   feature_names FeatureEngineering.feature_names]   s
     "!r   r   N)r   r   returnNone)r   intr&   dict[str, Any])r   r(   r&   r)   )T)r   r   r   r   r    boolr&   z8tuple[list[dict[str, Any]], list[dict[str, Any]] | None])r&   z	list[str])__name__
__module____qualname____firstlineno____doc__r   r   r   r!   r$   __static_attributes__ r   r   r   r      sL     "(",  $	"" " 	"
 
B"2"r   r   N)
r/   
__future__r   datetimer   typingr   sqlalchemy.ormr   r   r1   r   r   <module>r6      s$    #   "T" T"r   