"""Feature engineering for ML model inputs.

Extracts structured feature vectors from raw racing data to feed
into ensemble or standalone ML models for win/place prediction.
"""

from __future__ import annotations

from datetime import date
from typing import Any

from sqlalchemy.orm import Session


class FeatureEngineering:
    """Feature extraction and dataset construction for racing ML models.

    Features are built from Elo ratings, form metrics, race conditions,
    and historical performance statistics.
    """

    def __init__(self, session: Session) -> None:
        """Initialize with a database session.

        Args:
            session: Database session for loading data.
        """
        self._session = session

    def extract_horse_features(self, horse_id: int) -> dict[str, Any]:
        """Extract feature vector for a single horse.

        Features include:
        - Current Elo rating and RD
        - Recent form (avg placing, win rate, place rate)
        - Distance performance stats
        - Barrier/handicap statistics
        - Course/venue familiarity
        - Days since last start
        - Driver/trainer combination stats

        Args:
            horse_id: The horse entity ID.

        Returns:
            Dict of feature name -> value.
        """
        raise NotImplementedError

    def extract_race_features(self, race_id: int) -> dict[str, Any]:
        """Extract aggregate features for a race.

        Features include:
        - Race distance, start type, gait
        - Track condition, weather
        - Field size
        - Aggregate ratings of all starters
        - Race class / grade indicators

        Args:
            race_id: The race ID.

        Returns:
            Dict of feature name -> value.
        """
        raise NotImplementedError

    def build_training_dataset(
        self,
        date_from: date,
        date_to: date,
        include_target: bool = True,
    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
        """Build a complete training dataset for a date range.

        For each starter in completed races within the range, extract
        pre-race features and (optionally) the actual outcome as target.

        Args:
            date_from: Start date (inclusive).
            date_to: End date (inclusive).
            include_target: If True, include actual placing/probability
                targets in the returned data.

        Returns:
            Tuple of (features_list, targets_list_or_None).
            Each element in features_list is a dict of feature name -> value.
            Each element in targets_list is a dict with 'starter_id', 'placing',
            and optionally 'win_frac', 'place_frac'.
        """
        raise NotImplementedError

    def feature_names(self) -> list[str]:
        """Return ordered list of all feature names produced by this engine.

        Returns:
            List of feature name strings.
        """
        raise NotImplementedError
