"""Ingestion service for TAB racing data.

Supports two data sources:
  - "tab":   Directly from the TAB Affiliates API (default, existing behaviour)
  - "ingest": From the tab-api-ingest TypeScript service REST API
"""

import asyncio
from datetime import date

from sqlalchemy.orm import Session

from packages.core.common.logging import get_logger
from packages.core.storage.repositories import (
    MeetingRepository,
    RaceRepository,
    StarterRepository,
)
from packages.ingest_client import IngestServiceClient
from packages.tab_client.client import get_client

logger = get_logger(__name__)


class IngestionService:
    """Service for ingesting race data into database.

    Supports two sources via the *source* parameter:
      - "tab" (default):  fetches data from the TAB Affiliates API directly
      - "ingest":         fetches data from the tab-api-ingest REST service
    """

    def __init__(self, session: Session, source: str = "tab"):
        """Initialize ingestion service.

        Args:
            session: Database session
            source: Data source — "tab" (TAB API direct) or "ingest" (tab-api-ingest service)

        Raises:
            ValueError: If source is not a recognised value
        """
        if source not in ("tab", "ingest"):
            raise ValueError(f"Unknown source '{source}'. Use 'tab' or 'ingest'.")

        self.session = session
        self.source = source
        self.stats = {
            "meetings": 0,
            "races": 0,
            "starters": 0,
            "errors": 0,
        }

    async def ingest_date_range(
        self,
        date_from: date,
        date_to: date,
        category: str | None = None,
        country: str | None = None,
    ) -> tuple[int, int, int]:
        """Ingest all meetings and races in date range.

        Args:
            date_from: Start date (inclusive)
            date_to: End date (inclusive)
            category: Racing category (T, H, G) — uses default from settings if None
            country: Country filter — uses default from settings if None

        Returns:
            Tuple of (meetings_count, races_count, starters_count)
        """
        if self.source == "ingest":
            return await self._ingest_from_service(
                date_from, date_to, category, country
            )

        logger.info(f"Starting TAB ingestion from {date_from} to {date_to}")

        async with get_client() as client:
            # Fetch meetings in range
            meetings_data = await client.get_meetings(
                date_from.strftime("%Y-%m-%d"),
                date_to.strftime("%Y-%m-%d"),
                category=category,
                country=country,
            )

            logger.info(f"Found {len(meetings_data)} meetings to process")

            for meeting_data in meetings_data:
                try:
                    await self._ingest_meeting(client, meeting_data)
                except Exception as e:
                    logger.error(
                        f"Error ingesting meeting {meeting_data.get('meeting')}: {e}",
                        exc_info=True,
                    )
                    self.stats["errors"] += 1

        logger.info(
            f"Ingestion complete: {self.stats['meetings']} meetings, "
            f"{self.stats['races']} races, {self.stats['starters']} starters, "
            f"{self.stats['errors']} errors"
        )

        return self.stats["meetings"], self.stats["races"], self.stats["starters"]

    async def ingest_single_date(
        self,
        target_date: date,
        category: str | None = None,
        country: str | None = None,
    ) -> tuple[int, int, int]:
        """Ingest meetings for a single date.

        Args:
            target_date: Date to ingest
            category: Racing category (T, H, G) — uses default from settings if None
            country: Country filter — uses default from settings if None

        Returns:
            Tuple of (meetings_count, races_count, starters_count)
        """
        return await self.ingest_date_range(
            target_date, target_date, category=category, country=country
        )

    async def _ingest_from_service(
        self,
        date_from: date,
        date_to: date,
        category: str | None = None,
        country: str | None = None,
    ) -> tuple[int, int, int]:
        """Ingest from the tab-api-ingest service.

        Uses IngestServiceClient instead of the TAB API directly.
        Iterates through meetings returned by the ingest service and
        processes each with its races and runners.

        Args:
            date_from: Start date (inclusive)
            date_to: End date (inclusive)
            category: Racing category (T, H, G)
            country: Country filter

        Returns:
            Tuple of (meetings_count, races_count, starters_count)
        """
        logger.info(f"Starting ingest-service ingestion from {date_from} to {date_to}")

        async with IngestServiceClient() as client:
            # Fetch meetings in range
            meetings_data = await client.get_meetings(
                date=str(date_from),
                country=country,
                category=category,
            )

            logger.info(f"Found {len(meetings_data)} meetings from ingest service")

            for meeting_data in meetings_data:
                try:
                    await self._ingest_meeting_from_service(client, meeting_data)
                except Exception as e:
                    logger.error(
                        f"Error ingesting meeting {meeting_data.get('meeting')}: {e}",
                        exc_info=True,
                    )
                    self.stats["errors"] += 1

        logger.info(
            f"Ingest-service ingestion complete: {self.stats['meetings']} meetings, "
            f"{self.stats['races']} races, {self.stats['starters']} starters, "
            f"{self.stats['errors']} errors"
        )

        return self.stats["meetings"], self.stats["races"], self.stats["starters"]

    async def _ingest_meeting(self, client, meeting_data: dict) -> None:
        """Ingest single meeting using a TAB-style client.

        Args:
            client: TAB API client (real or mock)
            meeting_data: Meeting data from API (includes races array with summaries)
        """
        # TAB API uses "meeting" field for meeting ID (string)
        meeting_id = meeting_data.get("meeting")
        if not meeting_id:
            logger.warning("Skipping meeting with no ID")
            return

        # Upsert meeting
        meeting = MeetingRepository.upsert(self.session, meeting_data)
        self.stats["meetings"] += 1
        self.session.commit()

        logger.info(
            f"Processing meeting {meeting_id} ({meeting.venue}, {meeting.meeting_date}, {meeting.category})"
        )

        # TAB API includes races array with summaries in the meeting data
        # Each race has "id" (event_id) and "race_number"
        races_list = meeting_data.get("races", [])

        if not races_list:
            logger.warning(f"No races found in meeting {meeting_id}")
            return

        logger.info(f"Found {len(races_list)} races in meeting {meeting_id}")

        # Process each race
        for race_summary in races_list:
            try:
                # TAB race summary has "id" which is the event_id
                event_id = race_summary.get("id")
                race_number = race_summary.get("race_number")

                if not event_id:
                    logger.warning(
                        f"Skipping race {race_number} with no event_id in meeting {meeting_id}"
                    )
                    continue

                # Fetch full event/race data with runners and results
                event_data = await client.get_event(event_id)
                await self._ingest_race(meeting_id, event_data)

                # Small delay to be polite to API
                await asyncio.sleep(0.1)

            except Exception as e:
                race_num = race_summary.get("race_number", "?")
                logger.error(
                    f"Error ingesting race {meeting_id}/{race_num}: {e}",
                    exc_info=True,
                )
                self.stats["errors"] += 1

    async def _ingest_meeting_from_service(
        self,
        client: IngestServiceClient,
        meeting_data: dict,
    ) -> None:
        """Ingest single meeting using the ingest service client.

        Differs from _ingest_meeting in that race data is fetched via
        the ingest service's own REST endpoints rather than the TAB API.

        Args:
            client: IngestServiceClient instance
            meeting_data: Meeting data from ingest service
        """
        # Ingest service uses "meeting" field for meeting ID (string)
        meeting_id = meeting_data.get("meeting")
        if not meeting_id:
            logger.warning("Skipping meeting with no ID")
            return

        # Upsert meeting
        meeting = MeetingRepository.upsert(self.session, meeting_data)
        self.stats["meetings"] += 1
        self.session.commit()

        logger.info(
            f"Processing meeting {meeting_id} ({meeting.venue}, {meeting.meeting_date}, {meeting.category})"
        )

        # Get races for this meeting from the ingest service
        races_data = await client.get_races(meeting_id=meeting_id)

        if not races_data:
            logger.warning(f"No races found for meeting {meeting_id}")
            return

        logger.info(f"Found {len(races_data)} races for meeting {meeting_id}")

        # Process each race
        for race_summary in races_data:
            try:
                race_id = race_summary.get("id") or str(race_summary.get("race_id", ""))
                race_number = race_summary.get("race_number")

                if not race_id:
                    logger.warning(
                        f"Skipping race {race_number} with no ID in meeting {meeting_id}"
                    )
                    continue

                # Fetch full race data and runners from the ingest service
                full_race_data = await client.get_race(race_id)
                runners = await client.get_runners(race_id)

                # Construct event_data compatible with _ingest_race
                event_data = {
                    "race": full_race_data,
                    "runners": runners,
                    "results": full_race_data.get("results", []),
                }

                # If the race data already contains runners/results at top level, use those
                if isinstance(full_race_data, dict):
                    if "runners" in full_race_data and not event_data["runners"]:
                        event_data["runners"] = full_race_data["runners"]
                    if "results" in full_race_data and not event_data["results"]:
                        event_data["results"] = full_race_data["results"]

                await self._ingest_race(meeting_id, event_data)

                # Small delay to be polite
                await asyncio.sleep(0.1)

            except Exception as e:
                race_num = race_summary.get("race_number", "?")
                logger.error(
                    f"Error ingesting race {meeting_id}/{race_num}: {e}",
                    exc_info=True,
                )
                self.stats["errors"] += 1

    async def _ingest_race(self, meeting_id: str, event_data: dict) -> None:
        """Ingest single race with all starters.

        Works with event data from either the TAB API or the ingest service.
        Expects event_data to have keys: "race", "runners", "results".

        Args:
            meeting_id: Parent meeting ID (string)
            event_data: Full event data (includes race, runners, results)
        """
        # TAB API returns: {race: {...}, runners: [...], results: [...], ...}
        # Ingest service: same shape constructed by _ingest_meeting_from_service
        race_data = event_data.get("race", {})

        race_number = race_data.get("race_number")
        if not race_number:
            logger.warning(f"Skipping race with no number in meeting {meeting_id}")
            return

        # Upsert race
        race = RaceRepository.upsert(self.session, meeting_id, race_data)
        self.stats["races"] += 1

        # Get runners/starters
        runners = event_data.get("runners", [])

        # Get results and build lookup by entrant_id
        results = event_data.get("results", [])
        result_map: dict[str, int] = {}
        for result in results:
            entrant_id = result.get("entrant_id")
            position = result.get("position")
            if entrant_id and position:
                result_map[entrant_id] = int(position)

        logger.debug(
            f"Processing {len(runners)} starters for race {meeting_id}/{race_number} "
            f"({len(result_map)} results)"
        )

        for runner_data in runners:
            try:
                # Match result to runner by entrant_id
                entrant_id = runner_data.get("entrant_id")
                placing = result_map.get(entrant_id) if entrant_id else None

                starter = StarterRepository.upsert(
                    self.session, race.id, runner_data, placing=placing
                )
                if starter:  # None if scratched
                    self.stats["starters"] += 1
            except Exception as e:
                logger.error(
                    f"Error ingesting starter in race {race.id}: {e}",
                    exc_info=True,
                )
                self.stats["errors"] += 1

        # Commit after each race
        self.session.commit()


async def ingest_meetings(
    session: Session,
    date_from: date,
    date_to: date,
    category: str | None = None,
    country: str | None = None,
    source: str = "tab",
) -> tuple[int, int, int]:
    """Convenience function to ingest meetings.

    Args:
        session: Database session
        date_from: Start date
        date_to: End date
        category: Racing category (T, H, G) — uses default from settings if None
        country: Country filter — uses default from settings if None
        source: Data source — "tab" (default) or "ingest"

    Returns:
        Tuple of (meetings_count, races_count, starters_count)
    """
    service = IngestionService(session, source=source)
    return await service.ingest_date_range(
        date_from, date_to, category=category, country=country
    )
