"""Background job scheduler for automated tasks.

Wraps APScheduler's AsyncIOScheduler with TipSharks-specific job types
for data ingestion, rating recomputation, and HRNZ scraping.
"""

from __future__ import annotations

import asyncio
import smtplib
from datetime import date, timedelta
from email.message import EmailMessage
from typing import Any

from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger

from packages.core.common.logging import get_logger
from packages.core.common.settings import get_settings
from packages.core.storage.database import get_session

logger = get_logger(__name__)


class TipSharksScheduler:
    """Scheduler for TipSharks background jobs.

    Wraps APScheduler's AsyncIOScheduler with convenience methods for
    adding ingest, recompute, and scrape jobs.

    Each job executes the corresponding business logic function directly
    (no subprocess calls), using ``get_session()`` for database access.
    """

    def __init__(self) -> None:
        self._scheduler = AsyncIOScheduler()
        self._running = False
        global _scheduler_instance
        _scheduler_instance = self

    # ── Lifecycle ─────────────────────────────────────────────────────

    def start(self) -> None:
        """Start the scheduler.

        Safe to call multiple times — only starts once.
        """
        if self._running:
            logger.warning("Scheduler already running")
            return
        self._scheduler.start()
        self._running = True
        logger.info("TipSharksScheduler started")

    def shutdown(self, wait: bool = True) -> None:
        """Shutdown the scheduler gracefully.

        Args:
            wait: If True, wait for running jobs to finish.
        """
        if not self._running:
            logger.warning("Scheduler not running")
            return
        self._scheduler.shutdown(wait=wait)
        self._running = False
        logger.info("TipSharksScheduler shut down")

    @property
    def running(self) -> bool:
        """Whether the scheduler is currently running."""
        return self._running

    # ── Job management ────────────────────────────────────────────────

    def add_ingest_job(
        self,
        date_from: str | None = None,
        date_to: str | None = None,
        category: str = "H",
        source: str = "tab",
        cron_expr: str = "0 6 * * *",
        job_id: str = "ingest_daily",
    ) -> str:
        """Schedule a recurring data ingestion job.

        Args:
            date_from: Start date (YYYY-MM-DD). Defaults to yesterday.
            date_to: End date (YYYY-MM-DD). Defaults to today.
            category: Racing category (T, H, G).
            source: Data source ("tab" or "ingest").
            cron_expr: Cron schedule expression.
            job_id: Unique identifier for the job.

        Returns:
            The job ID.
        """
        trigger = CronTrigger.from_crontab(cron_expr, timezone=self._get_timezone())

        self._scheduler.add_job(
            _run_ingest,
            trigger=trigger,
            args=[date_from, date_to, category, source],
            id=job_id,
            replace_existing=True,
            name=f"Ingest ({category}) from {source}",
        )
        logger.info(
            "Scheduled ingest job",
            extra={
                "job_id": job_id,
                "cron": cron_expr,
                "category": category,
                "source": source,
            },
        )
        return job_id

    def add_recompute_job(
        self,
        date_from: str | None = None,
        date_to: str | None = None,
        clear: bool = False,
        cron_expr: str = "0 2 * * 0",
        job_id: str = "recompute_weekly",
    ) -> str:
        """Schedule a recurring rating recomputation job.

        Args:
            date_from: Start date (YYYY-MM-DD). Defaults to 90 days ago.
            date_to: End date (YYYY-MM-DD). Defaults to today.
            clear: If True, clear existing ratings before recompute.
            cron_expr: Cron schedule expression.
            job_id: Unique identifier for the job.

        Returns:
            The job ID.
        """
        trigger = CronTrigger.from_crontab(cron_expr, timezone=self._get_timezone())

        self._scheduler.add_job(
            _run_recompute,
            trigger=trigger,
            args=[date_from, date_to, clear],
            id=job_id,
            replace_existing=True,
            name=f"Recompute ({date_from or 'auto'} to {date_to or 'auto'})",
        )
        logger.info(
            "Scheduled recompute job",
            extra={"job_id": job_id, "cron": cron_expr, "clear": clear},
        )
        return job_id

    def add_scrape_job(
        self,
        urls: list[str] | None = None,
        club_codes: list[str] | None = None,
        date_from: str | None = None,
        date_to: str | None = None,
        cron_expr: str = "0 4 * * 0",
        job_id: str = "scrape_hrnz_weekly",
    ) -> str:
        """Schedule a recurring HRNZ scraping job.

        Args:
            urls: Specific HRNZ result URLs to scrape.
            club_codes: HRNZ club codes to generate URLs for.
            date_from: Start date (YYYY-MM-DD).
            date_to: End date (YYYY-MM-DD).
            cron_expr: Cron schedule expression.
            job_id: Unique identifier for the job.

        Returns:
            The job ID.
        """
        trigger = CronTrigger.from_crontab(cron_expr, timezone=self._get_timezone())

        self._scheduler.add_job(
            _run_scrape,
            trigger=trigger,
            args=[urls, club_codes, date_from, date_to],
            id=job_id,
            replace_existing=True,
            name="HRNZ Scrape",
        )
        logger.info(
            "Scheduled scrape job",
            extra={"job_id": job_id, "cron": cron_expr},
        )
        return job_id

    def add_eval_job(
        self,
        date_from: str | None = None,
        date_to: str | None = None,
        cron_expr: str = "0 4 * * 0",
        job_id: str = "eval_weekly",
    ) -> str:
        """Schedule a recurring weekly evaluation job.

        Args:
            date_from: Start date (YYYY-MM-DD). Defaults to 7 days ago.
            date_to: End date (YYYY-MM-DD). Defaults to today.
            cron_expr: Cron schedule expression.
            job_id: Unique identifier for the job.

        Returns:
            The job ID.
        """
        trigger = CronTrigger.from_crontab(cron_expr, timezone=self._get_timezone())

        self._scheduler.add_job(
            _run_eval,
            trigger=trigger,
            args=[date_from, date_to],
            id=job_id,
            replace_existing=True,
            name="Weekly Evaluation",
        )
        logger.info(
            "Scheduled eval job",
            extra={"job_id": job_id, "cron": cron_expr},
        )
        return job_id

    def add_full_recompute_job(
        self,
        date_from: str | None = None,
        date_to: str | None = None,
        clear: bool = True,
        cron_expr: str = "0 3 1 * *",
        job_id: str = "recompute_monthly_full",
    ) -> str:
        """Schedule a recurring monthly full recompute job.

        Args:
            date_from: Start date (YYYY-MM-DD). Defaults to 365 days ago.
            date_to: End date (YYYY-MM-DD). Defaults to today.
            clear: If True, clear existing ratings before recompute.
            cron_expr: Cron schedule expression.
            job_id: Unique identifier for the job.

        Returns:
            The job ID.
        """
        trigger = CronTrigger.from_crontab(cron_expr, timezone=self._get_timezone())

        self._scheduler.add_job(
            _run_full_recompute,
            trigger=trigger,
            args=[date_from, date_to, clear],
            id=job_id,
            replace_existing=True,
            name="Monthly Full Recompute",
        )
        logger.info(
            "Scheduled full recompute job",
            extra={"job_id": job_id, "cron": cron_expr, "clear": clear},
        )
        return job_id

    def _send_failure_notification(self, job_id: str, error: str) -> None:
        """Send email notification for a scheduler job failure.

        Only sends if ``email_notifications`` is enabled in settings.

        Args:
            job_id: The failed job identifier.
            error: The error message from the job failure.
        """
        settings = get_settings().scheduler
        if not settings.email_notifications:
            return
        if not settings.email_smtp_host or not settings.email_to:
            logger.warning(
                "Email notifications enabled but SMTP host or recipients not configured",
                extra={"job_id": job_id},
            )
            return

        try:
            msg = EmailMessage()
            msg.set_content(
                f"Scheduled job '{job_id}' failed with error:\n\n{error}\n\n"
                f"Timestamp: {date.today().isoformat()}\n\n"
                "TipSharks Scheduler Failure Notification"
            )
            msg["Subject"] = f"[TipSharks] Scheduler Failure: {job_id}"
            msg["From"] = settings.email_from or "scheduler@tipsharks.com"
            msg["To"] = settings.email_to

            with smtplib.SMTP(
                settings.email_smtp_host, settings.email_smtp_port
            ) as server:
                if settings.email_username:
                    server.starttls()
                    server.login(settings.email_username, settings.email_password)
                server.send_message(msg)

            logger.info(
                "Sent failure notification email",
                extra={"job_id": job_id, "recipients": settings.email_to},
            )
        except Exception as exc:
            logger.error(
                "Failed to send failure notification",
                extra={"job_id": job_id, "error": str(exc)},
            )

    # ── Inspection and removal ───────────────────────────────────────

    def list_jobs(self) -> list[dict[str, Any]]:
        """List all scheduled jobs.

        Returns:
            List of dicts with keys: id, name, next_run_time, trigger.
        """
        jobs = []
        for job in self._scheduler.get_jobs():
            jobs.append(
                {
                    "id": job.id,
                    "name": job.name,
                    "next_run_time": (
                        str(job.next_run_time) if job.next_run_time else None
                    ),
                    "trigger": str(job.trigger),
                }
            )
        return jobs

    def remove_job(self, job_id: str) -> bool:
        """Remove a scheduled job by ID.

        Args:
            job_id: The job identifier.

        Returns:
            True if the job was removed, False if not found.
        """
        try:
            self._scheduler.remove_job(job_id)
            logger.info("Removed scheduled job", extra={"job_id": job_id})
            return True
        except Exception:
            logger.warning("Job not found for removal", extra={"job_id": job_id})
            return False

    def get_job(self, job_id: str) -> dict[str, Any] | None:
        """Get a single job by ID.

        Args:
            job_id: The job identifier.

        Returns:
            Job info dict or None if not found.
        """
        job = self._scheduler.get_job(job_id)
        if job is None:
            return None
        return {
            "id": job.id,
            "name": job.name,
            "next_run_time": str(job.next_run_time) if job.next_run_time else None,
            "trigger": str(job.trigger),
        }

    # ── Internal helpers ─────────────────────────────────────────────

    def _get_timezone(self) -> str:
        """Get the configured timezone string."""
        return get_settings().scheduler.timezone

    def load_default_jobs(self) -> list[str]:
        """Add default jobs from settings.

        Reads scheduler config from settings and adds the standard
        ingest, recompute, eval, and full recompute jobs.

        Returns:
            List of job IDs that were added.
        """
        settings = get_settings().scheduler
        job_ids: list[str] = []

        if settings.ingest_cron:
            job_ids.append(
                self.add_ingest_job(
                    cron_expr=settings.ingest_cron,
                    job_id="ingest_daily",
                )
            )

        if settings.recompute_cron:
            job_ids.append(
                self.add_recompute_job(
                    cron_expr=settings.recompute_cron,
                    job_id="recompute_weekly",
                )
            )

        if settings.scrape_cron:
            job_ids.append(
                self.add_scrape_job(
                    cron_expr=settings.scrape_cron,
                    job_id="scrape_hrnz_weekly",
                )
            )

        if settings.eval_cron:
            job_ids.append(
                self.add_eval_job(
                    cron_expr=settings.eval_cron,
                    job_id="eval_weekly",
                )
            )

        if settings.full_recompute_cron:
            job_ids.append(
                self.add_full_recompute_job(
                    cron_expr=settings.full_recompute_cron,
                    job_id="recompute_monthly_full",
                )
            )

        return job_ids


# ── Background job functions ─────────────────────────────────────────

# Global scheduler instance for failure notifications
_scheduler_instance: TipSharksScheduler | None = None


def _notify_failure(job_id: str, error: str) -> None:
    """Send failure notification via the global scheduler instance."""
    global _scheduler_instance
    if _scheduler_instance is not None:
        _scheduler_instance._send_failure_notification(job_id, error)


def _run_ingest(
    date_from: str | None = None,
    date_to: str | None = None,
    category: str = "H",
    source: str = "tab",
) -> dict[str, int]:
    """Execute an ingestion job.

    Args:
        date_from: Start date (YYYY-MM-DD). Defaults to yesterday.
        date_to: End date (YYYY-MM-DD). Defaults to today.
        category: Racing category (T, H, G).
        source: Data source ("tab" or "ingest").

    Returns:
        Dict with ingestion stats.
    """
    settings = get_settings()

    if date_from:
        start_date = date.fromisoformat(date_from)
    else:
        start_date = date.today() - timedelta(days=settings.scheduler.ingest_days_back)

    if date_to:
        end_date = date.fromisoformat(date_to)
    else:
        end_date = date.today()

    logger.info(
        "Scheduled ingest starting",
        extra={
            "date_from": str(start_date),
            "date_to": str(end_date),
            "category": category,
            "source": source,
        },
    )

    try:
        from packages.core.storage.ingestion import IngestionService

        with get_session() as session:
            service = IngestionService(session, source=source)
            meetings, races, starters = asyncio.run(
                service.ingest_date_range(start_date, end_date, category=category)
            )

        stats = {
            "meetings": meetings,
            "races": races,
            "starters": starters,
            "errors": service.stats["errors"],
        }
        logger.info(
            "Scheduled ingest complete",
            extra=stats,
        )
        return stats
    except Exception as exc:
        logger.error(
            "Scheduled ingest failed", extra={"error": str(exc)}, exc_info=True
        )
        _notify_failure("ingest_daily", str(exc))
        return {"meetings": 0, "races": 0, "starters": 0, "errors": 1}


def _run_recompute(
    date_from: str | None = None,
    date_to: str | None = None,
    clear: bool = False,
) -> dict[str, int]:
    """Execute a recompute job.

    Args:
        date_from: Start date (YYYY-MM-DD). Defaults to 90 days ago.
        date_to: End date (YYYY-MM-DD). Defaults to today.
        clear: If True, clear existing ratings.

    Returns:
        Dict with recompute stats.
    """
    settings = get_settings()

    if date_from:
        start_date = date.fromisoformat(date_from)
    else:
        start_date = date.today() - timedelta(
            days=settings.scheduler.recompute_days_back
        )

    if date_to:
        end_date = date.fromisoformat(date_to)
    else:
        end_date = date.today()

    logger.info(
        "Scheduled recompute starting",
        extra={"date_from": str(start_date), "date_to": str(end_date), "clear": clear},
    )

    try:
        from packages.core.ratings.recompute import recompute_ratings

        with get_session() as session:
            snapshots_created = recompute_ratings(
                session,
                start_date,
                end_date,
                clear_existing=clear,
            )

        stats = {"snapshots_created": snapshots_created}
        logger.info("Scheduled recompute complete", extra=stats)
        return stats
    except Exception as exc:
        logger.error(
            "Scheduled recompute failed", extra={"error": str(exc)}, exc_info=True
        )
        _notify_failure("recompute_weekly", str(exc))
        return {"snapshots_created": 0}


def _run_scrape(
    urls: list[str] | None = None,
    club_codes: list[str] | None = None,
    date_from: str | None = None,
    date_to: str | None = None,
) -> dict[str, int]:
    """Execute an HRNZ scraping job.

    Args:
        urls: Specific HRNZ result URLs to scrape.
        club_codes: HRNZ club codes to generate URLs for.
        date_from: Start date (YYYY-MM-DD).
        date_to: End date (YYYY-MM-DD).

    Returns:
        Dict with scraping stats.
    """
    logger.info(
        "Scheduled scrape starting",
        extra={
            "urls_count": len(urls) if urls else 0,
            "club_codes": club_codes,
            "date_from": date_from,
            "date_to": date_to,
        },
    )

    try:
        from packages.core.storage.repositories import (
            DriverRepository,
            HorseRepository,
            MeetingRepository,
            RaceRepository,
            StarterRepository,
            TrainerRepository,
        )
        from packages.hrnz_scraper import HRNZScraper
        from packages.hrnz_scraper.mapper import HRNZDataMapper

        start_date = date.fromisoformat(date_from) if date_from else date.today()
        end_date = date.fromisoformat(date_to) if date_to else date.today()

        stats: dict[str, int] = {
            "meetings": 0,
            "races": 0,
            "starters": 0,
            "horses": 0,
            "drivers": 0,
            "trainers": 0,
            "errors": 0,
        }

        mapper = HRNZDataMapper()

        async def _scrape_all():
            async with HRNZScraper() as scraper:
                target_urls: list[str] = urls or []
                if club_codes:
                    from apps.backend.api.main import _generate_hrnz_urls

                    generated = _generate_hrnz_urls(start_date, end_date, club_codes)
                    target_urls.extend(generated)

                if not target_urls:
                    logger.warning("No URLs provided for scrape job")
                    return stats

                target_urls = list(dict.fromkeys(target_urls))

                for url in target_urls:
                    try:
                        scraped = await scraper.get_meeting_results(url)
                        if not scraped.get("races"):
                            continue

                        scraped_date = scraped.get("date")
                        if scraped_date:
                            try:
                                meeting_date = date.fromisoformat(scraped_date)
                                if not (start_date <= meeting_date <= end_date):
                                    continue
                            except ValueError:
                                pass

                        meeting = mapper.map_meeting(scraped)
                        entities = mapper.map_entities(scraped)

                        with get_session() as session:
                            MeetingRepository.upsert(session, meeting)
                            stats["meetings"] += 1

                            for horse in entities.get("horses", []):
                                HorseRepository.upsert(
                                    session,
                                    horse["id"],
                                    horse["name"],
                                    horse.get("raw_json"),
                                )
                                stats["horses"] += 1

                            for driver in entities.get("drivers", []):
                                DriverRepository.upsert(
                                    session, driver["name"], driver_id=driver.get("id")
                                )
                                stats["drivers"] += 1

                            for trainer in entities.get("trainers", []):
                                TrainerRepository.upsert(
                                    session,
                                    trainer["name"],
                                    trainer_id=trainer.get("id"),
                                )
                                stats["trainers"] += 1

                            races = mapper.map_races(scraped, meeting["meeting"])
                            race_id_map = {}
                            for race in races:
                                race_obj = RaceRepository.upsert(
                                    session, meeting["meeting"], race
                                )
                                race_id_map[race["race_number"]] = race_obj.id
                                stats["races"] += 1

                            starters = mapper.map_starters(scraped, race_id_map)
                            for starter in starters:
                                StarterRepository.upsert(
                                    session,
                                    starter["race_id"],
                                    starter,
                                    starter.get("placing"),
                                )
                                stats["starters"] += 1

                            session.commit()
                    except Exception as exc:
                        stats["errors"] += 1
                        logger.error(
                            "Failed to scrape URL in scheduled job",
                            extra={"url": url, "error": str(exc)},
                        )

            return stats

        stats = asyncio.run(_scrape_all())

        logger.info("Scheduled scrape complete", extra=stats)
        return stats
    except Exception as exc:
        logger.error(
            "Scheduled scrape failed", extra={"error": str(exc)}, exc_info=True
        )
        _notify_failure("scrape_hrnz_weekly", str(exc))
        return {
            "meetings": 0,
            "races": 0,
            "starters": 0,
            "horses": 0,
            "drivers": 0,
            "trainers": 0,
            "errors": 1,
        }


def _run_eval(
    date_from: str | None = None,
    date_to: str | None = None,
) -> dict[str, Any]:
    """Execute a weekly evaluation job.

    Args:
        date_from: Start date (YYYY-MM-DD). Defaults to 7 days ago.
        date_to: End date (YYYY-MM-DD). Defaults to today.

    Returns:
        Dict with evaluation stats.
    """
    if date_from:
        start_date = date.fromisoformat(date_from)
    else:
        start_date = date.today() - timedelta(days=7)

    if date_to:
        end_date = date.fromisoformat(date_to)
    else:
        end_date = date.today()

    logger.info(
        "Scheduled evaluation starting",
        extra={"date_from": str(start_date), "date_to": str(end_date)},
    )

    try:
        from packages.core.ratings.predictions import PredictionEngine
        from packages.core.storage.models import Race, Starter

        with get_session() as session:
            engine = PredictionEngine(session)
            races = (
                session.query(Race)
                .join(Starter, Race.id == Starter.race_id)
                .filter(Starter.placing.isnot(None))
                .filter(Race.race_datetime >= start_date.isoformat())
                .filter(Race.race_datetime <= end_date.isoformat())
                .distinct()
                .all()
            )

            total_races = len(races)
            winner_correct = 0
            total_brier = 0.0

            for race in races:
                try:
                    comparison = engine.compare_prediction_to_actual(race.id)
                    if comparison:
                        if comparison.get("winner_correct"):
                            winner_correct += 1
                        total_brier += comparison.get("brier_score", 0.0)
                except Exception:
                    continue

            avg_brier = total_brier / total_races if total_races > 0 else 0.0
            win_accuracy = winner_correct / total_races if total_races > 0 else 0.0

        stats = {
            "races_evaluated": total_races,
            "winner_accuracy": round(win_accuracy, 4),
            "avg_brier_score": round(avg_brier, 4),
            "date_from": str(start_date),
            "date_to": str(end_date),
        }
        logger.info("Scheduled evaluation complete", extra=stats)
        return stats
    except Exception as exc:
        logger.error(
            "Scheduled evaluation failed", extra={"error": str(exc)}, exc_info=True
        )
        _notify_failure("eval_weekly", str(exc))
        return {"races_evaluated": 0, "winner_accuracy": 0.0, "avg_brier_score": 0.0}


def _run_full_recompute(
    date_from: str | None = None,
    date_to: str | None = None,
    clear: bool = True,
) -> dict[str, int]:
    """Execute a monthly full recompute job.

    Args:
        date_from: Start date (YYYY-MM-DD). Defaults to 365 days ago.
        date_to: End date (YYYY-MM-DD). Defaults to today.
        clear: If True, clear existing ratings before recompute.

    Returns:
        Dict with recompute stats.
    """
    if date_from:
        start_date = date.fromisoformat(date_from)
    else:
        start_date = date.today() - timedelta(days=365)

    if date_to:
        end_date = date.fromisoformat(date_to)
    else:
        end_date = date.today()

    logger.info(
        "Scheduled full recompute starting",
        extra={"date_from": str(start_date), "date_to": str(end_date), "clear": clear},
    )

    try:
        from packages.core.ratings.recompute import recompute_ratings

        with get_session() as session:
            snapshots_created = recompute_ratings(
                session,
                start_date,
                end_date,
                clear_existing=clear,
            )

        stats = {"snapshots_created": snapshots_created}
        logger.info("Scheduled full recompute complete", extra=stats)
        return stats
    except Exception as exc:
        logger.error(
            "Scheduled full recompute failed", extra={"error": str(exc)}, exc_info=True
        )
        _notify_failure("recompute_monthly_full", str(exc))
        return {"snapshots_created": 0}
