"""FastAPI application for TipSharks ratings API."""

import asyncio
import csv
import hashlib
import json
import re
from datetime import UTC, date, datetime, timedelta
from io import BytesIO, StringIO
from pathlib import Path
from typing import Any

from fastapi import (
    APIRouter,
    Depends,
    FastAPI,
    HTTPException,
    Query,
    Request,
    Response,
    Security,
    WebSocket,
    WebSocketDisconnect,
)
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, ConfigDict, Field
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from sqlalchemy import func
from sqlalchemy.orm import Session, joinedload

from packages.core.common.logging import get_logger, setup_logging
from packages.core.common.rate_limit import get_user_rate_limit_key
from packages.core.common.scheduler import TipSharksScheduler
from packages.core.common.settings import HRNZ_ALL_CLUB_CODES, get_settings
from packages.core.common.utils import parse_date
from packages.core.ratings.recompute import recompute_ratings
from packages.core.storage.audit import AuditLogger
from packages.core.storage.database import get_session
from packages.core.storage.ingestion import IngestionService
from packages.core.storage.models import (
    AuditLog,
    Driver,
    EntityType,
    Horse,
    Meeting,
    Race,
    RatingSnapshot,
    Trainer,
)
from packages.core.storage.repositories import RatingSnapshotRepository

# Setup
setup_logging()
logger = get_logger(__name__)
settings = get_settings()

# Scheduler instance for background jobs
scheduler = TipSharksScheduler()

# Rate limiter configuration
# Uses per-user key when X-User-ID header is provided by the mobile client,
# falling back to IP-based key for anonymous (e.g. browser) requests.
limiter = Limiter(key_func=get_user_rate_limit_key)

app = FastAPI(
    title="TipSharks API",
    description="Advanced harness racing ratings and predictions powered by multi-runner Elo algorithms",
    version="0.2.0",
)

# Add rate limiter to app state
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# API router with /v1 prefix for versioned API endpoints
api_router = APIRouter(prefix="/v1")

# Legacy API prefixes that should redirect to /v1/
_LEGACY_API_PREFIXES = {
    "/ratings",
    "/races",
    "/predictions",
    "/analytics",
    "/admin",
    "/webhook",
}

# Add CORS middleware
# Parse CORS origins from settings (supports "*" or comma-separated list)
cors_origins = (
    ["*"]
    if settings.api.cors_allow_origins == "*"
    else [origin.strip() for origin in settings.api.cors_allow_origins.split(",")]
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=cors_origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.middleware("http")
async def legacy_api_redirects(request: Request, call_next):
    """Redirect legacy API paths to /v1/ equivalents.

    Handles GET, POST, and other methods transparently via 301 redirect.
    Preserves query string parameters.
    Keeps /health, /ui/*, /static, /docs/site, and root at their original paths.
    """
    path = request.url.path
    for prefix in _LEGACY_API_PREFIXES:
        if path.startswith(prefix):
            new_path = f"/v1{path}"
            # Preserve query string if present
            if request.url.query:
                new_path = f"{new_path}?{request.url.query}"
            return RedirectResponse(url=new_path, status_code=301)
    return await call_next(request)


@app.middleware("http")
async def add_no_cache_headers(request: Request, call_next):
    """Avoid stale API responses in browsers/proxies."""
    response = await call_next(request)
    if request.url.path.startswith(("/v1/ratings", "/v1/races", "/ratings", "/races")):
        response.headers.setdefault(
            "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
        )
        response.headers.setdefault("Pragma", "no-cache")
        response.headers.setdefault("Expires", "0")
    return response


@app.middleware("http")
async def add_request_id(request: Request, call_next):
    """Add a unique request ID to every request and response.

    Reads ``x-request-id`` from the incoming request headers if present,
    otherwise generates a new UUID.  The ID is stored on ``request.state``
    and echoed back in the ``x-request-id`` response header.
    """
    import uuid

    request_id = request.headers.get("x-request-id", str(uuid.uuid4()))
    request.state.request_id = request_id
    response = await call_next(request)
    response.headers["x-request-id"] = request_id
    return response


@app.middleware("http")
async def log_requests(request: Request, call_next):
    """Log basic information about every API request.

    Records HTTP method, path, status code, duration, and the request ID
    (if one was assigned by the ``add_request_id`` middleware).
    """
    from time import time

    start = time()
    response = await call_next(request)
    duration = time() - start
    logger.info(
        "API request",
        extra={
            "method": request.method,
            "path": request.url.path,
            "status_code": response.status_code,
            "duration_ms": round(duration * 1000, 2),
            "request_id": getattr(request.state, "request_id", None),
        },
    )
    return response


@app.middleware("http")
async def rating_etag_middleware(request: Request, call_next):
    """Add ETag and Last-Modified headers to rating endpoints for conditional requests.

    Computes an ETag from the response body (MD5 hash) and sets Last-Modified
    from the most recent rating snapshot timestamp. Handles If-None-Match and
    If-Modified-Since request headers to return 304 Not Modified when the
    client's cached version is still valid.
    """
    path = request.url.path

    # Only apply to rating detail and list endpoints
    if not (
        path.startswith("/v1/ratings/horses")
        or path.startswith("/v1/ratings/drivers")
        or path.startswith("/v1/ratings/trainers")
    ):
        return await call_next(request)

    # Get the most recent rating snapshot timestamp for Last-Modified
    last_modified: datetime | None = None
    try:
        with get_session() as session:
            latest_ts = session.query(func.max(RatingSnapshot.created_at)).scalar()
            if latest_ts is not None:
                if latest_ts.tzinfo is None:
                    latest_ts = latest_ts.replace(tzinfo=UTC)
                last_modified = latest_ts
    except Exception:
        pass

    # Check If-Modified-Since before computing the full response
    if last_modified is not None and "if-modified-since" in request.headers:
        try:
            ims_str = request.headers["if-modified-since"]
            ims = datetime.strptime(ims_str, "%a, %d %b %Y %H:%M:%S GMT").replace(
                tzinfo=UTC
            )
            if last_modified <= ims:
                # Data has not changed since last fetch — but we still need
                # to check ETag below after computing the response body.
                pass
        except (ValueError, TypeError):
            pass

    response = await call_next(request)

    # Read the response body to compute ETag
    body = b""
    try:
        async for chunk in response.body_iterator:
            body += chunk
    except Exception:
        # If we can't read the body, just pass through without ETag
        return response

    # Compute ETag from the response body
    etag_hex = hashlib.md5(body).hexdigest()
    etag_value = f'"{etag_hex}"'

    # Set ETag header
    response.headers["ETag"] = etag_value

    # Set Last-Modified header
    if last_modified is not None:
        response.headers["Last-Modified"] = last_modified.strftime(
            "%a, %d %b %Y %H:%M:%S GMT"
        )

    # Check If-None-Match for conditional request
    if_none_match = request.headers.get("if-none-match")
    if if_none_match and if_none_match.strip('"') == etag_hex:
        return Response(
            status_code=304,
            headers={
                "ETag": etag_value,
                "Last-Modified": response.headers.get("Last-Modified", ""),
                "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0",
                "Pragma": "no-cache",
                "Expires": "0",
            },
        )

    # Reconstruct the response with captured body (body_iterator is consumed)
    return Response(
        content=body,
        status_code=response.status_code,
        headers=dict(response.headers),
        media_type=response.media_type,
    )


# Mount static files for web UI
web_dir = Path(__file__).parent.parent / "web"
if web_dir.exists():
    app.mount("/static", StaticFiles(directory=str(web_dir / "static")), name="static")
    docs_site_dir = web_dir / "static" / "docs"
    docs_site_dir.mkdir(parents=True, exist_ok=True)
    app.mount(
        "/docs/site",
        StaticFiles(directory=str(docs_site_dir), html=True),
        name="docs-site",
    )


# ── Scheduler lifecycle hooks ─────────────────────────────────────


@app.on_event("startup")
async def start_scheduler():
    """Start the background scheduler on application startup.

    Reads scheduler configuration from settings and loads default jobs.
    Skipped if ``scheduler.enabled`` is False.
    """
    sched_settings = settings.scheduler
    if not sched_settings.enabled:
        logger.info("Scheduler is disabled via settings")
        return

    scheduler.start()
    job_ids = scheduler.load_default_jobs()
    if job_ids:
        logger.info("Default scheduler jobs loaded", extra={"job_ids": job_ids})
    else:
        logger.info("No default scheduler jobs configured")


@app.on_event("shutdown")
async def stop_scheduler():
    """Shutdown the background scheduler gracefully on application shutdown."""
    if scheduler.running:
        scheduler.shutdown(wait=True)
        logger.info("Scheduler shut down on app shutdown")


security = HTTPBearer()


# Dependency for database session
def get_db():
    """Dependency for database session."""
    with get_session() as session:
        yield session


# Dependency for admin auth
def verify_admin_token(credentials: HTTPAuthorizationCredentials = Security(security)):
    """Verify admin token for protected endpoints."""
    if credentials.credentials != settings.api.admin_token:
        raise HTTPException(status_code=401, detail="Invalid authentication token")
    return credentials.credentials


# Pydantic models for API responses
class HealthResponse(BaseModel):
    """Health check response."""

    status: str
    version: str
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {"status": "healthy", "version": "0.2.0"},
            ]
        }
    )


class RatingResponse(BaseModel):
    """Rating information for an entity."""

    entity_type: str
    entity_id: int
    entity_name: str | None = None
    rating: float
    rd: float | None = None
    race_count: int | None = None
    as_of_race_id: int
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {
                    "entity_type": "horse",
                    "entity_id": 42,
                    "entity_name": "Some Delight",
                    "rating": 1520.5,
                    "rd": 85.3,
                    "race_count": 24,
                    "as_of_race_id": 12345,
                },
            ]
        }
    )


class RatingHistoryItem(BaseModel):
    """Single rating history point."""

    race_id: int
    rating: float
    rd: float | None = None
    race_date: str | None = None


class HorseDetailResponse(BaseModel):
    """Detailed horse information with rating history."""

    horse_id: int
    name: str
    current_rating: float | None = None
    current_rd: float | None = None
    race_count: int
    rating_history: list[RatingHistoryItem] = []
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {
                    "horse_id": 42,
                    "name": "Some Delight",
                    "current_rating": 1520.5,
                    "current_rd": 85.3,
                    "race_count": 24,
                    "rating_history": [
                        {
                            "race_id": 12345,
                            "rating": 1520.5,
                            "rd": 85.3,
                            "race_date": "2026-05-01",
                        },
                        {
                            "race_id": 12340,
                            "rating": 1510.0,
                            "rd": 90.1,
                            "race_date": "2026-04-28",
                        },
                    ],
                },
            ]
        }
    )


class DriverDetailResponse(BaseModel):
    """Detailed driver information with rating history."""

    driver_id: int
    name: str
    current_rating: float | None = None
    current_rd: float | None = None
    race_count: int
    rating_history: list[RatingHistoryItem] = []


class TrainerDetailResponse(BaseModel):
    """Detailed trainer information with rating history."""

    trainer_id: int
    name: str
    current_rating: float | None = None
    current_rd: float | None = None
    race_count: int
    rating_history: list[RatingHistoryItem] = []


class PaginationMeta(BaseModel):
    """Pagination metadata with next/prev navigation links."""

    total: int
    limit: int
    offset: int
    next: str | None = None
    prev: str | None = None


class PaginatedRatingResponse(BaseModel):
    """Paginated response for rating list endpoints."""

    data: list[RatingResponse]
    meta: PaginationMeta
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {
                    "data": [
                        {
                            "entity_type": "horse",
                            "entity_id": 42,
                            "entity_name": "Some Delight",
                            "rating": 1520.5,
                            "rd": 85.3,
                            "race_count": 24,
                            "as_of_race_id": 12345,
                        },
                    ],
                    "meta": {
                        "total": 1,
                        "limit": 100,
                        "offset": 0,
                        "next": "/v1/ratings/horses?limit=100&offset=100",
                        "prev": None,
                    },
                },
            ]
        }
    )


class PaginatedRaceResponse(BaseModel):
    """Paginated response for race list endpoints."""

    data: list[dict]
    meta: PaginationMeta


class IngestionRequest(BaseModel):
    """Request to trigger ingestion."""

    date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
    date_to: str = Field(..., description="End date (YYYY-MM-DD)")


class IngestionResponse(BaseModel):
    """Response from ingestion."""

    meetings: int
    races: int
    starters: int
    errors: int
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {"meetings": 3, "races": 24, "starters": 192, "errors": 0},
            ]
        }
    )


class RecomputeRequest(BaseModel):
    """Request to trigger recompute."""

    date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
    date_to: str = Field(..., description="End date (YYYY-MM-DD)")
    clear_existing: bool = Field(
        default=False, description="Clear existing ratings first"
    )
    learn_adjustments: bool = Field(
        default=False, description="Learn barrier/handicap adjustments"
    )


class RecomputeResponse(BaseModel):
    """Response from recompute."""

    snapshots_created: int
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {"snapshots_created": 1520},
            ]
        }
    )


class ScrapeRequest(BaseModel):
    """Request to trigger a scrape/ingestion."""

    urls: list[str] | None = Field(
        default=None,
        description="Optional HRNZ results URLs or URL paths (e.g., 010741rs.htm)",
    )
    club_codes: list[str] | str | None = Field(
        default=None,
        description="HRNZ club codes to generate URLs for (two digits each)",
    )
    date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
    date_to: str = Field(..., description="End date (YYYY-MM-DD)")
    recompute: bool = Field(default=True, description="Recompute ratings after ingest")
    clear_existing: bool = Field(
        default=False, description="Clear existing ratings before recompute"
    )
    learn_adjustments: bool = Field(
        default=False, description="Learn barrier/handicap adjustments"
    )


class ScrapeResponse(BaseModel):
    """Response from scrape/ingestion webhook."""

    meetings: int
    races: int
    starters: int
    horses: int
    drivers: int
    trainers: int
    errors: int
    recomputed: bool
    snapshots_created: int


# ── Scheduler job management models ────────────────────────────────


class SchedulerJobInfo(BaseModel):
    """Information about a scheduled job."""

    id: str
    name: str
    next_run_time: str | None = None
    trigger: str


class SchedulerJobListResponse(BaseModel):
    """Response with list of scheduled jobs."""

    jobs: list[SchedulerJobInfo]
    total: int


class AddSchedulerJobRequest(BaseModel):
    """Request to add a scheduled job."""

    job_type: str = Field(..., description="Job type: ingest, recompute, or scrape")
    cron_expr: str = Field(..., description="Cron expression for scheduling")
    job_id: str | None = Field(
        default=None, description="Optional custom job ID (auto-generated if omitted)"
    )
    # Ingest-specific
    category: str = Field(default="H", description="Racing category (T, H, G)")
    source: str = Field(default="tab", description="Data source (tab, ingest)")
    # Recompute-specific
    clear: bool = Field(
        default=False, description="Clear existing ratings before recompute"
    )
    # Scrape-specific
    urls: list[str] | None = Field(
        default=None, description="HRNZ result URLs to scrape"
    )
    club_codes: list[str] | None = Field(default=None, description="HRNZ club codes")
    # Date range
    date_from: str | None = Field(default=None, description="Start date (YYYY-MM-DD)")
    date_to: str | None = Field(default=None, description="End date (YYYY-MM-DD)")


class AddSchedulerJobResponse(BaseModel):
    """Response after adding a scheduled job."""

    job_id: str
    message: str


class RemoveSchedulerJobResponse(BaseModel):
    """Response after removing a scheduled job."""

    job_id: str
    removed: bool
    message: str


class PredictionResponse(BaseModel):
    """Prediction for a single starter."""

    horse_id: int
    horse_name: str | None = None
    driver_id: int | None = None
    driver_name: str | None = None
    trainer_id: int | None = None
    trainer_name: str | None = None
    barrier: int | None = None
    effective_rating: float
    win_probability: float
    place_probability: float
    place_score: float
    predicted_placing: int
    ci_lower: float | None = None
    ci_upper: float | None = None
    placing: int | None = None


class RacePredictionResponse(BaseModel):
    """Predictions for all starters in a race."""

    race_id: int
    race_number: int | None = None
    venue: str | None = None
    distance_m: int | None = None
    predictions: list[PredictionResponse] = []
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {
                    "race_id": 12345,
                    "race_number": 7,
                    "venue": "Addington",
                    "distance_m": 1980,
                    "predictions": [
                        {
                            "horse_id": 42,
                            "horse_name": "Some Delight",
                            "driver_id": 101,
                            "driver_name": "Ricky May",
                            "trainer_id": 201,
                            "trainer_name": "Mark Purdon",
                            "barrier": 3,
                            "effective_rating": 1520.5,
                            "win_probability": 0.35,
                            "place_probability": 0.62,
                            "place_score": 1.8,
                            "predicted_placing": 1,
                            "ci_lower": 1480.2,
                            "ci_upper": 1560.8,
                            "placing": None,
                        },
                    ],
                },
            ]
        }
    )


class ConfidenceBucket(BaseModel):
    """Accuracy metrics for a confidence bucket."""

    races: int
    win_accuracy: float
    avg_brier: float


class ConfidenceBuckets(BaseModel):
    """Accuracy metrics grouped by confidence."""

    high: ConfidenceBucket
    medium: ConfidenceBucket
    low: ConfidenceBucket


class DailyTrendItem(BaseModel):
    """Single day in the accuracy trend."""

    date: str
    avg_brier: float
    win_accuracy: float
    races: int


class RecentRaceAccuracy(BaseModel):
    """Accuracy metrics for a single evaluated race."""

    race_id: int
    race_number: int | None = None
    venue: str | None = None
    race_date: str | None = None
    field_size: int
    winner_correct: bool
    top3_overlap: int
    brier_score: float


class AccuracySummary(BaseModel):
    """Aggregated prediction accuracy summary."""

    summary: dict
    daily_trend: list[DailyTrendItem]
    confidence_buckets: ConfidenceBuckets
    recent_races: list[RecentRaceAccuracy]
    model_config = ConfigDict(
        json_schema_extra={
            "examples": [
                {
                    "summary": {
                        "overall_win_accuracy": 0.28,
                        "top3_accuracy": 0.52,
                        "avg_brier_score": 0.21,
                        "num_races_evaluated": 150,
                    },
                    "daily_trend": [
                        {
                            "date": "2026-05-01",
                            "avg_brier": 0.19,
                            "win_accuracy": 0.30,
                            "races": 10,
                        },
                    ],
                    "confidence_buckets": {
                        "high": {"races": 20, "win_accuracy": 0.55, "avg_brier": 0.12},
                        "medium": {
                            "races": 60,
                            "win_accuracy": 0.28,
                            "avg_brier": 0.20,
                        },
                        "low": {"races": 70, "win_accuracy": 0.14, "avg_brier": 0.28},
                    },
                    "recent_races": [
                        {
                            "race_id": 12345,
                            "race_number": 7,
                            "venue": "Addington",
                            "race_date": "2026-05-01",
                            "field_size": 10,
                            "winner_correct": True,
                            "top3_overlap": 2,
                            "brier_score": 0.15,
                        },
                    ],
                },
            ]
        }
    )


# ── Audit log models ────────────────────────────────────────────────────────


class AuditLogCreateRequest(BaseModel):
    """Request to create an audit log entry."""

    table_name: str = Field(..., description="Name of the table that was changed")
    record_id: str = Field(..., description="Primary key value of the changed record")
    action: str = Field(
        ..., description="Type of change: INSERT, UPDATE, DELETE, or CORRECT"
    )
    old_values: dict[str, Any] | None = Field(
        default=None, description="Snapshot of values before the change"
    )
    new_values: dict[str, Any] | None = Field(
        default=None, description="Snapshot of values after the change"
    )
    changed_by: str | None = Field(default=None, description="User/system identifier")
    change_reason: str | None = Field(
        default=None, description="Human-readable reason for the change"
    )


class AuditLogEntry(BaseModel):
    """Single audit log entry in API responses."""

    id: int
    table_name: str
    record_id: str
    action: str
    old_values: dict[str, Any] | None = None
    new_values: dict[str, Any] | None = None
    changed_by: str | None = None
    change_reason: str | None = None
    created_at: str | None = None
    model_config = ConfigDict(from_attributes=True)


class AuditLogListResponse(BaseModel):
    """Paginated response for audit log listing."""

    data: list[AuditLogEntry]
    total: int


# Endpoints


def _normalize_club_code(code: str) -> str:
    """Normalize club codes to two-digit strings."""
    code_str = str(code).strip()
    if code_str.isdigit():
        value = int(code_str)
        if value < 0 or value > 99:
            raise ValueError("Club code must be between 00 and 99")
        return f"{value:02d}"
    raise ValueError("Club code must be numeric")


def _generate_hrnz_urls(
    start_date: date, end_date: date, club_codes: list[str]
) -> list[str]:
    """Generate HRNZ result URLs for a date range and club codes."""
    normalized_codes = [_normalize_club_code(code) for code in club_codes]
    urls: list[str] = []

    current = start_date
    while current <= end_date:
        date_prefix = current.strftime("%m%d")
        for code in normalized_codes:
            urls.append(f"{date_prefix}{code}rs.htm")
        current += timedelta(days=1)

    return urls


def _resolve_meeting_date(scraped: dict, url: str, default_year: int) -> date | None:
    """Resolve meeting date from scraped data or URL."""
    date_raw = scraped.get("date_raw")
    if date_raw:
        date_raw = date_raw.replace("\xa0", " ").strip()
        if not re.search(r"\\b\\d{4}\\b", date_raw):
            for fmt in ["%A, %d %B", "%d %B"]:
                try:
                    parsed = datetime.strptime(date_raw, fmt)
                    return parsed.replace(year=default_year).date()
                except ValueError:
                    continue
        formats = [
            "%A, %d %B %Y",
            "%A, %d %B",
            "%d %B %Y",
            "%d %B",
        ]
        for fmt in formats:
            try:
                parsed = datetime.strptime(date_raw, fmt)
                if "%Y" not in fmt:
                    parsed = parsed.replace(year=default_year)
                return parsed.date()
            except ValueError:
                continue

    meeting_date_str = scraped.get("date")
    if meeting_date_str:
        try:
            return date.fromisoformat(meeting_date_str)
        except ValueError:
            pass

    match = re.search(r"(?P<mm>\\d{2})(?P<dd>\\d{2})\\d{2}rs\\.htm", url)
    if match:
        try:
            return date(default_year, int(match.group("mm")), int(match.group("dd")))
        except ValueError:
            return None

    return None


def _build_pagination_meta(
    total: int,
    limit: int,
    offset: int,
    base_url: str,
    query_params: dict,
) -> dict:
    """Build pagination metadata with next/prev navigation links.

    Args:
        total: Total number of results
        limit: Maximum results per page
        offset: Current offset
        base_url: Base URL without query string
        query_params: Current query parameters dict (limit/offset will be overridden)

    Returns:
        Dict with total, limit, offset, next, prev keys
    """
    meta: dict = {
        "total": total,
        "limit": limit,
        "offset": offset,
    }

    # Build next link
    if offset + limit < total:
        next_params = {**query_params, "limit": limit, "offset": offset + limit}
        query_string = "&".join(
            f"{k}={v}" for k, v in next_params.items() if v is not None
        )
        meta["next"] = f"{base_url}?{query_string}"
    else:
        meta["next"] = None

    # Build prev link
    if offset > 0:
        prev_offset = max(0, offset - limit)
        prev_params = {**query_params, "limit": limit, "offset": prev_offset}
        query_string = "&".join(
            f"{k}={v}" for k, v in prev_params.items() if v is not None
        )
        meta["prev"] = f"{base_url}?{query_string}"
    else:
        meta["prev"] = None

    return meta


# Web UI routes
@app.get("/", response_class=HTMLResponse)
async def root():
    """Redirect to web UI home page."""
    return RedirectResponse(url="/ui/")


@app.get("/docs/site")
@app.get("/docs/site/", response_class=HTMLResponse)
async def serve_docs_site():
    """Redirect to the static docs site."""
    return RedirectResponse(url="/static/docs/")


@app.get("/ui/", response_class=HTMLResponse)
@app.get("/ui/index.html", response_class=HTMLResponse)
async def serve_home():
    """Serve web UI home page."""
    html_path = web_dir / "templates" / "index.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/horse/{horse_id}", response_class=HTMLResponse)
async def serve_horse_detail(horse_id: int):
    """Serve horse detail page."""
    html_path = web_dir / "templates" / "horse.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/driver/{driver_id}", response_class=HTMLResponse)
async def serve_driver_detail(driver_id: int):
    """Serve driver detail page."""
    html_path = web_dir / "templates" / "driver.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/trainer/{trainer_id}", response_class=HTMLResponse)
async def serve_trainer_detail(trainer_id: int):
    """Serve trainer detail page."""
    html_path = web_dir / "templates" / "trainer.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/race/{race_id}", response_class=HTMLResponse)
async def serve_race_detail(race_id: int):
    """Serve race detail page."""
    html_path = web_dir / "templates" / "race.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/race-card", response_class=HTMLResponse)
@app.get("/ui/race-card/", response_class=HTMLResponse)
async def serve_race_cards_index():
    """Serve race cards index page."""
    html_path = web_dir / "templates" / "race-card.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/race-card/{race_id}", response_class=HTMLResponse)
async def serve_race_card(race_id: int):
    """Serve race card view for a specific race."""
    html_path = web_dir / "templates" / "race-card.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/search", response_class=HTMLResponse)
async def serve_search():
    """Serve search page."""
    html_path = web_dir / "templates" / "search.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/analytics", response_class=HTMLResponse)
async def serve_analytics():
    """Serve analytics page."""
    html_path = web_dir / "templates" / "analytics.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/analytics-dashboard", response_class=HTMLResponse)
async def serve_analytics_dashboard():
    """Serve analytics dashboard page with combined accuracy, ratings, and confidence metrics."""
    html_path = web_dir / "templates" / "analytics-dashboard.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/ui/data-correction", response_class=HTMLResponse)
async def serve_data_correction():
    """Serve data correction workflow page."""
    html_path = web_dir / "templates" / "data-correction.html"
    if html_path.exists():
        return HTMLResponse(content=html_path.read_text())
    raise HTTPException(status_code=404, detail="Web UI not found")


@app.get("/health", response_model=HealthResponse)
@limiter.limit("100/minute")
def health_check(request: Request):
    """Health check endpoint.

    Returns:
        Health status and version
    """
    return HealthResponse(status="healthy", version="0.2.0")


@api_router.get("/ratings/horses", response_model=PaginatedRatingResponse)
@limiter.limit("100/minute")
def get_horse_ratings(
    request: Request,
    limit: int = Query(default=100, le=500),
    offset: int = Query(default=0, ge=0),
    as_of_date: str | None = Query(default=None, description="YYYY-MM-DD"),
    venue: str | None = Query(default=None, description="Filter by venue"),
    format: str = Query(default="json", description="Response format: json or csv"),
    db: Session = Depends(get_db),
):
    """Get top horse ratings.

    Args:
        limit: Maximum results to return
        offset: Number of results to skip
        as_of_date: Optional date filter for historical ratings
        venue: Optional venue filter
        format: Response format (json or csv)
        db: Database session

    Returns:
        Paginated list of horse ratings in JSON or raw CSV
    """
    # Parse as_of_date if provided
    date_filter = parse_date(as_of_date) if as_of_date else None

    # Fetch all snapshots (use a high limit to effectively get all; pagination
    # is handled in-memory after venue filtering)
    snapshots = RatingSnapshotRepository.get_top_ratings(
        db, EntityType.HORSE, limit=100_000, as_of_date=date_filter
    )

    results = []
    for snapshot in snapshots:
        # Load horse name
        horse = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()

        # Apply venue filter if specified
        if venue:
            # Check if horse's most recent race was at the specified venue
            # This is a simplified filter - could be more sophisticated
            from packages.core.storage.models import Starter

            recent_starter = (
                db.query(Starter)
                .join(Race)
                .join(Meeting)
                .filter(
                    Starter.horse_id == snapshot.entity_id,
                    Meeting.venue == venue,
                )
                .first()
            )
            if not recent_starter:
                continue

        results.append(
            RatingResponse(
                entity_type="horse",
                entity_id=snapshot.entity_id,
                entity_name=horse.name if horse else None,
                rating=snapshot.rating,
                rd=snapshot.rd,
                race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
                as_of_race_id=snapshot.as_of_race_id,
            )
        )

    # Count total after filtering
    total = len(results)

    # Apply offset and limit for the current page
    page = results[offset : offset + limit]

    # Return CSV if requested
    if format.lower() == "csv":
        output = StringIO()
        writer = csv.DictWriter(
            output,
            fieldnames=[
                "entity_id",
                "name",
                "rating",
                "rd",
                "race_count",
                "as_of_race_id",
            ],
        )
        writer.writeheader()
        for r in page:
            writer.writerow(
                {
                    "entity_id": r.entity_id,
                    "name": r.entity_name,
                    "rating": r.rating,
                    "rd": r.rd,
                    "race_count": r.race_count,
                    "as_of_race_id": r.as_of_race_id,
                }
            )
        return Response(content=output.getvalue(), media_type="text/csv")

    # Build pagination metadata
    base_url = str(request.base_url).rstrip("/") + request.url.path
    query_params = dict(request.query_params)
    meta = _build_pagination_meta(total, limit, offset, base_url, query_params)

    return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))


@api_router.get("/ratings/horses/{horse_id}", response_model=HorseDetailResponse)
@limiter.limit("100/minute")
def get_horse_detail(
    request: Request,
    horse_id: int,
    db: Session = Depends(get_db),
):
    """Get detailed horse information with rating history.

    Args:
        horse_id: Horse ID
        db: Database session

    Returns:
        Horse details with rating history
    """
    # Get horse
    horse = db.query(Horse).filter(Horse.id == horse_id).first()
    if not horse:
        raise HTTPException(status_code=404, detail="Horse not found")

    # Get all rating snapshots for this horse
    from packages.core.storage.models import RatingSnapshot

    snapshots = (
        db.query(RatingSnapshot)
        .join(Race, RatingSnapshot.as_of_race_id == Race.id)
        .join(Meeting, Race.meeting_id == Meeting.id)
        .filter(
            RatingSnapshot.entity_type == EntityType.HORSE,
            RatingSnapshot.entity_id == horse_id,
        )
        .order_by(
            Meeting.meeting_date,
            Race.race_datetime,
            Race.race_number,
            Race.id,
        )
        .all()
    )

    # Build history
    history = []
    for snapshot in snapshots:
        # Optionally load race date (requires join with races/meetings)
        history.append(
            RatingHistoryItem(
                race_id=snapshot.as_of_race_id,
                rating=snapshot.rating,
                rd=snapshot.rd,
            )
        )

    # Get current rating (latest snapshot)
    current_rating = snapshots[-1].rating if snapshots else None
    current_rd = snapshots[-1].rd if snapshots else None

    return HorseDetailResponse(
        horse_id=horse_id,
        name=horse.name,
        current_rating=current_rating,
        current_rd=current_rd,
        race_count=len(snapshots),
        rating_history=history,
    )


@api_router.get("/ratings/drivers", response_model=PaginatedRatingResponse)
@limiter.limit("100/minute")
def get_driver_ratings(
    request: Request,
    limit: int = Query(default=100, le=500),
    offset: int = Query(default=0, ge=0),
    db: Session = Depends(get_db),
):
    """Get top driver ratings.

    Args:
        limit: Maximum results to return
        offset: Number of results to skip
        db: Database session

    Returns:
        Paginated list of driver ratings
    """
    snapshots = RatingSnapshotRepository.get_top_ratings(
        db, EntityType.DRIVER, limit=100_000
    )

    results = []
    for snapshot in snapshots:
        driver = db.query(Driver).filter(Driver.id == snapshot.entity_id).first()

        results.append(
            RatingResponse(
                entity_type="driver",
                entity_id=snapshot.entity_id,
                entity_name=driver.name if driver else None,
                rating=snapshot.rating,
                rd=snapshot.rd,
                race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
                as_of_race_id=snapshot.as_of_race_id,
            )
        )

    total = len(results)
    page = results[offset : offset + limit]

    base_url = str(request.base_url).rstrip("/") + request.url.path
    query_params = dict(request.query_params)
    meta = _build_pagination_meta(total, limit, offset, base_url, query_params)

    return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))


@api_router.get("/ratings/trainers", response_model=PaginatedRatingResponse)
@limiter.limit("100/minute")
def get_trainer_ratings(
    request: Request,
    limit: int = Query(default=100, le=500),
    offset: int = Query(default=0, ge=0),
    db: Session = Depends(get_db),
):
    """Get top trainer ratings.

    Args:
        limit: Maximum results to return
        offset: Number of results to skip
        db: Database session

    Returns:
        Paginated list of trainer ratings
    """
    snapshots = RatingSnapshotRepository.get_top_ratings(
        db, EntityType.TRAINER, limit=100_000
    )

    results = []
    for snapshot in snapshots:
        trainer = db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()

        results.append(
            RatingResponse(
                entity_type="trainer",
                entity_id=snapshot.entity_id,
                entity_name=trainer.name if trainer else None,
                rating=snapshot.rating,
                rd=snapshot.rd,
                race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
                as_of_race_id=snapshot.as_of_race_id,
            )
        )

    total = len(results)
    page = results[offset : offset + limit]

    base_url = str(request.base_url).rstrip("/") + request.url.path
    query_params = dict(request.query_params)
    meta = _build_pagination_meta(total, limit, offset, base_url, query_params)

    return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))


@api_router.get("/ratings/drivers/{driver_id}", response_model=DriverDetailResponse)
@limiter.limit("100/minute")
def get_driver_detail(
    request: Request,
    driver_id: int,
    db: Session = Depends(get_db),
):
    """Get detailed driver information with rating history.

    Args:
        driver_id: Driver ID
        db: Database session

    Returns:
        Driver details with rating history
    """
    # Get driver
    driver = db.query(Driver).filter(Driver.id == driver_id).first()
    if not driver:
        raise HTTPException(status_code=404, detail="Driver not found")

    # Get all rating snapshots for this driver
    from packages.core.storage.models import RatingSnapshot

    snapshots = (
        db.query(RatingSnapshot)
        .join(Race, RatingSnapshot.as_of_race_id == Race.id)
        .join(Meeting, Race.meeting_id == Meeting.id)
        .filter(
            RatingSnapshot.entity_type == EntityType.DRIVER,
            RatingSnapshot.entity_id == driver_id,
        )
        .order_by(
            Meeting.meeting_date,
            Race.race_datetime,
            Race.race_number,
            Race.id,
        )
        .all()
    )

    # Build history
    history = []
    for snapshot in snapshots:
        history.append(
            RatingHistoryItem(
                race_id=snapshot.as_of_race_id,
                rating=snapshot.rating,
                rd=snapshot.rd,
            )
        )

    # Get current rating (latest snapshot)
    current_rating = snapshots[-1].rating if snapshots else None
    current_rd = snapshots[-1].rd if snapshots else None

    return DriverDetailResponse(
        driver_id=driver_id,
        name=driver.name,
        current_rating=current_rating,
        current_rd=current_rd,
        race_count=len(snapshots),
        rating_history=history,
    )


@api_router.get("/ratings/trainers/{trainer_id}", response_model=TrainerDetailResponse)
@limiter.limit("100/minute")
def get_trainer_detail(
    request: Request,
    trainer_id: int,
    db: Session = Depends(get_db),
):
    """Get detailed trainer information with rating history.

    Args:
        trainer_id: Trainer ID
        db: Database session

    Returns:
        Trainer details with rating history
    """
    # Get trainer
    trainer = db.query(Trainer).filter(Trainer.id == trainer_id).first()
    if not trainer:
        raise HTTPException(status_code=404, detail="Trainer not found")

    # Get all rating snapshots for this trainer
    from packages.core.storage.models import RatingSnapshot

    snapshots = (
        db.query(RatingSnapshot)
        .join(Race, RatingSnapshot.as_of_race_id == Race.id)
        .join(Meeting, Race.meeting_id == Meeting.id)
        .filter(
            RatingSnapshot.entity_type == EntityType.TRAINER,
            RatingSnapshot.entity_id == trainer_id,
        )
        .order_by(
            Meeting.meeting_date,
            Race.race_datetime,
            Race.race_number,
            Race.id,
        )
        .all()
    )

    # Build history
    history = []
    for snapshot in snapshots:
        history.append(
            RatingHistoryItem(
                race_id=snapshot.as_of_race_id,
                rating=snapshot.rating,
                rd=snapshot.rd,
            )
        )

    # Get current rating (latest snapshot)
    current_rating = snapshots[-1].rating if snapshots else None
    current_rd = snapshots[-1].rd if snapshots else None

    return TrainerDetailResponse(
        trainer_id=trainer_id,
        name=trainer.name,
        current_rating=current_rating,
        current_rd=current_rd,
        race_count=len(snapshots),
        rating_history=history,
    )


@api_router.get("/races/{race_id}")
@limiter.limit("100/minute")
def get_race(
    request: Request,
    race_id: int,
    db: Session = Depends(get_db),
):
    """Get race details with starters.

    Args:
        race_id: Race ID
        db: Database session

    Returns:
        Race details
    """
    from packages.core.storage.models import Starter

    race = (
        db.query(Race)
        .options(joinedload(Race.meeting))
        .filter(Race.id == race_id)
        .first()
    )
    if not race:
        raise HTTPException(status_code=404, detail="Race not found")

    starters = db.query(Starter).filter(Starter.race_id == race_id).all()

    return {
        "id": race.id,
        "race_id": race.id,
        "meeting_id": race.meeting_id,
        "race_number": race.race_number,
        "venue": race.meeting.venue if race.meeting else None,
        "distance_m": race.distance_m,
        "start_type": race.start_type,
        "gait": race.gait,
        "race_datetime": race.race_datetime.isoformat() if race.race_datetime else None,
        "starters_count": len(starters),
        "starters": [
            {
                "horse_id": s.horse_id,
                "driver_id": s.driver_id,
                "trainer_id": s.trainer_id,
                "barrier": s.barrier,
                "handicap_m": s.handicap_m,
                "placing": s.placing,
                "did_not_finish": s.did_not_finish,
            }
            for s in starters
        ],
    }


@api_router.get("/races", response_model=PaginatedRaceResponse)
@limiter.limit("200/minute")
def list_races(
    request: Request,
    date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
    date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
    limit: int = Query(default=100, le=1000),
    offset: int = Query(default=0, ge=0),
    venue: str | None = Query(default=None, description="Filter by venue"),
    db: Session = Depends(get_db),
):
    """List races within a date range."""
    from packages.core.common.utils import parse_date

    if date_from:
        start_date = parse_date(date_from)
    else:
        start_date = date.today() - timedelta(days=30)

    if date_to:
        end_date = parse_date(date_to)
    else:
        end_date = date.today()

    query = (
        db.query(Race)
        .join(Race.meeting)
        .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date)
    )
    if venue:
        query = query.filter(Meeting.venue == venue)

    total = query.count()
    races = (
        query.options(joinedload(Race.meeting))
        .order_by(
            Meeting.meeting_date,
            Race.race_datetime,
            Race.race_number,
            Race.id,
        )
        .limit(limit)
        .offset(offset)
        .all()
    )

    race_list = []
    for race in races:
        race_list.append(
            {
                "race_id": race.id,
                "meeting_id": race.meeting_id,
                "meeting_date": (
                    race.meeting.meeting_date.isoformat()
                    if race.meeting and race.meeting.meeting_date
                    else None
                ),
                "venue": race.meeting.venue if race.meeting else None,
                "race_number": race.race_number,
                "distance_m": race.distance_m,
                "start_type": race.start_type,
                "race_datetime": (
                    race.race_datetime.isoformat() if race.race_datetime else None
                ),
            }
        )

    # Build pagination metadata with next/prev links
    base_url = str(request.base_url).rstrip("/") + request.url.path
    query_params = dict(request.query_params)
    meta = _build_pagination_meta(total, limit, offset, base_url, query_params)

    return PaginatedRaceResponse(data=race_list, meta=PaginationMeta(**meta))


@api_router.post("/admin/ingest", response_model=IngestionResponse)
async def trigger_ingestion(
    request: IngestionRequest,
    source: str = Query(
        default="tab",
        description='Data source: "tab" (TAB API directly) or "ingest" (tab-api-ingest service)',
    ),
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """Trigger data ingestion (admin only).

    Supports two data sources:
      - "tab" (default):  fetches data directly from the TAB Affiliates API
      - "ingest":         fetches data from the tab-api-ingest TypeScript service

    Args:
        request: Ingestion request with date range
        source: Data source selector
        db: Database session
        token: Admin token

    Returns:
        Ingestion statistics
    """
    start_date = parse_date(request.date_from)
    end_date = parse_date(request.date_to)

    logger.info(
        f"Admin triggered ingestion: {start_date} to {end_date} (source={source})"
    )

    service = IngestionService(db, source=source)
    meetings, races, starters = await service.ingest_date_range(start_date, end_date)

    return IngestionResponse(
        meetings=meetings,
        races=races,
        starters=starters,
        errors=service.stats["errors"],
    )


@api_router.post("/webhook/scrape", response_model=ScrapeResponse)
async def webhook_scrape(
    request: ScrapeRequest,
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """Webhook to trigger HRNZ Playwright scrape with parameters."""

    from packages.core.storage.repositories import (
        DriverRepository,
        HorseRepository,
        MeetingRepository,
        RaceRepository,
        StarterRepository,
        TrainerRepository,
    )
    from packages.hrnz_scraper import HRNZScraper
    from packages.hrnz_scraper.mapper import HRNZDataMapper

    start_date = parse_date(request.date_from)
    end_date = parse_date(request.date_to)

    club_codes = request.club_codes
    if club_codes is None:
        club_codes = settings.hrnz.club_codes

    if isinstance(club_codes, str):
        if club_codes.lower() == "all":
            club_codes = HRNZ_ALL_CLUB_CODES
        else:
            club_codes = [
                code.strip() for code in club_codes.split(",") if code.strip()
            ]

    if isinstance(club_codes, list):
        if any(isinstance(code, str) and code.lower() == "all" for code in club_codes):
            club_codes = HRNZ_ALL_CLUB_CODES

    urls: list[str] = []
    if club_codes:
        try:
            urls.extend(_generate_hrnz_urls(start_date, end_date, club_codes))
        except ValueError as exc:
            raise HTTPException(status_code=400, detail=str(exc)) from exc

    if request.urls:
        urls.extend(request.urls)

    if not urls:
        raise HTTPException(
            status_code=400,
            detail="Provide club_codes (or HRNZ_CLUB_CODES) to generate URLs, or pass urls explicitly.",
        )

    urls = list(dict.fromkeys(urls))

    logger.info(
        "Webhook triggered scrape",
        extra={
            "start_date": str(start_date),
            "end_date": str(end_date),
            "recompute": request.recompute,
            "urls": len(urls),
        },
    )

    stats = {
        "meetings": 0,
        "races": 0,
        "starters": 0,
        "horses": 0,
        "drivers": 0,
        "trainers": 0,
        "errors": 0,
    }

    mapper = HRNZDataMapper()
    async with HRNZScraper() as scraper:
        for url in urls:
            try:
                scraped = await scraper.get_meeting_results(url)

                meeting_date = _resolve_meeting_date(scraped, url, start_date.year)
                if not meeting_date or not scraped.get("races"):
                    logger.info("Skipping non-meeting page for %s", url)
                    continue

                if not (start_date <= meeting_date <= end_date):
                    continue

                scraped["date"] = meeting_date.isoformat()
                meeting = mapper.map_meeting(scraped)
                entities = mapper.map_entities(scraped)

                MeetingRepository.upsert(db, meeting)
                stats["meetings"] += 1

                for horse in entities["horses"]:
                    HorseRepository.upsert(
                        db,
                        horse["id"],
                        horse["name"],
                        horse.get("raw_json"),
                    )
                    stats["horses"] += 1

                for driver in entities["drivers"]:
                    DriverRepository.upsert(
                        db, driver["name"], driver_id=driver.get("id")
                    )
                    stats["drivers"] += 1

                for trainer in entities["trainers"]:
                    TrainerRepository.upsert(
                        db, trainer["name"], trainer_id=trainer.get("id")
                    )
                    stats["trainers"] += 1

                races = mapper.map_races(scraped, meeting["meeting"])
                race_id_map = {}
                for race in races:
                    race_obj = RaceRepository.upsert(db, meeting["meeting"], race)
                    race_id_map[race["race_number"]] = race_obj.id
                    stats["races"] += 1

                starters = mapper.map_starters(scraped, race_id_map)
                for starter in starters:
                    StarterRepository.upsert(
                        db,
                        starter["race_id"],
                        starter,
                        starter.get("placing"),
                    )
                    stats["starters"] += 1

                db.commit()
            except Exception as exc:
                db.rollback()
                stats["errors"] += 1
                logger.error("Failed to scrape %s: %s", url, exc, exc_info=True)

    snapshots_created = 0
    if request.recompute:

        def _run_recompute():
            with get_session() as session:
                return recompute_ratings(
                    session,
                    start_date,
                    end_date,
                    clear_existing=request.clear_existing,
                    learn_adjustments=request.learn_adjustments,
                )

        snapshots_created = await asyncio.to_thread(_run_recompute)

    return ScrapeResponse(
        meetings=stats["meetings"],
        races=stats["races"],
        starters=stats["starters"],
        horses=stats["horses"],
        drivers=stats["drivers"],
        trainers=stats["trainers"],
        errors=stats["errors"],
        recomputed=request.recompute,
        snapshots_created=snapshots_created,
    )


@api_router.post("/admin/recompute", response_model=RecomputeResponse)
def trigger_recompute(
    request: RecomputeRequest,
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """Trigger rating recompute (admin only).

    Args:
        request: Recompute request with date range
        db: Database session
        token: Admin token

    Returns:
        Recompute statistics
    """
    start_date = parse_date(request.date_from)
    end_date = parse_date(request.date_to)

    logger.info(f"Admin triggered recompute: {start_date} to {end_date}")

    snapshot_count = recompute_ratings(
        db,
        start_date,
        end_date,
        clear_existing=request.clear_existing,
        learn_adjustments=request.learn_adjustments,
    )

    return RecomputeResponse(snapshots_created=snapshot_count)


# ── Scheduler admin endpoints ────────────────────────────────────


@api_router.get(
    "/admin/jobs",
    response_model=SchedulerJobListResponse,
)
@limiter.limit("20/minute")
def list_scheduled_jobs(
    request: Request,
    token: str = Depends(verify_admin_token),
):
    """List all scheduled background jobs (admin only).

    Returns a list of all cron jobs currently registered in the scheduler.

    Args:
        token: Admin authentication token

    Returns:
        List of scheduled jobs with their triggers and next run times
    """
    logger.info("Admin listing scheduled jobs")
    jobs = scheduler.list_jobs()
    return SchedulerJobListResponse(
        jobs=[SchedulerJobInfo(**job) for job in jobs],
        total=len(jobs),
    )


@api_router.post(
    "/admin/jobs",
    response_model=AddSchedulerJobResponse,
    status_code=201,
)
@limiter.limit("20/minute")
def add_scheduled_job(
    request: Request,
    job_request: AddSchedulerJobRequest,
    token: str = Depends(verify_admin_token),
):
    """Add a new scheduled background job (admin only).

    Supports three job types:
      - ``ingest``:     Scheduled data ingestion from TAB API
      - ``recompute``:  Scheduled rating recomputation
      - ``scrape``:     Scheduled HRNZ data scraping

    Args:
        job_request: Job configuration including cron expression and type-specific params
        token: Admin authentication token

    Returns:
        The job ID and confirmation message
    """
    job_id = (
        job_request.job_id
        or f"{job_request.job_type}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
    )

    logger.info(
        "Admin adding scheduled job",
        extra={
            "job_type": job_request.job_type,
            "cron_expr": job_request.cron_expr,
            "job_id": job_id,
        },
    )

    if job_request.job_type == "ingest":
        scheduler.add_ingest_job(
            date_from=job_request.date_from,
            date_to=job_request.date_to,
            category=job_request.category,
            source=job_request.source,
            cron_expr=job_request.cron_expr,
            job_id=job_id,
        )
    elif job_request.job_type == "recompute":
        scheduler.add_recompute_job(
            date_from=job_request.date_from,
            date_to=job_request.date_to,
            clear=job_request.clear,
            cron_expr=job_request.cron_expr,
            job_id=job_id,
        )
    elif job_request.job_type == "scrape":
        scheduler.add_scrape_job(
            urls=job_request.urls,
            club_codes=job_request.club_codes,
            date_from=job_request.date_from,
            date_to=job_request.date_to,
            cron_expr=job_request.cron_expr,
            job_id=job_id,
        )
    else:
        raise HTTPException(
            status_code=400,
            detail=f"Unknown job_type '{job_request.job_type}'. Must be 'ingest', 'recompute', or 'scrape'.",
        )

    return AddSchedulerJobResponse(
        job_id=job_id,
        message=f"Scheduled {job_request.job_type} job with cron '{job_request.cron_expr}'",
    )


@api_router.delete(
    "/admin/jobs/{job_id}",
    response_model=RemoveSchedulerJobResponse,
)
@limiter.limit("20/minute")
def remove_scheduled_job(
    request: Request,
    job_id: str,
    token: str = Depends(verify_admin_token),
):
    """Remove a scheduled background job (admin only).

    Args:
        job_id: The job identifier to remove
        token: Admin authentication token

    Returns:
        Confirmation of removal
    """
    logger.info("Admin removing scheduled job", extra={"job_id": job_id})
    removed = scheduler.remove_job(job_id)

    if removed:
        return RemoveSchedulerJobResponse(
            job_id=job_id,
            removed=True,
            message=f"Job '{job_id}' removed successfully",
        )

    raise HTTPException(
        status_code=404,
        detail=f"Job '{job_id}' not found",
    )


# ── Admin audit log endpoints ────────────────────────────────────────────


@api_router.post("/admin/audit-log", response_model=AuditLogEntry, status_code=201)
def create_audit_log(
    request: AuditLogCreateRequest,
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """Create an audit log entry (admin only).

    Records a data change or correction event in the audit log.
    This endpoint allows programmatic creation of audit entries,
    e.g. from the data correction UI or automated scripts.

    Args:
        request: Audit log entry details
        db: Database session
        token: Admin token

    Returns:
        The created audit log entry
    """
    action_upper = request.action.upper().strip()
    if action_upper not in ("INSERT", "UPDATE", "DELETE", "CORRECT"):
        raise HTTPException(
            status_code=400,
            detail=f"Invalid action '{request.action}'. Must be INSERT, UPDATE, DELETE, or CORRECT.",
        )

    entry = AuditLogger.log_change(
        session=db,
        table_name=request.table_name,
        record_id=request.record_id,
        action=action_upper,
        old_values=request.old_values,
        new_values=request.new_values,
        changed_by=request.changed_by,
        change_reason=request.change_reason,
    )
    if entry is None:
        raise HTTPException(status_code=500, detail="Failed to create audit log entry")

    # Convert ORM model to Pydantic response
    return AuditLogEntry(
        id=entry.id,
        table_name=entry.table_name,
        record_id=entry.record_id,
        action=entry.action,
        old_values=entry.old_values,
        new_values=entry.new_values,
        changed_by=entry.changed_by,
        change_reason=entry.change_reason,
        created_at=entry.created_at.isoformat() if entry.created_at else None,
    )


@api_router.get("/admin/audit-log", response_model=AuditLogListResponse)
def list_audit_logs(
    limit: int = Query(default=100, le=500, description="Maximum entries to return"),
    offset: int = Query(default=0, ge=0, description="Number of entries to skip"),
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """List recent audit log entries (admin only).

    Args:
        limit: Maximum entries to return
        offset: Number of entries to skip
        db: Database session
        token: Admin token

    Returns:
        Paginated list of recent audit log entries
    """
    entries = AuditLogger.get_recent_changes(db, limit=limit + offset)
    # Apply offset in Python (the logger returns newest-first)
    page = entries[offset : offset + limit]

    total = len(entries)
    if total > limit + offset:
        total = db.query(AuditLog).count()

    return AuditLogListResponse(
        data=[
            AuditLogEntry(
                id=e.id,
                table_name=e.table_name,
                record_id=e.record_id,
                action=e.action,
                old_values=e.old_values,
                new_values=e.new_values,
                changed_by=e.changed_by,
                change_reason=e.change_reason,
                created_at=e.created_at.isoformat() if e.created_at else None,
            )
            for e in page
        ],
        total=total,
    )


@api_router.get(
    "/admin/audit-log/{table_name}/{record_id}", response_model=AuditLogListResponse
)
def get_audit_log_for_record(
    table_name: str,
    record_id: str,
    db: Session = Depends(get_db),
    token: str = Depends(verify_admin_token),
):
    """Get all audit log entries for a specific record (admin only).

    Args:
        table_name: Name of the table
        record_id: Primary key value of the record
        db: Database session
        token: Admin token

    Returns:
        List of audit log entries for the specified record
    """
    entries = AuditLogger.get_changes_for_record(db, table_name, record_id)

    return AuditLogListResponse(
        data=[
            AuditLogEntry(
                id=e.id,
                table_name=e.table_name,
                record_id=e.record_id,
                action=e.action,
                old_values=e.old_values,
                new_values=e.new_values,
                changed_by=e.changed_by,
                change_reason=e.change_reason,
                created_at=e.created_at.isoformat() if e.created_at else None,
            )
            for e in entries
        ],
        total=len(entries),
    )


@api_router.get("/races/{race_id}/predictions", response_model=RacePredictionResponse)
@limiter.limit("50/minute")
def get_race_predictions(
    request: Request,
    race_id: int,
    db: Session = Depends(get_db),
):
    """Get win probability predictions for a race.

    Args:
        race_id: Race ID
        db: Database session

    Returns:
        Predictions for all starters
    """
    from packages.core.ratings.predictions import PredictionEngine
    from packages.core.storage.models import Starter

    # Get race
    race = db.query(Race).filter(Race.id == race_id).first()
    if not race:
        raise HTTPException(status_code=404, detail="Race not found")

    # Get starters
    starters = db.query(Starter).filter(Starter.race_id == race_id).all()
    if not starters:
        raise HTTPException(status_code=404, detail="No starters found for race")
    starter_by_id = {starter.id: starter for starter in starters}

    # Load race meeting
    if not race.meeting:
        from sqlalchemy.orm import joinedload

        race = (
            db.query(Race)
            .options(joinedload(Race.meeting))
            .filter(Race.id == race_id)
            .first()
        )

    engine = PredictionEngine(db)
    prediction = engine.predict_race(race, starters)

    predictions = [
        PredictionResponse(
            horse_id=pred.horse_id,
            horse_name=pred.horse_name,
            driver_id=pred.driver_id,
            driver_name=pred.driver_name,
            trainer_id=pred.trainer_id,
            trainer_name=pred.trainer_name,
            barrier=pred.barrier,
            effective_rating=pred.effective_rating,
            win_probability=pred.win_probability,
            place_probability=pred.place_probability,
            place_score=pred.place_score,
            predicted_placing=pred.predicted_placing,
            ci_lower=pred.confidence_interval_low,
            ci_upper=pred.confidence_interval_high,
            placing=(
                starter_by_id[pred.starter_id].placing
                if pred.starter_id in starter_by_id
                else None
            ),
        )
        for pred in prediction.predictions
    ]

    return RacePredictionResponse(
        race_id=race_id,
        race_number=race.race_number,
        venue=race.meeting.venue if race.meeting else None,
        distance_m=race.distance_m,
        predictions=predictions,
    )


@api_router.get("/races/upcoming")
@limiter.limit("100/minute")
def get_upcoming_races(
    request: Request,
    race_date: str | None = Query(
        default=None, description="YYYY-MM-DD, defaults to today"
    ),
    db: Session = Depends(get_db),
):
    """Get upcoming races for a specific date.

    Args:
        race_date: Date to get races for (defaults to today)
        db: Database session

    Returns:
        List of races for the specified date
    """
    from packages.core.common.utils import parse_date

    target_date = parse_date(race_date) if race_date else date.today()

    races = (
        db.query(Race)
        .options(joinedload(Race.meeting))
        .join(Race.meeting)
        .filter(Meeting.meeting_date == target_date)
        .order_by(Meeting.venue, Race.race_number)
        .all()
    )

    race_list = []
    for race in races:
        race_list.append(
            {
                "race_id": race.id,
                "race_number": race.race_number,
                "venue": race.meeting.venue if race.meeting else None,
                "distance_m": race.distance_m,
                "start_type": race.start_type,
                "starter_count": len(race.starters),
            }
        )

    return {
        "date": target_date.isoformat(),
        "race_count": len(race_list),
        "races": race_list,
    }


@api_router.get("/predictions/compare/{race_id}")
@limiter.limit("50/minute")
def compare_prediction_to_actual(
    request: Request,
    race_id: int,
    db: Session = Depends(get_db),
):
    """Compare prediction to actual result for a completed race.

    Args:
        race_id: Race ID
        db: Database session

    Returns:
        Prediction accuracy comparison
    """
    from packages.core.ratings.predictions import PredictionEngine

    engine = PredictionEngine(db)
    comparison = engine.compare_prediction_to_actual(race_id)

    if not comparison:
        raise HTTPException(status_code=404, detail="Race not found or not completed")

    return comparison


@api_router.get("/analytics/accuracy", response_model=AccuracySummary)
@limiter.limit("20/minute")
def get_accuracy_summary(
    request: Request,
    days: int = Query(default=30, ge=1, le=365),
    db: Session = Depends(get_db),
):
    """Get aggregated prediction accuracy summary.

    Computes accuracy metrics for completed races in the specified date range.
    Limited to the most recent 200 evaluated races for performance.

    Args:
        days: Number of days to look back
        db: Database session

    Returns:
        Accuracy summary with daily trend and confidence buckets
    """
    from packages.core.ratings.predictions import PredictionEngine
    from packages.core.storage.models import Starter

    start_date = date.today() - timedelta(days=days)
    end_date = date.today()

    # Get completed races in date range with results
    races = (
        db.query(Race)
        .join(Starter, Race.id == Starter.race_id)
        .filter(Starter.placing.isnot(None))
        .join(Meeting)
        .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date)
        .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
        .distinct()
        .limit(200)
        .all()
    )

    engine = PredictionEngine(db)

    total_winner_correct = 0
    total_top3_overlap = 0.0
    total_brier = 0.0
    race_count = 0

    daily_data: dict[str, dict] = {}
    buckets = {
        "high": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
        "medium": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
        "low": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
    }
    recent_races: list[dict] = []

    for race in races:
        comparison = engine.compare_prediction_to_actual(race.id)
        if not comparison:
            continue

        winner_correct = comparison["winner_correct"]
        top3_overlap = comparison["top3_overlap"]
        brier = comparison["brier_score"]

        # Find predicted winner probability for bucketing
        predictions = comparison.get("predictions", [])
        predicted_winner_prob = 0.0
        if predictions:
            predicted_winner = max(predictions, key=lambda p: p["win_probability"])
            predicted_winner_prob = predicted_winner["win_probability"]

        if predicted_winner_prob >= 0.40:
            bucket_key = "high"
        elif predicted_winner_prob >= 0.20:
            bucket_key = "medium"
        else:
            bucket_key = "low"

        buckets[bucket_key]["races"] += 1
        if winner_correct:
            buckets[bucket_key]["winner_correct"] += 1
        buckets[bucket_key]["brier_sum"] += brier

        race_date_str = comparison.get("race_date")
        if not race_date_str and race.meeting and race.meeting.meeting_date:
            race_date_str = race.meeting.meeting_date.isoformat()
        if not race_date_str:
            race_date_str = "unknown"

        if race_date_str not in daily_data:
            daily_data[race_date_str] = {
                "brier_sum": 0.0,
                "winner_correct": 0,
                "races": 0,
            }
        daily_data[race_date_str]["brier_sum"] += brier
        if winner_correct:
            daily_data[race_date_str]["winner_correct"] += 1
        daily_data[race_date_str]["races"] += 1

        total_winner_correct += 1 if winner_correct else 0
        total_top3_overlap += top3_overlap
        total_brier += brier
        race_count += 1

        recent_races.append(
            {
                "race_id": comparison["race_id"],
                "race_number": comparison.get("race_number"),
                "venue": comparison.get("venue"),
                "race_date": comparison.get("race_date"),
                "field_size": comparison.get("field_size", 0),
                "winner_correct": winner_correct,
                "top3_overlap": top3_overlap,
                "brier_score": brier,
            }
        )

    if race_count == 0:
        return AccuracySummary(
            summary={
                "overall_win_accuracy": 0.0,
                "top3_accuracy": 0.0,
                "avg_brier_score": 0.0,
                "num_races_evaluated": 0,
            },
            daily_trend=[],
            confidence_buckets=ConfidenceBuckets(
                high=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
                medium=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
                low=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
            ),
            recent_races=[],
        )

    daily_trend = []
    for d in sorted(daily_data.keys()):
        data = daily_data[d]
        daily_trend.append(
            DailyTrendItem(
                date=d,
                avg_brier=data["brier_sum"] / data["races"],
                win_accuracy=data["winner_correct"] / data["races"],
                races=data["races"],
            )
        )

    def _bucket_stats(bucket: dict) -> ConfidenceBucket:
        if bucket["races"] == 0:
            return ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0)
        return ConfidenceBucket(
            races=bucket["races"],
            win_accuracy=bucket["winner_correct"] / bucket["races"],
            avg_brier=bucket["brier_sum"] / bucket["races"],
        )

    return AccuracySummary(
        summary={
            "overall_win_accuracy": total_winner_correct / race_count,
            "top3_accuracy": total_top3_overlap / race_count / 3.0,
            "avg_brier_score": total_brier / race_count,
            "num_races_evaluated": race_count,
        },
        daily_trend=daily_trend,
        confidence_buckets=ConfidenceBuckets(
            high=_bucket_stats(buckets["high"]),
            medium=_bucket_stats(buckets["medium"]),
            low=_bucket_stats(buckets["low"]),
        ),
        recent_races=[RecentRaceAccuracy(**r) for r in recent_races[:50]],
    )


# ── Export endpoints (CSV/Parquet) ──────────────────────────────────────


@api_router.get("/export/ratings.csv")
@limiter.limit("10/minute")
def export_ratings_csv(
    request: Request,
    entity_type: str | None = Query(
        default=None,
        description="Filter by entity type: horse, driver, trainer (default: all)",
    ),
    limit: int = Query(default=5000, le=50000, description="Maximum rows"),
    db: Session = Depends(get_db),
):
    """Export all ratings as CSV.

    Returns a CSV file with all rating snapshots across entity types.
    Supports filtering by entity_type and limit.
    """
    entity_types = (
        [
            EntityType(entity_type.lower()),
        ]
        if entity_type
        else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER]
    )

    output = StringIO()
    writer = csv.DictWriter(
        output,
        fieldnames=[
            "entity_type",
            "entity_id",
            "entity_name",
            "rating",
            "rd",
            "race_count",
            "as_of_race_id",
        ],
    )
    writer.writeheader()

    rows_written = 0
    for et in entity_types:
        snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit)
        for snapshot in snapshots:
            if rows_written >= limit:
                break

            # Load entity name
            entity_name = None
            if et == EntityType.HORSE:
                entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()
                entity_name = entity.name if entity else None
            elif et == EntityType.DRIVER:
                entity = (
                    db.query(Driver).filter(Driver.id == snapshot.entity_id).first()
                )
                entity_name = entity.name if entity else None
            elif et == EntityType.TRAINER:
                entity = (
                    db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()
                )
                entity_name = entity.name if entity else None

            writer.writerow(
                {
                    "entity_type": et.value,
                    "entity_id": snapshot.entity_id,
                    "entity_name": entity_name,
                    "rating": snapshot.rating,
                    "rd": snapshot.rd,
                    "race_count": (
                        snapshot.meta.get("race_count") if snapshot.meta else None
                    ),
                    "as_of_race_id": snapshot.as_of_race_id,
                }
            )
            rows_written += 1

        if rows_written >= limit:
            break

    return Response(
        content=output.getvalue(),
        media_type="text/csv",
        headers={"Content-Disposition": 'attachment; filename="ratings.csv"'},
    )


@api_router.get("/export/ratings.parquet")
@limiter.limit("10/minute")
def export_ratings_parquet(
    request: Request,
    entity_type: str | None = Query(
        default=None,
        description="Filter by entity type: horse, driver, trainer (default: all)",
    ),
    limit: int = Query(default=5000, le=50000, description="Maximum rows"),
    db: Session = Depends(get_db),
):
    """Export all ratings as Parquet.

    Returns a Parquet file with all rating snapshots across entity types.
    Requires pyarrow or fastparquet to be installed.
    """
    try:
        import pandas as pd
    except ImportError:
        raise HTTPException(
            status_code=501,
            detail="Parquet export requires pandas. Install with: pip install pandas",
        ) from None

    try:
        import pyarrow  # noqa: F401
    except ImportError:
        raise HTTPException(
            status_code=501,
            detail=(
                "Parquet export requires pyarrow. " "Install with: pip install pyarrow"
            ),
        ) from None

    entity_types = (
        [
            EntityType(entity_type.lower()),
        ]
        if entity_type
        else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER]
    )

    rows: list[dict] = []
    for et in entity_types:
        snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit)
        for snapshot in snapshots:
            if len(rows) >= limit:
                break

            entity_name = None
            if et == EntityType.HORSE:
                entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()
                entity_name = entity.name if entity else None
            elif et == EntityType.DRIVER:
                entity = (
                    db.query(Driver).filter(Driver.id == snapshot.entity_id).first()
                )
                entity_name = entity.name if entity else None
            elif et == EntityType.TRAINER:
                entity = (
                    db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()
                )
                entity_name = entity.name if entity else None

            rows.append(
                {
                    "entity_type": et.value,
                    "entity_id": snapshot.entity_id,
                    "entity_name": entity_name,
                    "rating": snapshot.rating,
                    "rd": snapshot.rd,
                    "race_count": (
                        snapshot.meta.get("race_count") if snapshot.meta else None
                    ),
                    "as_of_race_id": snapshot.as_of_race_id,
                }
            )

        if len(rows) >= limit:
            break

    if not rows:
        return Response(
            content=b"",
            media_type="application/octet-stream",
            headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'},
        )

    df = pd.DataFrame(rows)
    buffer = BytesIO()
    df.to_parquet(buffer, index=False)
    buffer.seek(0)

    return Response(
        content=buffer.getvalue(),
        media_type="application/octet-stream",
        headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'},
    )


@api_router.get("/export/predictions.csv")
@limiter.limit("10/minute")
def export_predictions_csv(
    request: Request,
    date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
    date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
    limit: int = Query(default=1000, le=10000, description="Maximum rows"),
    db: Session = Depends(get_db),
):
    """Export recent predictions as CSV.

    Returns a CSV file with predictions for completed races.
    Supports filtering by date range and limit.
    """
    from packages.core.ratings.predictions import PredictionEngine
    from packages.core.storage.models import Starter

    # Determine date range
    if date_from:
        start_date = parse_date(date_from)
    else:
        start_date = date.today() - timedelta(days=7)

    if date_to:
        end_date = parse_date(date_to)
    else:
        end_date = date.today()

    # Get races with results in the date range
    races_with_results = (
        db.query(Race)
        .join(Race.meeting)
        .filter(
            Meeting.meeting_date >= start_date,
            Meeting.meeting_date <= end_date,
        )
        .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
        .limit(limit)
        .all()
    )

    engine = PredictionEngine(db)
    output = StringIO()
    writer = csv.writer(output)
    writer.writerow(
        [
            "race_id",
            "race_number",
            "venue",
            "distance_m",
            "race_date",
            "starter_id",
            "horse_id",
            "horse_name",
            "driver_id",
            "driver_name",
            "trainer_id",
            "trainer_name",
            "barrier",
            "handicap_m",
            "effective_rating",
            "win_probability",
            "place_probability",
            "place_score",
            "predicted_placing",
            "ci_lower",
            "ci_upper",
        ]
    )

    rows_written = 0
    for race in races_with_results:
        if rows_written >= limit:
            break

        starters = db.query(Starter).filter(Starter.race_id == race.id).all()
        if not starters:
            continue

        try:
            prediction = engine.predict_race(race, starters)
        except Exception:
            continue

        for pred in prediction.predictions:
            if rows_written >= limit:
                break

            writer.writerow(
                [
                    race.id,
                    race.race_number or "",
                    race.meeting.venue if race.meeting else "",
                    race.distance_m or "",
                    (
                        race.meeting.meeting_date.isoformat()
                        if race.meeting and race.meeting.meeting_date
                        else ""
                    ),
                    pred.starter_id,
                    pred.horse_id,
                    pred.horse_name or "",
                    pred.driver_id or "",
                    pred.driver_name or "",
                    pred.trainer_id or "",
                    pred.trainer_name or "",
                    pred.barrier or "",
                    pred.handicap_m or "",
                    f"{pred.effective_rating:.1f}",
                    f"{pred.win_probability:.4f}",
                    f"{pred.place_probability:.4f}",
                    f"{pred.place_score:.1f}",
                    pred.predicted_placing,
                    f"{pred.confidence_interval_low:.1f}",
                    f"{pred.confidence_interval_high:.1f}",
                ]
            )
            rows_written += 1

    return Response(
        content=output.getvalue(),
        media_type="text/csv",
        headers={"Content-Disposition": 'attachment; filename="predictions.csv"'},
    )


@api_router.get("/export/predictions.parquet")
@limiter.limit("10/minute")
def export_predictions_parquet(
    request: Request,
    date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
    date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
    limit: int = Query(default=1000, le=10000, description="Maximum rows"),
    db: Session = Depends(get_db),
):
    """Export recent predictions as Parquet.

    Returns a Parquet file with predictions for completed races.
    Requires pyarrow or fastparquet to be installed.
    """
    try:
        import pandas as pd
    except ImportError:
        raise HTTPException(
            status_code=501,
            detail="Parquet export requires pandas. Install with: pip install pandas",
        ) from None

    try:
        import pyarrow  # noqa: F401
    except ImportError:
        raise HTTPException(
            status_code=501,
            detail=(
                "Parquet export requires pyarrow. " "Install with: pip install pyarrow"
            ),
        ) from None

    from packages.core.ratings.predictions import PredictionEngine
    from packages.core.storage.models import Starter

    # Determine date range
    if date_from:
        start_date = parse_date(date_from)
    else:
        start_date = date.today() - timedelta(days=7)

    if date_to:
        end_date = parse_date(date_to)
    else:
        end_date = date.today()

    # Get races with results in the date range
    races_with_results = (
        db.query(Race)
        .join(Race.meeting)
        .filter(
            Meeting.meeting_date >= start_date,
            Meeting.meeting_date <= end_date,
        )
        .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
        .limit(limit)
        .all()
    )

    engine = PredictionEngine(db)
    rows: list[dict] = []

    for race in races_with_results:
        if len(rows) >= limit:
            break

        starters = db.query(Starter).filter(Starter.race_id == race.id).all()
        if not starters:
            continue

        try:
            prediction = engine.predict_race(race, starters)
        except Exception:
            continue

        for pred in prediction.predictions:
            if len(rows) >= limit:
                break

            rows.append(
                {
                    "race_id": race.id,
                    "race_number": race.race_number,
                    "venue": race.meeting.venue if race.meeting else None,
                    "distance_m": race.distance_m,
                    "race_date": (
                        race.meeting.meeting_date.isoformat()
                        if race.meeting and race.meeting.meeting_date
                        else None
                    ),
                    "starter_id": pred.starter_id,
                    "horse_id": pred.horse_id,
                    "horse_name": pred.horse_name,
                    "driver_id": pred.driver_id,
                    "driver_name": pred.driver_name,
                    "trainer_id": pred.trainer_id,
                    "trainer_name": pred.trainer_name,
                    "barrier": pred.barrier,
                    "handicap_m": pred.handicap_m,
                    "effective_rating": pred.effective_rating,
                    "win_probability": pred.win_probability,
                    "place_probability": pred.place_probability,
                    "place_score": pred.place_score,
                    "predicted_placing": pred.predicted_placing,
                    "ci_lower": pred.confidence_interval_low,
                    "ci_upper": pred.confidence_interval_high,
                }
            )

    if not rows:
        return Response(
            content=b"",
            media_type="application/octet-stream",
            headers={
                "Content-Disposition": 'attachment; filename="predictions.parquet"'
            },
        )

    df = pd.DataFrame(rows)
    buffer = BytesIO()
    df.to_parquet(buffer, index=False)
    buffer.seek(0)

    return Response(
        content=buffer.getvalue(),
        media_type="application/octet-stream",
        headers={"Content-Disposition": 'attachment; filename="predictions.parquet"'},
    )


@api_router.get("/export/race-predictions.pdf")
@limiter.limit("10/minute")
def export_race_predictions_pdf(
    request: Request,
    race_id: int,
    db: Session = Depends(get_db),
):
    """Export race predictions as a PDF document.

    Generates a PDF with a structured table showing each runner's rating,
    win probability, place probability, predicted placing, and confidence
    interval.  Requires the ``reportlab`` library.

    Args:
        race_id: Race ID to export.
        db: Database session.

    Returns:
        PDF binary response with appropriate Content-Type and disposition.
    """
    try:
        from reportlab.lib import colors
        from reportlab.lib.pagesizes import letter
        from reportlab.lib.styles import getSampleStyleSheet
        from reportlab.lib.units import inch
        from reportlab.platypus import (
            Paragraph,
            SimpleDocTemplate,
            Spacer,
            Table,
            TableStyle,
        )
    except ImportError:
        raise HTTPException(
            status_code=501,
            detail="PDF export requires reportlab. Install with: pip install reportlab",
        ) from None

    from packages.core.ratings.predictions import PredictionEngine
    from packages.core.storage.models import Starter

    # ── Resolve race ─────────────────────────────────────────────────
    race = db.query(Race).filter(Race.id == race_id).first()
    if not race:
        raise HTTPException(status_code=404, detail="Race not found")

    # Eager-load the meeting for venue / date info
    race = (
        db.query(Race)
        .options(joinedload(Race.meeting))
        .filter(Race.id == race_id)
        .first()
    )

    starters = db.query(Starter).filter(Starter.race_id == race_id).all()
    if not starters:
        raise HTTPException(status_code=404, detail="No starters found for race")

    engine = PredictionEngine(db)
    prediction = engine.predict_race(race, starters)

    # ── Build PDF ────────────────────────────────────────────────────
    buffer = BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=letter)
    styles = getSampleStyleSheet()
    elements = []

    # Title
    venue = race.meeting.venue if race.meeting else "Unknown"
    race_date = (
        race.meeting.meeting_date.isoformat()
        if race.meeting and race.meeting.meeting_date
        else ""
    )
    title_text = (
        f"Race {race.race_number} — {venue} ({race_date})<br/>"
        f"Distance: {race.distance_m}m  |  "
        f"Starters: {len(prediction.predictions)}"
    )
    elements.append(Paragraph("<b>TipSharks — Race Predictions</b>", styles["Title"]))
    elements.append(Spacer(1, 0.15 * inch))
    elements.append(Paragraph(title_text, styles["Normal"]))
    elements.append(Spacer(1, 0.2 * inch))

    # Table header + rows
    table_data = [
        ["#", "Horse", "Driver", "Rating", "Win%", "Place%", "Pred.", "CI Range"],
    ]
    for idx, pred in enumerate(prediction.predictions, start=1):
        ci = (
            f"{pred.confidence_interval_low:.0f}–{pred.confidence_interval_high:.0f}"
            if pred.confidence_interval_low is not None
            else "—"
        )
        table_data.append(
            [
                str(idx),
                pred.horse_name or f"ID {pred.horse_id}",
                pred.driver_name or "—",
                f"{pred.effective_rating:.1f}",
                f"{pred.win_probability:.1%}",
                f"{pred.place_probability:.1%}",
                str(pred.predicted_placing),
                ci,
            ]
        )

    # Column widths
    col_widths = [
        0.3 * inch,
        1.6 * inch,
        1.4 * inch,
        0.8 * inch,
        0.7 * inch,
        0.7 * inch,
        0.5 * inch,
        1.2 * inch,
    ]
    table = Table(table_data, colWidths=col_widths, repeatRows=1)
    table.setStyle(
        TableStyle(
            [
                ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a5276")),
                ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
                ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
                ("FONTSIZE", (0, 0), (-1, 0), 9),
                ("FONTSIZE", (0, 1), (-1, -1), 8),
                ("ALIGN", (0, 0), (-1, -1), "CENTER"),
                ("ALIGN", (1, 1), (2, -1), "LEFT"),
                ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
                ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
                (
                    "ROWBACKGROUNDS",
                    (0, 1),
                    (-1, -1),
                    [colors.white, colors.HexColor("#f2f3f4")],
                ),
                ("TOPPADDING", (0, 0), (-1, -1), 4),
                ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
            ]
        )
    )
    elements.append(table)

    # Footer metadata
    elements.append(Spacer(1, 0.2 * inch))
    from datetime import UTC, datetime

    elements.append(
        Paragraph(
            f"Generated: {datetime.now(UTC).isoformat()} | "
            f"Field size: {prediction.metadata.get('field_size', '—')} | "
            f"Avg rating: {prediction.metadata.get('avg_rating', '—'):.1f}",
            styles["Normal"],
        )
    )

    doc.build(elements)
    pdf_bytes = buffer.getvalue()

    filename = f"race_{race_id}_predictions.pdf"
    return Response(
        content=pdf_bytes,
        media_type="application/pdf",
        headers={
            "Content-Disposition": f'attachment; filename="{filename}"',
            "Content-Length": str(len(pdf_bytes)),
        },
    )


# ── WebSocket endpoint for live race updates ────────────────────────


@app.websocket("/ws/races/{race_id}")
async def websocket_race_endpoint(websocket: WebSocket, race_id: int):
    """WebSocket endpoint for live race updates.

    On connect, validates the race exists in the database and sends
    the initial race state (race details + starters). Broadcasts live
    updates as they become available via the simulation task.

    Args:
        websocket: The WebSocket connection.
        race_id: Race identifier.
    """
    from apps.backend.api.websocket import _build_initial_state, manager
    from packages.core.storage.models import Starter

    # Validate race exists
    try:
        with get_session() as session:
            race = session.query(Race).filter(Race.id == race_id).first()
            if not race:
                await websocket.close(code=4004, reason="Race not found")
                return

            # Eager-load relationships for the initial state
            race = (
                session.query(Race)
                .options(
                    joinedload(Race.meeting),
                    joinedload(Race.starters).joinedload(Starter.horse),
                    joinedload(Race.starters).joinedload(Starter.driver),
                    joinedload(Race.starters).joinedload(Starter.trainer),
                )
                .filter(Race.id == race_id)
                .first()
            )
            starters = race.starters if race else []

            # Build initial state
            initial_state = _build_initial_state(race, starters)
    except Exception:
        logger.error("Failed to validate race for WebSocket", exc_info=True)
        await websocket.close(code=1011, reason="Internal server error")
        return

    # Accept connection and register
    await manager.connect(websocket, race_id)

    # Send initial state
    await manager.send_personal_message(initial_state, websocket)

    # Start simulation on first connection
    if not manager.is_simulation_running(race_id):
        manager.start_simulation(race_id)

    try:
        while True:
            data = await websocket.receive_text()
            try:
                msg = json.loads(data)
                msg_type = msg.get("type", "")
                msg_race_id = msg.get("race_id", race_id)

                if msg_type == "subscribe" and isinstance(msg_race_id, int):
                    logger.info(
                        "Client subscribed to race updates",
                        extra={"race_id": msg_race_id},
                    )
                    # Confirm subscription
                    await manager.send_personal_message(
                        json.dumps({"type": "subscribed", "race_id": msg_race_id}),
                        websocket,
                    )
                else:
                    logger.debug(
                        "Unknown WebSocket message type",
                        extra={"type": msg_type, "race_id": race_id},
                    )
            except json.JSONDecodeError:
                logger.warning(
                    "Invalid JSON received on WebSocket",
                    extra={"race_id": race_id},
                )
    except WebSocketDisconnect:
        logger.info("WebSocket client disconnected", extra={"race_id": race_id})
    except Exception:
        logger.error("WebSocket error", extra={"race_id": race_id}, exc_info=True)
    finally:
        await manager.disconnect(websocket, race_id)
        # Stop simulation when last client leaves
        if manager.get_connection_count(race_id) == 0 and manager.is_simulation_running(
            race_id
        ):
            manager.stop_simulation(race_id)


# Prometheus metrics endpoint stub
# In production, integrate with the prometheus-client library.
# See docs/monitoring.md for instrumentation guidance.
@app.get("/metrics", response_class=PlainTextResponse)
def metrics():
    """Prometheus metrics endpoint stub.

    Returns a placeholder response. To serve real metrics:
    1. pip install prometheus-client
    2. Create counters/histograms/gauges in a metrics module
    3. Call generate_latest(REGISTRY) here

    See docs/monitoring.md for the full instrumentation guide.
    """
    return PlainTextResponse(
        content="# TipSharks API metrics stub\n# Integrate prometheus-client for production use\n",
        status_code=200,
    )


# Include the versioned API router
app.include_router(api_router)

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)
