Coverage for apps / backend / api / main.py: 48%
1081 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:37 +1200
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:37 +1200
1"""FastAPI application for TipSharks ratings API."""
3import asyncio
4import csv
5import hashlib
6import json
7import re
8from datetime import UTC, date, datetime, timedelta
9from io import BytesIO, StringIO
10from pathlib import Path
11from typing import Any
13from fastapi import (
14 APIRouter,
15 Depends,
16 FastAPI,
17 HTTPException,
18 Query,
19 Request,
20 Response,
21 Security,
22 WebSocket,
23 WebSocketDisconnect,
24)
25from fastapi.middleware.cors import CORSMiddleware
26from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse
27from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
28from fastapi.staticfiles import StaticFiles
29from pydantic import BaseModel, ConfigDict, Field
30from slowapi import Limiter, _rate_limit_exceeded_handler
31from slowapi.errors import RateLimitExceeded
32from sqlalchemy import func
33from sqlalchemy.orm import Session, joinedload
35from packages.core.common.logging import get_logger, setup_logging
36from packages.core.common.rate_limit import get_user_rate_limit_key
37from packages.core.common.scheduler import TipSharksScheduler
38from packages.core.common.settings import HRNZ_ALL_CLUB_CODES, get_settings
39from packages.core.common.utils import parse_date
40from packages.core.ratings.recompute import recompute_ratings
41from packages.core.storage.audit import AuditLogger
42from packages.core.storage.database import get_session
43from packages.core.storage.ingestion import IngestionService
44from packages.core.storage.models import (
45 AuditLog,
46 Driver,
47 EntityType,
48 Horse,
49 Meeting,
50 Race,
51 RatingSnapshot,
52 Trainer,
53)
54from packages.core.storage.repositories import RatingSnapshotRepository
56# Setup
57setup_logging()
58logger = get_logger(__name__)
59settings = get_settings()
61# Scheduler instance for background jobs
62scheduler = TipSharksScheduler()
64# Rate limiter configuration
65# Uses per-user key when X-User-ID header is provided by the mobile client,
66# falling back to IP-based key for anonymous (e.g. browser) requests.
67limiter = Limiter(key_func=get_user_rate_limit_key)
69app = FastAPI(
70 title="TipSharks API",
71 description="Advanced harness racing ratings and predictions powered by multi-runner Elo algorithms",
72 version="0.2.0",
73)
75# Add rate limiter to app state
76app.state.limiter = limiter
77app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
79# API router with /v1 prefix for versioned API endpoints
80api_router = APIRouter(prefix="/v1")
82# Legacy API prefixes that should redirect to /v1/
83_LEGACY_API_PREFIXES = {
84 "/ratings",
85 "/races",
86 "/predictions",
87 "/analytics",
88 "/admin",
89 "/webhook",
90}
92# Add CORS middleware
93# Parse CORS origins from settings (supports "*" or comma-separated list)
94cors_origins = (
95 ["*"]
96 if settings.api.cors_allow_origins == "*"
97 else [origin.strip() for origin in settings.api.cors_allow_origins.split(",")]
98)
99app.add_middleware(
100 CORSMiddleware,
101 allow_origins=cors_origins,
102 allow_credentials=True,
103 allow_methods=["*"],
104 allow_headers=["*"],
105)
108@app.middleware("http")
109async def legacy_api_redirects(request: Request, call_next):
110 """Redirect legacy API paths to /v1/ equivalents.
112 Handles GET, POST, and other methods transparently via 301 redirect.
113 Preserves query string parameters.
114 Keeps /health, /ui/*, /static, /docs/site, and root at their original paths.
115 """
116 path = request.url.path
117 for prefix in _LEGACY_API_PREFIXES:
118 if path.startswith(prefix):
119 new_path = f"/v1{path}"
120 # Preserve query string if present
121 if request.url.query:
122 new_path = f"{new_path}?{request.url.query}"
123 return RedirectResponse(url=new_path, status_code=301)
124 return await call_next(request)
127@app.middleware("http")
128async def add_no_cache_headers(request: Request, call_next):
129 """Avoid stale API responses in browsers/proxies."""
130 response = await call_next(request)
131 if request.url.path.startswith(("/v1/ratings", "/v1/races", "/ratings", "/races")):
132 response.headers.setdefault(
133 "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
134 )
135 response.headers.setdefault("Pragma", "no-cache")
136 response.headers.setdefault("Expires", "0")
137 return response
140@app.middleware("http")
141async def add_request_id(request: Request, call_next):
142 """Add a unique request ID to every request and response.
144 Reads ``x-request-id`` from the incoming request headers if present,
145 otherwise generates a new UUID. The ID is stored on ``request.state``
146 and echoed back in the ``x-request-id`` response header.
147 """
148 import uuid
150 request_id = request.headers.get("x-request-id", str(uuid.uuid4()))
151 request.state.request_id = request_id
152 response = await call_next(request)
153 response.headers["x-request-id"] = request_id
154 return response
157@app.middleware("http")
158async def log_requests(request: Request, call_next):
159 """Log basic information about every API request.
161 Records HTTP method, path, status code, duration, and the request ID
162 (if one was assigned by the ``add_request_id`` middleware).
163 """
164 from time import time
166 start = time()
167 response = await call_next(request)
168 duration = time() - start
169 logger.info(
170 "API request",
171 extra={
172 "method": request.method,
173 "path": request.url.path,
174 "status_code": response.status_code,
175 "duration_ms": round(duration * 1000, 2),
176 "request_id": getattr(request.state, "request_id", None),
177 },
178 )
179 return response
182@app.middleware("http")
183async def rating_etag_middleware(request: Request, call_next):
184 """Add ETag and Last-Modified headers to rating endpoints for conditional requests.
186 Computes an ETag from the response body (MD5 hash) and sets Last-Modified
187 from the most recent rating snapshot timestamp. Handles If-None-Match and
188 If-Modified-Since request headers to return 304 Not Modified when the
189 client's cached version is still valid.
190 """
191 path = request.url.path
193 # Only apply to rating detail and list endpoints
194 if not (
195 path.startswith("/v1/ratings/horses")
196 or path.startswith("/v1/ratings/drivers")
197 or path.startswith("/v1/ratings/trainers")
198 ):
199 return await call_next(request)
201 # Get the most recent rating snapshot timestamp for Last-Modified
202 last_modified: datetime | None = None
203 try:
204 with get_session() as session:
205 latest_ts = session.query(func.max(RatingSnapshot.created_at)).scalar()
206 if latest_ts is not None:
207 if latest_ts.tzinfo is None:
208 latest_ts = latest_ts.replace(tzinfo=UTC)
209 last_modified = latest_ts
210 except Exception:
211 pass
213 # Check If-Modified-Since before computing the full response
214 if last_modified is not None and "if-modified-since" in request.headers:
215 try:
216 ims_str = request.headers["if-modified-since"]
217 ims = datetime.strptime(ims_str, "%a, %d %b %Y %H:%M:%S GMT").replace(
218 tzinfo=UTC
219 )
220 if last_modified <= ims:
221 # Data has not changed since last fetch — but we still need
222 # to check ETag below after computing the response body.
223 pass
224 except (ValueError, TypeError):
225 pass
227 response = await call_next(request)
229 # Read the response body to compute ETag
230 body = b""
231 try:
232 async for chunk in response.body_iterator:
233 body += chunk
234 except Exception:
235 # If we can't read the body, just pass through without ETag
236 return response
238 # Compute ETag from the response body
239 etag_hex = hashlib.md5(body).hexdigest()
240 etag_value = f'"{etag_hex}"'
242 # Set ETag header
243 response.headers["ETag"] = etag_value
245 # Set Last-Modified header
246 if last_modified is not None:
247 response.headers["Last-Modified"] = last_modified.strftime(
248 "%a, %d %b %Y %H:%M:%S GMT"
249 )
251 # Check If-None-Match for conditional request
252 if_none_match = request.headers.get("if-none-match")
253 if if_none_match and if_none_match.strip('"') == etag_hex:
254 return Response(
255 status_code=304,
256 headers={
257 "ETag": etag_value,
258 "Last-Modified": response.headers.get("Last-Modified", ""),
259 "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0",
260 "Pragma": "no-cache",
261 "Expires": "0",
262 },
263 )
265 # Reconstruct the response with captured body (body_iterator is consumed)
266 return Response(
267 content=body,
268 status_code=response.status_code,
269 headers=dict(response.headers),
270 media_type=response.media_type,
271 )
274# Mount static files for web UI
275web_dir = Path(__file__).parent.parent / "web"
276if web_dir.exists():
277 app.mount("/static", StaticFiles(directory=str(web_dir / "static")), name="static")
278 docs_site_dir = web_dir / "static" / "docs"
279 docs_site_dir.mkdir(parents=True, exist_ok=True)
280 app.mount(
281 "/docs/site",
282 StaticFiles(directory=str(docs_site_dir), html=True),
283 name="docs-site",
284 )
287# ── Scheduler lifecycle hooks ─────────────────────────────────────
290@app.on_event("startup")
291async def start_scheduler():
292 """Start the background scheduler on application startup.
294 Reads scheduler configuration from settings and loads default jobs.
295 Skipped if ``scheduler.enabled`` is False.
296 """
297 sched_settings = settings.scheduler
298 if not sched_settings.enabled:
299 logger.info("Scheduler is disabled via settings")
300 return
302 scheduler.start()
303 job_ids = scheduler.load_default_jobs()
304 if job_ids:
305 logger.info("Default scheduler jobs loaded", extra={"job_ids": job_ids})
306 else:
307 logger.info("No default scheduler jobs configured")
310@app.on_event("shutdown")
311async def stop_scheduler():
312 """Shutdown the background scheduler gracefully on application shutdown."""
313 if scheduler.running:
314 scheduler.shutdown(wait=True)
315 logger.info("Scheduler shut down on app shutdown")
318security = HTTPBearer()
321# Dependency for database session
322def get_db():
323 """Dependency for database session."""
324 with get_session() as session:
325 yield session
328# Dependency for admin auth
329def verify_admin_token(credentials: HTTPAuthorizationCredentials = Security(security)):
330 """Verify admin token for protected endpoints."""
331 if credentials.credentials != settings.api.admin_token:
332 raise HTTPException(status_code=401, detail="Invalid authentication token")
333 return credentials.credentials
336# Pydantic models for API responses
337class HealthResponse(BaseModel):
338 """Health check response."""
340 status: str
341 version: str
342 model_config = ConfigDict(
343 json_schema_extra={
344 "examples": [
345 {"status": "healthy", "version": "0.2.0"},
346 ]
347 }
348 )
351class RatingResponse(BaseModel):
352 """Rating information for an entity."""
354 entity_type: str
355 entity_id: int
356 entity_name: str | None = None
357 rating: float
358 rd: float | None = None
359 race_count: int | None = None
360 as_of_race_id: int
361 model_config = ConfigDict(
362 json_schema_extra={
363 "examples": [
364 {
365 "entity_type": "horse",
366 "entity_id": 42,
367 "entity_name": "Some Delight",
368 "rating": 1520.5,
369 "rd": 85.3,
370 "race_count": 24,
371 "as_of_race_id": 12345,
372 },
373 ]
374 }
375 )
378class RatingHistoryItem(BaseModel):
379 """Single rating history point."""
381 race_id: int
382 rating: float
383 rd: float | None = None
384 race_date: str | None = None
387class HorseDetailResponse(BaseModel):
388 """Detailed horse information with rating history."""
390 horse_id: int
391 name: str
392 current_rating: float | None = None
393 current_rd: float | None = None
394 race_count: int
395 rating_history: list[RatingHistoryItem] = []
396 model_config = ConfigDict(
397 json_schema_extra={
398 "examples": [
399 {
400 "horse_id": 42,
401 "name": "Some Delight",
402 "current_rating": 1520.5,
403 "current_rd": 85.3,
404 "race_count": 24,
405 "rating_history": [
406 {
407 "race_id": 12345,
408 "rating": 1520.5,
409 "rd": 85.3,
410 "race_date": "2026-05-01",
411 },
412 {
413 "race_id": 12340,
414 "rating": 1510.0,
415 "rd": 90.1,
416 "race_date": "2026-04-28",
417 },
418 ],
419 },
420 ]
421 }
422 )
425class DriverDetailResponse(BaseModel):
426 """Detailed driver information with rating history."""
428 driver_id: int
429 name: str
430 current_rating: float | None = None
431 current_rd: float | None = None
432 race_count: int
433 rating_history: list[RatingHistoryItem] = []
436class TrainerDetailResponse(BaseModel):
437 """Detailed trainer information with rating history."""
439 trainer_id: int
440 name: str
441 current_rating: float | None = None
442 current_rd: float | None = None
443 race_count: int
444 rating_history: list[RatingHistoryItem] = []
447class PaginationMeta(BaseModel):
448 """Pagination metadata with next/prev navigation links."""
450 total: int
451 limit: int
452 offset: int
453 next: str | None = None
454 prev: str | None = None
457class PaginatedRatingResponse(BaseModel):
458 """Paginated response for rating list endpoints."""
460 data: list[RatingResponse]
461 meta: PaginationMeta
462 model_config = ConfigDict(
463 json_schema_extra={
464 "examples": [
465 {
466 "data": [
467 {
468 "entity_type": "horse",
469 "entity_id": 42,
470 "entity_name": "Some Delight",
471 "rating": 1520.5,
472 "rd": 85.3,
473 "race_count": 24,
474 "as_of_race_id": 12345,
475 },
476 ],
477 "meta": {
478 "total": 1,
479 "limit": 100,
480 "offset": 0,
481 "next": "/v1/ratings/horses?limit=100&offset=100",
482 "prev": None,
483 },
484 },
485 ]
486 }
487 )
490class PaginatedRaceResponse(BaseModel):
491 """Paginated response for race list endpoints."""
493 data: list[dict]
494 meta: PaginationMeta
497class IngestionRequest(BaseModel):
498 """Request to trigger ingestion."""
500 date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
501 date_to: str = Field(..., description="End date (YYYY-MM-DD)")
504class IngestionResponse(BaseModel):
505 """Response from ingestion."""
507 meetings: int
508 races: int
509 starters: int
510 errors: int
511 model_config = ConfigDict(
512 json_schema_extra={
513 "examples": [
514 {"meetings": 3, "races": 24, "starters": 192, "errors": 0},
515 ]
516 }
517 )
520class RecomputeRequest(BaseModel):
521 """Request to trigger recompute."""
523 date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
524 date_to: str = Field(..., description="End date (YYYY-MM-DD)")
525 clear_existing: bool = Field(
526 default=False, description="Clear existing ratings first"
527 )
528 learn_adjustments: bool = Field(
529 default=False, description="Learn barrier/handicap adjustments"
530 )
533class RecomputeResponse(BaseModel):
534 """Response from recompute."""
536 snapshots_created: int
537 model_config = ConfigDict(
538 json_schema_extra={
539 "examples": [
540 {"snapshots_created": 1520},
541 ]
542 }
543 )
546class ScrapeRequest(BaseModel):
547 """Request to trigger a scrape/ingestion."""
549 urls: list[str] | None = Field(
550 default=None,
551 description="Optional HRNZ results URLs or URL paths (e.g., 010741rs.htm)",
552 )
553 club_codes: list[str] | str | None = Field(
554 default=None,
555 description="HRNZ club codes to generate URLs for (two digits each)",
556 )
557 date_from: str = Field(..., description="Start date (YYYY-MM-DD)")
558 date_to: str = Field(..., description="End date (YYYY-MM-DD)")
559 recompute: bool = Field(default=True, description="Recompute ratings after ingest")
560 clear_existing: bool = Field(
561 default=False, description="Clear existing ratings before recompute"
562 )
563 learn_adjustments: bool = Field(
564 default=False, description="Learn barrier/handicap adjustments"
565 )
568class ScrapeResponse(BaseModel):
569 """Response from scrape/ingestion webhook."""
571 meetings: int
572 races: int
573 starters: int
574 horses: int
575 drivers: int
576 trainers: int
577 errors: int
578 recomputed: bool
579 snapshots_created: int
582# ── Scheduler job management models ────────────────────────────────
585class SchedulerJobInfo(BaseModel):
586 """Information about a scheduled job."""
588 id: str
589 name: str
590 next_run_time: str | None = None
591 trigger: str
594class SchedulerJobListResponse(BaseModel):
595 """Response with list of scheduled jobs."""
597 jobs: list[SchedulerJobInfo]
598 total: int
601class AddSchedulerJobRequest(BaseModel):
602 """Request to add a scheduled job."""
604 job_type: str = Field(..., description="Job type: ingest, recompute, or scrape")
605 cron_expr: str = Field(..., description="Cron expression for scheduling")
606 job_id: str | None = Field(
607 default=None, description="Optional custom job ID (auto-generated if omitted)"
608 )
609 # Ingest-specific
610 category: str = Field(default="H", description="Racing category (T, H, G)")
611 source: str = Field(default="tab", description="Data source (tab, ingest)")
612 # Recompute-specific
613 clear: bool = Field(
614 default=False, description="Clear existing ratings before recompute"
615 )
616 # Scrape-specific
617 urls: list[str] | None = Field(
618 default=None, description="HRNZ result URLs to scrape"
619 )
620 club_codes: list[str] | None = Field(default=None, description="HRNZ club codes")
621 # Date range
622 date_from: str | None = Field(default=None, description="Start date (YYYY-MM-DD)")
623 date_to: str | None = Field(default=None, description="End date (YYYY-MM-DD)")
626class AddSchedulerJobResponse(BaseModel):
627 """Response after adding a scheduled job."""
629 job_id: str
630 message: str
633class RemoveSchedulerJobResponse(BaseModel):
634 """Response after removing a scheduled job."""
636 job_id: str
637 removed: bool
638 message: str
641class PredictionResponse(BaseModel):
642 """Prediction for a single starter."""
644 horse_id: int
645 horse_name: str | None = None
646 driver_id: int | None = None
647 driver_name: str | None = None
648 trainer_id: int | None = None
649 trainer_name: str | None = None
650 barrier: int | None = None
651 effective_rating: float
652 win_probability: float
653 place_probability: float
654 place_score: float
655 predicted_placing: int
656 ci_lower: float | None = None
657 ci_upper: float | None = None
658 placing: int | None = None
661class RacePredictionResponse(BaseModel):
662 """Predictions for all starters in a race."""
664 race_id: int
665 race_number: int | None = None
666 venue: str | None = None
667 distance_m: int | None = None
668 predictions: list[PredictionResponse] = []
669 model_config = ConfigDict(
670 json_schema_extra={
671 "examples": [
672 {
673 "race_id": 12345,
674 "race_number": 7,
675 "venue": "Addington",
676 "distance_m": 1980,
677 "predictions": [
678 {
679 "horse_id": 42,
680 "horse_name": "Some Delight",
681 "driver_id": 101,
682 "driver_name": "Ricky May",
683 "trainer_id": 201,
684 "trainer_name": "Mark Purdon",
685 "barrier": 3,
686 "effective_rating": 1520.5,
687 "win_probability": 0.35,
688 "place_probability": 0.62,
689 "place_score": 1.8,
690 "predicted_placing": 1,
691 "ci_lower": 1480.2,
692 "ci_upper": 1560.8,
693 "placing": None,
694 },
695 ],
696 },
697 ]
698 }
699 )
702class ConfidenceBucket(BaseModel):
703 """Accuracy metrics for a confidence bucket."""
705 races: int
706 win_accuracy: float
707 avg_brier: float
710class ConfidenceBuckets(BaseModel):
711 """Accuracy metrics grouped by confidence."""
713 high: ConfidenceBucket
714 medium: ConfidenceBucket
715 low: ConfidenceBucket
718class DailyTrendItem(BaseModel):
719 """Single day in the accuracy trend."""
721 date: str
722 avg_brier: float
723 win_accuracy: float
724 races: int
727class RecentRaceAccuracy(BaseModel):
728 """Accuracy metrics for a single evaluated race."""
730 race_id: int
731 race_number: int | None = None
732 venue: str | None = None
733 race_date: str | None = None
734 field_size: int
735 winner_correct: bool
736 top3_overlap: int
737 brier_score: float
740class AccuracySummary(BaseModel):
741 """Aggregated prediction accuracy summary."""
743 summary: dict
744 daily_trend: list[DailyTrendItem]
745 confidence_buckets: ConfidenceBuckets
746 recent_races: list[RecentRaceAccuracy]
747 model_config = ConfigDict(
748 json_schema_extra={
749 "examples": [
750 {
751 "summary": {
752 "overall_win_accuracy": 0.28,
753 "top3_accuracy": 0.52,
754 "avg_brier_score": 0.21,
755 "num_races_evaluated": 150,
756 },
757 "daily_trend": [
758 {
759 "date": "2026-05-01",
760 "avg_brier": 0.19,
761 "win_accuracy": 0.30,
762 "races": 10,
763 },
764 ],
765 "confidence_buckets": {
766 "high": {"races": 20, "win_accuracy": 0.55, "avg_brier": 0.12},
767 "medium": {
768 "races": 60,
769 "win_accuracy": 0.28,
770 "avg_brier": 0.20,
771 },
772 "low": {"races": 70, "win_accuracy": 0.14, "avg_brier": 0.28},
773 },
774 "recent_races": [
775 {
776 "race_id": 12345,
777 "race_number": 7,
778 "venue": "Addington",
779 "race_date": "2026-05-01",
780 "field_size": 10,
781 "winner_correct": True,
782 "top3_overlap": 2,
783 "brier_score": 0.15,
784 },
785 ],
786 },
787 ]
788 }
789 )
792# ── Audit log models ────────────────────────────────────────────────────────
795class AuditLogCreateRequest(BaseModel):
796 """Request to create an audit log entry."""
798 table_name: str = Field(..., description="Name of the table that was changed")
799 record_id: str = Field(..., description="Primary key value of the changed record")
800 action: str = Field(
801 ..., description="Type of change: INSERT, UPDATE, DELETE, or CORRECT"
802 )
803 old_values: dict[str, Any] | None = Field(
804 default=None, description="Snapshot of values before the change"
805 )
806 new_values: dict[str, Any] | None = Field(
807 default=None, description="Snapshot of values after the change"
808 )
809 changed_by: str | None = Field(default=None, description="User/system identifier")
810 change_reason: str | None = Field(
811 default=None, description="Human-readable reason for the change"
812 )
815class AuditLogEntry(BaseModel):
816 """Single audit log entry in API responses."""
818 id: int
819 table_name: str
820 record_id: str
821 action: str
822 old_values: dict[str, Any] | None = None
823 new_values: dict[str, Any] | None = None
824 changed_by: str | None = None
825 change_reason: str | None = None
826 created_at: str | None = None
827 model_config = ConfigDict(from_attributes=True)
830class AuditLogListResponse(BaseModel):
831 """Paginated response for audit log listing."""
833 data: list[AuditLogEntry]
834 total: int
837# Endpoints
840def _normalize_club_code(code: str) -> str:
841 """Normalize club codes to two-digit strings."""
842 code_str = str(code).strip()
843 if code_str.isdigit():
844 value = int(code_str)
845 if value < 0 or value > 99:
846 raise ValueError("Club code must be between 00 and 99")
847 return f"{value:02d}"
848 raise ValueError("Club code must be numeric")
851def _generate_hrnz_urls(
852 start_date: date, end_date: date, club_codes: list[str]
853) -> list[str]:
854 """Generate HRNZ result URLs for a date range and club codes."""
855 normalized_codes = [_normalize_club_code(code) for code in club_codes]
856 urls: list[str] = []
858 current = start_date
859 while current <= end_date:
860 date_prefix = current.strftime("%m%d")
861 for code in normalized_codes:
862 urls.append(f"{date_prefix}{code}rs.htm")
863 current += timedelta(days=1)
865 return urls
868def _resolve_meeting_date(scraped: dict, url: str, default_year: int) -> date | None:
869 """Resolve meeting date from scraped data or URL."""
870 date_raw = scraped.get("date_raw")
871 if date_raw:
872 date_raw = date_raw.replace("\xa0", " ").strip()
873 if not re.search(r"\\b\\d{4}\\b", date_raw):
874 for fmt in ["%A, %d %B", "%d %B"]:
875 try:
876 parsed = datetime.strptime(date_raw, fmt)
877 return parsed.replace(year=default_year).date()
878 except ValueError:
879 continue
880 formats = [
881 "%A, %d %B %Y",
882 "%A, %d %B",
883 "%d %B %Y",
884 "%d %B",
885 ]
886 for fmt in formats:
887 try:
888 parsed = datetime.strptime(date_raw, fmt)
889 if "%Y" not in fmt:
890 parsed = parsed.replace(year=default_year)
891 return parsed.date()
892 except ValueError:
893 continue
895 meeting_date_str = scraped.get("date")
896 if meeting_date_str:
897 try:
898 return date.fromisoformat(meeting_date_str)
899 except ValueError:
900 pass
902 match = re.search(r"(?P<mm>\\d{2})(?P<dd>\\d{2})\\d{2}rs\\.htm", url)
903 if match:
904 try:
905 return date(default_year, int(match.group("mm")), int(match.group("dd")))
906 except ValueError:
907 return None
909 return None
912def _build_pagination_meta(
913 total: int,
914 limit: int,
915 offset: int,
916 base_url: str,
917 query_params: dict,
918) -> dict:
919 """Build pagination metadata with next/prev navigation links.
921 Args:
922 total: Total number of results
923 limit: Maximum results per page
924 offset: Current offset
925 base_url: Base URL without query string
926 query_params: Current query parameters dict (limit/offset will be overridden)
928 Returns:
929 Dict with total, limit, offset, next, prev keys
930 """
931 meta: dict = {
932 "total": total,
933 "limit": limit,
934 "offset": offset,
935 }
937 # Build next link
938 if offset + limit < total:
939 next_params = {**query_params, "limit": limit, "offset": offset + limit}
940 query_string = "&".join(
941 f"{k}={v}" for k, v in next_params.items() if v is not None
942 )
943 meta["next"] = f"{base_url}?{query_string}"
944 else:
945 meta["next"] = None
947 # Build prev link
948 if offset > 0:
949 prev_offset = max(0, offset - limit)
950 prev_params = {**query_params, "limit": limit, "offset": prev_offset}
951 query_string = "&".join(
952 f"{k}={v}" for k, v in prev_params.items() if v is not None
953 )
954 meta["prev"] = f"{base_url}?{query_string}"
955 else:
956 meta["prev"] = None
958 return meta
961# Web UI routes
962@app.get("/", response_class=HTMLResponse)
963async def root():
964 """Redirect to web UI home page."""
965 return RedirectResponse(url="/ui/")
968@app.get("/docs/site")
969@app.get("/docs/site/", response_class=HTMLResponse)
970async def serve_docs_site():
971 """Redirect to the static docs site."""
972 return RedirectResponse(url="/static/docs/")
975@app.get("/ui/", response_class=HTMLResponse)
976@app.get("/ui/index.html", response_class=HTMLResponse)
977async def serve_home():
978 """Serve web UI home page."""
979 html_path = web_dir / "templates" / "index.html"
980 if html_path.exists():
981 return HTMLResponse(content=html_path.read_text())
982 raise HTTPException(status_code=404, detail="Web UI not found")
985@app.get("/ui/horse/{horse_id}", response_class=HTMLResponse)
986async def serve_horse_detail(horse_id: int):
987 """Serve horse detail page."""
988 html_path = web_dir / "templates" / "horse.html"
989 if html_path.exists():
990 return HTMLResponse(content=html_path.read_text())
991 raise HTTPException(status_code=404, detail="Web UI not found")
994@app.get("/ui/driver/{driver_id}", response_class=HTMLResponse)
995async def serve_driver_detail(driver_id: int):
996 """Serve driver detail page."""
997 html_path = web_dir / "templates" / "driver.html"
998 if html_path.exists():
999 return HTMLResponse(content=html_path.read_text())
1000 raise HTTPException(status_code=404, detail="Web UI not found")
1003@app.get("/ui/trainer/{trainer_id}", response_class=HTMLResponse)
1004async def serve_trainer_detail(trainer_id: int):
1005 """Serve trainer detail page."""
1006 html_path = web_dir / "templates" / "trainer.html"
1007 if html_path.exists():
1008 return HTMLResponse(content=html_path.read_text())
1009 raise HTTPException(status_code=404, detail="Web UI not found")
1012@app.get("/ui/race/{race_id}", response_class=HTMLResponse)
1013async def serve_race_detail(race_id: int):
1014 """Serve race detail page."""
1015 html_path = web_dir / "templates" / "race.html"
1016 if html_path.exists():
1017 return HTMLResponse(content=html_path.read_text())
1018 raise HTTPException(status_code=404, detail="Web UI not found")
1021@app.get("/ui/race-card", response_class=HTMLResponse)
1022@app.get("/ui/race-card/", response_class=HTMLResponse)
1023async def serve_race_cards_index():
1024 """Serve race cards index page."""
1025 html_path = web_dir / "templates" / "race-card.html"
1026 if html_path.exists():
1027 return HTMLResponse(content=html_path.read_text())
1028 raise HTTPException(status_code=404, detail="Web UI not found")
1031@app.get("/ui/race-card/{race_id}", response_class=HTMLResponse)
1032async def serve_race_card(race_id: int):
1033 """Serve race card view for a specific race."""
1034 html_path = web_dir / "templates" / "race-card.html"
1035 if html_path.exists():
1036 return HTMLResponse(content=html_path.read_text())
1037 raise HTTPException(status_code=404, detail="Web UI not found")
1040@app.get("/ui/search", response_class=HTMLResponse)
1041async def serve_search():
1042 """Serve search page."""
1043 html_path = web_dir / "templates" / "search.html"
1044 if html_path.exists():
1045 return HTMLResponse(content=html_path.read_text())
1046 raise HTTPException(status_code=404, detail="Web UI not found")
1049@app.get("/ui/analytics", response_class=HTMLResponse)
1050async def serve_analytics():
1051 """Serve analytics page."""
1052 html_path = web_dir / "templates" / "analytics.html"
1053 if html_path.exists():
1054 return HTMLResponse(content=html_path.read_text())
1055 raise HTTPException(status_code=404, detail="Web UI not found")
1058@app.get("/ui/analytics-dashboard", response_class=HTMLResponse)
1059async def serve_analytics_dashboard():
1060 """Serve analytics dashboard page with combined accuracy, ratings, and confidence metrics."""
1061 html_path = web_dir / "templates" / "analytics-dashboard.html"
1062 if html_path.exists():
1063 return HTMLResponse(content=html_path.read_text())
1064 raise HTTPException(status_code=404, detail="Web UI not found")
1067@app.get("/ui/data-correction", response_class=HTMLResponse)
1068async def serve_data_correction():
1069 """Serve data correction workflow page."""
1070 html_path = web_dir / "templates" / "data-correction.html"
1071 if html_path.exists():
1072 return HTMLResponse(content=html_path.read_text())
1073 raise HTTPException(status_code=404, detail="Web UI not found")
1076@app.get("/health", response_model=HealthResponse)
1077@limiter.limit("100/minute")
1078def health_check(request: Request):
1079 """Health check endpoint.
1081 Returns:
1082 Health status and version
1083 """
1084 return HealthResponse(status="healthy", version="0.2.0")
1087@api_router.get("/ratings/horses", response_model=PaginatedRatingResponse)
1088@limiter.limit("100/minute")
1089def get_horse_ratings(
1090 request: Request,
1091 limit: int = Query(default=100, le=500),
1092 offset: int = Query(default=0, ge=0),
1093 as_of_date: str | None = Query(default=None, description="YYYY-MM-DD"),
1094 venue: str | None = Query(default=None, description="Filter by venue"),
1095 format: str = Query(default="json", description="Response format: json or csv"),
1096 db: Session = Depends(get_db),
1097):
1098 """Get top horse ratings.
1100 Args:
1101 limit: Maximum results to return
1102 offset: Number of results to skip
1103 as_of_date: Optional date filter for historical ratings
1104 venue: Optional venue filter
1105 format: Response format (json or csv)
1106 db: Database session
1108 Returns:
1109 Paginated list of horse ratings in JSON or raw CSV
1110 """
1111 # Parse as_of_date if provided
1112 date_filter = parse_date(as_of_date) if as_of_date else None
1114 # Fetch all snapshots (use a high limit to effectively get all; pagination
1115 # is handled in-memory after venue filtering)
1116 snapshots = RatingSnapshotRepository.get_top_ratings(
1117 db, EntityType.HORSE, limit=100_000, as_of_date=date_filter
1118 )
1120 results = []
1121 for snapshot in snapshots:
1122 # Load horse name
1123 horse = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()
1125 # Apply venue filter if specified
1126 if venue:
1127 # Check if horse's most recent race was at the specified venue
1128 # This is a simplified filter - could be more sophisticated
1129 from packages.core.storage.models import Starter
1131 recent_starter = (
1132 db.query(Starter)
1133 .join(Race)
1134 .join(Meeting)
1135 .filter(
1136 Starter.horse_id == snapshot.entity_id,
1137 Meeting.venue == venue,
1138 )
1139 .first()
1140 )
1141 if not recent_starter:
1142 continue
1144 results.append(
1145 RatingResponse(
1146 entity_type="horse",
1147 entity_id=snapshot.entity_id,
1148 entity_name=horse.name if horse else None,
1149 rating=snapshot.rating,
1150 rd=snapshot.rd,
1151 race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
1152 as_of_race_id=snapshot.as_of_race_id,
1153 )
1154 )
1156 # Count total after filtering
1157 total = len(results)
1159 # Apply offset and limit for the current page
1160 page = results[offset : offset + limit]
1162 # Return CSV if requested
1163 if format.lower() == "csv":
1164 output = StringIO()
1165 writer = csv.DictWriter(
1166 output,
1167 fieldnames=[
1168 "entity_id",
1169 "name",
1170 "rating",
1171 "rd",
1172 "race_count",
1173 "as_of_race_id",
1174 ],
1175 )
1176 writer.writeheader()
1177 for r in page:
1178 writer.writerow(
1179 {
1180 "entity_id": r.entity_id,
1181 "name": r.entity_name,
1182 "rating": r.rating,
1183 "rd": r.rd,
1184 "race_count": r.race_count,
1185 "as_of_race_id": r.as_of_race_id,
1186 }
1187 )
1188 return Response(content=output.getvalue(), media_type="text/csv")
1190 # Build pagination metadata
1191 base_url = str(request.base_url).rstrip("/") + request.url.path
1192 query_params = dict(request.query_params)
1193 meta = _build_pagination_meta(total, limit, offset, base_url, query_params)
1195 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))
1198@api_router.get("/ratings/horses/{horse_id}", response_model=HorseDetailResponse)
1199@limiter.limit("100/minute")
1200def get_horse_detail(
1201 request: Request,
1202 horse_id: int,
1203 db: Session = Depends(get_db),
1204):
1205 """Get detailed horse information with rating history.
1207 Args:
1208 horse_id: Horse ID
1209 db: Database session
1211 Returns:
1212 Horse details with rating history
1213 """
1214 # Get horse
1215 horse = db.query(Horse).filter(Horse.id == horse_id).first()
1216 if not horse:
1217 raise HTTPException(status_code=404, detail="Horse not found")
1219 # Get all rating snapshots for this horse
1220 from packages.core.storage.models import RatingSnapshot
1222 snapshots = (
1223 db.query(RatingSnapshot)
1224 .join(Race, RatingSnapshot.as_of_race_id == Race.id)
1225 .join(Meeting, Race.meeting_id == Meeting.id)
1226 .filter(
1227 RatingSnapshot.entity_type == EntityType.HORSE,
1228 RatingSnapshot.entity_id == horse_id,
1229 )
1230 .order_by(
1231 Meeting.meeting_date,
1232 Race.race_datetime,
1233 Race.race_number,
1234 Race.id,
1235 )
1236 .all()
1237 )
1239 # Build history
1240 history = []
1241 for snapshot in snapshots:
1242 # Optionally load race date (requires join with races/meetings)
1243 history.append(
1244 RatingHistoryItem(
1245 race_id=snapshot.as_of_race_id,
1246 rating=snapshot.rating,
1247 rd=snapshot.rd,
1248 )
1249 )
1251 # Get current rating (latest snapshot)
1252 current_rating = snapshots[-1].rating if snapshots else None
1253 current_rd = snapshots[-1].rd if snapshots else None
1255 return HorseDetailResponse(
1256 horse_id=horse_id,
1257 name=horse.name,
1258 current_rating=current_rating,
1259 current_rd=current_rd,
1260 race_count=len(snapshots),
1261 rating_history=history,
1262 )
1265@api_router.get("/ratings/drivers", response_model=PaginatedRatingResponse)
1266@limiter.limit("100/minute")
1267def get_driver_ratings(
1268 request: Request,
1269 limit: int = Query(default=100, le=500),
1270 offset: int = Query(default=0, ge=0),
1271 db: Session = Depends(get_db),
1272):
1273 """Get top driver ratings.
1275 Args:
1276 limit: Maximum results to return
1277 offset: Number of results to skip
1278 db: Database session
1280 Returns:
1281 Paginated list of driver ratings
1282 """
1283 snapshots = RatingSnapshotRepository.get_top_ratings(
1284 db, EntityType.DRIVER, limit=100_000
1285 )
1287 results = []
1288 for snapshot in snapshots:
1289 driver = db.query(Driver).filter(Driver.id == snapshot.entity_id).first()
1291 results.append(
1292 RatingResponse(
1293 entity_type="driver",
1294 entity_id=snapshot.entity_id,
1295 entity_name=driver.name if driver else None,
1296 rating=snapshot.rating,
1297 rd=snapshot.rd,
1298 race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
1299 as_of_race_id=snapshot.as_of_race_id,
1300 )
1301 )
1303 total = len(results)
1304 page = results[offset : offset + limit]
1306 base_url = str(request.base_url).rstrip("/") + request.url.path
1307 query_params = dict(request.query_params)
1308 meta = _build_pagination_meta(total, limit, offset, base_url, query_params)
1310 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))
1313@api_router.get("/ratings/trainers", response_model=PaginatedRatingResponse)
1314@limiter.limit("100/minute")
1315def get_trainer_ratings(
1316 request: Request,
1317 limit: int = Query(default=100, le=500),
1318 offset: int = Query(default=0, ge=0),
1319 db: Session = Depends(get_db),
1320):
1321 """Get top trainer ratings.
1323 Args:
1324 limit: Maximum results to return
1325 offset: Number of results to skip
1326 db: Database session
1328 Returns:
1329 Paginated list of trainer ratings
1330 """
1331 snapshots = RatingSnapshotRepository.get_top_ratings(
1332 db, EntityType.TRAINER, limit=100_000
1333 )
1335 results = []
1336 for snapshot in snapshots:
1337 trainer = db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()
1339 results.append(
1340 RatingResponse(
1341 entity_type="trainer",
1342 entity_id=snapshot.entity_id,
1343 entity_name=trainer.name if trainer else None,
1344 rating=snapshot.rating,
1345 rd=snapshot.rd,
1346 race_count=snapshot.meta.get("race_count") if snapshot.meta else None,
1347 as_of_race_id=snapshot.as_of_race_id,
1348 )
1349 )
1351 total = len(results)
1352 page = results[offset : offset + limit]
1354 base_url = str(request.base_url).rstrip("/") + request.url.path
1355 query_params = dict(request.query_params)
1356 meta = _build_pagination_meta(total, limit, offset, base_url, query_params)
1358 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta))
1361@api_router.get("/ratings/drivers/{driver_id}", response_model=DriverDetailResponse)
1362@limiter.limit("100/minute")
1363def get_driver_detail(
1364 request: Request,
1365 driver_id: int,
1366 db: Session = Depends(get_db),
1367):
1368 """Get detailed driver information with rating history.
1370 Args:
1371 driver_id: Driver ID
1372 db: Database session
1374 Returns:
1375 Driver details with rating history
1376 """
1377 # Get driver
1378 driver = db.query(Driver).filter(Driver.id == driver_id).first()
1379 if not driver:
1380 raise HTTPException(status_code=404, detail="Driver not found")
1382 # Get all rating snapshots for this driver
1383 from packages.core.storage.models import RatingSnapshot
1385 snapshots = (
1386 db.query(RatingSnapshot)
1387 .join(Race, RatingSnapshot.as_of_race_id == Race.id)
1388 .join(Meeting, Race.meeting_id == Meeting.id)
1389 .filter(
1390 RatingSnapshot.entity_type == EntityType.DRIVER,
1391 RatingSnapshot.entity_id == driver_id,
1392 )
1393 .order_by(
1394 Meeting.meeting_date,
1395 Race.race_datetime,
1396 Race.race_number,
1397 Race.id,
1398 )
1399 .all()
1400 )
1402 # Build history
1403 history = []
1404 for snapshot in snapshots:
1405 history.append(
1406 RatingHistoryItem(
1407 race_id=snapshot.as_of_race_id,
1408 rating=snapshot.rating,
1409 rd=snapshot.rd,
1410 )
1411 )
1413 # Get current rating (latest snapshot)
1414 current_rating = snapshots[-1].rating if snapshots else None
1415 current_rd = snapshots[-1].rd if snapshots else None
1417 return DriverDetailResponse(
1418 driver_id=driver_id,
1419 name=driver.name,
1420 current_rating=current_rating,
1421 current_rd=current_rd,
1422 race_count=len(snapshots),
1423 rating_history=history,
1424 )
1427@api_router.get("/ratings/trainers/{trainer_id}", response_model=TrainerDetailResponse)
1428@limiter.limit("100/minute")
1429def get_trainer_detail(
1430 request: Request,
1431 trainer_id: int,
1432 db: Session = Depends(get_db),
1433):
1434 """Get detailed trainer information with rating history.
1436 Args:
1437 trainer_id: Trainer ID
1438 db: Database session
1440 Returns:
1441 Trainer details with rating history
1442 """
1443 # Get trainer
1444 trainer = db.query(Trainer).filter(Trainer.id == trainer_id).first()
1445 if not trainer:
1446 raise HTTPException(status_code=404, detail="Trainer not found")
1448 # Get all rating snapshots for this trainer
1449 from packages.core.storage.models import RatingSnapshot
1451 snapshots = (
1452 db.query(RatingSnapshot)
1453 .join(Race, RatingSnapshot.as_of_race_id == Race.id)
1454 .join(Meeting, Race.meeting_id == Meeting.id)
1455 .filter(
1456 RatingSnapshot.entity_type == EntityType.TRAINER,
1457 RatingSnapshot.entity_id == trainer_id,
1458 )
1459 .order_by(
1460 Meeting.meeting_date,
1461 Race.race_datetime,
1462 Race.race_number,
1463 Race.id,
1464 )
1465 .all()
1466 )
1468 # Build history
1469 history = []
1470 for snapshot in snapshots:
1471 history.append(
1472 RatingHistoryItem(
1473 race_id=snapshot.as_of_race_id,
1474 rating=snapshot.rating,
1475 rd=snapshot.rd,
1476 )
1477 )
1479 # Get current rating (latest snapshot)
1480 current_rating = snapshots[-1].rating if snapshots else None
1481 current_rd = snapshots[-1].rd if snapshots else None
1483 return TrainerDetailResponse(
1484 trainer_id=trainer_id,
1485 name=trainer.name,
1486 current_rating=current_rating,
1487 current_rd=current_rd,
1488 race_count=len(snapshots),
1489 rating_history=history,
1490 )
1493@api_router.get("/races/{race_id}")
1494@limiter.limit("100/minute")
1495def get_race(
1496 request: Request,
1497 race_id: int,
1498 db: Session = Depends(get_db),
1499):
1500 """Get race details with starters.
1502 Args:
1503 race_id: Race ID
1504 db: Database session
1506 Returns:
1507 Race details
1508 """
1509 from packages.core.storage.models import Starter
1511 race = (
1512 db.query(Race)
1513 .options(joinedload(Race.meeting))
1514 .filter(Race.id == race_id)
1515 .first()
1516 )
1517 if not race:
1518 raise HTTPException(status_code=404, detail="Race not found")
1520 starters = db.query(Starter).filter(Starter.race_id == race_id).all()
1522 return {
1523 "id": race.id,
1524 "race_id": race.id,
1525 "meeting_id": race.meeting_id,
1526 "race_number": race.race_number,
1527 "venue": race.meeting.venue if race.meeting else None,
1528 "distance_m": race.distance_m,
1529 "start_type": race.start_type,
1530 "gait": race.gait,
1531 "race_datetime": race.race_datetime.isoformat() if race.race_datetime else None,
1532 "starters_count": len(starters),
1533 "starters": [
1534 {
1535 "horse_id": s.horse_id,
1536 "driver_id": s.driver_id,
1537 "trainer_id": s.trainer_id,
1538 "barrier": s.barrier,
1539 "handicap_m": s.handicap_m,
1540 "placing": s.placing,
1541 "did_not_finish": s.did_not_finish,
1542 }
1543 for s in starters
1544 ],
1545 }
1548@api_router.get("/races", response_model=PaginatedRaceResponse)
1549@limiter.limit("200/minute")
1550def list_races(
1551 request: Request,
1552 date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
1553 date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
1554 limit: int = Query(default=100, le=1000),
1555 offset: int = Query(default=0, ge=0),
1556 venue: str | None = Query(default=None, description="Filter by venue"),
1557 db: Session = Depends(get_db),
1558):
1559 """List races within a date range."""
1560 from packages.core.common.utils import parse_date
1562 if date_from:
1563 start_date = parse_date(date_from)
1564 else:
1565 start_date = date.today() - timedelta(days=30)
1567 if date_to:
1568 end_date = parse_date(date_to)
1569 else:
1570 end_date = date.today()
1572 query = (
1573 db.query(Race)
1574 .join(Race.meeting)
1575 .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date)
1576 )
1577 if venue:
1578 query = query.filter(Meeting.venue == venue)
1580 total = query.count()
1581 races = (
1582 query.options(joinedload(Race.meeting))
1583 .order_by(
1584 Meeting.meeting_date,
1585 Race.race_datetime,
1586 Race.race_number,
1587 Race.id,
1588 )
1589 .limit(limit)
1590 .offset(offset)
1591 .all()
1592 )
1594 race_list = []
1595 for race in races:
1596 race_list.append(
1597 {
1598 "race_id": race.id,
1599 "meeting_id": race.meeting_id,
1600 "meeting_date": (
1601 race.meeting.meeting_date.isoformat()
1602 if race.meeting and race.meeting.meeting_date
1603 else None
1604 ),
1605 "venue": race.meeting.venue if race.meeting else None,
1606 "race_number": race.race_number,
1607 "distance_m": race.distance_m,
1608 "start_type": race.start_type,
1609 "race_datetime": (
1610 race.race_datetime.isoformat() if race.race_datetime else None
1611 ),
1612 }
1613 )
1615 # Build pagination metadata with next/prev links
1616 base_url = str(request.base_url).rstrip("/") + request.url.path
1617 query_params = dict(request.query_params)
1618 meta = _build_pagination_meta(total, limit, offset, base_url, query_params)
1620 return PaginatedRaceResponse(data=race_list, meta=PaginationMeta(**meta))
1623@api_router.post("/admin/ingest", response_model=IngestionResponse)
1624async def trigger_ingestion(
1625 request: IngestionRequest,
1626 source: str = Query(
1627 default="tab",
1628 description='Data source: "tab" (TAB API directly) or "ingest" (tab-api-ingest service)',
1629 ),
1630 db: Session = Depends(get_db),
1631 token: str = Depends(verify_admin_token),
1632):
1633 """Trigger data ingestion (admin only).
1635 Supports two data sources:
1636 - "tab" (default): fetches data directly from the TAB Affiliates API
1637 - "ingest": fetches data from the tab-api-ingest TypeScript service
1639 Args:
1640 request: Ingestion request with date range
1641 source: Data source selector
1642 db: Database session
1643 token: Admin token
1645 Returns:
1646 Ingestion statistics
1647 """
1648 start_date = parse_date(request.date_from)
1649 end_date = parse_date(request.date_to)
1651 logger.info(
1652 f"Admin triggered ingestion: {start_date} to {end_date} (source={source})"
1653 )
1655 service = IngestionService(db, source=source)
1656 meetings, races, starters = await service.ingest_date_range(start_date, end_date)
1658 return IngestionResponse(
1659 meetings=meetings,
1660 races=races,
1661 starters=starters,
1662 errors=service.stats["errors"],
1663 )
1666@api_router.post("/webhook/scrape", response_model=ScrapeResponse)
1667async def webhook_scrape(
1668 request: ScrapeRequest,
1669 db: Session = Depends(get_db),
1670 token: str = Depends(verify_admin_token),
1671):
1672 """Webhook to trigger HRNZ Playwright scrape with parameters."""
1674 from packages.core.storage.repositories import (
1675 DriverRepository,
1676 HorseRepository,
1677 MeetingRepository,
1678 RaceRepository,
1679 StarterRepository,
1680 TrainerRepository,
1681 )
1682 from packages.hrnz_scraper import HRNZScraper
1683 from packages.hrnz_scraper.mapper import HRNZDataMapper
1685 start_date = parse_date(request.date_from)
1686 end_date = parse_date(request.date_to)
1688 club_codes = request.club_codes
1689 if club_codes is None:
1690 club_codes = settings.hrnz.club_codes
1692 if isinstance(club_codes, str):
1693 if club_codes.lower() == "all":
1694 club_codes = HRNZ_ALL_CLUB_CODES
1695 else:
1696 club_codes = [
1697 code.strip() for code in club_codes.split(",") if code.strip()
1698 ]
1700 if isinstance(club_codes, list):
1701 if any(isinstance(code, str) and code.lower() == "all" for code in club_codes):
1702 club_codes = HRNZ_ALL_CLUB_CODES
1704 urls: list[str] = []
1705 if club_codes:
1706 try:
1707 urls.extend(_generate_hrnz_urls(start_date, end_date, club_codes))
1708 except ValueError as exc:
1709 raise HTTPException(status_code=400, detail=str(exc)) from exc
1711 if request.urls:
1712 urls.extend(request.urls)
1714 if not urls:
1715 raise HTTPException(
1716 status_code=400,
1717 detail="Provide club_codes (or HRNZ_CLUB_CODES) to generate URLs, or pass urls explicitly.",
1718 )
1720 urls = list(dict.fromkeys(urls))
1722 logger.info(
1723 "Webhook triggered scrape",
1724 extra={
1725 "start_date": str(start_date),
1726 "end_date": str(end_date),
1727 "recompute": request.recompute,
1728 "urls": len(urls),
1729 },
1730 )
1732 stats = {
1733 "meetings": 0,
1734 "races": 0,
1735 "starters": 0,
1736 "horses": 0,
1737 "drivers": 0,
1738 "trainers": 0,
1739 "errors": 0,
1740 }
1742 mapper = HRNZDataMapper()
1743 async with HRNZScraper() as scraper:
1744 for url in urls:
1745 try:
1746 scraped = await scraper.get_meeting_results(url)
1748 meeting_date = _resolve_meeting_date(scraped, url, start_date.year)
1749 if not meeting_date or not scraped.get("races"):
1750 logger.info("Skipping non-meeting page for %s", url)
1751 continue
1753 if not (start_date <= meeting_date <= end_date):
1754 continue
1756 scraped["date"] = meeting_date.isoformat()
1757 meeting = mapper.map_meeting(scraped)
1758 entities = mapper.map_entities(scraped)
1760 MeetingRepository.upsert(db, meeting)
1761 stats["meetings"] += 1
1763 for horse in entities["horses"]:
1764 HorseRepository.upsert(
1765 db,
1766 horse["id"],
1767 horse["name"],
1768 horse.get("raw_json"),
1769 )
1770 stats["horses"] += 1
1772 for driver in entities["drivers"]:
1773 DriverRepository.upsert(
1774 db, driver["name"], driver_id=driver.get("id")
1775 )
1776 stats["drivers"] += 1
1778 for trainer in entities["trainers"]:
1779 TrainerRepository.upsert(
1780 db, trainer["name"], trainer_id=trainer.get("id")
1781 )
1782 stats["trainers"] += 1
1784 races = mapper.map_races(scraped, meeting["meeting"])
1785 race_id_map = {}
1786 for race in races:
1787 race_obj = RaceRepository.upsert(db, meeting["meeting"], race)
1788 race_id_map[race["race_number"]] = race_obj.id
1789 stats["races"] += 1
1791 starters = mapper.map_starters(scraped, race_id_map)
1792 for starter in starters:
1793 StarterRepository.upsert(
1794 db,
1795 starter["race_id"],
1796 starter,
1797 starter.get("placing"),
1798 )
1799 stats["starters"] += 1
1801 db.commit()
1802 except Exception as exc:
1803 db.rollback()
1804 stats["errors"] += 1
1805 logger.error("Failed to scrape %s: %s", url, exc, exc_info=True)
1807 snapshots_created = 0
1808 if request.recompute:
1810 def _run_recompute():
1811 with get_session() as session:
1812 return recompute_ratings(
1813 session,
1814 start_date,
1815 end_date,
1816 clear_existing=request.clear_existing,
1817 learn_adjustments=request.learn_adjustments,
1818 )
1820 snapshots_created = await asyncio.to_thread(_run_recompute)
1822 return ScrapeResponse(
1823 meetings=stats["meetings"],
1824 races=stats["races"],
1825 starters=stats["starters"],
1826 horses=stats["horses"],
1827 drivers=stats["drivers"],
1828 trainers=stats["trainers"],
1829 errors=stats["errors"],
1830 recomputed=request.recompute,
1831 snapshots_created=snapshots_created,
1832 )
1835@api_router.post("/admin/recompute", response_model=RecomputeResponse)
1836def trigger_recompute(
1837 request: RecomputeRequest,
1838 db: Session = Depends(get_db),
1839 token: str = Depends(verify_admin_token),
1840):
1841 """Trigger rating recompute (admin only).
1843 Args:
1844 request: Recompute request with date range
1845 db: Database session
1846 token: Admin token
1848 Returns:
1849 Recompute statistics
1850 """
1851 start_date = parse_date(request.date_from)
1852 end_date = parse_date(request.date_to)
1854 logger.info(f"Admin triggered recompute: {start_date} to {end_date}")
1856 snapshot_count = recompute_ratings(
1857 db,
1858 start_date,
1859 end_date,
1860 clear_existing=request.clear_existing,
1861 learn_adjustments=request.learn_adjustments,
1862 )
1864 return RecomputeResponse(snapshots_created=snapshot_count)
1867# ── Scheduler admin endpoints ────────────────────────────────────
1870@api_router.get(
1871 "/admin/jobs",
1872 response_model=SchedulerJobListResponse,
1873)
1874@limiter.limit("20/minute")
1875def list_scheduled_jobs(
1876 request: Request,
1877 token: str = Depends(verify_admin_token),
1878):
1879 """List all scheduled background jobs (admin only).
1881 Returns a list of all cron jobs currently registered in the scheduler.
1883 Args:
1884 token: Admin authentication token
1886 Returns:
1887 List of scheduled jobs with their triggers and next run times
1888 """
1889 logger.info("Admin listing scheduled jobs")
1890 jobs = scheduler.list_jobs()
1891 return SchedulerJobListResponse(
1892 jobs=[SchedulerJobInfo(**job) for job in jobs],
1893 total=len(jobs),
1894 )
1897@api_router.post(
1898 "/admin/jobs",
1899 response_model=AddSchedulerJobResponse,
1900 status_code=201,
1901)
1902@limiter.limit("20/minute")
1903def add_scheduled_job(
1904 request: Request,
1905 job_request: AddSchedulerJobRequest,
1906 token: str = Depends(verify_admin_token),
1907):
1908 """Add a new scheduled background job (admin only).
1910 Supports three job types:
1911 - ``ingest``: Scheduled data ingestion from TAB API
1912 - ``recompute``: Scheduled rating recomputation
1913 - ``scrape``: Scheduled HRNZ data scraping
1915 Args:
1916 job_request: Job configuration including cron expression and type-specific params
1917 token: Admin authentication token
1919 Returns:
1920 The job ID and confirmation message
1921 """
1922 job_id = (
1923 job_request.job_id
1924 or f"{job_request.job_type}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
1925 )
1927 logger.info(
1928 "Admin adding scheduled job",
1929 extra={
1930 "job_type": job_request.job_type,
1931 "cron_expr": job_request.cron_expr,
1932 "job_id": job_id,
1933 },
1934 )
1936 if job_request.job_type == "ingest":
1937 scheduler.add_ingest_job(
1938 date_from=job_request.date_from,
1939 date_to=job_request.date_to,
1940 category=job_request.category,
1941 source=job_request.source,
1942 cron_expr=job_request.cron_expr,
1943 job_id=job_id,
1944 )
1945 elif job_request.job_type == "recompute":
1946 scheduler.add_recompute_job(
1947 date_from=job_request.date_from,
1948 date_to=job_request.date_to,
1949 clear=job_request.clear,
1950 cron_expr=job_request.cron_expr,
1951 job_id=job_id,
1952 )
1953 elif job_request.job_type == "scrape":
1954 scheduler.add_scrape_job(
1955 urls=job_request.urls,
1956 club_codes=job_request.club_codes,
1957 date_from=job_request.date_from,
1958 date_to=job_request.date_to,
1959 cron_expr=job_request.cron_expr,
1960 job_id=job_id,
1961 )
1962 else:
1963 raise HTTPException(
1964 status_code=400,
1965 detail=f"Unknown job_type '{job_request.job_type}'. Must be 'ingest', 'recompute', or 'scrape'.",
1966 )
1968 return AddSchedulerJobResponse(
1969 job_id=job_id,
1970 message=f"Scheduled {job_request.job_type} job with cron '{job_request.cron_expr}'",
1971 )
1974@api_router.delete(
1975 "/admin/jobs/{job_id}",
1976 response_model=RemoveSchedulerJobResponse,
1977)
1978@limiter.limit("20/minute")
1979def remove_scheduled_job(
1980 request: Request,
1981 job_id: str,
1982 token: str = Depends(verify_admin_token),
1983):
1984 """Remove a scheduled background job (admin only).
1986 Args:
1987 job_id: The job identifier to remove
1988 token: Admin authentication token
1990 Returns:
1991 Confirmation of removal
1992 """
1993 logger.info("Admin removing scheduled job", extra={"job_id": job_id})
1994 removed = scheduler.remove_job(job_id)
1996 if removed:
1997 return RemoveSchedulerJobResponse(
1998 job_id=job_id,
1999 removed=True,
2000 message=f"Job '{job_id}' removed successfully",
2001 )
2003 raise HTTPException(
2004 status_code=404,
2005 detail=f"Job '{job_id}' not found",
2006 )
2009# ── Admin audit log endpoints ────────────────────────────────────────────
2012@api_router.post("/admin/audit-log", response_model=AuditLogEntry, status_code=201)
2013def create_audit_log(
2014 request: AuditLogCreateRequest,
2015 db: Session = Depends(get_db),
2016 token: str = Depends(verify_admin_token),
2017):
2018 """Create an audit log entry (admin only).
2020 Records a data change or correction event in the audit log.
2021 This endpoint allows programmatic creation of audit entries,
2022 e.g. from the data correction UI or automated scripts.
2024 Args:
2025 request: Audit log entry details
2026 db: Database session
2027 token: Admin token
2029 Returns:
2030 The created audit log entry
2031 """
2032 action_upper = request.action.upper().strip()
2033 if action_upper not in ("INSERT", "UPDATE", "DELETE", "CORRECT"):
2034 raise HTTPException(
2035 status_code=400,
2036 detail=f"Invalid action '{request.action}'. Must be INSERT, UPDATE, DELETE, or CORRECT.",
2037 )
2039 entry = AuditLogger.log_change(
2040 session=db,
2041 table_name=request.table_name,
2042 record_id=request.record_id,
2043 action=action_upper,
2044 old_values=request.old_values,
2045 new_values=request.new_values,
2046 changed_by=request.changed_by,
2047 change_reason=request.change_reason,
2048 )
2049 if entry is None:
2050 raise HTTPException(status_code=500, detail="Failed to create audit log entry")
2052 # Convert ORM model to Pydantic response
2053 return AuditLogEntry(
2054 id=entry.id,
2055 table_name=entry.table_name,
2056 record_id=entry.record_id,
2057 action=entry.action,
2058 old_values=entry.old_values,
2059 new_values=entry.new_values,
2060 changed_by=entry.changed_by,
2061 change_reason=entry.change_reason,
2062 created_at=entry.created_at.isoformat() if entry.created_at else None,
2063 )
2066@api_router.get("/admin/audit-log", response_model=AuditLogListResponse)
2067def list_audit_logs(
2068 limit: int = Query(default=100, le=500, description="Maximum entries to return"),
2069 offset: int = Query(default=0, ge=0, description="Number of entries to skip"),
2070 db: Session = Depends(get_db),
2071 token: str = Depends(verify_admin_token),
2072):
2073 """List recent audit log entries (admin only).
2075 Args:
2076 limit: Maximum entries to return
2077 offset: Number of entries to skip
2078 db: Database session
2079 token: Admin token
2081 Returns:
2082 Paginated list of recent audit log entries
2083 """
2084 entries = AuditLogger.get_recent_changes(db, limit=limit + offset)
2085 # Apply offset in Python (the logger returns newest-first)
2086 page = entries[offset : offset + limit]
2088 total = len(entries)
2089 if total > limit + offset:
2090 total = db.query(AuditLog).count()
2092 return AuditLogListResponse(
2093 data=[
2094 AuditLogEntry(
2095 id=e.id,
2096 table_name=e.table_name,
2097 record_id=e.record_id,
2098 action=e.action,
2099 old_values=e.old_values,
2100 new_values=e.new_values,
2101 changed_by=e.changed_by,
2102 change_reason=e.change_reason,
2103 created_at=e.created_at.isoformat() if e.created_at else None,
2104 )
2105 for e in page
2106 ],
2107 total=total,
2108 )
2111@api_router.get(
2112 "/admin/audit-log/{table_name}/{record_id}", response_model=AuditLogListResponse
2113)
2114def get_audit_log_for_record(
2115 table_name: str,
2116 record_id: str,
2117 db: Session = Depends(get_db),
2118 token: str = Depends(verify_admin_token),
2119):
2120 """Get all audit log entries for a specific record (admin only).
2122 Args:
2123 table_name: Name of the table
2124 record_id: Primary key value of the record
2125 db: Database session
2126 token: Admin token
2128 Returns:
2129 List of audit log entries for the specified record
2130 """
2131 entries = AuditLogger.get_changes_for_record(db, table_name, record_id)
2133 return AuditLogListResponse(
2134 data=[
2135 AuditLogEntry(
2136 id=e.id,
2137 table_name=e.table_name,
2138 record_id=e.record_id,
2139 action=e.action,
2140 old_values=e.old_values,
2141 new_values=e.new_values,
2142 changed_by=e.changed_by,
2143 change_reason=e.change_reason,
2144 created_at=e.created_at.isoformat() if e.created_at else None,
2145 )
2146 for e in entries
2147 ],
2148 total=len(entries),
2149 )
2152@api_router.get("/races/{race_id}/predictions", response_model=RacePredictionResponse)
2153@limiter.limit("50/minute")
2154def get_race_predictions(
2155 request: Request,
2156 race_id: int,
2157 db: Session = Depends(get_db),
2158):
2159 """Get win probability predictions for a race.
2161 Args:
2162 race_id: Race ID
2163 db: Database session
2165 Returns:
2166 Predictions for all starters
2167 """
2168 from packages.core.ratings.predictions import PredictionEngine
2169 from packages.core.storage.models import Starter
2171 # Get race
2172 race = db.query(Race).filter(Race.id == race_id).first()
2173 if not race:
2174 raise HTTPException(status_code=404, detail="Race not found")
2176 # Get starters
2177 starters = db.query(Starter).filter(Starter.race_id == race_id).all()
2178 if not starters:
2179 raise HTTPException(status_code=404, detail="No starters found for race")
2180 starter_by_id = {starter.id: starter for starter in starters}
2182 # Load race meeting
2183 if not race.meeting:
2184 from sqlalchemy.orm import joinedload
2186 race = (
2187 db.query(Race)
2188 .options(joinedload(Race.meeting))
2189 .filter(Race.id == race_id)
2190 .first()
2191 )
2193 engine = PredictionEngine(db)
2194 prediction = engine.predict_race(race, starters)
2196 predictions = [
2197 PredictionResponse(
2198 horse_id=pred.horse_id,
2199 horse_name=pred.horse_name,
2200 driver_id=pred.driver_id,
2201 driver_name=pred.driver_name,
2202 trainer_id=pred.trainer_id,
2203 trainer_name=pred.trainer_name,
2204 barrier=pred.barrier,
2205 effective_rating=pred.effective_rating,
2206 win_probability=pred.win_probability,
2207 place_probability=pred.place_probability,
2208 place_score=pred.place_score,
2209 predicted_placing=pred.predicted_placing,
2210 ci_lower=pred.confidence_interval_low,
2211 ci_upper=pred.confidence_interval_high,
2212 placing=(
2213 starter_by_id[pred.starter_id].placing
2214 if pred.starter_id in starter_by_id
2215 else None
2216 ),
2217 )
2218 for pred in prediction.predictions
2219 ]
2221 return RacePredictionResponse(
2222 race_id=race_id,
2223 race_number=race.race_number,
2224 venue=race.meeting.venue if race.meeting else None,
2225 distance_m=race.distance_m,
2226 predictions=predictions,
2227 )
2230@api_router.get("/races/upcoming")
2231@limiter.limit("100/minute")
2232def get_upcoming_races(
2233 request: Request,
2234 race_date: str | None = Query(
2235 default=None, description="YYYY-MM-DD, defaults to today"
2236 ),
2237 db: Session = Depends(get_db),
2238):
2239 """Get upcoming races for a specific date.
2241 Args:
2242 race_date: Date to get races for (defaults to today)
2243 db: Database session
2245 Returns:
2246 List of races for the specified date
2247 """
2248 from packages.core.common.utils import parse_date
2250 target_date = parse_date(race_date) if race_date else date.today()
2252 races = (
2253 db.query(Race)
2254 .options(joinedload(Race.meeting))
2255 .join(Race.meeting)
2256 .filter(Meeting.meeting_date == target_date)
2257 .order_by(Meeting.venue, Race.race_number)
2258 .all()
2259 )
2261 race_list = []
2262 for race in races:
2263 race_list.append(
2264 {
2265 "race_id": race.id,
2266 "race_number": race.race_number,
2267 "venue": race.meeting.venue if race.meeting else None,
2268 "distance_m": race.distance_m,
2269 "start_type": race.start_type,
2270 "starter_count": len(race.starters),
2271 }
2272 )
2274 return {
2275 "date": target_date.isoformat(),
2276 "race_count": len(race_list),
2277 "races": race_list,
2278 }
2281@api_router.get("/predictions/compare/{race_id}")
2282@limiter.limit("50/minute")
2283def compare_prediction_to_actual(
2284 request: Request,
2285 race_id: int,
2286 db: Session = Depends(get_db),
2287):
2288 """Compare prediction to actual result for a completed race.
2290 Args:
2291 race_id: Race ID
2292 db: Database session
2294 Returns:
2295 Prediction accuracy comparison
2296 """
2297 from packages.core.ratings.predictions import PredictionEngine
2299 engine = PredictionEngine(db)
2300 comparison = engine.compare_prediction_to_actual(race_id)
2302 if not comparison:
2303 raise HTTPException(status_code=404, detail="Race not found or not completed")
2305 return comparison
2308@api_router.get("/analytics/accuracy", response_model=AccuracySummary)
2309@limiter.limit("20/minute")
2310def get_accuracy_summary(
2311 request: Request,
2312 days: int = Query(default=30, ge=1, le=365),
2313 db: Session = Depends(get_db),
2314):
2315 """Get aggregated prediction accuracy summary.
2317 Computes accuracy metrics for completed races in the specified date range.
2318 Limited to the most recent 200 evaluated races for performance.
2320 Args:
2321 days: Number of days to look back
2322 db: Database session
2324 Returns:
2325 Accuracy summary with daily trend and confidence buckets
2326 """
2327 from packages.core.ratings.predictions import PredictionEngine
2328 from packages.core.storage.models import Starter
2330 start_date = date.today() - timedelta(days=days)
2331 end_date = date.today()
2333 # Get completed races in date range with results
2334 races = (
2335 db.query(Race)
2336 .join(Starter, Race.id == Starter.race_id)
2337 .filter(Starter.placing.isnot(None))
2338 .join(Meeting)
2339 .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date)
2340 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
2341 .distinct()
2342 .limit(200)
2343 .all()
2344 )
2346 engine = PredictionEngine(db)
2348 total_winner_correct = 0
2349 total_top3_overlap = 0.0
2350 total_brier = 0.0
2351 race_count = 0
2353 daily_data: dict[str, dict] = {}
2354 buckets = {
2355 "high": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
2356 "medium": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
2357 "low": {"races": 0, "winner_correct": 0, "brier_sum": 0.0},
2358 }
2359 recent_races: list[dict] = []
2361 for race in races:
2362 comparison = engine.compare_prediction_to_actual(race.id)
2363 if not comparison:
2364 continue
2366 winner_correct = comparison["winner_correct"]
2367 top3_overlap = comparison["top3_overlap"]
2368 brier = comparison["brier_score"]
2370 # Find predicted winner probability for bucketing
2371 predictions = comparison.get("predictions", [])
2372 predicted_winner_prob = 0.0
2373 if predictions:
2374 predicted_winner = max(predictions, key=lambda p: p["win_probability"])
2375 predicted_winner_prob = predicted_winner["win_probability"]
2377 if predicted_winner_prob >= 0.40:
2378 bucket_key = "high"
2379 elif predicted_winner_prob >= 0.20:
2380 bucket_key = "medium"
2381 else:
2382 bucket_key = "low"
2384 buckets[bucket_key]["races"] += 1
2385 if winner_correct:
2386 buckets[bucket_key]["winner_correct"] += 1
2387 buckets[bucket_key]["brier_sum"] += brier
2389 race_date_str = comparison.get("race_date")
2390 if not race_date_str and race.meeting and race.meeting.meeting_date:
2391 race_date_str = race.meeting.meeting_date.isoformat()
2392 if not race_date_str:
2393 race_date_str = "unknown"
2395 if race_date_str not in daily_data:
2396 daily_data[race_date_str] = {
2397 "brier_sum": 0.0,
2398 "winner_correct": 0,
2399 "races": 0,
2400 }
2401 daily_data[race_date_str]["brier_sum"] += brier
2402 if winner_correct:
2403 daily_data[race_date_str]["winner_correct"] += 1
2404 daily_data[race_date_str]["races"] += 1
2406 total_winner_correct += 1 if winner_correct else 0
2407 total_top3_overlap += top3_overlap
2408 total_brier += brier
2409 race_count += 1
2411 recent_races.append(
2412 {
2413 "race_id": comparison["race_id"],
2414 "race_number": comparison.get("race_number"),
2415 "venue": comparison.get("venue"),
2416 "race_date": comparison.get("race_date"),
2417 "field_size": comparison.get("field_size", 0),
2418 "winner_correct": winner_correct,
2419 "top3_overlap": top3_overlap,
2420 "brier_score": brier,
2421 }
2422 )
2424 if race_count == 0:
2425 return AccuracySummary(
2426 summary={
2427 "overall_win_accuracy": 0.0,
2428 "top3_accuracy": 0.0,
2429 "avg_brier_score": 0.0,
2430 "num_races_evaluated": 0,
2431 },
2432 daily_trend=[],
2433 confidence_buckets=ConfidenceBuckets(
2434 high=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
2435 medium=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
2436 low=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0),
2437 ),
2438 recent_races=[],
2439 )
2441 daily_trend = []
2442 for d in sorted(daily_data.keys()):
2443 data = daily_data[d]
2444 daily_trend.append(
2445 DailyTrendItem(
2446 date=d,
2447 avg_brier=data["brier_sum"] / data["races"],
2448 win_accuracy=data["winner_correct"] / data["races"],
2449 races=data["races"],
2450 )
2451 )
2453 def _bucket_stats(bucket: dict) -> ConfidenceBucket:
2454 if bucket["races"] == 0:
2455 return ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0)
2456 return ConfidenceBucket(
2457 races=bucket["races"],
2458 win_accuracy=bucket["winner_correct"] / bucket["races"],
2459 avg_brier=bucket["brier_sum"] / bucket["races"],
2460 )
2462 return AccuracySummary(
2463 summary={
2464 "overall_win_accuracy": total_winner_correct / race_count,
2465 "top3_accuracy": total_top3_overlap / race_count / 3.0,
2466 "avg_brier_score": total_brier / race_count,
2467 "num_races_evaluated": race_count,
2468 },
2469 daily_trend=daily_trend,
2470 confidence_buckets=ConfidenceBuckets(
2471 high=_bucket_stats(buckets["high"]),
2472 medium=_bucket_stats(buckets["medium"]),
2473 low=_bucket_stats(buckets["low"]),
2474 ),
2475 recent_races=[RecentRaceAccuracy(**r) for r in recent_races[:50]],
2476 )
2479# ── Export endpoints (CSV/Parquet) ──────────────────────────────────────
2482@api_router.get("/export/ratings.csv")
2483@limiter.limit("10/minute")
2484def export_ratings_csv(
2485 request: Request,
2486 entity_type: str | None = Query(
2487 default=None,
2488 description="Filter by entity type: horse, driver, trainer (default: all)",
2489 ),
2490 limit: int = Query(default=5000, le=50000, description="Maximum rows"),
2491 db: Session = Depends(get_db),
2492):
2493 """Export all ratings as CSV.
2495 Returns a CSV file with all rating snapshots across entity types.
2496 Supports filtering by entity_type and limit.
2497 """
2498 entity_types = (
2499 [
2500 EntityType(entity_type.lower()),
2501 ]
2502 if entity_type
2503 else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER]
2504 )
2506 output = StringIO()
2507 writer = csv.DictWriter(
2508 output,
2509 fieldnames=[
2510 "entity_type",
2511 "entity_id",
2512 "entity_name",
2513 "rating",
2514 "rd",
2515 "race_count",
2516 "as_of_race_id",
2517 ],
2518 )
2519 writer.writeheader()
2521 rows_written = 0
2522 for et in entity_types:
2523 snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit)
2524 for snapshot in snapshots:
2525 if rows_written >= limit:
2526 break
2528 # Load entity name
2529 entity_name = None
2530 if et == EntityType.HORSE:
2531 entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()
2532 entity_name = entity.name if entity else None
2533 elif et == EntityType.DRIVER:
2534 entity = (
2535 db.query(Driver).filter(Driver.id == snapshot.entity_id).first()
2536 )
2537 entity_name = entity.name if entity else None
2538 elif et == EntityType.TRAINER:
2539 entity = (
2540 db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()
2541 )
2542 entity_name = entity.name if entity else None
2544 writer.writerow(
2545 {
2546 "entity_type": et.value,
2547 "entity_id": snapshot.entity_id,
2548 "entity_name": entity_name,
2549 "rating": snapshot.rating,
2550 "rd": snapshot.rd,
2551 "race_count": (
2552 snapshot.meta.get("race_count") if snapshot.meta else None
2553 ),
2554 "as_of_race_id": snapshot.as_of_race_id,
2555 }
2556 )
2557 rows_written += 1
2559 if rows_written >= limit:
2560 break
2562 return Response(
2563 content=output.getvalue(),
2564 media_type="text/csv",
2565 headers={"Content-Disposition": 'attachment; filename="ratings.csv"'},
2566 )
2569@api_router.get("/export/ratings.parquet")
2570@limiter.limit("10/minute")
2571def export_ratings_parquet(
2572 request: Request,
2573 entity_type: str | None = Query(
2574 default=None,
2575 description="Filter by entity type: horse, driver, trainer (default: all)",
2576 ),
2577 limit: int = Query(default=5000, le=50000, description="Maximum rows"),
2578 db: Session = Depends(get_db),
2579):
2580 """Export all ratings as Parquet.
2582 Returns a Parquet file with all rating snapshots across entity types.
2583 Requires pyarrow or fastparquet to be installed.
2584 """
2585 try:
2586 import pandas as pd
2587 except ImportError:
2588 raise HTTPException(
2589 status_code=501,
2590 detail="Parquet export requires pandas. Install with: pip install pandas",
2591 ) from None
2593 try:
2594 import pyarrow # noqa: F401
2595 except ImportError:
2596 raise HTTPException(
2597 status_code=501,
2598 detail=(
2599 "Parquet export requires pyarrow. " "Install with: pip install pyarrow"
2600 ),
2601 ) from None
2603 entity_types = (
2604 [
2605 EntityType(entity_type.lower()),
2606 ]
2607 if entity_type
2608 else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER]
2609 )
2611 rows: list[dict] = []
2612 for et in entity_types:
2613 snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit)
2614 for snapshot in snapshots:
2615 if len(rows) >= limit:
2616 break
2618 entity_name = None
2619 if et == EntityType.HORSE:
2620 entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first()
2621 entity_name = entity.name if entity else None
2622 elif et == EntityType.DRIVER:
2623 entity = (
2624 db.query(Driver).filter(Driver.id == snapshot.entity_id).first()
2625 )
2626 entity_name = entity.name if entity else None
2627 elif et == EntityType.TRAINER:
2628 entity = (
2629 db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first()
2630 )
2631 entity_name = entity.name if entity else None
2633 rows.append(
2634 {
2635 "entity_type": et.value,
2636 "entity_id": snapshot.entity_id,
2637 "entity_name": entity_name,
2638 "rating": snapshot.rating,
2639 "rd": snapshot.rd,
2640 "race_count": (
2641 snapshot.meta.get("race_count") if snapshot.meta else None
2642 ),
2643 "as_of_race_id": snapshot.as_of_race_id,
2644 }
2645 )
2647 if len(rows) >= limit:
2648 break
2650 if not rows:
2651 return Response(
2652 content=b"",
2653 media_type="application/octet-stream",
2654 headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'},
2655 )
2657 df = pd.DataFrame(rows)
2658 buffer = BytesIO()
2659 df.to_parquet(buffer, index=False)
2660 buffer.seek(0)
2662 return Response(
2663 content=buffer.getvalue(),
2664 media_type="application/octet-stream",
2665 headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'},
2666 )
2669@api_router.get("/export/predictions.csv")
2670@limiter.limit("10/minute")
2671def export_predictions_csv(
2672 request: Request,
2673 date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
2674 date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
2675 limit: int = Query(default=1000, le=10000, description="Maximum rows"),
2676 db: Session = Depends(get_db),
2677):
2678 """Export recent predictions as CSV.
2680 Returns a CSV file with predictions for completed races.
2681 Supports filtering by date range and limit.
2682 """
2683 from packages.core.ratings.predictions import PredictionEngine
2684 from packages.core.storage.models import Starter
2686 # Determine date range
2687 if date_from:
2688 start_date = parse_date(date_from)
2689 else:
2690 start_date = date.today() - timedelta(days=7)
2692 if date_to:
2693 end_date = parse_date(date_to)
2694 else:
2695 end_date = date.today()
2697 # Get races with results in the date range
2698 races_with_results = (
2699 db.query(Race)
2700 .join(Race.meeting)
2701 .filter(
2702 Meeting.meeting_date >= start_date,
2703 Meeting.meeting_date <= end_date,
2704 )
2705 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
2706 .limit(limit)
2707 .all()
2708 )
2710 engine = PredictionEngine(db)
2711 output = StringIO()
2712 writer = csv.writer(output)
2713 writer.writerow(
2714 [
2715 "race_id",
2716 "race_number",
2717 "venue",
2718 "distance_m",
2719 "race_date",
2720 "starter_id",
2721 "horse_id",
2722 "horse_name",
2723 "driver_id",
2724 "driver_name",
2725 "trainer_id",
2726 "trainer_name",
2727 "barrier",
2728 "handicap_m",
2729 "effective_rating",
2730 "win_probability",
2731 "place_probability",
2732 "place_score",
2733 "predicted_placing",
2734 "ci_lower",
2735 "ci_upper",
2736 ]
2737 )
2739 rows_written = 0
2740 for race in races_with_results:
2741 if rows_written >= limit:
2742 break
2744 starters = db.query(Starter).filter(Starter.race_id == race.id).all()
2745 if not starters:
2746 continue
2748 try:
2749 prediction = engine.predict_race(race, starters)
2750 except Exception:
2751 continue
2753 for pred in prediction.predictions:
2754 if rows_written >= limit:
2755 break
2757 writer.writerow(
2758 [
2759 race.id,
2760 race.race_number or "",
2761 race.meeting.venue if race.meeting else "",
2762 race.distance_m or "",
2763 (
2764 race.meeting.meeting_date.isoformat()
2765 if race.meeting and race.meeting.meeting_date
2766 else ""
2767 ),
2768 pred.starter_id,
2769 pred.horse_id,
2770 pred.horse_name or "",
2771 pred.driver_id or "",
2772 pred.driver_name or "",
2773 pred.trainer_id or "",
2774 pred.trainer_name or "",
2775 pred.barrier or "",
2776 pred.handicap_m or "",
2777 f"{pred.effective_rating:.1f}",
2778 f"{pred.win_probability:.4f}",
2779 f"{pred.place_probability:.4f}",
2780 f"{pred.place_score:.1f}",
2781 pred.predicted_placing,
2782 f"{pred.confidence_interval_low:.1f}",
2783 f"{pred.confidence_interval_high:.1f}",
2784 ]
2785 )
2786 rows_written += 1
2788 return Response(
2789 content=output.getvalue(),
2790 media_type="text/csv",
2791 headers={"Content-Disposition": 'attachment; filename="predictions.csv"'},
2792 )
2795@api_router.get("/export/predictions.parquet")
2796@limiter.limit("10/minute")
2797def export_predictions_parquet(
2798 request: Request,
2799 date_from: str | None = Query(default=None, description="YYYY-MM-DD"),
2800 date_to: str | None = Query(default=None, description="YYYY-MM-DD"),
2801 limit: int = Query(default=1000, le=10000, description="Maximum rows"),
2802 db: Session = Depends(get_db),
2803):
2804 """Export recent predictions as Parquet.
2806 Returns a Parquet file with predictions for completed races.
2807 Requires pyarrow or fastparquet to be installed.
2808 """
2809 try:
2810 import pandas as pd
2811 except ImportError:
2812 raise HTTPException(
2813 status_code=501,
2814 detail="Parquet export requires pandas. Install with: pip install pandas",
2815 ) from None
2817 try:
2818 import pyarrow # noqa: F401
2819 except ImportError:
2820 raise HTTPException(
2821 status_code=501,
2822 detail=(
2823 "Parquet export requires pyarrow. " "Install with: pip install pyarrow"
2824 ),
2825 ) from None
2827 from packages.core.ratings.predictions import PredictionEngine
2828 from packages.core.storage.models import Starter
2830 # Determine date range
2831 if date_from:
2832 start_date = parse_date(date_from)
2833 else:
2834 start_date = date.today() - timedelta(days=7)
2836 if date_to:
2837 end_date = parse_date(date_to)
2838 else:
2839 end_date = date.today()
2841 # Get races with results in the date range
2842 races_with_results = (
2843 db.query(Race)
2844 .join(Race.meeting)
2845 .filter(
2846 Meeting.meeting_date >= start_date,
2847 Meeting.meeting_date <= end_date,
2848 )
2849 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last())
2850 .limit(limit)
2851 .all()
2852 )
2854 engine = PredictionEngine(db)
2855 rows: list[dict] = []
2857 for race in races_with_results:
2858 if len(rows) >= limit:
2859 break
2861 starters = db.query(Starter).filter(Starter.race_id == race.id).all()
2862 if not starters:
2863 continue
2865 try:
2866 prediction = engine.predict_race(race, starters)
2867 except Exception:
2868 continue
2870 for pred in prediction.predictions:
2871 if len(rows) >= limit:
2872 break
2874 rows.append(
2875 {
2876 "race_id": race.id,
2877 "race_number": race.race_number,
2878 "venue": race.meeting.venue if race.meeting else None,
2879 "distance_m": race.distance_m,
2880 "race_date": (
2881 race.meeting.meeting_date.isoformat()
2882 if race.meeting and race.meeting.meeting_date
2883 else None
2884 ),
2885 "starter_id": pred.starter_id,
2886 "horse_id": pred.horse_id,
2887 "horse_name": pred.horse_name,
2888 "driver_id": pred.driver_id,
2889 "driver_name": pred.driver_name,
2890 "trainer_id": pred.trainer_id,
2891 "trainer_name": pred.trainer_name,
2892 "barrier": pred.barrier,
2893 "handicap_m": pred.handicap_m,
2894 "effective_rating": pred.effective_rating,
2895 "win_probability": pred.win_probability,
2896 "place_probability": pred.place_probability,
2897 "place_score": pred.place_score,
2898 "predicted_placing": pred.predicted_placing,
2899 "ci_lower": pred.confidence_interval_low,
2900 "ci_upper": pred.confidence_interval_high,
2901 }
2902 )
2904 if not rows:
2905 return Response(
2906 content=b"",
2907 media_type="application/octet-stream",
2908 headers={
2909 "Content-Disposition": 'attachment; filename="predictions.parquet"'
2910 },
2911 )
2913 df = pd.DataFrame(rows)
2914 buffer = BytesIO()
2915 df.to_parquet(buffer, index=False)
2916 buffer.seek(0)
2918 return Response(
2919 content=buffer.getvalue(),
2920 media_type="application/octet-stream",
2921 headers={"Content-Disposition": 'attachment; filename="predictions.parquet"'},
2922 )
2925@api_router.get("/export/race-predictions.pdf")
2926@limiter.limit("10/minute")
2927def export_race_predictions_pdf(
2928 request: Request,
2929 race_id: int,
2930 db: Session = Depends(get_db),
2931):
2932 """Export race predictions as a PDF document.
2934 Generates a PDF with a structured table showing each runner's rating,
2935 win probability, place probability, predicted placing, and confidence
2936 interval. Requires the ``reportlab`` library.
2938 Args:
2939 race_id: Race ID to export.
2940 db: Database session.
2942 Returns:
2943 PDF binary response with appropriate Content-Type and disposition.
2944 """
2945 try:
2946 from reportlab.lib import colors
2947 from reportlab.lib.pagesizes import letter
2948 from reportlab.lib.styles import getSampleStyleSheet
2949 from reportlab.lib.units import inch
2950 from reportlab.platypus import (
2951 Paragraph,
2952 SimpleDocTemplate,
2953 Spacer,
2954 Table,
2955 TableStyle,
2956 )
2957 except ImportError:
2958 raise HTTPException(
2959 status_code=501,
2960 detail="PDF export requires reportlab. Install with: pip install reportlab",
2961 ) from None
2963 from packages.core.ratings.predictions import PredictionEngine
2964 from packages.core.storage.models import Starter
2966 # ── Resolve race ─────────────────────────────────────────────────
2967 race = db.query(Race).filter(Race.id == race_id).first()
2968 if not race:
2969 raise HTTPException(status_code=404, detail="Race not found")
2971 # Eager-load the meeting for venue / date info
2972 race = (
2973 db.query(Race)
2974 .options(joinedload(Race.meeting))
2975 .filter(Race.id == race_id)
2976 .first()
2977 )
2979 starters = db.query(Starter).filter(Starter.race_id == race_id).all()
2980 if not starters:
2981 raise HTTPException(status_code=404, detail="No starters found for race")
2983 engine = PredictionEngine(db)
2984 prediction = engine.predict_race(race, starters)
2986 # ── Build PDF ────────────────────────────────────────────────────
2987 buffer = BytesIO()
2988 doc = SimpleDocTemplate(buffer, pagesize=letter)
2989 styles = getSampleStyleSheet()
2990 elements = []
2992 # Title
2993 venue = race.meeting.venue if race.meeting else "Unknown"
2994 race_date = (
2995 race.meeting.meeting_date.isoformat()
2996 if race.meeting and race.meeting.meeting_date
2997 else ""
2998 )
2999 title_text = (
3000 f"Race {race.race_number} — {venue} ({race_date})<br/>"
3001 f"Distance: {race.distance_m}m | "
3002 f"Starters: {len(prediction.predictions)}"
3003 )
3004 elements.append(Paragraph("<b>TipSharks — Race Predictions</b>", styles["Title"]))
3005 elements.append(Spacer(1, 0.15 * inch))
3006 elements.append(Paragraph(title_text, styles["Normal"]))
3007 elements.append(Spacer(1, 0.2 * inch))
3009 # Table header + rows
3010 table_data = [
3011 ["#", "Horse", "Driver", "Rating", "Win%", "Place%", "Pred.", "CI Range"],
3012 ]
3013 for idx, pred in enumerate(prediction.predictions, start=1):
3014 ci = (
3015 f"{pred.confidence_interval_low:.0f}–{pred.confidence_interval_high:.0f}"
3016 if pred.confidence_interval_low is not None
3017 else "—"
3018 )
3019 table_data.append(
3020 [
3021 str(idx),
3022 pred.horse_name or f"ID {pred.horse_id}",
3023 pred.driver_name or "—",
3024 f"{pred.effective_rating:.1f}",
3025 f"{pred.win_probability:.1%}",
3026 f"{pred.place_probability:.1%}",
3027 str(pred.predicted_placing),
3028 ci,
3029 ]
3030 )
3032 # Column widths
3033 col_widths = [
3034 0.3 * inch,
3035 1.6 * inch,
3036 1.4 * inch,
3037 0.8 * inch,
3038 0.7 * inch,
3039 0.7 * inch,
3040 0.5 * inch,
3041 1.2 * inch,
3042 ]
3043 table = Table(table_data, colWidths=col_widths, repeatRows=1)
3044 table.setStyle(
3045 TableStyle(
3046 [
3047 ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a5276")),
3048 ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
3049 ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
3050 ("FONTSIZE", (0, 0), (-1, 0), 9),
3051 ("FONTSIZE", (0, 1), (-1, -1), 8),
3052 ("ALIGN", (0, 0), (-1, -1), "CENTER"),
3053 ("ALIGN", (1, 1), (2, -1), "LEFT"),
3054 ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
3055 ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
3056 (
3057 "ROWBACKGROUNDS",
3058 (0, 1),
3059 (-1, -1),
3060 [colors.white, colors.HexColor("#f2f3f4")],
3061 ),
3062 ("TOPPADDING", (0, 0), (-1, -1), 4),
3063 ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
3064 ]
3065 )
3066 )
3067 elements.append(table)
3069 # Footer metadata
3070 elements.append(Spacer(1, 0.2 * inch))
3071 from datetime import UTC, datetime
3073 elements.append(
3074 Paragraph(
3075 f"Generated: {datetime.now(UTC).isoformat()} | "
3076 f"Field size: {prediction.metadata.get('field_size', '—')} | "
3077 f"Avg rating: {prediction.metadata.get('avg_rating', '—'):.1f}",
3078 styles["Normal"],
3079 )
3080 )
3082 doc.build(elements)
3083 pdf_bytes = buffer.getvalue()
3085 filename = f"race_{race_id}_predictions.pdf"
3086 return Response(
3087 content=pdf_bytes,
3088 media_type="application/pdf",
3089 headers={
3090 "Content-Disposition": f'attachment; filename="{filename}"',
3091 "Content-Length": str(len(pdf_bytes)),
3092 },
3093 )
3096# ── WebSocket endpoint for live race updates ────────────────────────
3099@app.websocket("/ws/races/{race_id}")
3100async def websocket_race_endpoint(websocket: WebSocket, race_id: int):
3101 """WebSocket endpoint for live race updates.
3103 On connect, validates the race exists in the database and sends
3104 the initial race state (race details + starters). Broadcasts live
3105 updates as they become available via the simulation task.
3107 Args:
3108 websocket: The WebSocket connection.
3109 race_id: Race identifier.
3110 """
3111 from apps.backend.api.websocket import _build_initial_state, manager
3112 from packages.core.storage.models import Starter
3114 # Validate race exists
3115 try:
3116 with get_session() as session:
3117 race = session.query(Race).filter(Race.id == race_id).first()
3118 if not race:
3119 await websocket.close(code=4004, reason="Race not found")
3120 return
3122 # Eager-load relationships for the initial state
3123 race = (
3124 session.query(Race)
3125 .options(
3126 joinedload(Race.meeting),
3127 joinedload(Race.starters).joinedload(Starter.horse),
3128 joinedload(Race.starters).joinedload(Starter.driver),
3129 joinedload(Race.starters).joinedload(Starter.trainer),
3130 )
3131 .filter(Race.id == race_id)
3132 .first()
3133 )
3134 starters = race.starters if race else []
3136 # Build initial state
3137 initial_state = _build_initial_state(race, starters)
3138 except Exception:
3139 logger.error("Failed to validate race for WebSocket", exc_info=True)
3140 await websocket.close(code=1011, reason="Internal server error")
3141 return
3143 # Accept connection and register
3144 await manager.connect(websocket, race_id)
3146 # Send initial state
3147 await manager.send_personal_message(initial_state, websocket)
3149 # Start simulation on first connection
3150 if not manager.is_simulation_running(race_id):
3151 manager.start_simulation(race_id)
3153 try:
3154 while True:
3155 data = await websocket.receive_text()
3156 try:
3157 msg = json.loads(data)
3158 msg_type = msg.get("type", "")
3159 msg_race_id = msg.get("race_id", race_id)
3161 if msg_type == "subscribe" and isinstance(msg_race_id, int):
3162 logger.info(
3163 "Client subscribed to race updates",
3164 extra={"race_id": msg_race_id},
3165 )
3166 # Confirm subscription
3167 await manager.send_personal_message(
3168 json.dumps({"type": "subscribed", "race_id": msg_race_id}),
3169 websocket,
3170 )
3171 else:
3172 logger.debug(
3173 "Unknown WebSocket message type",
3174 extra={"type": msg_type, "race_id": race_id},
3175 )
3176 except json.JSONDecodeError:
3177 logger.warning(
3178 "Invalid JSON received on WebSocket",
3179 extra={"race_id": race_id},
3180 )
3181 except WebSocketDisconnect:
3182 logger.info("WebSocket client disconnected", extra={"race_id": race_id})
3183 except Exception:
3184 logger.error("WebSocket error", extra={"race_id": race_id}, exc_info=True)
3185 finally:
3186 await manager.disconnect(websocket, race_id)
3187 # Stop simulation when last client leaves
3188 if manager.get_connection_count(race_id) == 0 and manager.is_simulation_running(
3189 race_id
3190 ):
3191 manager.stop_simulation(race_id)
3194# Prometheus metrics endpoint stub
3195# In production, integrate with the prometheus-client library.
3196# See docs/monitoring.md for instrumentation guidance.
3197@app.get("/metrics", response_class=PlainTextResponse)
3198def metrics():
3199 """Prometheus metrics endpoint stub.
3201 Returns a placeholder response. To serve real metrics:
3202 1. pip install prometheus-client
3203 2. Create counters/histograms/gauges in a metrics module
3204 3. Call generate_latest(REGISTRY) here
3206 See docs/monitoring.md for the full instrumentation guide.
3207 """
3208 return PlainTextResponse(
3209 content="# TipSharks API metrics stub\n# Integrate prometheus-client for production use\n",
3210 status_code=200,
3211 )
3214# Include the versioned API router
3215app.include_router(api_router)
3217if __name__ == "__main__":
3218 import uvicorn
3220 uvicorn.run(app, host="0.0.0.0", port=8000)