Coverage for apps / backend / api / main.py: 48%

1081 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-08 08:37 +1200

1"""FastAPI application for TipSharks ratings API.""" 

2 

3import asyncio 

4import csv 

5import hashlib 

6import json 

7import re 

8from datetime import UTC, date, datetime, timedelta 

9from io import BytesIO, StringIO 

10from pathlib import Path 

11from typing import Any 

12 

13from fastapi import ( 

14 APIRouter, 

15 Depends, 

16 FastAPI, 

17 HTTPException, 

18 Query, 

19 Request, 

20 Response, 

21 Security, 

22 WebSocket, 

23 WebSocketDisconnect, 

24) 

25from fastapi.middleware.cors import CORSMiddleware 

26from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse 

27from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer 

28from fastapi.staticfiles import StaticFiles 

29from pydantic import BaseModel, ConfigDict, Field 

30from slowapi import Limiter, _rate_limit_exceeded_handler 

31from slowapi.errors import RateLimitExceeded 

32from sqlalchemy import func 

33from sqlalchemy.orm import Session, joinedload 

34 

35from packages.core.common.logging import get_logger, setup_logging 

36from packages.core.common.rate_limit import get_user_rate_limit_key 

37from packages.core.common.scheduler import TipSharksScheduler 

38from packages.core.common.settings import HRNZ_ALL_CLUB_CODES, get_settings 

39from packages.core.common.utils import parse_date 

40from packages.core.ratings.recompute import recompute_ratings 

41from packages.core.storage.audit import AuditLogger 

42from packages.core.storage.database import get_session 

43from packages.core.storage.ingestion import IngestionService 

44from packages.core.storage.models import ( 

45 AuditLog, 

46 Driver, 

47 EntityType, 

48 Horse, 

49 Meeting, 

50 Race, 

51 RatingSnapshot, 

52 Trainer, 

53) 

54from packages.core.storage.repositories import RatingSnapshotRepository 

55 

56# Setup 

57setup_logging() 

58logger = get_logger(__name__) 

59settings = get_settings() 

60 

61# Scheduler instance for background jobs 

62scheduler = TipSharksScheduler() 

63 

64# Rate limiter configuration 

65# Uses per-user key when X-User-ID header is provided by the mobile client, 

66# falling back to IP-based key for anonymous (e.g. browser) requests. 

67limiter = Limiter(key_func=get_user_rate_limit_key) 

68 

69app = FastAPI( 

70 title="TipSharks API", 

71 description="Advanced harness racing ratings and predictions powered by multi-runner Elo algorithms", 

72 version="0.2.0", 

73) 

74 

75# Add rate limiter to app state 

76app.state.limiter = limiter 

77app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) 

78 

79# API router with /v1 prefix for versioned API endpoints 

80api_router = APIRouter(prefix="/v1") 

81 

82# Legacy API prefixes that should redirect to /v1/ 

83_LEGACY_API_PREFIXES = { 

84 "/ratings", 

85 "/races", 

86 "/predictions", 

87 "/analytics", 

88 "/admin", 

89 "/webhook", 

90} 

91 

92# Add CORS middleware 

93# Parse CORS origins from settings (supports "*" or comma-separated list) 

94cors_origins = ( 

95 ["*"] 

96 if settings.api.cors_allow_origins == "*" 

97 else [origin.strip() for origin in settings.api.cors_allow_origins.split(",")] 

98) 

99app.add_middleware( 

100 CORSMiddleware, 

101 allow_origins=cors_origins, 

102 allow_credentials=True, 

103 allow_methods=["*"], 

104 allow_headers=["*"], 

105) 

106 

107 

108@app.middleware("http") 

109async def legacy_api_redirects(request: Request, call_next): 

110 """Redirect legacy API paths to /v1/ equivalents. 

111 

112 Handles GET, POST, and other methods transparently via 301 redirect. 

113 Preserves query string parameters. 

114 Keeps /health, /ui/*, /static, /docs/site, and root at their original paths. 

115 """ 

116 path = request.url.path 

117 for prefix in _LEGACY_API_PREFIXES: 

118 if path.startswith(prefix): 

119 new_path = f"/v1{path}" 

120 # Preserve query string if present 

121 if request.url.query: 

122 new_path = f"{new_path}?{request.url.query}" 

123 return RedirectResponse(url=new_path, status_code=301) 

124 return await call_next(request) 

125 

126 

127@app.middleware("http") 

128async def add_no_cache_headers(request: Request, call_next): 

129 """Avoid stale API responses in browsers/proxies.""" 

130 response = await call_next(request) 

131 if request.url.path.startswith(("/v1/ratings", "/v1/races", "/ratings", "/races")): 

132 response.headers.setdefault( 

133 "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" 

134 ) 

135 response.headers.setdefault("Pragma", "no-cache") 

136 response.headers.setdefault("Expires", "0") 

137 return response 

138 

139 

140@app.middleware("http") 

141async def add_request_id(request: Request, call_next): 

142 """Add a unique request ID to every request and response. 

143 

144 Reads ``x-request-id`` from the incoming request headers if present, 

145 otherwise generates a new UUID. The ID is stored on ``request.state`` 

146 and echoed back in the ``x-request-id`` response header. 

147 """ 

148 import uuid 

149 

150 request_id = request.headers.get("x-request-id", str(uuid.uuid4())) 

151 request.state.request_id = request_id 

152 response = await call_next(request) 

153 response.headers["x-request-id"] = request_id 

154 return response 

155 

156 

157@app.middleware("http") 

158async def log_requests(request: Request, call_next): 

159 """Log basic information about every API request. 

160 

161 Records HTTP method, path, status code, duration, and the request ID 

162 (if one was assigned by the ``add_request_id`` middleware). 

163 """ 

164 from time import time 

165 

166 start = time() 

167 response = await call_next(request) 

168 duration = time() - start 

169 logger.info( 

170 "API request", 

171 extra={ 

172 "method": request.method, 

173 "path": request.url.path, 

174 "status_code": response.status_code, 

175 "duration_ms": round(duration * 1000, 2), 

176 "request_id": getattr(request.state, "request_id", None), 

177 }, 

178 ) 

179 return response 

180 

181 

182@app.middleware("http") 

183async def rating_etag_middleware(request: Request, call_next): 

184 """Add ETag and Last-Modified headers to rating endpoints for conditional requests. 

185 

186 Computes an ETag from the response body (MD5 hash) and sets Last-Modified 

187 from the most recent rating snapshot timestamp. Handles If-None-Match and 

188 If-Modified-Since request headers to return 304 Not Modified when the 

189 client's cached version is still valid. 

190 """ 

191 path = request.url.path 

192 

193 # Only apply to rating detail and list endpoints 

194 if not ( 

195 path.startswith("/v1/ratings/horses") 

196 or path.startswith("/v1/ratings/drivers") 

197 or path.startswith("/v1/ratings/trainers") 

198 ): 

199 return await call_next(request) 

200 

201 # Get the most recent rating snapshot timestamp for Last-Modified 

202 last_modified: datetime | None = None 

203 try: 

204 with get_session() as session: 

205 latest_ts = session.query(func.max(RatingSnapshot.created_at)).scalar() 

206 if latest_ts is not None: 

207 if latest_ts.tzinfo is None: 

208 latest_ts = latest_ts.replace(tzinfo=UTC) 

209 last_modified = latest_ts 

210 except Exception: 

211 pass 

212 

213 # Check If-Modified-Since before computing the full response 

214 if last_modified is not None and "if-modified-since" in request.headers: 

215 try: 

216 ims_str = request.headers["if-modified-since"] 

217 ims = datetime.strptime(ims_str, "%a, %d %b %Y %H:%M:%S GMT").replace( 

218 tzinfo=UTC 

219 ) 

220 if last_modified <= ims: 

221 # Data has not changed since last fetch — but we still need 

222 # to check ETag below after computing the response body. 

223 pass 

224 except (ValueError, TypeError): 

225 pass 

226 

227 response = await call_next(request) 

228 

229 # Read the response body to compute ETag 

230 body = b"" 

231 try: 

232 async for chunk in response.body_iterator: 

233 body += chunk 

234 except Exception: 

235 # If we can't read the body, just pass through without ETag 

236 return response 

237 

238 # Compute ETag from the response body 

239 etag_hex = hashlib.md5(body).hexdigest() 

240 etag_value = f'"{etag_hex}"' 

241 

242 # Set ETag header 

243 response.headers["ETag"] = etag_value 

244 

245 # Set Last-Modified header 

246 if last_modified is not None: 

247 response.headers["Last-Modified"] = last_modified.strftime( 

248 "%a, %d %b %Y %H:%M:%S GMT" 

249 ) 

250 

251 # Check If-None-Match for conditional request 

252 if_none_match = request.headers.get("if-none-match") 

253 if if_none_match and if_none_match.strip('"') == etag_hex: 

254 return Response( 

255 status_code=304, 

256 headers={ 

257 "ETag": etag_value, 

258 "Last-Modified": response.headers.get("Last-Modified", ""), 

259 "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0", 

260 "Pragma": "no-cache", 

261 "Expires": "0", 

262 }, 

263 ) 

264 

265 # Reconstruct the response with captured body (body_iterator is consumed) 

266 return Response( 

267 content=body, 

268 status_code=response.status_code, 

269 headers=dict(response.headers), 

270 media_type=response.media_type, 

271 ) 

272 

273 

274# Mount static files for web UI 

275web_dir = Path(__file__).parent.parent / "web" 

276if web_dir.exists(): 

277 app.mount("/static", StaticFiles(directory=str(web_dir / "static")), name="static") 

278 docs_site_dir = web_dir / "static" / "docs" 

279 docs_site_dir.mkdir(parents=True, exist_ok=True) 

280 app.mount( 

281 "/docs/site", 

282 StaticFiles(directory=str(docs_site_dir), html=True), 

283 name="docs-site", 

284 ) 

285 

286 

287# ── Scheduler lifecycle hooks ───────────────────────────────────── 

288 

289 

290@app.on_event("startup") 

291async def start_scheduler(): 

292 """Start the background scheduler on application startup. 

293 

294 Reads scheduler configuration from settings and loads default jobs. 

295 Skipped if ``scheduler.enabled`` is False. 

296 """ 

297 sched_settings = settings.scheduler 

298 if not sched_settings.enabled: 

299 logger.info("Scheduler is disabled via settings") 

300 return 

301 

302 scheduler.start() 

303 job_ids = scheduler.load_default_jobs() 

304 if job_ids: 

305 logger.info("Default scheduler jobs loaded", extra={"job_ids": job_ids}) 

306 else: 

307 logger.info("No default scheduler jobs configured") 

308 

309 

310@app.on_event("shutdown") 

311async def stop_scheduler(): 

312 """Shutdown the background scheduler gracefully on application shutdown.""" 

313 if scheduler.running: 

314 scheduler.shutdown(wait=True) 

315 logger.info("Scheduler shut down on app shutdown") 

316 

317 

318security = HTTPBearer() 

319 

320 

321# Dependency for database session 

322def get_db(): 

323 """Dependency for database session.""" 

324 with get_session() as session: 

325 yield session 

326 

327 

328# Dependency for admin auth 

329def verify_admin_token(credentials: HTTPAuthorizationCredentials = Security(security)): 

330 """Verify admin token for protected endpoints.""" 

331 if credentials.credentials != settings.api.admin_token: 

332 raise HTTPException(status_code=401, detail="Invalid authentication token") 

333 return credentials.credentials 

334 

335 

336# Pydantic models for API responses 

337class HealthResponse(BaseModel): 

338 """Health check response.""" 

339 

340 status: str 

341 version: str 

342 model_config = ConfigDict( 

343 json_schema_extra={ 

344 "examples": [ 

345 {"status": "healthy", "version": "0.2.0"}, 

346 ] 

347 } 

348 ) 

349 

350 

351class RatingResponse(BaseModel): 

352 """Rating information for an entity.""" 

353 

354 entity_type: str 

355 entity_id: int 

356 entity_name: str | None = None 

357 rating: float 

358 rd: float | None = None 

359 race_count: int | None = None 

360 as_of_race_id: int 

361 model_config = ConfigDict( 

362 json_schema_extra={ 

363 "examples": [ 

364 { 

365 "entity_type": "horse", 

366 "entity_id": 42, 

367 "entity_name": "Some Delight", 

368 "rating": 1520.5, 

369 "rd": 85.3, 

370 "race_count": 24, 

371 "as_of_race_id": 12345, 

372 }, 

373 ] 

374 } 

375 ) 

376 

377 

378class RatingHistoryItem(BaseModel): 

379 """Single rating history point.""" 

380 

381 race_id: int 

382 rating: float 

383 rd: float | None = None 

384 race_date: str | None = None 

385 

386 

387class HorseDetailResponse(BaseModel): 

388 """Detailed horse information with rating history.""" 

389 

390 horse_id: int 

391 name: str 

392 current_rating: float | None = None 

393 current_rd: float | None = None 

394 race_count: int 

395 rating_history: list[RatingHistoryItem] = [] 

396 model_config = ConfigDict( 

397 json_schema_extra={ 

398 "examples": [ 

399 { 

400 "horse_id": 42, 

401 "name": "Some Delight", 

402 "current_rating": 1520.5, 

403 "current_rd": 85.3, 

404 "race_count": 24, 

405 "rating_history": [ 

406 { 

407 "race_id": 12345, 

408 "rating": 1520.5, 

409 "rd": 85.3, 

410 "race_date": "2026-05-01", 

411 }, 

412 { 

413 "race_id": 12340, 

414 "rating": 1510.0, 

415 "rd": 90.1, 

416 "race_date": "2026-04-28", 

417 }, 

418 ], 

419 }, 

420 ] 

421 } 

422 ) 

423 

424 

425class DriverDetailResponse(BaseModel): 

426 """Detailed driver information with rating history.""" 

427 

428 driver_id: int 

429 name: str 

430 current_rating: float | None = None 

431 current_rd: float | None = None 

432 race_count: int 

433 rating_history: list[RatingHistoryItem] = [] 

434 

435 

436class TrainerDetailResponse(BaseModel): 

437 """Detailed trainer information with rating history.""" 

438 

439 trainer_id: int 

440 name: str 

441 current_rating: float | None = None 

442 current_rd: float | None = None 

443 race_count: int 

444 rating_history: list[RatingHistoryItem] = [] 

445 

446 

447class PaginationMeta(BaseModel): 

448 """Pagination metadata with next/prev navigation links.""" 

449 

450 total: int 

451 limit: int 

452 offset: int 

453 next: str | None = None 

454 prev: str | None = None 

455 

456 

457class PaginatedRatingResponse(BaseModel): 

458 """Paginated response for rating list endpoints.""" 

459 

460 data: list[RatingResponse] 

461 meta: PaginationMeta 

462 model_config = ConfigDict( 

463 json_schema_extra={ 

464 "examples": [ 

465 { 

466 "data": [ 

467 { 

468 "entity_type": "horse", 

469 "entity_id": 42, 

470 "entity_name": "Some Delight", 

471 "rating": 1520.5, 

472 "rd": 85.3, 

473 "race_count": 24, 

474 "as_of_race_id": 12345, 

475 }, 

476 ], 

477 "meta": { 

478 "total": 1, 

479 "limit": 100, 

480 "offset": 0, 

481 "next": "/v1/ratings/horses?limit=100&offset=100", 

482 "prev": None, 

483 }, 

484 }, 

485 ] 

486 } 

487 ) 

488 

489 

490class PaginatedRaceResponse(BaseModel): 

491 """Paginated response for race list endpoints.""" 

492 

493 data: list[dict] 

494 meta: PaginationMeta 

495 

496 

497class IngestionRequest(BaseModel): 

498 """Request to trigger ingestion.""" 

499 

500 date_from: str = Field(..., description="Start date (YYYY-MM-DD)") 

501 date_to: str = Field(..., description="End date (YYYY-MM-DD)") 

502 

503 

504class IngestionResponse(BaseModel): 

505 """Response from ingestion.""" 

506 

507 meetings: int 

508 races: int 

509 starters: int 

510 errors: int 

511 model_config = ConfigDict( 

512 json_schema_extra={ 

513 "examples": [ 

514 {"meetings": 3, "races": 24, "starters": 192, "errors": 0}, 

515 ] 

516 } 

517 ) 

518 

519 

520class RecomputeRequest(BaseModel): 

521 """Request to trigger recompute.""" 

522 

523 date_from: str = Field(..., description="Start date (YYYY-MM-DD)") 

524 date_to: str = Field(..., description="End date (YYYY-MM-DD)") 

525 clear_existing: bool = Field( 

526 default=False, description="Clear existing ratings first" 

527 ) 

528 learn_adjustments: bool = Field( 

529 default=False, description="Learn barrier/handicap adjustments" 

530 ) 

531 

532 

533class RecomputeResponse(BaseModel): 

534 """Response from recompute.""" 

535 

536 snapshots_created: int 

537 model_config = ConfigDict( 

538 json_schema_extra={ 

539 "examples": [ 

540 {"snapshots_created": 1520}, 

541 ] 

542 } 

543 ) 

544 

545 

546class ScrapeRequest(BaseModel): 

547 """Request to trigger a scrape/ingestion.""" 

548 

549 urls: list[str] | None = Field( 

550 default=None, 

551 description="Optional HRNZ results URLs or URL paths (e.g., 010741rs.htm)", 

552 ) 

553 club_codes: list[str] | str | None = Field( 

554 default=None, 

555 description="HRNZ club codes to generate URLs for (two digits each)", 

556 ) 

557 date_from: str = Field(..., description="Start date (YYYY-MM-DD)") 

558 date_to: str = Field(..., description="End date (YYYY-MM-DD)") 

559 recompute: bool = Field(default=True, description="Recompute ratings after ingest") 

560 clear_existing: bool = Field( 

561 default=False, description="Clear existing ratings before recompute" 

562 ) 

563 learn_adjustments: bool = Field( 

564 default=False, description="Learn barrier/handicap adjustments" 

565 ) 

566 

567 

568class ScrapeResponse(BaseModel): 

569 """Response from scrape/ingestion webhook.""" 

570 

571 meetings: int 

572 races: int 

573 starters: int 

574 horses: int 

575 drivers: int 

576 trainers: int 

577 errors: int 

578 recomputed: bool 

579 snapshots_created: int 

580 

581 

582# ── Scheduler job management models ──────────────────────────────── 

583 

584 

585class SchedulerJobInfo(BaseModel): 

586 """Information about a scheduled job.""" 

587 

588 id: str 

589 name: str 

590 next_run_time: str | None = None 

591 trigger: str 

592 

593 

594class SchedulerJobListResponse(BaseModel): 

595 """Response with list of scheduled jobs.""" 

596 

597 jobs: list[SchedulerJobInfo] 

598 total: int 

599 

600 

601class AddSchedulerJobRequest(BaseModel): 

602 """Request to add a scheduled job.""" 

603 

604 job_type: str = Field(..., description="Job type: ingest, recompute, or scrape") 

605 cron_expr: str = Field(..., description="Cron expression for scheduling") 

606 job_id: str | None = Field( 

607 default=None, description="Optional custom job ID (auto-generated if omitted)" 

608 ) 

609 # Ingest-specific 

610 category: str = Field(default="H", description="Racing category (T, H, G)") 

611 source: str = Field(default="tab", description="Data source (tab, ingest)") 

612 # Recompute-specific 

613 clear: bool = Field( 

614 default=False, description="Clear existing ratings before recompute" 

615 ) 

616 # Scrape-specific 

617 urls: list[str] | None = Field( 

618 default=None, description="HRNZ result URLs to scrape" 

619 ) 

620 club_codes: list[str] | None = Field(default=None, description="HRNZ club codes") 

621 # Date range 

622 date_from: str | None = Field(default=None, description="Start date (YYYY-MM-DD)") 

623 date_to: str | None = Field(default=None, description="End date (YYYY-MM-DD)") 

624 

625 

626class AddSchedulerJobResponse(BaseModel): 

627 """Response after adding a scheduled job.""" 

628 

629 job_id: str 

630 message: str 

631 

632 

633class RemoveSchedulerJobResponse(BaseModel): 

634 """Response after removing a scheduled job.""" 

635 

636 job_id: str 

637 removed: bool 

638 message: str 

639 

640 

641class PredictionResponse(BaseModel): 

642 """Prediction for a single starter.""" 

643 

644 horse_id: int 

645 horse_name: str | None = None 

646 driver_id: int | None = None 

647 driver_name: str | None = None 

648 trainer_id: int | None = None 

649 trainer_name: str | None = None 

650 barrier: int | None = None 

651 effective_rating: float 

652 win_probability: float 

653 place_probability: float 

654 place_score: float 

655 predicted_placing: int 

656 ci_lower: float | None = None 

657 ci_upper: float | None = None 

658 placing: int | None = None 

659 

660 

661class RacePredictionResponse(BaseModel): 

662 """Predictions for all starters in a race.""" 

663 

664 race_id: int 

665 race_number: int | None = None 

666 venue: str | None = None 

667 distance_m: int | None = None 

668 predictions: list[PredictionResponse] = [] 

669 model_config = ConfigDict( 

670 json_schema_extra={ 

671 "examples": [ 

672 { 

673 "race_id": 12345, 

674 "race_number": 7, 

675 "venue": "Addington", 

676 "distance_m": 1980, 

677 "predictions": [ 

678 { 

679 "horse_id": 42, 

680 "horse_name": "Some Delight", 

681 "driver_id": 101, 

682 "driver_name": "Ricky May", 

683 "trainer_id": 201, 

684 "trainer_name": "Mark Purdon", 

685 "barrier": 3, 

686 "effective_rating": 1520.5, 

687 "win_probability": 0.35, 

688 "place_probability": 0.62, 

689 "place_score": 1.8, 

690 "predicted_placing": 1, 

691 "ci_lower": 1480.2, 

692 "ci_upper": 1560.8, 

693 "placing": None, 

694 }, 

695 ], 

696 }, 

697 ] 

698 } 

699 ) 

700 

701 

702class ConfidenceBucket(BaseModel): 

703 """Accuracy metrics for a confidence bucket.""" 

704 

705 races: int 

706 win_accuracy: float 

707 avg_brier: float 

708 

709 

710class ConfidenceBuckets(BaseModel): 

711 """Accuracy metrics grouped by confidence.""" 

712 

713 high: ConfidenceBucket 

714 medium: ConfidenceBucket 

715 low: ConfidenceBucket 

716 

717 

718class DailyTrendItem(BaseModel): 

719 """Single day in the accuracy trend.""" 

720 

721 date: str 

722 avg_brier: float 

723 win_accuracy: float 

724 races: int 

725 

726 

727class RecentRaceAccuracy(BaseModel): 

728 """Accuracy metrics for a single evaluated race.""" 

729 

730 race_id: int 

731 race_number: int | None = None 

732 venue: str | None = None 

733 race_date: str | None = None 

734 field_size: int 

735 winner_correct: bool 

736 top3_overlap: int 

737 brier_score: float 

738 

739 

740class AccuracySummary(BaseModel): 

741 """Aggregated prediction accuracy summary.""" 

742 

743 summary: dict 

744 daily_trend: list[DailyTrendItem] 

745 confidence_buckets: ConfidenceBuckets 

746 recent_races: list[RecentRaceAccuracy] 

747 model_config = ConfigDict( 

748 json_schema_extra={ 

749 "examples": [ 

750 { 

751 "summary": { 

752 "overall_win_accuracy": 0.28, 

753 "top3_accuracy": 0.52, 

754 "avg_brier_score": 0.21, 

755 "num_races_evaluated": 150, 

756 }, 

757 "daily_trend": [ 

758 { 

759 "date": "2026-05-01", 

760 "avg_brier": 0.19, 

761 "win_accuracy": 0.30, 

762 "races": 10, 

763 }, 

764 ], 

765 "confidence_buckets": { 

766 "high": {"races": 20, "win_accuracy": 0.55, "avg_brier": 0.12}, 

767 "medium": { 

768 "races": 60, 

769 "win_accuracy": 0.28, 

770 "avg_brier": 0.20, 

771 }, 

772 "low": {"races": 70, "win_accuracy": 0.14, "avg_brier": 0.28}, 

773 }, 

774 "recent_races": [ 

775 { 

776 "race_id": 12345, 

777 "race_number": 7, 

778 "venue": "Addington", 

779 "race_date": "2026-05-01", 

780 "field_size": 10, 

781 "winner_correct": True, 

782 "top3_overlap": 2, 

783 "brier_score": 0.15, 

784 }, 

785 ], 

786 }, 

787 ] 

788 } 

789 ) 

790 

791 

792# ── Audit log models ──────────────────────────────────────────────────────── 

793 

794 

795class AuditLogCreateRequest(BaseModel): 

796 """Request to create an audit log entry.""" 

797 

798 table_name: str = Field(..., description="Name of the table that was changed") 

799 record_id: str = Field(..., description="Primary key value of the changed record") 

800 action: str = Field( 

801 ..., description="Type of change: INSERT, UPDATE, DELETE, or CORRECT" 

802 ) 

803 old_values: dict[str, Any] | None = Field( 

804 default=None, description="Snapshot of values before the change" 

805 ) 

806 new_values: dict[str, Any] | None = Field( 

807 default=None, description="Snapshot of values after the change" 

808 ) 

809 changed_by: str | None = Field(default=None, description="User/system identifier") 

810 change_reason: str | None = Field( 

811 default=None, description="Human-readable reason for the change" 

812 ) 

813 

814 

815class AuditLogEntry(BaseModel): 

816 """Single audit log entry in API responses.""" 

817 

818 id: int 

819 table_name: str 

820 record_id: str 

821 action: str 

822 old_values: dict[str, Any] | None = None 

823 new_values: dict[str, Any] | None = None 

824 changed_by: str | None = None 

825 change_reason: str | None = None 

826 created_at: str | None = None 

827 model_config = ConfigDict(from_attributes=True) 

828 

829 

830class AuditLogListResponse(BaseModel): 

831 """Paginated response for audit log listing.""" 

832 

833 data: list[AuditLogEntry] 

834 total: int 

835 

836 

837# Endpoints 

838 

839 

840def _normalize_club_code(code: str) -> str: 

841 """Normalize club codes to two-digit strings.""" 

842 code_str = str(code).strip() 

843 if code_str.isdigit(): 

844 value = int(code_str) 

845 if value < 0 or value > 99: 

846 raise ValueError("Club code must be between 00 and 99") 

847 return f"{value:02d}" 

848 raise ValueError("Club code must be numeric") 

849 

850 

851def _generate_hrnz_urls( 

852 start_date: date, end_date: date, club_codes: list[str] 

853) -> list[str]: 

854 """Generate HRNZ result URLs for a date range and club codes.""" 

855 normalized_codes = [_normalize_club_code(code) for code in club_codes] 

856 urls: list[str] = [] 

857 

858 current = start_date 

859 while current <= end_date: 

860 date_prefix = current.strftime("%m%d") 

861 for code in normalized_codes: 

862 urls.append(f"{date_prefix}{code}rs.htm") 

863 current += timedelta(days=1) 

864 

865 return urls 

866 

867 

868def _resolve_meeting_date(scraped: dict, url: str, default_year: int) -> date | None: 

869 """Resolve meeting date from scraped data or URL.""" 

870 date_raw = scraped.get("date_raw") 

871 if date_raw: 

872 date_raw = date_raw.replace("\xa0", " ").strip() 

873 if not re.search(r"\\b\\d{4}\\b", date_raw): 

874 for fmt in ["%A, %d %B", "%d %B"]: 

875 try: 

876 parsed = datetime.strptime(date_raw, fmt) 

877 return parsed.replace(year=default_year).date() 

878 except ValueError: 

879 continue 

880 formats = [ 

881 "%A, %d %B %Y", 

882 "%A, %d %B", 

883 "%d %B %Y", 

884 "%d %B", 

885 ] 

886 for fmt in formats: 

887 try: 

888 parsed = datetime.strptime(date_raw, fmt) 

889 if "%Y" not in fmt: 

890 parsed = parsed.replace(year=default_year) 

891 return parsed.date() 

892 except ValueError: 

893 continue 

894 

895 meeting_date_str = scraped.get("date") 

896 if meeting_date_str: 

897 try: 

898 return date.fromisoformat(meeting_date_str) 

899 except ValueError: 

900 pass 

901 

902 match = re.search(r"(?P<mm>\\d{2})(?P<dd>\\d{2})\\d{2}rs\\.htm", url) 

903 if match: 

904 try: 

905 return date(default_year, int(match.group("mm")), int(match.group("dd"))) 

906 except ValueError: 

907 return None 

908 

909 return None 

910 

911 

912def _build_pagination_meta( 

913 total: int, 

914 limit: int, 

915 offset: int, 

916 base_url: str, 

917 query_params: dict, 

918) -> dict: 

919 """Build pagination metadata with next/prev navigation links. 

920 

921 Args: 

922 total: Total number of results 

923 limit: Maximum results per page 

924 offset: Current offset 

925 base_url: Base URL without query string 

926 query_params: Current query parameters dict (limit/offset will be overridden) 

927 

928 Returns: 

929 Dict with total, limit, offset, next, prev keys 

930 """ 

931 meta: dict = { 

932 "total": total, 

933 "limit": limit, 

934 "offset": offset, 

935 } 

936 

937 # Build next link 

938 if offset + limit < total: 

939 next_params = {**query_params, "limit": limit, "offset": offset + limit} 

940 query_string = "&".join( 

941 f"{k}={v}" for k, v in next_params.items() if v is not None 

942 ) 

943 meta["next"] = f"{base_url}?{query_string}" 

944 else: 

945 meta["next"] = None 

946 

947 # Build prev link 

948 if offset > 0: 

949 prev_offset = max(0, offset - limit) 

950 prev_params = {**query_params, "limit": limit, "offset": prev_offset} 

951 query_string = "&".join( 

952 f"{k}={v}" for k, v in prev_params.items() if v is not None 

953 ) 

954 meta["prev"] = f"{base_url}?{query_string}" 

955 else: 

956 meta["prev"] = None 

957 

958 return meta 

959 

960 

961# Web UI routes 

962@app.get("/", response_class=HTMLResponse) 

963async def root(): 

964 """Redirect to web UI home page.""" 

965 return RedirectResponse(url="/ui/") 

966 

967 

968@app.get("/docs/site") 

969@app.get("/docs/site/", response_class=HTMLResponse) 

970async def serve_docs_site(): 

971 """Redirect to the static docs site.""" 

972 return RedirectResponse(url="/static/docs/") 

973 

974 

975@app.get("/ui/", response_class=HTMLResponse) 

976@app.get("/ui/index.html", response_class=HTMLResponse) 

977async def serve_home(): 

978 """Serve web UI home page.""" 

979 html_path = web_dir / "templates" / "index.html" 

980 if html_path.exists(): 

981 return HTMLResponse(content=html_path.read_text()) 

982 raise HTTPException(status_code=404, detail="Web UI not found") 

983 

984 

985@app.get("/ui/horse/{horse_id}", response_class=HTMLResponse) 

986async def serve_horse_detail(horse_id: int): 

987 """Serve horse detail page.""" 

988 html_path = web_dir / "templates" / "horse.html" 

989 if html_path.exists(): 

990 return HTMLResponse(content=html_path.read_text()) 

991 raise HTTPException(status_code=404, detail="Web UI not found") 

992 

993 

994@app.get("/ui/driver/{driver_id}", response_class=HTMLResponse) 

995async def serve_driver_detail(driver_id: int): 

996 """Serve driver detail page.""" 

997 html_path = web_dir / "templates" / "driver.html" 

998 if html_path.exists(): 

999 return HTMLResponse(content=html_path.read_text()) 

1000 raise HTTPException(status_code=404, detail="Web UI not found") 

1001 

1002 

1003@app.get("/ui/trainer/{trainer_id}", response_class=HTMLResponse) 

1004async def serve_trainer_detail(trainer_id: int): 

1005 """Serve trainer detail page.""" 

1006 html_path = web_dir / "templates" / "trainer.html" 

1007 if html_path.exists(): 

1008 return HTMLResponse(content=html_path.read_text()) 

1009 raise HTTPException(status_code=404, detail="Web UI not found") 

1010 

1011 

1012@app.get("/ui/race/{race_id}", response_class=HTMLResponse) 

1013async def serve_race_detail(race_id: int): 

1014 """Serve race detail page.""" 

1015 html_path = web_dir / "templates" / "race.html" 

1016 if html_path.exists(): 

1017 return HTMLResponse(content=html_path.read_text()) 

1018 raise HTTPException(status_code=404, detail="Web UI not found") 

1019 

1020 

1021@app.get("/ui/race-card", response_class=HTMLResponse) 

1022@app.get("/ui/race-card/", response_class=HTMLResponse) 

1023async def serve_race_cards_index(): 

1024 """Serve race cards index page.""" 

1025 html_path = web_dir / "templates" / "race-card.html" 

1026 if html_path.exists(): 

1027 return HTMLResponse(content=html_path.read_text()) 

1028 raise HTTPException(status_code=404, detail="Web UI not found") 

1029 

1030 

1031@app.get("/ui/race-card/{race_id}", response_class=HTMLResponse) 

1032async def serve_race_card(race_id: int): 

1033 """Serve race card view for a specific race.""" 

1034 html_path = web_dir / "templates" / "race-card.html" 

1035 if html_path.exists(): 

1036 return HTMLResponse(content=html_path.read_text()) 

1037 raise HTTPException(status_code=404, detail="Web UI not found") 

1038 

1039 

1040@app.get("/ui/search", response_class=HTMLResponse) 

1041async def serve_search(): 

1042 """Serve search page.""" 

1043 html_path = web_dir / "templates" / "search.html" 

1044 if html_path.exists(): 

1045 return HTMLResponse(content=html_path.read_text()) 

1046 raise HTTPException(status_code=404, detail="Web UI not found") 

1047 

1048 

1049@app.get("/ui/analytics", response_class=HTMLResponse) 

1050async def serve_analytics(): 

1051 """Serve analytics page.""" 

1052 html_path = web_dir / "templates" / "analytics.html" 

1053 if html_path.exists(): 

1054 return HTMLResponse(content=html_path.read_text()) 

1055 raise HTTPException(status_code=404, detail="Web UI not found") 

1056 

1057 

1058@app.get("/ui/analytics-dashboard", response_class=HTMLResponse) 

1059async def serve_analytics_dashboard(): 

1060 """Serve analytics dashboard page with combined accuracy, ratings, and confidence metrics.""" 

1061 html_path = web_dir / "templates" / "analytics-dashboard.html" 

1062 if html_path.exists(): 

1063 return HTMLResponse(content=html_path.read_text()) 

1064 raise HTTPException(status_code=404, detail="Web UI not found") 

1065 

1066 

1067@app.get("/ui/data-correction", response_class=HTMLResponse) 

1068async def serve_data_correction(): 

1069 """Serve data correction workflow page.""" 

1070 html_path = web_dir / "templates" / "data-correction.html" 

1071 if html_path.exists(): 

1072 return HTMLResponse(content=html_path.read_text()) 

1073 raise HTTPException(status_code=404, detail="Web UI not found") 

1074 

1075 

1076@app.get("/health", response_model=HealthResponse) 

1077@limiter.limit("100/minute") 

1078def health_check(request: Request): 

1079 """Health check endpoint. 

1080 

1081 Returns: 

1082 Health status and version 

1083 """ 

1084 return HealthResponse(status="healthy", version="0.2.0") 

1085 

1086 

1087@api_router.get("/ratings/horses", response_model=PaginatedRatingResponse) 

1088@limiter.limit("100/minute") 

1089def get_horse_ratings( 

1090 request: Request, 

1091 limit: int = Query(default=100, le=500), 

1092 offset: int = Query(default=0, ge=0), 

1093 as_of_date: str | None = Query(default=None, description="YYYY-MM-DD"), 

1094 venue: str | None = Query(default=None, description="Filter by venue"), 

1095 format: str = Query(default="json", description="Response format: json or csv"), 

1096 db: Session = Depends(get_db), 

1097): 

1098 """Get top horse ratings. 

1099 

1100 Args: 

1101 limit: Maximum results to return 

1102 offset: Number of results to skip 

1103 as_of_date: Optional date filter for historical ratings 

1104 venue: Optional venue filter 

1105 format: Response format (json or csv) 

1106 db: Database session 

1107 

1108 Returns: 

1109 Paginated list of horse ratings in JSON or raw CSV 

1110 """ 

1111 # Parse as_of_date if provided 

1112 date_filter = parse_date(as_of_date) if as_of_date else None 

1113 

1114 # Fetch all snapshots (use a high limit to effectively get all; pagination 

1115 # is handled in-memory after venue filtering) 

1116 snapshots = RatingSnapshotRepository.get_top_ratings( 

1117 db, EntityType.HORSE, limit=100_000, as_of_date=date_filter 

1118 ) 

1119 

1120 results = [] 

1121 for snapshot in snapshots: 

1122 # Load horse name 

1123 horse = db.query(Horse).filter(Horse.id == snapshot.entity_id).first() 

1124 

1125 # Apply venue filter if specified 

1126 if venue: 

1127 # Check if horse's most recent race was at the specified venue 

1128 # This is a simplified filter - could be more sophisticated 

1129 from packages.core.storage.models import Starter 

1130 

1131 recent_starter = ( 

1132 db.query(Starter) 

1133 .join(Race) 

1134 .join(Meeting) 

1135 .filter( 

1136 Starter.horse_id == snapshot.entity_id, 

1137 Meeting.venue == venue, 

1138 ) 

1139 .first() 

1140 ) 

1141 if not recent_starter: 

1142 continue 

1143 

1144 results.append( 

1145 RatingResponse( 

1146 entity_type="horse", 

1147 entity_id=snapshot.entity_id, 

1148 entity_name=horse.name if horse else None, 

1149 rating=snapshot.rating, 

1150 rd=snapshot.rd, 

1151 race_count=snapshot.meta.get("race_count") if snapshot.meta else None, 

1152 as_of_race_id=snapshot.as_of_race_id, 

1153 ) 

1154 ) 

1155 

1156 # Count total after filtering 

1157 total = len(results) 

1158 

1159 # Apply offset and limit for the current page 

1160 page = results[offset : offset + limit] 

1161 

1162 # Return CSV if requested 

1163 if format.lower() == "csv": 

1164 output = StringIO() 

1165 writer = csv.DictWriter( 

1166 output, 

1167 fieldnames=[ 

1168 "entity_id", 

1169 "name", 

1170 "rating", 

1171 "rd", 

1172 "race_count", 

1173 "as_of_race_id", 

1174 ], 

1175 ) 

1176 writer.writeheader() 

1177 for r in page: 

1178 writer.writerow( 

1179 { 

1180 "entity_id": r.entity_id, 

1181 "name": r.entity_name, 

1182 "rating": r.rating, 

1183 "rd": r.rd, 

1184 "race_count": r.race_count, 

1185 "as_of_race_id": r.as_of_race_id, 

1186 } 

1187 ) 

1188 return Response(content=output.getvalue(), media_type="text/csv") 

1189 

1190 # Build pagination metadata 

1191 base_url = str(request.base_url).rstrip("/") + request.url.path 

1192 query_params = dict(request.query_params) 

1193 meta = _build_pagination_meta(total, limit, offset, base_url, query_params) 

1194 

1195 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta)) 

1196 

1197 

1198@api_router.get("/ratings/horses/{horse_id}", response_model=HorseDetailResponse) 

1199@limiter.limit("100/minute") 

1200def get_horse_detail( 

1201 request: Request, 

1202 horse_id: int, 

1203 db: Session = Depends(get_db), 

1204): 

1205 """Get detailed horse information with rating history. 

1206 

1207 Args: 

1208 horse_id: Horse ID 

1209 db: Database session 

1210 

1211 Returns: 

1212 Horse details with rating history 

1213 """ 

1214 # Get horse 

1215 horse = db.query(Horse).filter(Horse.id == horse_id).first() 

1216 if not horse: 

1217 raise HTTPException(status_code=404, detail="Horse not found") 

1218 

1219 # Get all rating snapshots for this horse 

1220 from packages.core.storage.models import RatingSnapshot 

1221 

1222 snapshots = ( 

1223 db.query(RatingSnapshot) 

1224 .join(Race, RatingSnapshot.as_of_race_id == Race.id) 

1225 .join(Meeting, Race.meeting_id == Meeting.id) 

1226 .filter( 

1227 RatingSnapshot.entity_type == EntityType.HORSE, 

1228 RatingSnapshot.entity_id == horse_id, 

1229 ) 

1230 .order_by( 

1231 Meeting.meeting_date, 

1232 Race.race_datetime, 

1233 Race.race_number, 

1234 Race.id, 

1235 ) 

1236 .all() 

1237 ) 

1238 

1239 # Build history 

1240 history = [] 

1241 for snapshot in snapshots: 

1242 # Optionally load race date (requires join with races/meetings) 

1243 history.append( 

1244 RatingHistoryItem( 

1245 race_id=snapshot.as_of_race_id, 

1246 rating=snapshot.rating, 

1247 rd=snapshot.rd, 

1248 ) 

1249 ) 

1250 

1251 # Get current rating (latest snapshot) 

1252 current_rating = snapshots[-1].rating if snapshots else None 

1253 current_rd = snapshots[-1].rd if snapshots else None 

1254 

1255 return HorseDetailResponse( 

1256 horse_id=horse_id, 

1257 name=horse.name, 

1258 current_rating=current_rating, 

1259 current_rd=current_rd, 

1260 race_count=len(snapshots), 

1261 rating_history=history, 

1262 ) 

1263 

1264 

1265@api_router.get("/ratings/drivers", response_model=PaginatedRatingResponse) 

1266@limiter.limit("100/minute") 

1267def get_driver_ratings( 

1268 request: Request, 

1269 limit: int = Query(default=100, le=500), 

1270 offset: int = Query(default=0, ge=0), 

1271 db: Session = Depends(get_db), 

1272): 

1273 """Get top driver ratings. 

1274 

1275 Args: 

1276 limit: Maximum results to return 

1277 offset: Number of results to skip 

1278 db: Database session 

1279 

1280 Returns: 

1281 Paginated list of driver ratings 

1282 """ 

1283 snapshots = RatingSnapshotRepository.get_top_ratings( 

1284 db, EntityType.DRIVER, limit=100_000 

1285 ) 

1286 

1287 results = [] 

1288 for snapshot in snapshots: 

1289 driver = db.query(Driver).filter(Driver.id == snapshot.entity_id).first() 

1290 

1291 results.append( 

1292 RatingResponse( 

1293 entity_type="driver", 

1294 entity_id=snapshot.entity_id, 

1295 entity_name=driver.name if driver else None, 

1296 rating=snapshot.rating, 

1297 rd=snapshot.rd, 

1298 race_count=snapshot.meta.get("race_count") if snapshot.meta else None, 

1299 as_of_race_id=snapshot.as_of_race_id, 

1300 ) 

1301 ) 

1302 

1303 total = len(results) 

1304 page = results[offset : offset + limit] 

1305 

1306 base_url = str(request.base_url).rstrip("/") + request.url.path 

1307 query_params = dict(request.query_params) 

1308 meta = _build_pagination_meta(total, limit, offset, base_url, query_params) 

1309 

1310 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta)) 

1311 

1312 

1313@api_router.get("/ratings/trainers", response_model=PaginatedRatingResponse) 

1314@limiter.limit("100/minute") 

1315def get_trainer_ratings( 

1316 request: Request, 

1317 limit: int = Query(default=100, le=500), 

1318 offset: int = Query(default=0, ge=0), 

1319 db: Session = Depends(get_db), 

1320): 

1321 """Get top trainer ratings. 

1322 

1323 Args: 

1324 limit: Maximum results to return 

1325 offset: Number of results to skip 

1326 db: Database session 

1327 

1328 Returns: 

1329 Paginated list of trainer ratings 

1330 """ 

1331 snapshots = RatingSnapshotRepository.get_top_ratings( 

1332 db, EntityType.TRAINER, limit=100_000 

1333 ) 

1334 

1335 results = [] 

1336 for snapshot in snapshots: 

1337 trainer = db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first() 

1338 

1339 results.append( 

1340 RatingResponse( 

1341 entity_type="trainer", 

1342 entity_id=snapshot.entity_id, 

1343 entity_name=trainer.name if trainer else None, 

1344 rating=snapshot.rating, 

1345 rd=snapshot.rd, 

1346 race_count=snapshot.meta.get("race_count") if snapshot.meta else None, 

1347 as_of_race_id=snapshot.as_of_race_id, 

1348 ) 

1349 ) 

1350 

1351 total = len(results) 

1352 page = results[offset : offset + limit] 

1353 

1354 base_url = str(request.base_url).rstrip("/") + request.url.path 

1355 query_params = dict(request.query_params) 

1356 meta = _build_pagination_meta(total, limit, offset, base_url, query_params) 

1357 

1358 return PaginatedRatingResponse(data=page, meta=PaginationMeta(**meta)) 

1359 

1360 

1361@api_router.get("/ratings/drivers/{driver_id}", response_model=DriverDetailResponse) 

1362@limiter.limit("100/minute") 

1363def get_driver_detail( 

1364 request: Request, 

1365 driver_id: int, 

1366 db: Session = Depends(get_db), 

1367): 

1368 """Get detailed driver information with rating history. 

1369 

1370 Args: 

1371 driver_id: Driver ID 

1372 db: Database session 

1373 

1374 Returns: 

1375 Driver details with rating history 

1376 """ 

1377 # Get driver 

1378 driver = db.query(Driver).filter(Driver.id == driver_id).first() 

1379 if not driver: 

1380 raise HTTPException(status_code=404, detail="Driver not found") 

1381 

1382 # Get all rating snapshots for this driver 

1383 from packages.core.storage.models import RatingSnapshot 

1384 

1385 snapshots = ( 

1386 db.query(RatingSnapshot) 

1387 .join(Race, RatingSnapshot.as_of_race_id == Race.id) 

1388 .join(Meeting, Race.meeting_id == Meeting.id) 

1389 .filter( 

1390 RatingSnapshot.entity_type == EntityType.DRIVER, 

1391 RatingSnapshot.entity_id == driver_id, 

1392 ) 

1393 .order_by( 

1394 Meeting.meeting_date, 

1395 Race.race_datetime, 

1396 Race.race_number, 

1397 Race.id, 

1398 ) 

1399 .all() 

1400 ) 

1401 

1402 # Build history 

1403 history = [] 

1404 for snapshot in snapshots: 

1405 history.append( 

1406 RatingHistoryItem( 

1407 race_id=snapshot.as_of_race_id, 

1408 rating=snapshot.rating, 

1409 rd=snapshot.rd, 

1410 ) 

1411 ) 

1412 

1413 # Get current rating (latest snapshot) 

1414 current_rating = snapshots[-1].rating if snapshots else None 

1415 current_rd = snapshots[-1].rd if snapshots else None 

1416 

1417 return DriverDetailResponse( 

1418 driver_id=driver_id, 

1419 name=driver.name, 

1420 current_rating=current_rating, 

1421 current_rd=current_rd, 

1422 race_count=len(snapshots), 

1423 rating_history=history, 

1424 ) 

1425 

1426 

1427@api_router.get("/ratings/trainers/{trainer_id}", response_model=TrainerDetailResponse) 

1428@limiter.limit("100/minute") 

1429def get_trainer_detail( 

1430 request: Request, 

1431 trainer_id: int, 

1432 db: Session = Depends(get_db), 

1433): 

1434 """Get detailed trainer information with rating history. 

1435 

1436 Args: 

1437 trainer_id: Trainer ID 

1438 db: Database session 

1439 

1440 Returns: 

1441 Trainer details with rating history 

1442 """ 

1443 # Get trainer 

1444 trainer = db.query(Trainer).filter(Trainer.id == trainer_id).first() 

1445 if not trainer: 

1446 raise HTTPException(status_code=404, detail="Trainer not found") 

1447 

1448 # Get all rating snapshots for this trainer 

1449 from packages.core.storage.models import RatingSnapshot 

1450 

1451 snapshots = ( 

1452 db.query(RatingSnapshot) 

1453 .join(Race, RatingSnapshot.as_of_race_id == Race.id) 

1454 .join(Meeting, Race.meeting_id == Meeting.id) 

1455 .filter( 

1456 RatingSnapshot.entity_type == EntityType.TRAINER, 

1457 RatingSnapshot.entity_id == trainer_id, 

1458 ) 

1459 .order_by( 

1460 Meeting.meeting_date, 

1461 Race.race_datetime, 

1462 Race.race_number, 

1463 Race.id, 

1464 ) 

1465 .all() 

1466 ) 

1467 

1468 # Build history 

1469 history = [] 

1470 for snapshot in snapshots: 

1471 history.append( 

1472 RatingHistoryItem( 

1473 race_id=snapshot.as_of_race_id, 

1474 rating=snapshot.rating, 

1475 rd=snapshot.rd, 

1476 ) 

1477 ) 

1478 

1479 # Get current rating (latest snapshot) 

1480 current_rating = snapshots[-1].rating if snapshots else None 

1481 current_rd = snapshots[-1].rd if snapshots else None 

1482 

1483 return TrainerDetailResponse( 

1484 trainer_id=trainer_id, 

1485 name=trainer.name, 

1486 current_rating=current_rating, 

1487 current_rd=current_rd, 

1488 race_count=len(snapshots), 

1489 rating_history=history, 

1490 ) 

1491 

1492 

1493@api_router.get("/races/{race_id}") 

1494@limiter.limit("100/minute") 

1495def get_race( 

1496 request: Request, 

1497 race_id: int, 

1498 db: Session = Depends(get_db), 

1499): 

1500 """Get race details with starters. 

1501 

1502 Args: 

1503 race_id: Race ID 

1504 db: Database session 

1505 

1506 Returns: 

1507 Race details 

1508 """ 

1509 from packages.core.storage.models import Starter 

1510 

1511 race = ( 

1512 db.query(Race) 

1513 .options(joinedload(Race.meeting)) 

1514 .filter(Race.id == race_id) 

1515 .first() 

1516 ) 

1517 if not race: 

1518 raise HTTPException(status_code=404, detail="Race not found") 

1519 

1520 starters = db.query(Starter).filter(Starter.race_id == race_id).all() 

1521 

1522 return { 

1523 "id": race.id, 

1524 "race_id": race.id, 

1525 "meeting_id": race.meeting_id, 

1526 "race_number": race.race_number, 

1527 "venue": race.meeting.venue if race.meeting else None, 

1528 "distance_m": race.distance_m, 

1529 "start_type": race.start_type, 

1530 "gait": race.gait, 

1531 "race_datetime": race.race_datetime.isoformat() if race.race_datetime else None, 

1532 "starters_count": len(starters), 

1533 "starters": [ 

1534 { 

1535 "horse_id": s.horse_id, 

1536 "driver_id": s.driver_id, 

1537 "trainer_id": s.trainer_id, 

1538 "barrier": s.barrier, 

1539 "handicap_m": s.handicap_m, 

1540 "placing": s.placing, 

1541 "did_not_finish": s.did_not_finish, 

1542 } 

1543 for s in starters 

1544 ], 

1545 } 

1546 

1547 

1548@api_router.get("/races", response_model=PaginatedRaceResponse) 

1549@limiter.limit("200/minute") 

1550def list_races( 

1551 request: Request, 

1552 date_from: str | None = Query(default=None, description="YYYY-MM-DD"), 

1553 date_to: str | None = Query(default=None, description="YYYY-MM-DD"), 

1554 limit: int = Query(default=100, le=1000), 

1555 offset: int = Query(default=0, ge=0), 

1556 venue: str | None = Query(default=None, description="Filter by venue"), 

1557 db: Session = Depends(get_db), 

1558): 

1559 """List races within a date range.""" 

1560 from packages.core.common.utils import parse_date 

1561 

1562 if date_from: 

1563 start_date = parse_date(date_from) 

1564 else: 

1565 start_date = date.today() - timedelta(days=30) 

1566 

1567 if date_to: 

1568 end_date = parse_date(date_to) 

1569 else: 

1570 end_date = date.today() 

1571 

1572 query = ( 

1573 db.query(Race) 

1574 .join(Race.meeting) 

1575 .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date) 

1576 ) 

1577 if venue: 

1578 query = query.filter(Meeting.venue == venue) 

1579 

1580 total = query.count() 

1581 races = ( 

1582 query.options(joinedload(Race.meeting)) 

1583 .order_by( 

1584 Meeting.meeting_date, 

1585 Race.race_datetime, 

1586 Race.race_number, 

1587 Race.id, 

1588 ) 

1589 .limit(limit) 

1590 .offset(offset) 

1591 .all() 

1592 ) 

1593 

1594 race_list = [] 

1595 for race in races: 

1596 race_list.append( 

1597 { 

1598 "race_id": race.id, 

1599 "meeting_id": race.meeting_id, 

1600 "meeting_date": ( 

1601 race.meeting.meeting_date.isoformat() 

1602 if race.meeting and race.meeting.meeting_date 

1603 else None 

1604 ), 

1605 "venue": race.meeting.venue if race.meeting else None, 

1606 "race_number": race.race_number, 

1607 "distance_m": race.distance_m, 

1608 "start_type": race.start_type, 

1609 "race_datetime": ( 

1610 race.race_datetime.isoformat() if race.race_datetime else None 

1611 ), 

1612 } 

1613 ) 

1614 

1615 # Build pagination metadata with next/prev links 

1616 base_url = str(request.base_url).rstrip("/") + request.url.path 

1617 query_params = dict(request.query_params) 

1618 meta = _build_pagination_meta(total, limit, offset, base_url, query_params) 

1619 

1620 return PaginatedRaceResponse(data=race_list, meta=PaginationMeta(**meta)) 

1621 

1622 

1623@api_router.post("/admin/ingest", response_model=IngestionResponse) 

1624async def trigger_ingestion( 

1625 request: IngestionRequest, 

1626 source: str = Query( 

1627 default="tab", 

1628 description='Data source: "tab" (TAB API directly) or "ingest" (tab-api-ingest service)', 

1629 ), 

1630 db: Session = Depends(get_db), 

1631 token: str = Depends(verify_admin_token), 

1632): 

1633 """Trigger data ingestion (admin only). 

1634 

1635 Supports two data sources: 

1636 - "tab" (default): fetches data directly from the TAB Affiliates API 

1637 - "ingest": fetches data from the tab-api-ingest TypeScript service 

1638 

1639 Args: 

1640 request: Ingestion request with date range 

1641 source: Data source selector 

1642 db: Database session 

1643 token: Admin token 

1644 

1645 Returns: 

1646 Ingestion statistics 

1647 """ 

1648 start_date = parse_date(request.date_from) 

1649 end_date = parse_date(request.date_to) 

1650 

1651 logger.info( 

1652 f"Admin triggered ingestion: {start_date} to {end_date} (source={source})" 

1653 ) 

1654 

1655 service = IngestionService(db, source=source) 

1656 meetings, races, starters = await service.ingest_date_range(start_date, end_date) 

1657 

1658 return IngestionResponse( 

1659 meetings=meetings, 

1660 races=races, 

1661 starters=starters, 

1662 errors=service.stats["errors"], 

1663 ) 

1664 

1665 

1666@api_router.post("/webhook/scrape", response_model=ScrapeResponse) 

1667async def webhook_scrape( 

1668 request: ScrapeRequest, 

1669 db: Session = Depends(get_db), 

1670 token: str = Depends(verify_admin_token), 

1671): 

1672 """Webhook to trigger HRNZ Playwright scrape with parameters.""" 

1673 

1674 from packages.core.storage.repositories import ( 

1675 DriverRepository, 

1676 HorseRepository, 

1677 MeetingRepository, 

1678 RaceRepository, 

1679 StarterRepository, 

1680 TrainerRepository, 

1681 ) 

1682 from packages.hrnz_scraper import HRNZScraper 

1683 from packages.hrnz_scraper.mapper import HRNZDataMapper 

1684 

1685 start_date = parse_date(request.date_from) 

1686 end_date = parse_date(request.date_to) 

1687 

1688 club_codes = request.club_codes 

1689 if club_codes is None: 

1690 club_codes = settings.hrnz.club_codes 

1691 

1692 if isinstance(club_codes, str): 

1693 if club_codes.lower() == "all": 

1694 club_codes = HRNZ_ALL_CLUB_CODES 

1695 else: 

1696 club_codes = [ 

1697 code.strip() for code in club_codes.split(",") if code.strip() 

1698 ] 

1699 

1700 if isinstance(club_codes, list): 

1701 if any(isinstance(code, str) and code.lower() == "all" for code in club_codes): 

1702 club_codes = HRNZ_ALL_CLUB_CODES 

1703 

1704 urls: list[str] = [] 

1705 if club_codes: 

1706 try: 

1707 urls.extend(_generate_hrnz_urls(start_date, end_date, club_codes)) 

1708 except ValueError as exc: 

1709 raise HTTPException(status_code=400, detail=str(exc)) from exc 

1710 

1711 if request.urls: 

1712 urls.extend(request.urls) 

1713 

1714 if not urls: 

1715 raise HTTPException( 

1716 status_code=400, 

1717 detail="Provide club_codes (or HRNZ_CLUB_CODES) to generate URLs, or pass urls explicitly.", 

1718 ) 

1719 

1720 urls = list(dict.fromkeys(urls)) 

1721 

1722 logger.info( 

1723 "Webhook triggered scrape", 

1724 extra={ 

1725 "start_date": str(start_date), 

1726 "end_date": str(end_date), 

1727 "recompute": request.recompute, 

1728 "urls": len(urls), 

1729 }, 

1730 ) 

1731 

1732 stats = { 

1733 "meetings": 0, 

1734 "races": 0, 

1735 "starters": 0, 

1736 "horses": 0, 

1737 "drivers": 0, 

1738 "trainers": 0, 

1739 "errors": 0, 

1740 } 

1741 

1742 mapper = HRNZDataMapper() 

1743 async with HRNZScraper() as scraper: 

1744 for url in urls: 

1745 try: 

1746 scraped = await scraper.get_meeting_results(url) 

1747 

1748 meeting_date = _resolve_meeting_date(scraped, url, start_date.year) 

1749 if not meeting_date or not scraped.get("races"): 

1750 logger.info("Skipping non-meeting page for %s", url) 

1751 continue 

1752 

1753 if not (start_date <= meeting_date <= end_date): 

1754 continue 

1755 

1756 scraped["date"] = meeting_date.isoformat() 

1757 meeting = mapper.map_meeting(scraped) 

1758 entities = mapper.map_entities(scraped) 

1759 

1760 MeetingRepository.upsert(db, meeting) 

1761 stats["meetings"] += 1 

1762 

1763 for horse in entities["horses"]: 

1764 HorseRepository.upsert( 

1765 db, 

1766 horse["id"], 

1767 horse["name"], 

1768 horse.get("raw_json"), 

1769 ) 

1770 stats["horses"] += 1 

1771 

1772 for driver in entities["drivers"]: 

1773 DriverRepository.upsert( 

1774 db, driver["name"], driver_id=driver.get("id") 

1775 ) 

1776 stats["drivers"] += 1 

1777 

1778 for trainer in entities["trainers"]: 

1779 TrainerRepository.upsert( 

1780 db, trainer["name"], trainer_id=trainer.get("id") 

1781 ) 

1782 stats["trainers"] += 1 

1783 

1784 races = mapper.map_races(scraped, meeting["meeting"]) 

1785 race_id_map = {} 

1786 for race in races: 

1787 race_obj = RaceRepository.upsert(db, meeting["meeting"], race) 

1788 race_id_map[race["race_number"]] = race_obj.id 

1789 stats["races"] += 1 

1790 

1791 starters = mapper.map_starters(scraped, race_id_map) 

1792 for starter in starters: 

1793 StarterRepository.upsert( 

1794 db, 

1795 starter["race_id"], 

1796 starter, 

1797 starter.get("placing"), 

1798 ) 

1799 stats["starters"] += 1 

1800 

1801 db.commit() 

1802 except Exception as exc: 

1803 db.rollback() 

1804 stats["errors"] += 1 

1805 logger.error("Failed to scrape %s: %s", url, exc, exc_info=True) 

1806 

1807 snapshots_created = 0 

1808 if request.recompute: 

1809 

1810 def _run_recompute(): 

1811 with get_session() as session: 

1812 return recompute_ratings( 

1813 session, 

1814 start_date, 

1815 end_date, 

1816 clear_existing=request.clear_existing, 

1817 learn_adjustments=request.learn_adjustments, 

1818 ) 

1819 

1820 snapshots_created = await asyncio.to_thread(_run_recompute) 

1821 

1822 return ScrapeResponse( 

1823 meetings=stats["meetings"], 

1824 races=stats["races"], 

1825 starters=stats["starters"], 

1826 horses=stats["horses"], 

1827 drivers=stats["drivers"], 

1828 trainers=stats["trainers"], 

1829 errors=stats["errors"], 

1830 recomputed=request.recompute, 

1831 snapshots_created=snapshots_created, 

1832 ) 

1833 

1834 

1835@api_router.post("/admin/recompute", response_model=RecomputeResponse) 

1836def trigger_recompute( 

1837 request: RecomputeRequest, 

1838 db: Session = Depends(get_db), 

1839 token: str = Depends(verify_admin_token), 

1840): 

1841 """Trigger rating recompute (admin only). 

1842 

1843 Args: 

1844 request: Recompute request with date range 

1845 db: Database session 

1846 token: Admin token 

1847 

1848 Returns: 

1849 Recompute statistics 

1850 """ 

1851 start_date = parse_date(request.date_from) 

1852 end_date = parse_date(request.date_to) 

1853 

1854 logger.info(f"Admin triggered recompute: {start_date} to {end_date}") 

1855 

1856 snapshot_count = recompute_ratings( 

1857 db, 

1858 start_date, 

1859 end_date, 

1860 clear_existing=request.clear_existing, 

1861 learn_adjustments=request.learn_adjustments, 

1862 ) 

1863 

1864 return RecomputeResponse(snapshots_created=snapshot_count) 

1865 

1866 

1867# ── Scheduler admin endpoints ──────────────────────────────────── 

1868 

1869 

1870@api_router.get( 

1871 "/admin/jobs", 

1872 response_model=SchedulerJobListResponse, 

1873) 

1874@limiter.limit("20/minute") 

1875def list_scheduled_jobs( 

1876 request: Request, 

1877 token: str = Depends(verify_admin_token), 

1878): 

1879 """List all scheduled background jobs (admin only). 

1880 

1881 Returns a list of all cron jobs currently registered in the scheduler. 

1882 

1883 Args: 

1884 token: Admin authentication token 

1885 

1886 Returns: 

1887 List of scheduled jobs with their triggers and next run times 

1888 """ 

1889 logger.info("Admin listing scheduled jobs") 

1890 jobs = scheduler.list_jobs() 

1891 return SchedulerJobListResponse( 

1892 jobs=[SchedulerJobInfo(**job) for job in jobs], 

1893 total=len(jobs), 

1894 ) 

1895 

1896 

1897@api_router.post( 

1898 "/admin/jobs", 

1899 response_model=AddSchedulerJobResponse, 

1900 status_code=201, 

1901) 

1902@limiter.limit("20/minute") 

1903def add_scheduled_job( 

1904 request: Request, 

1905 job_request: AddSchedulerJobRequest, 

1906 token: str = Depends(verify_admin_token), 

1907): 

1908 """Add a new scheduled background job (admin only). 

1909 

1910 Supports three job types: 

1911 - ``ingest``: Scheduled data ingestion from TAB API 

1912 - ``recompute``: Scheduled rating recomputation 

1913 - ``scrape``: Scheduled HRNZ data scraping 

1914 

1915 Args: 

1916 job_request: Job configuration including cron expression and type-specific params 

1917 token: Admin authentication token 

1918 

1919 Returns: 

1920 The job ID and confirmation message 

1921 """ 

1922 job_id = ( 

1923 job_request.job_id 

1924 or f"{job_request.job_type}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}" 

1925 ) 

1926 

1927 logger.info( 

1928 "Admin adding scheduled job", 

1929 extra={ 

1930 "job_type": job_request.job_type, 

1931 "cron_expr": job_request.cron_expr, 

1932 "job_id": job_id, 

1933 }, 

1934 ) 

1935 

1936 if job_request.job_type == "ingest": 

1937 scheduler.add_ingest_job( 

1938 date_from=job_request.date_from, 

1939 date_to=job_request.date_to, 

1940 category=job_request.category, 

1941 source=job_request.source, 

1942 cron_expr=job_request.cron_expr, 

1943 job_id=job_id, 

1944 ) 

1945 elif job_request.job_type == "recompute": 

1946 scheduler.add_recompute_job( 

1947 date_from=job_request.date_from, 

1948 date_to=job_request.date_to, 

1949 clear=job_request.clear, 

1950 cron_expr=job_request.cron_expr, 

1951 job_id=job_id, 

1952 ) 

1953 elif job_request.job_type == "scrape": 

1954 scheduler.add_scrape_job( 

1955 urls=job_request.urls, 

1956 club_codes=job_request.club_codes, 

1957 date_from=job_request.date_from, 

1958 date_to=job_request.date_to, 

1959 cron_expr=job_request.cron_expr, 

1960 job_id=job_id, 

1961 ) 

1962 else: 

1963 raise HTTPException( 

1964 status_code=400, 

1965 detail=f"Unknown job_type '{job_request.job_type}'. Must be 'ingest', 'recompute', or 'scrape'.", 

1966 ) 

1967 

1968 return AddSchedulerJobResponse( 

1969 job_id=job_id, 

1970 message=f"Scheduled {job_request.job_type} job with cron '{job_request.cron_expr}'", 

1971 ) 

1972 

1973 

1974@api_router.delete( 

1975 "/admin/jobs/{job_id}", 

1976 response_model=RemoveSchedulerJobResponse, 

1977) 

1978@limiter.limit("20/minute") 

1979def remove_scheduled_job( 

1980 request: Request, 

1981 job_id: str, 

1982 token: str = Depends(verify_admin_token), 

1983): 

1984 """Remove a scheduled background job (admin only). 

1985 

1986 Args: 

1987 job_id: The job identifier to remove 

1988 token: Admin authentication token 

1989 

1990 Returns: 

1991 Confirmation of removal 

1992 """ 

1993 logger.info("Admin removing scheduled job", extra={"job_id": job_id}) 

1994 removed = scheduler.remove_job(job_id) 

1995 

1996 if removed: 

1997 return RemoveSchedulerJobResponse( 

1998 job_id=job_id, 

1999 removed=True, 

2000 message=f"Job '{job_id}' removed successfully", 

2001 ) 

2002 

2003 raise HTTPException( 

2004 status_code=404, 

2005 detail=f"Job '{job_id}' not found", 

2006 ) 

2007 

2008 

2009# ── Admin audit log endpoints ──────────────────────────────────────────── 

2010 

2011 

2012@api_router.post("/admin/audit-log", response_model=AuditLogEntry, status_code=201) 

2013def create_audit_log( 

2014 request: AuditLogCreateRequest, 

2015 db: Session = Depends(get_db), 

2016 token: str = Depends(verify_admin_token), 

2017): 

2018 """Create an audit log entry (admin only). 

2019 

2020 Records a data change or correction event in the audit log. 

2021 This endpoint allows programmatic creation of audit entries, 

2022 e.g. from the data correction UI or automated scripts. 

2023 

2024 Args: 

2025 request: Audit log entry details 

2026 db: Database session 

2027 token: Admin token 

2028 

2029 Returns: 

2030 The created audit log entry 

2031 """ 

2032 action_upper = request.action.upper().strip() 

2033 if action_upper not in ("INSERT", "UPDATE", "DELETE", "CORRECT"): 

2034 raise HTTPException( 

2035 status_code=400, 

2036 detail=f"Invalid action '{request.action}'. Must be INSERT, UPDATE, DELETE, or CORRECT.", 

2037 ) 

2038 

2039 entry = AuditLogger.log_change( 

2040 session=db, 

2041 table_name=request.table_name, 

2042 record_id=request.record_id, 

2043 action=action_upper, 

2044 old_values=request.old_values, 

2045 new_values=request.new_values, 

2046 changed_by=request.changed_by, 

2047 change_reason=request.change_reason, 

2048 ) 

2049 if entry is None: 

2050 raise HTTPException(status_code=500, detail="Failed to create audit log entry") 

2051 

2052 # Convert ORM model to Pydantic response 

2053 return AuditLogEntry( 

2054 id=entry.id, 

2055 table_name=entry.table_name, 

2056 record_id=entry.record_id, 

2057 action=entry.action, 

2058 old_values=entry.old_values, 

2059 new_values=entry.new_values, 

2060 changed_by=entry.changed_by, 

2061 change_reason=entry.change_reason, 

2062 created_at=entry.created_at.isoformat() if entry.created_at else None, 

2063 ) 

2064 

2065 

2066@api_router.get("/admin/audit-log", response_model=AuditLogListResponse) 

2067def list_audit_logs( 

2068 limit: int = Query(default=100, le=500, description="Maximum entries to return"), 

2069 offset: int = Query(default=0, ge=0, description="Number of entries to skip"), 

2070 db: Session = Depends(get_db), 

2071 token: str = Depends(verify_admin_token), 

2072): 

2073 """List recent audit log entries (admin only). 

2074 

2075 Args: 

2076 limit: Maximum entries to return 

2077 offset: Number of entries to skip 

2078 db: Database session 

2079 token: Admin token 

2080 

2081 Returns: 

2082 Paginated list of recent audit log entries 

2083 """ 

2084 entries = AuditLogger.get_recent_changes(db, limit=limit + offset) 

2085 # Apply offset in Python (the logger returns newest-first) 

2086 page = entries[offset : offset + limit] 

2087 

2088 total = len(entries) 

2089 if total > limit + offset: 

2090 total = db.query(AuditLog).count() 

2091 

2092 return AuditLogListResponse( 

2093 data=[ 

2094 AuditLogEntry( 

2095 id=e.id, 

2096 table_name=e.table_name, 

2097 record_id=e.record_id, 

2098 action=e.action, 

2099 old_values=e.old_values, 

2100 new_values=e.new_values, 

2101 changed_by=e.changed_by, 

2102 change_reason=e.change_reason, 

2103 created_at=e.created_at.isoformat() if e.created_at else None, 

2104 ) 

2105 for e in page 

2106 ], 

2107 total=total, 

2108 ) 

2109 

2110 

2111@api_router.get( 

2112 "/admin/audit-log/{table_name}/{record_id}", response_model=AuditLogListResponse 

2113) 

2114def get_audit_log_for_record( 

2115 table_name: str, 

2116 record_id: str, 

2117 db: Session = Depends(get_db), 

2118 token: str = Depends(verify_admin_token), 

2119): 

2120 """Get all audit log entries for a specific record (admin only). 

2121 

2122 Args: 

2123 table_name: Name of the table 

2124 record_id: Primary key value of the record 

2125 db: Database session 

2126 token: Admin token 

2127 

2128 Returns: 

2129 List of audit log entries for the specified record 

2130 """ 

2131 entries = AuditLogger.get_changes_for_record(db, table_name, record_id) 

2132 

2133 return AuditLogListResponse( 

2134 data=[ 

2135 AuditLogEntry( 

2136 id=e.id, 

2137 table_name=e.table_name, 

2138 record_id=e.record_id, 

2139 action=e.action, 

2140 old_values=e.old_values, 

2141 new_values=e.new_values, 

2142 changed_by=e.changed_by, 

2143 change_reason=e.change_reason, 

2144 created_at=e.created_at.isoformat() if e.created_at else None, 

2145 ) 

2146 for e in entries 

2147 ], 

2148 total=len(entries), 

2149 ) 

2150 

2151 

2152@api_router.get("/races/{race_id}/predictions", response_model=RacePredictionResponse) 

2153@limiter.limit("50/minute") 

2154def get_race_predictions( 

2155 request: Request, 

2156 race_id: int, 

2157 db: Session = Depends(get_db), 

2158): 

2159 """Get win probability predictions for a race. 

2160 

2161 Args: 

2162 race_id: Race ID 

2163 db: Database session 

2164 

2165 Returns: 

2166 Predictions for all starters 

2167 """ 

2168 from packages.core.ratings.predictions import PredictionEngine 

2169 from packages.core.storage.models import Starter 

2170 

2171 # Get race 

2172 race = db.query(Race).filter(Race.id == race_id).first() 

2173 if not race: 

2174 raise HTTPException(status_code=404, detail="Race not found") 

2175 

2176 # Get starters 

2177 starters = db.query(Starter).filter(Starter.race_id == race_id).all() 

2178 if not starters: 

2179 raise HTTPException(status_code=404, detail="No starters found for race") 

2180 starter_by_id = {starter.id: starter for starter in starters} 

2181 

2182 # Load race meeting 

2183 if not race.meeting: 

2184 from sqlalchemy.orm import joinedload 

2185 

2186 race = ( 

2187 db.query(Race) 

2188 .options(joinedload(Race.meeting)) 

2189 .filter(Race.id == race_id) 

2190 .first() 

2191 ) 

2192 

2193 engine = PredictionEngine(db) 

2194 prediction = engine.predict_race(race, starters) 

2195 

2196 predictions = [ 

2197 PredictionResponse( 

2198 horse_id=pred.horse_id, 

2199 horse_name=pred.horse_name, 

2200 driver_id=pred.driver_id, 

2201 driver_name=pred.driver_name, 

2202 trainer_id=pred.trainer_id, 

2203 trainer_name=pred.trainer_name, 

2204 barrier=pred.barrier, 

2205 effective_rating=pred.effective_rating, 

2206 win_probability=pred.win_probability, 

2207 place_probability=pred.place_probability, 

2208 place_score=pred.place_score, 

2209 predicted_placing=pred.predicted_placing, 

2210 ci_lower=pred.confidence_interval_low, 

2211 ci_upper=pred.confidence_interval_high, 

2212 placing=( 

2213 starter_by_id[pred.starter_id].placing 

2214 if pred.starter_id in starter_by_id 

2215 else None 

2216 ), 

2217 ) 

2218 for pred in prediction.predictions 

2219 ] 

2220 

2221 return RacePredictionResponse( 

2222 race_id=race_id, 

2223 race_number=race.race_number, 

2224 venue=race.meeting.venue if race.meeting else None, 

2225 distance_m=race.distance_m, 

2226 predictions=predictions, 

2227 ) 

2228 

2229 

2230@api_router.get("/races/upcoming") 

2231@limiter.limit("100/minute") 

2232def get_upcoming_races( 

2233 request: Request, 

2234 race_date: str | None = Query( 

2235 default=None, description="YYYY-MM-DD, defaults to today" 

2236 ), 

2237 db: Session = Depends(get_db), 

2238): 

2239 """Get upcoming races for a specific date. 

2240 

2241 Args: 

2242 race_date: Date to get races for (defaults to today) 

2243 db: Database session 

2244 

2245 Returns: 

2246 List of races for the specified date 

2247 """ 

2248 from packages.core.common.utils import parse_date 

2249 

2250 target_date = parse_date(race_date) if race_date else date.today() 

2251 

2252 races = ( 

2253 db.query(Race) 

2254 .options(joinedload(Race.meeting)) 

2255 .join(Race.meeting) 

2256 .filter(Meeting.meeting_date == target_date) 

2257 .order_by(Meeting.venue, Race.race_number) 

2258 .all() 

2259 ) 

2260 

2261 race_list = [] 

2262 for race in races: 

2263 race_list.append( 

2264 { 

2265 "race_id": race.id, 

2266 "race_number": race.race_number, 

2267 "venue": race.meeting.venue if race.meeting else None, 

2268 "distance_m": race.distance_m, 

2269 "start_type": race.start_type, 

2270 "starter_count": len(race.starters), 

2271 } 

2272 ) 

2273 

2274 return { 

2275 "date": target_date.isoformat(), 

2276 "race_count": len(race_list), 

2277 "races": race_list, 

2278 } 

2279 

2280 

2281@api_router.get("/predictions/compare/{race_id}") 

2282@limiter.limit("50/minute") 

2283def compare_prediction_to_actual( 

2284 request: Request, 

2285 race_id: int, 

2286 db: Session = Depends(get_db), 

2287): 

2288 """Compare prediction to actual result for a completed race. 

2289 

2290 Args: 

2291 race_id: Race ID 

2292 db: Database session 

2293 

2294 Returns: 

2295 Prediction accuracy comparison 

2296 """ 

2297 from packages.core.ratings.predictions import PredictionEngine 

2298 

2299 engine = PredictionEngine(db) 

2300 comparison = engine.compare_prediction_to_actual(race_id) 

2301 

2302 if not comparison: 

2303 raise HTTPException(status_code=404, detail="Race not found or not completed") 

2304 

2305 return comparison 

2306 

2307 

2308@api_router.get("/analytics/accuracy", response_model=AccuracySummary) 

2309@limiter.limit("20/minute") 

2310def get_accuracy_summary( 

2311 request: Request, 

2312 days: int = Query(default=30, ge=1, le=365), 

2313 db: Session = Depends(get_db), 

2314): 

2315 """Get aggregated prediction accuracy summary. 

2316 

2317 Computes accuracy metrics for completed races in the specified date range. 

2318 Limited to the most recent 200 evaluated races for performance. 

2319 

2320 Args: 

2321 days: Number of days to look back 

2322 db: Database session 

2323 

2324 Returns: 

2325 Accuracy summary with daily trend and confidence buckets 

2326 """ 

2327 from packages.core.ratings.predictions import PredictionEngine 

2328 from packages.core.storage.models import Starter 

2329 

2330 start_date = date.today() - timedelta(days=days) 

2331 end_date = date.today() 

2332 

2333 # Get completed races in date range with results 

2334 races = ( 

2335 db.query(Race) 

2336 .join(Starter, Race.id == Starter.race_id) 

2337 .filter(Starter.placing.isnot(None)) 

2338 .join(Meeting) 

2339 .filter(Meeting.meeting_date >= start_date, Meeting.meeting_date <= end_date) 

2340 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last()) 

2341 .distinct() 

2342 .limit(200) 

2343 .all() 

2344 ) 

2345 

2346 engine = PredictionEngine(db) 

2347 

2348 total_winner_correct = 0 

2349 total_top3_overlap = 0.0 

2350 total_brier = 0.0 

2351 race_count = 0 

2352 

2353 daily_data: dict[str, dict] = {} 

2354 buckets = { 

2355 "high": {"races": 0, "winner_correct": 0, "brier_sum": 0.0}, 

2356 "medium": {"races": 0, "winner_correct": 0, "brier_sum": 0.0}, 

2357 "low": {"races": 0, "winner_correct": 0, "brier_sum": 0.0}, 

2358 } 

2359 recent_races: list[dict] = [] 

2360 

2361 for race in races: 

2362 comparison = engine.compare_prediction_to_actual(race.id) 

2363 if not comparison: 

2364 continue 

2365 

2366 winner_correct = comparison["winner_correct"] 

2367 top3_overlap = comparison["top3_overlap"] 

2368 brier = comparison["brier_score"] 

2369 

2370 # Find predicted winner probability for bucketing 

2371 predictions = comparison.get("predictions", []) 

2372 predicted_winner_prob = 0.0 

2373 if predictions: 

2374 predicted_winner = max(predictions, key=lambda p: p["win_probability"]) 

2375 predicted_winner_prob = predicted_winner["win_probability"] 

2376 

2377 if predicted_winner_prob >= 0.40: 

2378 bucket_key = "high" 

2379 elif predicted_winner_prob >= 0.20: 

2380 bucket_key = "medium" 

2381 else: 

2382 bucket_key = "low" 

2383 

2384 buckets[bucket_key]["races"] += 1 

2385 if winner_correct: 

2386 buckets[bucket_key]["winner_correct"] += 1 

2387 buckets[bucket_key]["brier_sum"] += brier 

2388 

2389 race_date_str = comparison.get("race_date") 

2390 if not race_date_str and race.meeting and race.meeting.meeting_date: 

2391 race_date_str = race.meeting.meeting_date.isoformat() 

2392 if not race_date_str: 

2393 race_date_str = "unknown" 

2394 

2395 if race_date_str not in daily_data: 

2396 daily_data[race_date_str] = { 

2397 "brier_sum": 0.0, 

2398 "winner_correct": 0, 

2399 "races": 0, 

2400 } 

2401 daily_data[race_date_str]["brier_sum"] += brier 

2402 if winner_correct: 

2403 daily_data[race_date_str]["winner_correct"] += 1 

2404 daily_data[race_date_str]["races"] += 1 

2405 

2406 total_winner_correct += 1 if winner_correct else 0 

2407 total_top3_overlap += top3_overlap 

2408 total_brier += brier 

2409 race_count += 1 

2410 

2411 recent_races.append( 

2412 { 

2413 "race_id": comparison["race_id"], 

2414 "race_number": comparison.get("race_number"), 

2415 "venue": comparison.get("venue"), 

2416 "race_date": comparison.get("race_date"), 

2417 "field_size": comparison.get("field_size", 0), 

2418 "winner_correct": winner_correct, 

2419 "top3_overlap": top3_overlap, 

2420 "brier_score": brier, 

2421 } 

2422 ) 

2423 

2424 if race_count == 0: 

2425 return AccuracySummary( 

2426 summary={ 

2427 "overall_win_accuracy": 0.0, 

2428 "top3_accuracy": 0.0, 

2429 "avg_brier_score": 0.0, 

2430 "num_races_evaluated": 0, 

2431 }, 

2432 daily_trend=[], 

2433 confidence_buckets=ConfidenceBuckets( 

2434 high=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0), 

2435 medium=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0), 

2436 low=ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0), 

2437 ), 

2438 recent_races=[], 

2439 ) 

2440 

2441 daily_trend = [] 

2442 for d in sorted(daily_data.keys()): 

2443 data = daily_data[d] 

2444 daily_trend.append( 

2445 DailyTrendItem( 

2446 date=d, 

2447 avg_brier=data["brier_sum"] / data["races"], 

2448 win_accuracy=data["winner_correct"] / data["races"], 

2449 races=data["races"], 

2450 ) 

2451 ) 

2452 

2453 def _bucket_stats(bucket: dict) -> ConfidenceBucket: 

2454 if bucket["races"] == 0: 

2455 return ConfidenceBucket(races=0, win_accuracy=0.0, avg_brier=0.0) 

2456 return ConfidenceBucket( 

2457 races=bucket["races"], 

2458 win_accuracy=bucket["winner_correct"] / bucket["races"], 

2459 avg_brier=bucket["brier_sum"] / bucket["races"], 

2460 ) 

2461 

2462 return AccuracySummary( 

2463 summary={ 

2464 "overall_win_accuracy": total_winner_correct / race_count, 

2465 "top3_accuracy": total_top3_overlap / race_count / 3.0, 

2466 "avg_brier_score": total_brier / race_count, 

2467 "num_races_evaluated": race_count, 

2468 }, 

2469 daily_trend=daily_trend, 

2470 confidence_buckets=ConfidenceBuckets( 

2471 high=_bucket_stats(buckets["high"]), 

2472 medium=_bucket_stats(buckets["medium"]), 

2473 low=_bucket_stats(buckets["low"]), 

2474 ), 

2475 recent_races=[RecentRaceAccuracy(**r) for r in recent_races[:50]], 

2476 ) 

2477 

2478 

2479# ── Export endpoints (CSV/Parquet) ────────────────────────────────────── 

2480 

2481 

2482@api_router.get("/export/ratings.csv") 

2483@limiter.limit("10/minute") 

2484def export_ratings_csv( 

2485 request: Request, 

2486 entity_type: str | None = Query( 

2487 default=None, 

2488 description="Filter by entity type: horse, driver, trainer (default: all)", 

2489 ), 

2490 limit: int = Query(default=5000, le=50000, description="Maximum rows"), 

2491 db: Session = Depends(get_db), 

2492): 

2493 """Export all ratings as CSV. 

2494 

2495 Returns a CSV file with all rating snapshots across entity types. 

2496 Supports filtering by entity_type and limit. 

2497 """ 

2498 entity_types = ( 

2499 [ 

2500 EntityType(entity_type.lower()), 

2501 ] 

2502 if entity_type 

2503 else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER] 

2504 ) 

2505 

2506 output = StringIO() 

2507 writer = csv.DictWriter( 

2508 output, 

2509 fieldnames=[ 

2510 "entity_type", 

2511 "entity_id", 

2512 "entity_name", 

2513 "rating", 

2514 "rd", 

2515 "race_count", 

2516 "as_of_race_id", 

2517 ], 

2518 ) 

2519 writer.writeheader() 

2520 

2521 rows_written = 0 

2522 for et in entity_types: 

2523 snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit) 

2524 for snapshot in snapshots: 

2525 if rows_written >= limit: 

2526 break 

2527 

2528 # Load entity name 

2529 entity_name = None 

2530 if et == EntityType.HORSE: 

2531 entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first() 

2532 entity_name = entity.name if entity else None 

2533 elif et == EntityType.DRIVER: 

2534 entity = ( 

2535 db.query(Driver).filter(Driver.id == snapshot.entity_id).first() 

2536 ) 

2537 entity_name = entity.name if entity else None 

2538 elif et == EntityType.TRAINER: 

2539 entity = ( 

2540 db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first() 

2541 ) 

2542 entity_name = entity.name if entity else None 

2543 

2544 writer.writerow( 

2545 { 

2546 "entity_type": et.value, 

2547 "entity_id": snapshot.entity_id, 

2548 "entity_name": entity_name, 

2549 "rating": snapshot.rating, 

2550 "rd": snapshot.rd, 

2551 "race_count": ( 

2552 snapshot.meta.get("race_count") if snapshot.meta else None 

2553 ), 

2554 "as_of_race_id": snapshot.as_of_race_id, 

2555 } 

2556 ) 

2557 rows_written += 1 

2558 

2559 if rows_written >= limit: 

2560 break 

2561 

2562 return Response( 

2563 content=output.getvalue(), 

2564 media_type="text/csv", 

2565 headers={"Content-Disposition": 'attachment; filename="ratings.csv"'}, 

2566 ) 

2567 

2568 

2569@api_router.get("/export/ratings.parquet") 

2570@limiter.limit("10/minute") 

2571def export_ratings_parquet( 

2572 request: Request, 

2573 entity_type: str | None = Query( 

2574 default=None, 

2575 description="Filter by entity type: horse, driver, trainer (default: all)", 

2576 ), 

2577 limit: int = Query(default=5000, le=50000, description="Maximum rows"), 

2578 db: Session = Depends(get_db), 

2579): 

2580 """Export all ratings as Parquet. 

2581 

2582 Returns a Parquet file with all rating snapshots across entity types. 

2583 Requires pyarrow or fastparquet to be installed. 

2584 """ 

2585 try: 

2586 import pandas as pd 

2587 except ImportError: 

2588 raise HTTPException( 

2589 status_code=501, 

2590 detail="Parquet export requires pandas. Install with: pip install pandas", 

2591 ) from None 

2592 

2593 try: 

2594 import pyarrow # noqa: F401 

2595 except ImportError: 

2596 raise HTTPException( 

2597 status_code=501, 

2598 detail=( 

2599 "Parquet export requires pyarrow. " "Install with: pip install pyarrow" 

2600 ), 

2601 ) from None 

2602 

2603 entity_types = ( 

2604 [ 

2605 EntityType(entity_type.lower()), 

2606 ] 

2607 if entity_type 

2608 else [EntityType.HORSE, EntityType.DRIVER, EntityType.TRAINER] 

2609 ) 

2610 

2611 rows: list[dict] = [] 

2612 for et in entity_types: 

2613 snapshots = RatingSnapshotRepository.get_top_ratings(db, et, limit=limit) 

2614 for snapshot in snapshots: 

2615 if len(rows) >= limit: 

2616 break 

2617 

2618 entity_name = None 

2619 if et == EntityType.HORSE: 

2620 entity = db.query(Horse).filter(Horse.id == snapshot.entity_id).first() 

2621 entity_name = entity.name if entity else None 

2622 elif et == EntityType.DRIVER: 

2623 entity = ( 

2624 db.query(Driver).filter(Driver.id == snapshot.entity_id).first() 

2625 ) 

2626 entity_name = entity.name if entity else None 

2627 elif et == EntityType.TRAINER: 

2628 entity = ( 

2629 db.query(Trainer).filter(Trainer.id == snapshot.entity_id).first() 

2630 ) 

2631 entity_name = entity.name if entity else None 

2632 

2633 rows.append( 

2634 { 

2635 "entity_type": et.value, 

2636 "entity_id": snapshot.entity_id, 

2637 "entity_name": entity_name, 

2638 "rating": snapshot.rating, 

2639 "rd": snapshot.rd, 

2640 "race_count": ( 

2641 snapshot.meta.get("race_count") if snapshot.meta else None 

2642 ), 

2643 "as_of_race_id": snapshot.as_of_race_id, 

2644 } 

2645 ) 

2646 

2647 if len(rows) >= limit: 

2648 break 

2649 

2650 if not rows: 

2651 return Response( 

2652 content=b"", 

2653 media_type="application/octet-stream", 

2654 headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'}, 

2655 ) 

2656 

2657 df = pd.DataFrame(rows) 

2658 buffer = BytesIO() 

2659 df.to_parquet(buffer, index=False) 

2660 buffer.seek(0) 

2661 

2662 return Response( 

2663 content=buffer.getvalue(), 

2664 media_type="application/octet-stream", 

2665 headers={"Content-Disposition": 'attachment; filename="ratings.parquet"'}, 

2666 ) 

2667 

2668 

2669@api_router.get("/export/predictions.csv") 

2670@limiter.limit("10/minute") 

2671def export_predictions_csv( 

2672 request: Request, 

2673 date_from: str | None = Query(default=None, description="YYYY-MM-DD"), 

2674 date_to: str | None = Query(default=None, description="YYYY-MM-DD"), 

2675 limit: int = Query(default=1000, le=10000, description="Maximum rows"), 

2676 db: Session = Depends(get_db), 

2677): 

2678 """Export recent predictions as CSV. 

2679 

2680 Returns a CSV file with predictions for completed races. 

2681 Supports filtering by date range and limit. 

2682 """ 

2683 from packages.core.ratings.predictions import PredictionEngine 

2684 from packages.core.storage.models import Starter 

2685 

2686 # Determine date range 

2687 if date_from: 

2688 start_date = parse_date(date_from) 

2689 else: 

2690 start_date = date.today() - timedelta(days=7) 

2691 

2692 if date_to: 

2693 end_date = parse_date(date_to) 

2694 else: 

2695 end_date = date.today() 

2696 

2697 # Get races with results in the date range 

2698 races_with_results = ( 

2699 db.query(Race) 

2700 .join(Race.meeting) 

2701 .filter( 

2702 Meeting.meeting_date >= start_date, 

2703 Meeting.meeting_date <= end_date, 

2704 ) 

2705 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last()) 

2706 .limit(limit) 

2707 .all() 

2708 ) 

2709 

2710 engine = PredictionEngine(db) 

2711 output = StringIO() 

2712 writer = csv.writer(output) 

2713 writer.writerow( 

2714 [ 

2715 "race_id", 

2716 "race_number", 

2717 "venue", 

2718 "distance_m", 

2719 "race_date", 

2720 "starter_id", 

2721 "horse_id", 

2722 "horse_name", 

2723 "driver_id", 

2724 "driver_name", 

2725 "trainer_id", 

2726 "trainer_name", 

2727 "barrier", 

2728 "handicap_m", 

2729 "effective_rating", 

2730 "win_probability", 

2731 "place_probability", 

2732 "place_score", 

2733 "predicted_placing", 

2734 "ci_lower", 

2735 "ci_upper", 

2736 ] 

2737 ) 

2738 

2739 rows_written = 0 

2740 for race in races_with_results: 

2741 if rows_written >= limit: 

2742 break 

2743 

2744 starters = db.query(Starter).filter(Starter.race_id == race.id).all() 

2745 if not starters: 

2746 continue 

2747 

2748 try: 

2749 prediction = engine.predict_race(race, starters) 

2750 except Exception: 

2751 continue 

2752 

2753 for pred in prediction.predictions: 

2754 if rows_written >= limit: 

2755 break 

2756 

2757 writer.writerow( 

2758 [ 

2759 race.id, 

2760 race.race_number or "", 

2761 race.meeting.venue if race.meeting else "", 

2762 race.distance_m or "", 

2763 ( 

2764 race.meeting.meeting_date.isoformat() 

2765 if race.meeting and race.meeting.meeting_date 

2766 else "" 

2767 ), 

2768 pred.starter_id, 

2769 pred.horse_id, 

2770 pred.horse_name or "", 

2771 pred.driver_id or "", 

2772 pred.driver_name or "", 

2773 pred.trainer_id or "", 

2774 pred.trainer_name or "", 

2775 pred.barrier or "", 

2776 pred.handicap_m or "", 

2777 f"{pred.effective_rating:.1f}", 

2778 f"{pred.win_probability:.4f}", 

2779 f"{pred.place_probability:.4f}", 

2780 f"{pred.place_score:.1f}", 

2781 pred.predicted_placing, 

2782 f"{pred.confidence_interval_low:.1f}", 

2783 f"{pred.confidence_interval_high:.1f}", 

2784 ] 

2785 ) 

2786 rows_written += 1 

2787 

2788 return Response( 

2789 content=output.getvalue(), 

2790 media_type="text/csv", 

2791 headers={"Content-Disposition": 'attachment; filename="predictions.csv"'}, 

2792 ) 

2793 

2794 

2795@api_router.get("/export/predictions.parquet") 

2796@limiter.limit("10/minute") 

2797def export_predictions_parquet( 

2798 request: Request, 

2799 date_from: str | None = Query(default=None, description="YYYY-MM-DD"), 

2800 date_to: str | None = Query(default=None, description="YYYY-MM-DD"), 

2801 limit: int = Query(default=1000, le=10000, description="Maximum rows"), 

2802 db: Session = Depends(get_db), 

2803): 

2804 """Export recent predictions as Parquet. 

2805 

2806 Returns a Parquet file with predictions for completed races. 

2807 Requires pyarrow or fastparquet to be installed. 

2808 """ 

2809 try: 

2810 import pandas as pd 

2811 except ImportError: 

2812 raise HTTPException( 

2813 status_code=501, 

2814 detail="Parquet export requires pandas. Install with: pip install pandas", 

2815 ) from None 

2816 

2817 try: 

2818 import pyarrow # noqa: F401 

2819 except ImportError: 

2820 raise HTTPException( 

2821 status_code=501, 

2822 detail=( 

2823 "Parquet export requires pyarrow. " "Install with: pip install pyarrow" 

2824 ), 

2825 ) from None 

2826 

2827 from packages.core.ratings.predictions import PredictionEngine 

2828 from packages.core.storage.models import Starter 

2829 

2830 # Determine date range 

2831 if date_from: 

2832 start_date = parse_date(date_from) 

2833 else: 

2834 start_date = date.today() - timedelta(days=7) 

2835 

2836 if date_to: 

2837 end_date = parse_date(date_to) 

2838 else: 

2839 end_date = date.today() 

2840 

2841 # Get races with results in the date range 

2842 races_with_results = ( 

2843 db.query(Race) 

2844 .join(Race.meeting) 

2845 .filter( 

2846 Meeting.meeting_date >= start_date, 

2847 Meeting.meeting_date <= end_date, 

2848 ) 

2849 .order_by(Meeting.meeting_date.desc(), Race.race_datetime.desc().nulls_last()) 

2850 .limit(limit) 

2851 .all() 

2852 ) 

2853 

2854 engine = PredictionEngine(db) 

2855 rows: list[dict] = [] 

2856 

2857 for race in races_with_results: 

2858 if len(rows) >= limit: 

2859 break 

2860 

2861 starters = db.query(Starter).filter(Starter.race_id == race.id).all() 

2862 if not starters: 

2863 continue 

2864 

2865 try: 

2866 prediction = engine.predict_race(race, starters) 

2867 except Exception: 

2868 continue 

2869 

2870 for pred in prediction.predictions: 

2871 if len(rows) >= limit: 

2872 break 

2873 

2874 rows.append( 

2875 { 

2876 "race_id": race.id, 

2877 "race_number": race.race_number, 

2878 "venue": race.meeting.venue if race.meeting else None, 

2879 "distance_m": race.distance_m, 

2880 "race_date": ( 

2881 race.meeting.meeting_date.isoformat() 

2882 if race.meeting and race.meeting.meeting_date 

2883 else None 

2884 ), 

2885 "starter_id": pred.starter_id, 

2886 "horse_id": pred.horse_id, 

2887 "horse_name": pred.horse_name, 

2888 "driver_id": pred.driver_id, 

2889 "driver_name": pred.driver_name, 

2890 "trainer_id": pred.trainer_id, 

2891 "trainer_name": pred.trainer_name, 

2892 "barrier": pred.barrier, 

2893 "handicap_m": pred.handicap_m, 

2894 "effective_rating": pred.effective_rating, 

2895 "win_probability": pred.win_probability, 

2896 "place_probability": pred.place_probability, 

2897 "place_score": pred.place_score, 

2898 "predicted_placing": pred.predicted_placing, 

2899 "ci_lower": pred.confidence_interval_low, 

2900 "ci_upper": pred.confidence_interval_high, 

2901 } 

2902 ) 

2903 

2904 if not rows: 

2905 return Response( 

2906 content=b"", 

2907 media_type="application/octet-stream", 

2908 headers={ 

2909 "Content-Disposition": 'attachment; filename="predictions.parquet"' 

2910 }, 

2911 ) 

2912 

2913 df = pd.DataFrame(rows) 

2914 buffer = BytesIO() 

2915 df.to_parquet(buffer, index=False) 

2916 buffer.seek(0) 

2917 

2918 return Response( 

2919 content=buffer.getvalue(), 

2920 media_type="application/octet-stream", 

2921 headers={"Content-Disposition": 'attachment; filename="predictions.parquet"'}, 

2922 ) 

2923 

2924 

2925@api_router.get("/export/race-predictions.pdf") 

2926@limiter.limit("10/minute") 

2927def export_race_predictions_pdf( 

2928 request: Request, 

2929 race_id: int, 

2930 db: Session = Depends(get_db), 

2931): 

2932 """Export race predictions as a PDF document. 

2933 

2934 Generates a PDF with a structured table showing each runner's rating, 

2935 win probability, place probability, predicted placing, and confidence 

2936 interval. Requires the ``reportlab`` library. 

2937 

2938 Args: 

2939 race_id: Race ID to export. 

2940 db: Database session. 

2941 

2942 Returns: 

2943 PDF binary response with appropriate Content-Type and disposition. 

2944 """ 

2945 try: 

2946 from reportlab.lib import colors 

2947 from reportlab.lib.pagesizes import letter 

2948 from reportlab.lib.styles import getSampleStyleSheet 

2949 from reportlab.lib.units import inch 

2950 from reportlab.platypus import ( 

2951 Paragraph, 

2952 SimpleDocTemplate, 

2953 Spacer, 

2954 Table, 

2955 TableStyle, 

2956 ) 

2957 except ImportError: 

2958 raise HTTPException( 

2959 status_code=501, 

2960 detail="PDF export requires reportlab. Install with: pip install reportlab", 

2961 ) from None 

2962 

2963 from packages.core.ratings.predictions import PredictionEngine 

2964 from packages.core.storage.models import Starter 

2965 

2966 # ── Resolve race ───────────────────────────────────────────────── 

2967 race = db.query(Race).filter(Race.id == race_id).first() 

2968 if not race: 

2969 raise HTTPException(status_code=404, detail="Race not found") 

2970 

2971 # Eager-load the meeting for venue / date info 

2972 race = ( 

2973 db.query(Race) 

2974 .options(joinedload(Race.meeting)) 

2975 .filter(Race.id == race_id) 

2976 .first() 

2977 ) 

2978 

2979 starters = db.query(Starter).filter(Starter.race_id == race_id).all() 

2980 if not starters: 

2981 raise HTTPException(status_code=404, detail="No starters found for race") 

2982 

2983 engine = PredictionEngine(db) 

2984 prediction = engine.predict_race(race, starters) 

2985 

2986 # ── Build PDF ──────────────────────────────────────────────────── 

2987 buffer = BytesIO() 

2988 doc = SimpleDocTemplate(buffer, pagesize=letter) 

2989 styles = getSampleStyleSheet() 

2990 elements = [] 

2991 

2992 # Title 

2993 venue = race.meeting.venue if race.meeting else "Unknown" 

2994 race_date = ( 

2995 race.meeting.meeting_date.isoformat() 

2996 if race.meeting and race.meeting.meeting_date 

2997 else "" 

2998 ) 

2999 title_text = ( 

3000 f"Race {race.race_number}{venue} ({race_date})<br/>" 

3001 f"Distance: {race.distance_m}m | " 

3002 f"Starters: {len(prediction.predictions)}" 

3003 ) 

3004 elements.append(Paragraph("<b>TipSharks — Race Predictions</b>", styles["Title"])) 

3005 elements.append(Spacer(1, 0.15 * inch)) 

3006 elements.append(Paragraph(title_text, styles["Normal"])) 

3007 elements.append(Spacer(1, 0.2 * inch)) 

3008 

3009 # Table header + rows 

3010 table_data = [ 

3011 ["#", "Horse", "Driver", "Rating", "Win%", "Place%", "Pred.", "CI Range"], 

3012 ] 

3013 for idx, pred in enumerate(prediction.predictions, start=1): 

3014 ci = ( 

3015 f"{pred.confidence_interval_low:.0f}{pred.confidence_interval_high:.0f}" 

3016 if pred.confidence_interval_low is not None 

3017 else "—" 

3018 ) 

3019 table_data.append( 

3020 [ 

3021 str(idx), 

3022 pred.horse_name or f"ID {pred.horse_id}", 

3023 pred.driver_name or "—", 

3024 f"{pred.effective_rating:.1f}", 

3025 f"{pred.win_probability:.1%}", 

3026 f"{pred.place_probability:.1%}", 

3027 str(pred.predicted_placing), 

3028 ci, 

3029 ] 

3030 ) 

3031 

3032 # Column widths 

3033 col_widths = [ 

3034 0.3 * inch, 

3035 1.6 * inch, 

3036 1.4 * inch, 

3037 0.8 * inch, 

3038 0.7 * inch, 

3039 0.7 * inch, 

3040 0.5 * inch, 

3041 1.2 * inch, 

3042 ] 

3043 table = Table(table_data, colWidths=col_widths, repeatRows=1) 

3044 table.setStyle( 

3045 TableStyle( 

3046 [ 

3047 ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a5276")), 

3048 ("TEXTCOLOR", (0, 0), (-1, 0), colors.white), 

3049 ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), 

3050 ("FONTSIZE", (0, 0), (-1, 0), 9), 

3051 ("FONTSIZE", (0, 1), (-1, -1), 8), 

3052 ("ALIGN", (0, 0), (-1, -1), "CENTER"), 

3053 ("ALIGN", (1, 1), (2, -1), "LEFT"), 

3054 ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), 

3055 ("GRID", (0, 0), (-1, -1), 0.5, colors.grey), 

3056 ( 

3057 "ROWBACKGROUNDS", 

3058 (0, 1), 

3059 (-1, -1), 

3060 [colors.white, colors.HexColor("#f2f3f4")], 

3061 ), 

3062 ("TOPPADDING", (0, 0), (-1, -1), 4), 

3063 ("BOTTOMPADDING", (0, 0), (-1, -1), 4), 

3064 ] 

3065 ) 

3066 ) 

3067 elements.append(table) 

3068 

3069 # Footer metadata 

3070 elements.append(Spacer(1, 0.2 * inch)) 

3071 from datetime import UTC, datetime 

3072 

3073 elements.append( 

3074 Paragraph( 

3075 f"Generated: {datetime.now(UTC).isoformat()} | " 

3076 f"Field size: {prediction.metadata.get('field_size', '—')} | " 

3077 f"Avg rating: {prediction.metadata.get('avg_rating', '—'):.1f}", 

3078 styles["Normal"], 

3079 ) 

3080 ) 

3081 

3082 doc.build(elements) 

3083 pdf_bytes = buffer.getvalue() 

3084 

3085 filename = f"race_{race_id}_predictions.pdf" 

3086 return Response( 

3087 content=pdf_bytes, 

3088 media_type="application/pdf", 

3089 headers={ 

3090 "Content-Disposition": f'attachment; filename="{filename}"', 

3091 "Content-Length": str(len(pdf_bytes)), 

3092 }, 

3093 ) 

3094 

3095 

3096# ── WebSocket endpoint for live race updates ──────────────────────── 

3097 

3098 

3099@app.websocket("/ws/races/{race_id}") 

3100async def websocket_race_endpoint(websocket: WebSocket, race_id: int): 

3101 """WebSocket endpoint for live race updates. 

3102 

3103 On connect, validates the race exists in the database and sends 

3104 the initial race state (race details + starters). Broadcasts live 

3105 updates as they become available via the simulation task. 

3106 

3107 Args: 

3108 websocket: The WebSocket connection. 

3109 race_id: Race identifier. 

3110 """ 

3111 from apps.backend.api.websocket import _build_initial_state, manager 

3112 from packages.core.storage.models import Starter 

3113 

3114 # Validate race exists 

3115 try: 

3116 with get_session() as session: 

3117 race = session.query(Race).filter(Race.id == race_id).first() 

3118 if not race: 

3119 await websocket.close(code=4004, reason="Race not found") 

3120 return 

3121 

3122 # Eager-load relationships for the initial state 

3123 race = ( 

3124 session.query(Race) 

3125 .options( 

3126 joinedload(Race.meeting), 

3127 joinedload(Race.starters).joinedload(Starter.horse), 

3128 joinedload(Race.starters).joinedload(Starter.driver), 

3129 joinedload(Race.starters).joinedload(Starter.trainer), 

3130 ) 

3131 .filter(Race.id == race_id) 

3132 .first() 

3133 ) 

3134 starters = race.starters if race else [] 

3135 

3136 # Build initial state 

3137 initial_state = _build_initial_state(race, starters) 

3138 except Exception: 

3139 logger.error("Failed to validate race for WebSocket", exc_info=True) 

3140 await websocket.close(code=1011, reason="Internal server error") 

3141 return 

3142 

3143 # Accept connection and register 

3144 await manager.connect(websocket, race_id) 

3145 

3146 # Send initial state 

3147 await manager.send_personal_message(initial_state, websocket) 

3148 

3149 # Start simulation on first connection 

3150 if not manager.is_simulation_running(race_id): 

3151 manager.start_simulation(race_id) 

3152 

3153 try: 

3154 while True: 

3155 data = await websocket.receive_text() 

3156 try: 

3157 msg = json.loads(data) 

3158 msg_type = msg.get("type", "") 

3159 msg_race_id = msg.get("race_id", race_id) 

3160 

3161 if msg_type == "subscribe" and isinstance(msg_race_id, int): 

3162 logger.info( 

3163 "Client subscribed to race updates", 

3164 extra={"race_id": msg_race_id}, 

3165 ) 

3166 # Confirm subscription 

3167 await manager.send_personal_message( 

3168 json.dumps({"type": "subscribed", "race_id": msg_race_id}), 

3169 websocket, 

3170 ) 

3171 else: 

3172 logger.debug( 

3173 "Unknown WebSocket message type", 

3174 extra={"type": msg_type, "race_id": race_id}, 

3175 ) 

3176 except json.JSONDecodeError: 

3177 logger.warning( 

3178 "Invalid JSON received on WebSocket", 

3179 extra={"race_id": race_id}, 

3180 ) 

3181 except WebSocketDisconnect: 

3182 logger.info("WebSocket client disconnected", extra={"race_id": race_id}) 

3183 except Exception: 

3184 logger.error("WebSocket error", extra={"race_id": race_id}, exc_info=True) 

3185 finally: 

3186 await manager.disconnect(websocket, race_id) 

3187 # Stop simulation when last client leaves 

3188 if manager.get_connection_count(race_id) == 0 and manager.is_simulation_running( 

3189 race_id 

3190 ): 

3191 manager.stop_simulation(race_id) 

3192 

3193 

3194# Prometheus metrics endpoint stub 

3195# In production, integrate with the prometheus-client library. 

3196# See docs/monitoring.md for instrumentation guidance. 

3197@app.get("/metrics", response_class=PlainTextResponse) 

3198def metrics(): 

3199 """Prometheus metrics endpoint stub. 

3200 

3201 Returns a placeholder response. To serve real metrics: 

3202 1. pip install prometheus-client 

3203 2. Create counters/histograms/gauges in a metrics module 

3204 3. Call generate_latest(REGISTRY) here 

3205 

3206 See docs/monitoring.md for the full instrumentation guide. 

3207 """ 

3208 return PlainTextResponse( 

3209 content="# TipSharks API metrics stub\n# Integrate prometheus-client for production use\n", 

3210 status_code=200, 

3211 ) 

3212 

3213 

3214# Include the versioned API router 

3215app.include_router(api_router) 

3216 

3217if __name__ == "__main__": 

3218 import uvicorn 

3219 

3220 uvicorn.run(app, host="0.0.0.0", port=8000)