Coverage for apps / backend / worker / cli.py: 29%

508 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-08 08:37 +1200

1"""Worker CLI for TipSharks ingestion and computation tasks.""" 

2 

3import asyncio 

4import json 

5import os 

6import subprocess 

7import sys 

8from calendar import monthrange 

9from datetime import date, timedelta 

10 

11import click 

12from rich.console import Console 

13from rich.table import Table 

14 

15from packages.core.common.logging import get_logger, setup_logging 

16from packages.core.common.settings import get_settings 

17from packages.core.common.utils import parse_date 

18from packages.core.storage.database import get_session 

19from packages.core.storage.ingestion import IngestionService 

20from packages.core.storage.models import Meeting, Race 

21from packages.core.storage.repositories import StarterRepository, normalize_race_data 

22 

23# Setup logging 

24setup_logging() 

25logger = get_logger(__name__) 

26console = Console() 

27 

28 

29def _subtract_months(d: date, months: int) -> date: 

30 """Subtract *months* from *d*, clamping day to month length. 

31 

32 Args: 

33 d: Starting date. 

34 months: Number of months to subtract (must be >= 0). 

35 

36 Returns: 

37 Date *months* months before *d*. 

38 """ 

39 target_month = d.month - 1 - months # 0-indexed 

40 target_year = d.year + target_month // 12 

41 target_month = target_month % 12 + 1 

42 max_day = monthrange(target_year, target_month)[1] 

43 target_day = min(d.day, max_day) 

44 return date(target_year, target_month, target_day) 

45 

46 

47@click.group() 

48def cli(): 

49 """TipSharks worker CLI. 

50 

51 Commands for ingesting TAB racing data and computing ratings. 

52 """ 

53 pass 

54 

55 

56@cli.command("normalize-races") 

57@click.option( 

58 "--from", 

59 "date_from", 

60 required=False, 

61 help="Start date (YYYY-MM-DD)", 

62) 

63@click.option( 

64 "--to", 

65 "date_to", 

66 required=False, 

67 help="End date (YYYY-MM-DD)", 

68) 

69@click.option( 

70 "--commit-every", 

71 default=200, 

72 show_default=True, 

73 help="Commit interval for database updates", 

74) 

75def normalize_races(date_from: str, date_to: str, commit_every: int) -> None: 

76 """Normalize race/starter JSON structure and backfill starters from raw JSON.""" 

77 start_date = parse_date(date_from) if date_from else None 

78 end_date = parse_date(date_to) if date_to else None 

79 

80 with get_session() as session: 

81 query = session.query(Race).join(Meeting) 

82 if start_date: 

83 query = query.filter(Meeting.meeting_date >= start_date) 

84 if end_date: 

85 query = query.filter(Meeting.meeting_date <= end_date) 

86 

87 race_ids = [ 

88 race_id 

89 for (race_id,) in query.with_entities(Race.id) 

90 .order_by(Meeting.meeting_date, Race.race_number) 

91 .all() 

92 ] 

93 total = len(race_ids) 

94 console.print(f"[bold]Normalizing {total} races[/bold]") 

95 

96 updated_races = 0 

97 upserted_starters = 0 

98 

99 for idx, race_id in enumerate(race_ids, 1): 

100 race = session.query(Race).filter(Race.id == race_id).one() 

101 normalized = normalize_race_data(race.raw_json or {}) 

102 if normalized != race.raw_json: 

103 race.raw_json = normalized 

104 updated_races += 1 

105 

106 starters = (normalized.get("raw_json") or {}).get("starters") 

107 if isinstance(starters, list): 

108 for starter in starters: 

109 if not isinstance(starter, dict): 

110 continue 

111 placing = starter.get("placing") 

112 StarterRepository.upsert(session, race.id, starter, placing=placing) 

113 upserted_starters += 1 

114 

115 if idx % commit_every == 0: 

116 session.commit() 

117 console.print(f"[dim]Processed {idx}/{total} races[/dim]") 

118 

119 session.commit() 

120 

121 console.print( 

122 f"[green]✓ Updated races: {updated_races}, upserted starters: {upserted_starters}[/green]" 

123 ) 

124 

125 

126@cli.command() 

127@click.option( 

128 "--from", 

129 "date_from", 

130 required=False, 

131 help="Start date (YYYY-MM-DD)", 

132) 

133@click.option( 

134 "--to", 

135 "date_to", 

136 required=False, 

137 help="End date (YYYY-MM-DD)", 

138) 

139@click.option( 

140 "--date", 

141 "single_date", 

142 required=False, 

143 help="Single date to ingest (YYYY-MM-DD)", 

144) 

145@click.option( 

146 "--category", 

147 "category", 

148 required=False, 

149 type=click.Choice(["T", "H", "G"], case_sensitive=False), 

150 help="Racing category: T (Thoroughbred), H (Harness), G (Greyhound)", 

151) 

152def ingest(date_from: str, date_to: str, single_date: str, category: str): 

153 """Ingest meetings and races from TAB API. 

154 

155 Examples: 

156 ingest --from 2024-01-01 --to 2024-01-31 

157 ingest --date 2024-01-15 

158 ingest --date 2024-01-15 --category H 

159 """ 

160 # Validate inputs 

161 if single_date: 

162 if date_from or date_to: 

163 console.print("[red]Error: Cannot use --date with --from/--to[/red]") 

164 sys.exit(1) 

165 start_date = parse_date(single_date) 

166 end_date = start_date 

167 elif date_from and date_to: 

168 start_date = parse_date(date_from) 

169 end_date = parse_date(date_to) 

170 else: 

171 console.print("[red]Error: Must specify either --date or --from/--to[/red]") 

172 sys.exit(1) 

173 

174 settings = get_settings() 

175 effective_category = category.upper() if category else settings.tab.default_category 

176 

177 console.print( 

178 f"\n[bold]Ingesting {effective_category} meetings from {start_date} to {end_date}[/bold]\n" 

179 ) 

180 

181 try: 

182 with get_session() as session: 

183 service = IngestionService(session) 

184 meetings, races, starters = asyncio.run( 

185 service.ingest_date_range( 

186 start_date, end_date, category=effective_category 

187 ) 

188 ) 

189 

190 # Display results 

191 table = Table(title="Ingestion Results") 

192 table.add_column("Entity", style="cyan") 

193 table.add_column("Count", style="green", justify="right") 

194 

195 table.add_row("Meetings", str(meetings)) 

196 table.add_row("Races", str(races)) 

197 table.add_row("Starters", str(starters)) 

198 table.add_row( 

199 "Errors", 

200 str(service.stats["errors"]), 

201 style="red" if service.stats["errors"] > 0 else "green", 

202 ) 

203 

204 console.print(table) 

205 

206 if service.stats["errors"] > 0: 

207 console.print( 

208 f"\n[yellow]⚠ {service.stats['errors']} errors occurred during ingestion[/yellow]" 

209 ) 

210 else: 

211 console.print("\n[green]✓ Ingestion completed successfully[/green]") 

212 

213 except Exception as e: 

214 logger.error(f"Ingestion failed: {e}", exc_info=True) 

215 console.print(f"\n[red]✗ Ingestion failed: {e}[/red]") 

216 sys.exit(1) 

217 

218 

219@cli.command() 

220@click.option( 

221 "--from", 

222 "date_from", 

223 required=True, 

224 help="Start date (YYYY-MM-DD)", 

225) 

226@click.option( 

227 "--to", 

228 "date_to", 

229 required=True, 

230 help="End date (YYYY-MM-DD)", 

231) 

232@click.option( 

233 "--clear", 

234 is_flag=True, 

235 help="Clear existing ratings before recompute", 

236) 

237def recompute(date_from: str, date_to: str, clear: bool): 

238 """Recompute ratings from stored race results. 

239 

240 This will deterministically recompute all ratings in the date range. 

241 

242 Examples: 

243 recompute --from 2024-01-01 --to 2024-12-31 

244 recompute --from 2024-01-01 --to 2024-12-31 --clear 

245 """ 

246 start_date = parse_date(date_from) 

247 end_date = parse_date(date_to) 

248 

249 console.print( 

250 f"\n[bold]Recomputing ratings from {start_date} to {end_date}[/bold]\n" 

251 ) 

252 

253 if clear: 

254 console.print("[yellow]Clearing existing ratings...[/yellow]") 

255 

256 try: 

257 # Import here to avoid circular dependency 

258 from packages.core.ratings.recompute import recompute_ratings 

259 

260 with get_session() as session: 

261 snapshot_count = recompute_ratings( 

262 session, start_date, end_date, clear_existing=clear 

263 ) 

264 

265 console.print( 

266 f"\n[green]✓ Recompute complete: {snapshot_count} rating snapshots created[/green]" 

267 ) 

268 

269 except Exception as e: 

270 logger.error(f"Recompute failed: {e}", exc_info=True) 

271 console.print(f"\n[red]✗ Recompute failed: {e}[/red]") 

272 sys.exit(1) 

273 

274 

275@cli.command() 

276@click.option( 

277 "--urls", 

278 "urls_file", 

279 required=True, 

280 help="File containing HRNZ result URLs (one per line)", 

281) 

282@click.option( 

283 "--overwrite/--no-overwrite", 

284 default=False, 

285 show_default=True, 

286 help="Overwrite existing meetings/races/starters for matching HRNZ meeting IDs", 

287) 

288@click.option( 

289 "--from", 

290 "date_from", 

291 required=False, 

292 help="Filter: Start date (YYYY-MM-DD)", 

293) 

294@click.option( 

295 "--to", 

296 "date_to", 

297 required=False, 

298 help="Filter: End date (YYYY-MM-DD)", 

299) 

300def scrape_hrnz(urls_file: str, overwrite: bool, date_from: str, date_to: str): 

301 """Import historical data from HRNZ results archive. 

302 

303 Scrapes HRNZ InfoHorse results pages and imports into database. 

304 

305 WARNING: Web scraping should only be used if official API access is unavailable. 

306 Always check HRNZ's Terms of Service before scraping. 

307 

308 Examples: 

309 scrape-hrnz --urls hrnz_urls.txt 

310 scrape-hrnz --urls hrnz_urls.txt --from 2024-01-01 --to 2024-12-31 

311 

312 URL File Format: 

313 102402rs.htm 

314 102502rs.htm 

315 102602rs.htm 

316 """ 

317 from datetime import date as date_type 

318 

319 from packages.core.storage.repositories import ( 

320 DriverRepository, 

321 HorseRepository, 

322 MeetingRepository, 

323 RaceRepository, 

324 StarterRepository, 

325 TrainerRepository, 

326 ) 

327 from packages.hrnz_scraper import HRNZScraper 

328 from packages.hrnz_scraper.mapper import HRNZDataMapper 

329 

330 # Parse date filters 

331 start_date = parse_date(date_from) if date_from else date_type(2000, 1, 1) 

332 end_date = parse_date(date_to) if date_to else date_type(2030, 12, 31) 

333 

334 # Read URLs from file 

335 try: 

336 with open(urls_file) as f: 

337 urls = [line.strip() for line in f if line.strip()] 

338 except FileNotFoundError: 

339 console.print(f"[red]Error: File not found: {urls_file}[/red]") 

340 sys.exit(1) 

341 

342 console.print(f"\n[bold]Scraping {len(urls)} HRNZ meetings[/bold]") 

343 console.print(f"Date filter: {start_date} to {end_date}\n") 

344 console.print( 

345 "[yellow]⚠ Using web scraper - please ensure compliance with HRNZ ToS[/yellow]\n" 

346 ) 

347 

348 stats = { 

349 "meetings": 0, 

350 "races": 0, 

351 "starters": 0, 

352 "horses": 0, 

353 "drivers": 0, 

354 "trainers": 0, 

355 "errors": 0, 

356 } 

357 

358 try: 

359 

360 async def scrape_and_import(): 

361 async with HRNZScraper() as scraper: 

362 for idx, url in enumerate(urls, 1): 

363 try: 

364 console.print(f"[{idx}/{len(urls)}] Scraping {url}...") 

365 

366 # Scrape meeting 

367 scraped = await scraper.get_meeting_results(url) 

368 

369 # Check date filter 

370 meeting_date_str = scraped.get("date") 

371 if meeting_date_str: 

372 meeting_date = date_type.fromisoformat(meeting_date_str) 

373 if not (start_date <= meeting_date <= end_date): 

374 console.print( 

375 " [dim]Skipped (outside date range)[/dim]" 

376 ) 

377 continue 

378 

379 # Map to TipSharks format 

380 mapper = HRNZDataMapper() 

381 meeting = mapper.map_meeting(scraped) 

382 entities = mapper.map_entities(scraped) 

383 

384 # Import to database 

385 with get_session() as session: 

386 if overwrite: 

387 session.query(Meeting).filter( 

388 Meeting.id == meeting["meeting"] 

389 ).delete(synchronize_session=False) 

390 session.commit() 

391 

392 # Upsert meeting 

393 MeetingRepository.upsert(session, meeting) 

394 stats["meetings"] += 1 

395 

396 # Upsert entities (horses, drivers, trainers) 

397 for horse in entities["horses"]: 

398 HorseRepository.upsert( 

399 session, 

400 horse["id"], 

401 horse["name"], 

402 horse.get("raw_json"), 

403 ) 

404 stats["horses"] += 1 

405 

406 for driver in entities["drivers"]: 

407 DriverRepository.upsert( 

408 session, driver["name"], driver_id=driver.get("id") 

409 ) 

410 stats["drivers"] += 1 

411 

412 for trainer in entities["trainers"]: 

413 TrainerRepository.upsert( 

414 session, 

415 trainer["name"], 

416 trainer_id=trainer.get("id"), 

417 ) 

418 stats["trainers"] += 1 

419 

420 # Upsert races and get race_id_map 

421 races = mapper.map_races(scraped, meeting["meeting"]) 

422 race_id_map = {} 

423 for race in races: 

424 race_obj = RaceRepository.upsert( 

425 session, meeting["meeting"], race 

426 ) 

427 race_id_map[race["race_number"]] = race_obj.id 

428 stats["races"] += 1 

429 

430 # Upsert starters 

431 starters = mapper.map_starters(scraped, race_id_map) 

432 for starter in starters: 

433 StarterRepository.upsert( 

434 session, 

435 starter["race_id"], 

436 starter, 

437 starter.get("placing"), 

438 ) 

439 stats["starters"] += 1 

440 

441 session.commit() 

442 

443 console.print( 

444 f" [green]✓ Imported: {len(races)} races, " 

445 f"{len(starters)} starters[/green]" 

446 ) 

447 

448 except Exception as e: 

449 stats["errors"] += 1 

450 logger.error(f"Failed to scrape {url}: {e}") 

451 console.print(f" [red]✗ Error: {e}[/red]") 

452 continue 

453 

454 asyncio.run(scrape_and_import()) 

455 

456 # Display results 

457 table = Table(title="HRNZ Scraping Results") 

458 table.add_column("Entity", style="cyan") 

459 table.add_column("Count", style="green", justify="right") 

460 

461 table.add_row("Meetings", str(stats["meetings"])) 

462 table.add_row("Races", str(stats["races"])) 

463 table.add_row("Starters", str(stats["starters"])) 

464 table.add_row("Horses", str(stats["horses"])) 

465 table.add_row("Drivers", str(stats["drivers"])) 

466 table.add_row("Trainers", str(stats["trainers"])) 

467 table.add_row( 

468 "Errors", 

469 str(stats["errors"]), 

470 style="red" if stats["errors"] > 0 else "green", 

471 ) 

472 

473 console.print("\n") 

474 console.print(table) 

475 

476 if stats["errors"] > 0: 

477 console.print( 

478 f"\n[yellow]⚠ {stats['errors']} errors occurred during scraping[/yellow]" 

479 ) 

480 console.print( 

481 "[yellow]Tip: Run 'recompute' to compute ratings for imported data[/yellow]" 

482 ) 

483 else: 

484 console.print("\n[green]✓ Scraping completed successfully[/green]") 

485 console.print( 

486 "[green]Tip: Run 'recompute' to compute ratings for imported data[/green]" 

487 ) 

488 

489 except Exception as e: 

490 logger.error(f"Scraping failed: {e}", exc_info=True) 

491 console.print(f"\n[red]✗ Scraping failed: {e}[/red]") 

492 sys.exit(1) 

493 

494 

495@cli.command() 

496@click.option( 

497 "--from", 

498 "date_from", 

499 required=True, 

500 help="Start date (YYYY-MM-DD)", 

501) 

502@click.option( 

503 "--to", 

504 "date_to", 

505 required=True, 

506 help="End date (YYYY-MM-DD)", 

507) 

508@click.option( 

509 "--race-type", 

510 "race_day_type", 

511 type=click.Choice(["OfficialRaces", "TrialRaces", "WorkoutRaces"]), 

512 default="OfficialRaces", 

513 show_default=True, 

514 help="Race day type filter from the results enquiry", 

515) 

516@click.option( 

517 "--club", 

518 "club_no", 

519 default="", 

520 show_default=True, 

521 help="Optional club number filter (leave blank for all clubs)", 

522) 

523@click.option( 

524 "--overwrite/--no-overwrite", 

525 default=False, 

526 show_default=True, 

527 help="Overwrite existing meetings/races/starters for matching HRNZ meeting IDs", 

528) 

529def scrape_hrnz_enquiry( 

530 date_from: str, date_to: str, race_day_type: str, club_no: str, overwrite: bool 

531): 

532 """Scrape HRNZ historical results via the Results Enquiry page.""" 

533 from datetime import date as date_type 

534 

535 from packages.core.storage.repositories import ( 

536 DriverRepository, 

537 HorseRepository, 

538 MeetingRepository, 

539 RaceRepository, 

540 StarterRepository, 

541 TrainerRepository, 

542 ) 

543 from packages.hrnz_scraper import HRNZHistoricalResultsScraper 

544 from packages.hrnz_scraper.mapper import HRNZDataMapper 

545 

546 start_date = parse_date(date_from) 

547 end_date = parse_date(date_to) 

548 

549 console.print( 

550 f"\n[bold]Scraping HRNZ results from {start_date} to {end_date}[/bold]\n" 

551 ) 

552 console.print( 

553 "[yellow]⚠ Using web scraper - please ensure compliance with HRNZ ToS[/yellow]\n" 

554 ) 

555 

556 stats = { 

557 "meetings": 0, 

558 "races": 0, 

559 "starters": 0, 

560 "horses": 0, 

561 "drivers": 0, 

562 "trainers": 0, 

563 "errors": 0, 

564 } 

565 

566 try: 

567 

568 async def scrape_and_import(): 

569 async with HRNZHistoricalResultsScraper() as scraper: 

570 meetings = [ 

571 meeting 

572 async for meeting in scraper.iter_meetings( 

573 start_date, 

574 end_date, 

575 race_day_type=race_day_type, 

576 club_no=club_no, 

577 ) 

578 ] 

579 for idx, meeting_meta in enumerate(meetings, 1): 

580 try: 

581 url = meeting_meta["results_url"] 

582 console.print(f"[{idx}/{len(meetings)}] Scraping {url}...") 

583 

584 scraped = await scraper.get_meeting_results(url, meeting_meta) 

585 

586 if not scraped.get("date") and meeting_meta.get("meeting_date"): 

587 scraped["date"] = meeting_meta["meeting_date"].isoformat() 

588 

589 if not scraped.get("races"): 

590 console.print(" [dim]Skipped (no races found)[/dim]") 

591 continue 

592 

593 meeting_date_str = scraped.get("date") 

594 if meeting_date_str: 

595 meeting_date = date_type.fromisoformat(meeting_date_str) 

596 if not (start_date <= meeting_date <= end_date): 

597 console.print( 

598 " [dim]Skipped (outside date range)[/dim]" 

599 ) 

600 continue 

601 

602 mapper = HRNZDataMapper() 

603 meeting = mapper.map_meeting(scraped) 

604 entities = mapper.map_entities(scraped) 

605 

606 with get_session() as session: 

607 if overwrite: 

608 session.query(Meeting).filter( 

609 Meeting.id == meeting["meeting"] 

610 ).delete(synchronize_session=False) 

611 session.commit() 

612 

613 MeetingRepository.upsert(session, meeting) 

614 stats["meetings"] += 1 

615 

616 for horse in entities["horses"]: 

617 HorseRepository.upsert( 

618 session, 

619 horse["id"], 

620 horse["name"], 

621 horse.get("raw_json"), 

622 ) 

623 stats["horses"] += 1 

624 

625 for driver in entities["drivers"]: 

626 DriverRepository.upsert( 

627 session, driver["name"], driver_id=driver.get("id") 

628 ) 

629 stats["drivers"] += 1 

630 

631 for trainer in entities["trainers"]: 

632 TrainerRepository.upsert( 

633 session, 

634 trainer["name"], 

635 trainer_id=trainer.get("id"), 

636 ) 

637 stats["trainers"] += 1 

638 

639 races = mapper.map_races(scraped, meeting["meeting"]) 

640 race_id_map = {} 

641 for race in races: 

642 race_obj = RaceRepository.upsert( 

643 session, meeting["meeting"], race 

644 ) 

645 race_id_map[race["race_number"]] = race_obj.id 

646 stats["races"] += 1 

647 

648 starters = mapper.map_starters(scraped, race_id_map) 

649 for starter in starters: 

650 StarterRepository.upsert( 

651 session, 

652 starter["race_id"], 

653 starter, 

654 starter.get("placing"), 

655 ) 

656 stats["starters"] += 1 

657 

658 session.commit() 

659 

660 console.print( 

661 f" [green]✓ Imported: {len(races)} races, " 

662 f"{len(starters)} starters[/green]" 

663 ) 

664 except Exception as e: 

665 stats["errors"] += 1 

666 logger.error( 

667 f"Failed to scrape {meeting_meta.get('results_url')}: {e}" 

668 ) 

669 console.print(f" [red]✗ Error: {e}[/red]") 

670 continue 

671 

672 asyncio.run(scrape_and_import()) 

673 

674 table = Table(title="HRNZ Results Enquiry Scraping Results") 

675 table.add_column("Entity", style="cyan") 

676 table.add_column("Count", style="green", justify="right") 

677 

678 table.add_row("Meetings", str(stats["meetings"])) 

679 table.add_row("Races", str(stats["races"])) 

680 table.add_row("Starters", str(stats["starters"])) 

681 table.add_row("Horses", str(stats["horses"])) 

682 table.add_row("Drivers", str(stats["drivers"])) 

683 table.add_row("Trainers", str(stats["trainers"])) 

684 table.add_row( 

685 "Errors", 

686 str(stats["errors"]), 

687 style="red" if stats["errors"] > 0 else "green", 

688 ) 

689 

690 console.print("\n") 

691 console.print(table) 

692 

693 if stats["errors"] > 0: 

694 console.print( 

695 f"\n[yellow]⚠ {stats['errors']} errors occurred during scraping[/yellow]" 

696 ) 

697 console.print( 

698 "[yellow]Tip: Run 'recompute' to compute ratings for imported data[/yellow]" 

699 ) 

700 else: 

701 console.print("\n[green]✓ Scraping completed successfully[/green]") 

702 console.print( 

703 "[green]Tip: Run 'recompute' to compute ratings for imported data[/green]" 

704 ) 

705 

706 except Exception as e: 

707 logger.error(f"Scraping failed: {e}", exc_info=True) 

708 console.print(f"\n[red]✗ Scraping failed: {e}[/red]") 

709 sys.exit(1) 

710 

711 

712@cli.command() 

713def refresh_club_codes(): 

714 """Refresh HRNZ club codes by scraping the HRNZ results pages. 

715 

716 Fetches the latest list of club codes from HRNZ, compares them with 

717 the hardcoded ``HRNZ_ALL_CLUB_CODES`` list, and reports any 

718 differences (new, missing, or unmatched codes). 

719 """ 

720 from packages.hrnz_scraper.club_refresh import ( 

721 generate_diff_report, 

722 refresh_club_codes, 

723 ) 

724 

725 console.print("\n[bold cyan]Refreshing HRNZ club codes...[/bold cyan]\n") 

726 

727 try: 

728 result = refresh_club_codes() 

729 report = generate_diff_report(result) 

730 

731 console.print(report) 

732 

733 new_codes = result.get("new", []) 

734 missing_codes = result.get("missing", []) 

735 

736 if result.get("error"): 

737 console.print(f"\n[yellow]⚠ {result['error']}[/yellow]") 

738 

739 if new_codes: 

740 console.print( 

741 "\n[yellow]⚠ New club codes detected. " 

742 "Update HRNZ_ALL_CLUB_CODES in packages/core/common/settings.py " 

743 "to include these codes.[/yellow]" 

744 ) 

745 console.print(f"\nAdd to settings.py:\n {new_codes!r}") 

746 

747 if missing_codes and not new_codes: 

748 console.print( 

749 "\n[green]✓ All hardcoded club codes were found online.[/green]" 

750 ) 

751 elif not new_codes and not missing_codes: 

752 console.print( 

753 "\n[green]✓ Hardcoded club codes are up to date " 

754 f"(all {len(result.get('fetched', []))} codes confirmed).[/green]" 

755 ) 

756 

757 except Exception as e: 

758 logger.error(f"Club code refresh failed: {e}", exc_info=True) 

759 console.print(f"\n[red]✗ Club code refresh failed: {e}[/red]") 

760 sys.exit(1) 

761 

762 

763@cli.command() 

764def info(): 

765 """Display configuration information.""" 

766 settings = get_settings() 

767 

768 table = Table(title="TipSharks Configuration") 

769 table.add_column("Setting", style="cyan") 

770 table.add_column("Value", style="yellow") 

771 

772 # TAB API settings 

773 table.add_row("TAB Base URL", settings.tab.base_url) 

774 table.add_row("TAB Default Category", settings.tab.default_category) 

775 table.add_row("TAB Default Country", settings.tab.default_country) 

776 table.add_row("TAB Mock Mode", "✓" if settings.tab.mock_mode else "✗") 

777 

778 # Database 

779 table.add_row( 

780 "Database URL", 

781 ( 

782 settings.database.url.split("@")[-1] 

783 if "@" in settings.database.url 

784 else settings.database.url 

785 ), 

786 ) 

787 table.add_row("Log Level", settings.logging.level) 

788 

789 # Rating settings 

790 table.add_row("Elo Scale (C)", str(settings.rating.elo_scale_c)) 

791 table.add_row("Elo K", str(settings.rating.elo_k_base)) 

792 table.add_row("Driver Weight (α)", str(settings.rating.driver_weight_alpha)) 

793 table.add_row("Trainer Weight (β)", str(settings.rating.trainer_weight_beta)) 

794 table.add_row("Enable Driver", "✓" if settings.rating.enable_driver else "✗") 

795 table.add_row("Enable Trainer", "✓" if settings.rating.enable_trainer else "✗") 

796 table.add_row( 

797 "Enable Adjustments", "✓" if settings.rating.enable_adjustments else "✗" 

798 ) 

799 

800 console.print(table) 

801 

802 

803@cli.command() 

804@click.option( 

805 "--from", 

806 "date_from", 

807 required=True, 

808 help="Start date (YYYY-MM-DD)", 

809) 

810@click.option( 

811 "--to", 

812 "date_to", 

813 required=True, 

814 help="End date (YYYY-MM-DD)", 

815) 

816@click.option( 

817 "--out", 

818 "output_file", 

819 required=False, 

820 help="Output JSON file path (optional)", 

821) 

822def data_quality(date_from, date_to, output_file): 

823 """Generate data quality report for date range. 

824 

825 Validates data completeness, accuracy, and identifies issues. 

826 """ 

827 import json 

828 

829 from packages.core.common.data_quality import ( 

830 DataQualityValidator, 

831 check_data_freshness, 

832 ) 

833 

834 console.print("[bold cyan]Generating Data Quality Report...[/bold cyan]") 

835 

836 # Parse dates 

837 start_date = parse_date(date_from) 

838 end_date = parse_date(date_to) 

839 

840 console.print(f"Date range: {start_date} to {end_date}") 

841 

842 with get_session() as session: 

843 validator = DataQualityValidator(session) 

844 

845 # Generate report 

846 report = validator.generate_report(start_date, end_date) 

847 

848 # Check data freshness 

849 freshness_issue = check_data_freshness(session) 

850 if freshness_issue: 

851 report.issues.append(freshness_issue) 

852 

853 # Display summary 

854 summary_table = Table(title="Data Quality Summary") 

855 summary_table.add_column("Metric", style="cyan") 

856 summary_table.add_column("Value", style="yellow") 

857 

858 summary_table.add_row("Total Meetings", str(report.total_meetings)) 

859 summary_table.add_row("Total Races", str(report.total_races)) 

860 summary_table.add_row("Total Starters", str(report.total_starters)) 

861 summary_table.add_row("Errors", f"[red]{report.error_count}[/red]") 

862 summary_table.add_row("Warnings", f"[yellow]{report.warning_count}[/yellow]") 

863 summary_table.add_row( 

864 "Info", str(sum(1 for i in report.issues if i.severity == "info")) 

865 ) 

866 

867 console.print(summary_table) 

868 

869 # Display metrics 

870 if report.metrics: 

871 metrics_table = Table(title="Data Metrics") 

872 metrics_table.add_column("Metric", style="cyan") 

873 metrics_table.add_column("Value", style="yellow") 

874 

875 for key, value in report.metrics.items(): 

876 formatted_key = key.replace("_", " ").title() 

877 metrics_table.add_row(formatted_key, str(value)) 

878 

879 console.print(metrics_table) 

880 

881 # Display issues by category 

882 if report.issues: 

883 console.print("\n[bold]Issues by Category:[/bold]") 

884 

885 for category in ["error", "warning", "info"]: 

886 category_issues = [i for i in report.issues if i.severity == category] 

887 if category_issues: 

888 issues_table = Table(title=f"{category.upper()} Issues") 

889 issues_table.add_column("Category", style="cyan") 

890 issues_table.add_column("Message", style="yellow") 

891 issues_table.add_column("Race ID", style="magenta") 

892 

893 for issue in category_issues[:20]: # Show first 20 

894 issues_table.add_row( 

895 issue.category, 

896 issue.message, 

897 str(issue.race_id) if issue.race_id else "-", 

898 ) 

899 

900 console.print(issues_table) 

901 

902 if len(category_issues) > 20: 

903 console.print( 

904 f"[dim]... and {len(category_issues) - 20} more {category} issues[/dim]" 

905 ) 

906 

907 # Save to file if requested 

908 if output_file: 

909 report_data = { 

910 "start_date": report.start_date.isoformat(), 

911 "end_date": report.end_date.isoformat(), 

912 "generated_at": report.generated_at.isoformat(), 

913 "summary": { 

914 "total_meetings": report.total_meetings, 

915 "total_races": report.total_races, 

916 "total_starters": report.total_starters, 

917 "error_count": report.error_count, 

918 "warning_count": report.warning_count, 

919 }, 

920 "metrics": report.metrics, 

921 "issues": [ 

922 { 

923 "severity": issue.severity, 

924 "category": issue.category, 

925 "message": issue.message, 

926 "race_id": issue.race_id, 

927 "meeting_id": issue.meeting_id, 

928 "starter_id": issue.starter_id, 

929 "details": issue.details, 

930 } 

931 for issue in report.issues 

932 ], 

933 } 

934 

935 with open(output_file, "w") as f: 

936 json.dump(report_data, f, indent=2) 

937 

938 console.print(f"\n[green]Report saved to {output_file}[/green]") 

939 

940 # Exit with error code if errors found 

941 if report.has_errors: 

942 console.print("\n[bold red]⚠ Data quality check FAILED[/bold red]") 

943 sys.exit(1) 

944 else: 

945 console.print("\n[bold green]✓ Data quality check PASSED[/bold green]") 

946 sys.exit(0) 

947 

948 

949@cli.command() 

950@click.option( 

951 "--months", 

952 default=12, 

953 type=click.IntRange(1, 24), 

954 show_default=True, 

955 help="Number of months to backfill (max 24)", 

956) 

957@click.option( 

958 "--category", 

959 type=click.Choice(["T", "H", "G"], case_sensitive=False), 

960 help="Racing category: T (Thoroughbred), H (Harness), G (Greyhound)", 

961) 

962@click.option( 

963 "--source", 

964 type=click.Choice(["tab", "ingest"], case_sensitive=False), 

965 default="tab", 

966 show_default=True, 

967 help="Data source: tab (TAB API) or ingest (tab-api-ingest service)", 

968) 

969@click.option( 

970 "--clear", 

971 is_flag=True, 

972 help="Clear existing ratings before recompute", 

973) 

974@click.option( 

975 "--learn-adjustments", 

976 is_flag=True, 

977 help="Learn barrier/handicap adjustments during recompute", 

978) 

979@click.option( 

980 "--skip-eval", 

981 is_flag=True, 

982 help="Skip accuracy evaluation after recompute", 

983) 

984def backfill(months, category, source, clear, learn_adjustments, skip_eval): 

985 """Backfill historical data and recompute ratings. 

986 

987 Ingests data week by week from (today - months) to today, 

988 then recomputes all ratings and evaluates prediction accuracy. 

989 

990 Examples: 

991 

992 backfill --months 12 --category H --source tab 

993 

994 backfill --months 6 --category T --clear --learn-adjustments 

995 

996 backfill --months 3 --source ingest --skip-eval 

997 """ 

998 # Calculate date range 

999 today = date.today() 

1000 start_date = _subtract_months(today, months) 

1001 

1002 settings = get_settings() 

1003 effective_category = category.upper() if category else settings.tab.default_category 

1004 

1005 console.print( 

1006 f"\n[bold]Backfilling {effective_category} data " 

1007 f"from {start_date} to {today}[/bold]" 

1008 ) 

1009 console.print( 

1010 f"Source: {source}, Months: {months}, " 

1011 f"Clear: {clear}, Learn adjustments: {learn_adjustments}\n" 

1012 ) 

1013 

1014 # Track totals 

1015 totals: dict[str, int] = { 

1016 "meetings": 0, 

1017 "races": 0, 

1018 "starters": 0, 

1019 "errors": 0, 

1020 } 

1021 failed_weeks: list[tuple[date, date, str]] = [] 

1022 

1023 # ── Week-by-week ingestion ─────────────────────────────────── 

1024 current = start_date 

1025 week_count = 0 

1026 while current <= today: 

1027 week_end = min(current + timedelta(days=6), today) 

1028 week_count += 1 

1029 console.print( 

1030 f"[bold cyan]Week {week_count}: {current} to {week_end}[/bold cyan]" 

1031 ) 

1032 

1033 try: 

1034 with get_session() as session: 

1035 service = IngestionService(session, source=source) 

1036 meetings, races, starters = asyncio.run( 

1037 service.ingest_date_range( 

1038 current, week_end, category=effective_category 

1039 ) 

1040 ) 

1041 totals["meetings"] += meetings 

1042 totals["races"] += races 

1043 totals["starters"] += starters 

1044 totals["errors"] += service.stats["errors"] 

1045 

1046 console.print( 

1047 f" [green]✓ {meetings} meetings, " 

1048 f"{races} races, {starters} starters[/green]" 

1049 ) 

1050 except Exception as e: 

1051 logger.error(f"Week {current} to {week_end} failed: {e}", exc_info=True) 

1052 console.print(f" [red]✗ Week failed: {e}[/red]") 

1053 failed_weeks.append((current, week_end, str(e))) 

1054 totals["errors"] += 1 

1055 

1056 current = week_end + timedelta(days=1) 

1057 

1058 # ── Ingestion summary ──────────────────────────────────────── 

1059 console.print("\n[bold]Ingestion complete:[/bold]") 

1060 console.print(f" Weeks processed: {week_count}") 

1061 console.print(f" Total meetings: {totals['meetings']}") 

1062 console.print(f" Total races: {totals['races']}") 

1063 console.print(f" Total starters: {totals['starters']}") 

1064 

1065 if failed_weeks: 

1066 console.print(f"\n[yellow]⚠ {len(failed_weeks)} week(s) had errors:[/yellow]") 

1067 for fw_start, fw_end, fw_err in failed_weeks: 

1068 console.print(f" [yellow]{fw_start} to {fw_end}: {fw_err}[/yellow]") 

1069 

1070 # ── Recompute ratings ──────────────────────────────────────── 

1071 snapshot_count = 0 

1072 if totals["meetings"] == 0: 

1073 console.print("\n[yellow]No data ingested — skipping recompute.[/yellow]") 

1074 else: 

1075 console.print( 

1076 f"\n[bold]Recomputing ratings from " f"{start_date} to {today}...[/bold]" 

1077 ) 

1078 try: 

1079 from packages.core.ratings.recompute import recompute_ratings 

1080 

1081 with get_session() as session: 

1082 snapshot_count = recompute_ratings( 

1083 session, 

1084 start_date, 

1085 today, 

1086 clear_existing=clear, 

1087 learn_adjustments=learn_adjustments, 

1088 ) 

1089 console.print(f"[green]✓ {snapshot_count} rating snapshots created[/green]") 

1090 except Exception as e: 

1091 logger.error(f"Recompute failed: {e}", exc_info=True) 

1092 console.print(f"\n[red]✗ Recompute failed: {e}[/red]") 

1093 sys.exit(1) 

1094 

1095 # ── Evaluation ─────────────────────────────────────────────── 

1096 brier: float | None = None 

1097 winner_acc: float | None = None 

1098 if not skip_eval and totals["meetings"] > 0: 

1099 console.print("\n[bold]Evaluating prediction accuracy...[/bold]") 

1100 try: 

1101 script_path = os.path.join( 

1102 os.path.dirname(os.path.abspath(__file__)), 

1103 "..", 

1104 "..", 

1105 "..", 

1106 "scripts", 

1107 "evaluate_accuracy.py", 

1108 ) 

1109 eval_result = subprocess.run( 

1110 [ 

1111 sys.executable, 

1112 script_path, 

1113 "--from", 

1114 start_date.isoformat(), 

1115 "--to", 

1116 today.isoformat(), 

1117 "--json", 

1118 ], 

1119 capture_output=True, 

1120 text=True, 

1121 check=True, 

1122 timeout=600, 

1123 ) 

1124 data = json.loads(eval_result.stdout) 

1125 if "all" in data and data["all"]: 

1126 brier = data["all"]["brier"] 

1127 winner_acc = data["all"]["winner_acc"] 

1128 console.print(f" [green]Winner accuracy: {winner_acc:.4f}[/green]") 

1129 console.print(f" [green]Brier score: {brier:.4f}[/green]") 

1130 else: 

1131 console.print(" [yellow]Evaluation returned no results[/yellow]") 

1132 except Exception as e: 

1133 logger.error(f"Evaluation failed: {e}", exc_info=True) 

1134 console.print(f" [yellow]⚠ Evaluation skipped due to error: {e}[/yellow]") 

1135 

1136 # ── Final summary table ────────────────────────────────────── 

1137 table = Table(title="Backfill Summary") 

1138 table.add_column("Metric", style="cyan") 

1139 table.add_column("Value", style="green", justify="right") 

1140 

1141 table.add_row("Weeks processed", str(week_count)) 

1142 table.add_row("Meetings ingested", str(totals["meetings"])) 

1143 table.add_row("Races ingested", str(totals["races"])) 

1144 table.add_row("Starters ingested", str(totals["starters"])) 

1145 table.add_row("Rating snapshots", str(snapshot_count)) 

1146 if brier is not None: 

1147 table.add_row("Brier score", f"{brier:.4f}") 

1148 if winner_acc is not None: 

1149 table.add_row("Winner accuracy", f"{winner_acc:.4f}") 

1150 table.add_row( 

1151 "Errors", 

1152 str(totals["errors"]), 

1153 style="red" if totals["errors"] > 0 else "green", 

1154 ) 

1155 if failed_weeks: 

1156 table.add_row("Failed weeks", str(len(failed_weeks)), style="red") 

1157 

1158 console.print("\n") 

1159 console.print(table) 

1160 

1161 if totals["errors"] > 0: 

1162 console.print( 

1163 f"\n[yellow]⚠ Completed with {totals['errors']} error(s)[/yellow]" 

1164 ) 

1165 else: 

1166 console.print("\n[green]✓ Backfill completed successfully[/green]") 

1167 

1168 

1169if __name__ == "__main__": 

1170 cli()