Coverage for packages/core/common/settings.py: 99%

1"""Configuration settings for TipSharks system.

3Loads configuration from environment variables using Pydantic settings.

4All settings have sensible defaults and can be overridden via .env file.

5"""

7from typing import Literal

9from pydantic import Field, field_validator

10from pydantic_settings import BaseSettings, SettingsConfigDict

13class TABSettings(BaseSettings):

14 """TAB Affiliates API configuration.

16 The TAB API is a public API - no authentication required.

17 """

19 base_url: str = Field(

20 default="https://api.tab.co.nz/affiliates/v1",

21 description="TAB Affiliates API base URL",

22 )

23 timeout: float = Field(default=30.0, description="Request timeout in seconds")

24 max_retries: int = Field(default=3, description="Maximum retry attempts")

25 mock_mode: bool = Field(

26 default=False,

27 description="Enable mock mode to use sample data instead of real API (for testing)",

28 )

29 default_category: str = Field(

30 default="H",

31 description="Default racing category: H (Harness), T (Thoroughbred), G (Greyhound)",

32 )

33 default_country: str = Field(

34 default="NZ",

35 description="Default country filter for API queries",

36 )

38 model_config = SettingsConfigDict(env_prefix="TAB_")

41class DatabaseSettings(BaseSettings):

42 """Database configuration."""

44 url: str = Field(..., description="Database connection URL")

45 pool_size: int = Field(default=5, description="Connection pool size")

46 max_overflow: int = Field(default=10, description="Max overflow connections")

47 pool_recycle: int = Field(

48 default=3600, description="Connection recycle time in seconds"

49 )

50 pool_pre_ping: bool = Field(default=True, description="Enable pool pre-ping")

51 echo: bool = Field(default=False, description="Echo SQL statements (debug)")

52 slow_query_threshold_ms: int = Field(

53 default=100,

54 description="Queries exceeding this duration (ms) are logged as slow queries",

55 )

57 model_config = SettingsConfigDict(env_prefix="DATABASE_")

60class LoggingSettings(BaseSettings):

61 """Logging configuration."""

63 level: str = Field(default="INFO", description="Log level")

64 format: str = Field(default="json", description="Log format (json or text)")

66 model_config = SettingsConfigDict(env_prefix="LOG_")

69class APISettings(BaseSettings):

70 """API server configuration."""

72 host: str = Field(default="0.0.0.0", description="API host")

73 port: int = Field(default=8000, description="API port")

74 admin_token: str = Field(

75 default="change_me_in_production",

76 description="Admin endpoint authentication token",

77 )

78 cors_allow_origins: str = Field(

79 default="*",

80 description="CORS allowed origins (comma-separated or * for all). Use specific origins in production.",

81 )

83 model_config = SettingsConfigDict(env_prefix="API_")

86HRNZ_ALL_CLUB_CODES = [

87 "02",

88 "04",

89 "07",

90 "09",

91 "15",

92 "16",

93 "17",

94 "18",

95 "19",

96 "20",

97 "21",

98 "23",

99 "24",

100 "25",

101 "26",

102 "27",

103 "28",

104 "30",

105 "31",

106 "32",

107 "35",

108 "36",

109 "37",

110 "38",

111 "39",

112 "40",

113 "41",

114 "42",

115 "43",

116 "44",

117 "45",

118 "46",

119 "47",

120 "48",

121 "49",

122 "50",

123 "51",

124 "52",

125 "53",

126 "54",

127 "55",

128 "56",

129 "57",

130 "63",

131 "64",

132 "66",

133 "68",

134 "69",

135 "70",

136 "73",

137 "79",

138 "80",

139 "81",

140 "82",

141 "83",

142 "84",

143 "85",

144 "86",

145 "87",

146 "88",

147 "89",

148 "90",

149 "91",

150 "92",

151 "94",

152 "95",

153 "96",

154 "97",

155]

156

157

158class HRNZSettings(BaseSettings):

159 """HRNZ scraper configuration."""

160

161 club_codes: str = Field(

162 default="all",

163 description="Comma-separated HRNZ club codes (two digits each) or 'all'",

164 )

165 decodo_proxy_server: str | None = Field(

166 default=None,

167 description="Decodo proxy server URL, e.g. http://host:port",

168 )

169 decodo_proxy_host: str | None = Field(

170 default=None,

171 description="Decodo proxy host (used when proxy server URL is not set)",

172 )

173 decodo_proxy_port: int | None = Field(

174 default=None,

175 description="Decodo proxy port (used when proxy server URL is not set)",

176 )

177 decodo_proxy_scheme: str = Field(

178 default="http",

179 description="Decodo proxy scheme (http or https)",

180 )

181 decodo_proxy_username: str | None = Field(

182 default=None,

183 description="Decodo proxy username (base credentials before session rotation)",

184 )

185 decodo_proxy_password: str | None = Field(

186 default=None,

187 description="Decodo proxy password",

188 )

189 decodo_rotate_each_request: bool = Field(

190 default=True,

191 description="Rotate Decodo session per request",

192 )

193 decodo_session_param: str = Field(

194 default="session",

195 description="Decodo session parameter name appended to username",

196 )

197 decodo_session_id: str | None = Field(

198 default=None,

199 description="Fixed Decodo session ID when rotation is disabled",

200 )

201 decodo_username_template: str | None = Field(

202 default=None,

203 description="Username template using {username} and {session} placeholders",

204 )

205

206 model_config = SettingsConfigDict(env_prefix="HRNZ_")

207

208

209class RatingSettings(BaseSettings):

210 """Rating engine configuration."""

211

212 # Core Elo parameters

213 elo_scale_c: float = Field(

214 default=400.0,

215 description="Logistic scale factor for Elo calculations",

216 )

217 elo_k_base: float = Field(

218 default=24.0,

219 description="Base K-factor for rating updates",

220 )

221 elo_k_min: float | None = Field(

222 default=None,

223 description="Optional minimum clamp for effective K-factor",

224 )

225 elo_k_max: float | None = Field(

226 default=None,

227 description="Optional maximum clamp for effective K-factor",

228 )

229 pairwise_normalizer: Literal["n_minus_1", "n", "comparisons"] = Field(

230 default="n_minus_1",

231 description="Normalization mode for pairwise sum (n-1, n, or actual comparisons)",

232 )

233 initial_rating: float = Field(

234 default=1500.0,

235 description="Starting rating for new entities",

236 )

237 rating_min: float | None = Field(

238 default=None,

239 description="Optional minimum clamp for ratings",

240 )

241 rating_max: float | None = Field(

242 default=None,

243 description="Optional maximum clamp for ratings",

244 )

245 initial_rd: float = Field(

246 default=350.0,

247 description="Initial rating deviation (uncertainty)",

248 )

249

250 # Rating deviation parameters

251 rd_min: float = Field(

252 default=50.0,

253 description="Minimum rating deviation (fully established)",

254 )

255 rd_max: float = Field(

256 default=350.0,

257 description="Maximum rating deviation (completely uncertain)",

258 )

259 rd_decay_per_race: float = Field(

260 default=15.0,

261 description="RD decrease per race (converges toward certainty)",

262 )

263 rd_decay_floor: float = Field(

264 default=0.0,

265 description="Minimum RD decay applied per race when RD is enabled",

266 )

267 rd_inflation_per_day: float = Field(

268 default=0.5,

269 description="RD increase per day of inactivity",

270 )

271 rd_inflation_cap_days: int | None = Field(

272 default=None,

273 description="Optional cap on inactivity days used for RD inflation",

274 )

275 rd_scaling_mode: Literal["linear", "sqrt", "none"] = Field(

276 default="linear",

277 description="Scaling mode for K-factor when RD is enabled",

278 )

279

280 # Multi-entity weights

281 driver_weight_alpha: float = Field(

282 default=0.35,

283 description="Weight for driver contribution to effective rating",

284 )

285 trainer_weight_beta: float = Field(

286 default=0.15,

287 description="Weight for trainer contribution to effective rating",

288 )

289 horse_k_scale: float = Field(

290 default=1.0,

291 description="Multiplier applied to horse rating updates",

292 )

293 driver_k_scale: float = Field(

294 default=1.0,

295 description="Multiplier applied to driver rating updates",

296 )

297 trainer_k_scale: float = Field(

298 default=1.0,

299 description="Multiplier applied to trainer rating updates",

300 )

301

302 # Condition adjustment learning

303 adj_learning_rate: float = Field(

304 default=0.5,

305 description="Learning rate for barrier/handicap adjustments",

306 )

307 adj_update_scale: float = Field(

308 default=1.0,

309 description="Scale factor applied to adjustment deltas before learning",

310 )

311 adj_min_samples: int = Field(

312 default=0,

313 description="Minimum samples required before applying learned adjustments",

314 )

315 adj_clamp_min: float | None = Field(

316 default=None,

317 description="Optional minimum clamp for learned adjustments",

318 )

319 adj_clamp_max: float | None = Field(

320 default=None,

321 description="Optional maximum clamp for learned adjustments",

322 )

323 adj_global_only: bool = Field(

324 default=False,

325 description="If true, only update/use global adjustments (no venue-specific)",

326 )

327 adj_barrier_enabled: bool = Field(

328 default=True,

329 description="Enable barrier adjustment learning/application",

330 )

331 adj_handicap_enabled: bool = Field(

332 default=True,

333 description="Enable handicap adjustment learning/application",

334 )

335 place_history_limit: int = Field(

336 default=8,

337 description="Number of recent finishes used for place consistency",

338 )

339 place_prior_rate: float = Field(

340 default=0.33,

341 description="Prior top-3 rate for place probability smoothing",

342 )

343 place_prior_weight: float = Field(

344 default=3.0,

345 description="Weight of the top-3 prior in place probability smoothing",

346 )

347 place_top3_weight: float = Field(

348 default=0.75,

349 description="Weight applied to the smoothed top-3 rate signal",

350 )

351 place_consistency_weight: float = Field(

352 default=0.5,

353 description="Weight applied to finish consistency signal",

354 )

355

356 # Feature flags

357 enable_driver: bool = Field(

358 default=True,

359 description="Include driver ratings in calculations",

360 )

361 enable_trainer: bool = Field(

362 default=True,

363 description="Include trainer ratings in calculations",

364 )

365 enable_adjustments: bool = Field(

366 default=True,

367 description="Learn and apply barrier/handicap adjustments",

368 )

369 enable_rd: bool = Field(

370 default=False,

371 description="Track and use rating deviation (RD)",

372 )

373 min_finishers: int = Field(

374 default=2,

375 description="Minimum finishers required to process a race",

376 )

377 dnf_treated_as_last: bool = Field(

378 default=False,

379 description="Treat DNF starters as last-place finishers",

380 )

381 tie_handling: Literal["ordered", "half", "skip"] = Field(

382 default="ordered",

383 description="Handling for tied placings (ordered, half, skip)",

384 )

385

386 # Distance buckets for condition adjustments (in meters)

387 distance_buckets: list[int] = Field(

388 default=[1700, 2000, 2400],

389 description="Distance thresholds for bucketing (in meters)",

390 )

391 distance_bucket_mode: Literal["thresholds", "fixed"] = Field(

392 default="thresholds",

393 description="Bucket mode: thresholds list or fixed-size buckets",

394 )

395 distance_bucket_size: int | None = Field(

396 default=None,

397 description="Fixed bucket size in meters (required if mode=fixed)",

398 )

399

400 @field_validator("distance_buckets", mode="before")

401 @classmethod

402 def parse_distance_buckets(cls, v):

403 """Parse distance buckets from comma-separated string or list."""

404 if isinstance(v, str):

405 return [int(x.strip()) for x in v.split(",")]

406 return v

407

408 @field_validator("distance_bucket_size")

409 @classmethod

410 def validate_bucket_size(cls, v, info):

411 """Ensure fixed-size bucketing has a valid bucket size."""

412 if info.data.get("distance_bucket_mode") == "fixed":

413 if v is None or v <= 0:

414 raise ValueError(

415 "DISTANCE_BUCKET_SIZE must be positive when mode is fixed"

416 )

417 return v

418

419 @field_validator("rating_max")

420 @classmethod

421 def validate_rating_bounds(cls, v, info):

422 """Ensure rating bounds are consistent."""

423 rating_min = info.data.get("rating_min")

424 if v is not None and rating_min is not None and v < rating_min:

425 raise ValueError("RATING_MAX must be >= RATING_MIN")

426 return v

427

428 model_config = SettingsConfigDict(

429 env_prefix="",

430 json_schema_extra={"examples": [{"distance_buckets": "1700,2000,2400"}]},

431 )

432

433

434class RateLimitSettings(BaseSettings):

435 """Rate limit configuration for the API.

436

437 Each limit is a string compatible with slowapi (e.g. ``"100/minute"``).

438 """

439

440 default: str = Field(

441 default="100/minute", description="Default per-user rate limit"

442 )

443 export: str = Field(

444 default="10/minute", description="Export endpoint per-user rate limit"

445 )

446 predictions: str = Field(

447 default="50/minute", description="Predictions endpoint per-user rate limit"

448 )

449 races_list: str = Field(

450 default="200/minute", description="Race list endpoint per-user rate limit"

451 )

452 race_detail: str = Field(

453 default="100/minute", description="Race detail endpoint per-user rate limit"

454 )

455 admin: str = Field(

456 default="20/minute", description="Admin endpoint per-user rate limit"

457 )

458

459 model_config = SettingsConfigDict(env_prefix="RATE_LIMIT_")

460

461

462class RedisSettings(BaseSettings):

463 """Redis cache configuration."""

464

465 url: str = Field(

466 default="redis://localhost:6379/0", description="Redis connection URL"

467 )

468 ttl_seconds: int = Field(default=300, description="Default cache TTL in seconds")

469 enabled: bool = Field(default=True, description="Enable Redis caching")

470

471 model_config = SettingsConfigDict(env_prefix="REDIS_")

472

473

474class IngestServiceSettings(BaseSettings):

475 """tab-api-ingest service configuration.

476

477 The tab-api-ingest service is a TypeScript/Express service that fetches

478 racing data from the TAB and HRNZ APIs and exposes it via its own REST API.

479 """

480

481 url: str = Field(

482 default="http://localhost:9090",

483 description="tab-api-ingest service base URL",

484 )

485

486 model_config = SettingsConfigDict(env_prefix="INGEST_SERVICE_")

487

488

489class SchedulerSettings(BaseSettings):

490 """Scheduler configuration for automated background jobs."""

491

492 enabled: bool = Field(

493 default=True,

494 description="Enable the background scheduler on startup",

495 )

496 ingest_cron: str = Field(

497 default="0 2 * * *",

498 description="Cron expression for daily ingestion (default: daily at 2am)",

499 )

500 recompute_cron: str = Field(

501 default="0 3 * * *",

502 description="Cron expression for daily recompute (default: daily at 3am)",

503 )

504 scrape_cron: str | None = Field(

505 default=None,

506 description="Cron expression for HRNZ scraping (default: disabled)",

507 )

508 eval_cron: str = Field(

509 default="0 4 * * 0",

510 description="Cron for weekly evaluation report (Sunday 4am)",

511 )

512 full_recompute_cron: str = Field(

513 default="0 3 1 * *",

514 description="Cron for monthly full recompute (1st of month 3am)",

515 )

516 email_notifications: bool = Field(

517 default=False,

518 description="Enable email notifications for scheduler failures",

519 )

520 email_smtp_host: str = Field(

521 default="",

522 description="SMTP host for notifications",

523 )

524 email_smtp_port: int = Field(

525 default=587,

526 description="SMTP port",

527 )

528 email_from: str = Field(

529 default="",

530 description="From address for notifications",

531 )

532 email_to: str = Field(

533 default="",

534 description="Comma-separated notification recipients",

535 )

536 email_username: str = Field(

537 default="",

538 description="SMTP username",

539 )

540 email_password: str = Field(

541 default="",

542 description="SMTP password",

543 )

544 timezone: str = Field(

545 default="Pacific/Auckland",

546 description="Timezone for scheduled jobs",

547 )

548 ingest_days_back: int = Field(

549 default=1,

550 description="Number of days back to ingest on each scheduled run",

551 )

552 recompute_days_back: int = Field(

553 default=90,

554 description="Number of days back to recompute on each scheduled run",

555 )

556

557 model_config = SettingsConfigDict(env_prefix="SCHEDULER_")

558

559

560class Settings(BaseSettings):

561 """Main application settings."""

562

563 tab: TABSettings = Field(default_factory=TABSettings)

564 database: DatabaseSettings = Field(default_factory=DatabaseSettings)

565 logging: LoggingSettings = Field(default_factory=LoggingSettings)

566 api: APISettings = Field(default_factory=APISettings)

567 hrnz: HRNZSettings = Field(default_factory=HRNZSettings)

568 rating: RatingSettings = Field(default_factory=RatingSettings)

569 ingest_service: IngestServiceSettings = Field(default_factory=IngestServiceSettings)

570 rate_limit: RateLimitSettings = Field(default_factory=RateLimitSettings)

571 redis: RedisSettings = Field(default_factory=RedisSettings)

572 scheduler: SchedulerSettings = Field(default_factory=SchedulerSettings)

573

574 model_config = SettingsConfigDict(

575 env_file=".env",

576 env_file_encoding="utf-8",

577 case_sensitive=False,

578 extra="ignore",

579 )

580

581

582# Global settings instance

583_settings: Settings | None = None

584

585

586def get_settings() -> Settings:

587 """Get or create global settings instance.

588

589 Returns:

590 Settings: Application settings loaded from environment

591 """

592 global _settings

593 if _settings is None:

594 _settings = Settings()

595 return _settings

596

597

598def reload_settings() -> Settings:

599 """Force reload settings from environment.

600

601 Useful for testing or dynamic configuration changes.

602

603 Returns:

604 Settings: Freshly loaded settings

605 """

606 global _settings

607 _settings = Settings()

608 return _settings

Coverage for packages / core / common / settings.py: 99%

164 statements