
    iUP                       S r SSKJr  SSKrSSKrSSKJrJrJr  SSK	J
r
J	r	  SSKJr  SSKrSSKJr  SSKJr  SS	KJr  SS
KJr  \" 5         \" 5       r\ " S S5      5       r\ " S S5      5       r\ " S S5      5       r\ " S S5      5       rS.S jrS.S jrS.S jrS/S jrS0S jrS1S jr           S2S jr!\RD                  " 5       \RF                  " SSSSS9\RF                  " S SSS!S9\RF                  " S"S#SS$S%9\RF                  " S&SS'S(S)9S3S* j5       5       5       5       5       r$S4S+ jr%S5S, jr&\'S-:X  a  \$" 5         gg)6a  Validate backfilled data for correctness and completeness.

Produces a JSON report with:
  - Meeting count per month
  - Race count per month
  - Rating convergence (snapshot count, rating distribution)
  - Duplicate race detection
  - Overall health summary
    )annotationsN)asdict	dataclassfield)datedatetime)Any)Console)Table)setup_logging)get_sessionc                  N    \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S	'   S
r	g)MonthlyCounts"   zCounts for a single month.strmonthr   intmeetingsracesstarters N)
__name__
__module____qualname____firstlineno____doc____annotations__r   r   r   __static_attributes__r       >/root/tipsharks/tipsharks-elo-api/scripts/validate_backfill.pyr   r   "   s'    $JHcE3NHcr   r   c                      \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S'   Sr
S\S	'   SrS\S
'   SrS\S'   SrS\S'   SrS\S'   SrS\S'   Srg)RatingDistribution,   zRating distribution statistics.        floatminmaxmeanmedianstdp5p25p75p95r   r   total_snapshotsr   N)r   r   r   r   r   r&   r   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r   r   r    r"   r"   ,   sh    )CCD%FECBOCCCOSr   r"   c                  Z    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   \	" \
S9rS	\S
'   Srg)DuplicateInfo<   z(Information about duplicate races found.r   r   total_racesduplicate_race_numbersduplicate_idsdefault_factorylist[dict[str, Any]]
duplicatesr   N)r   r   r   r   r   r3   r   r4   r5   r   listr9   r   r   r   r    r1   r1   <   s4    2K"#C#M3',T'BJ$Br   r1   c                  
   \ rS rSr% SrS\S'   S\S'   S\S'   \" \S9rS\S	'   \" \S9r	S\S
'   \" \S9r
S\S'   SrS\S'   SrS\S'   SrS\S'   \" \S9rS\S'   \" \S9rS\S'   \" \S9rS\S'   \" \S9rS\S'   SrS\S'   Srg)ValidationReportF   zComplete validation report.r   	date_fromdate_togenerated_atr6   r8   monthly_meetingsmonthly_racesmonthly_startersr   r   total_meetingsr3   total_startersdict[str, int]total_entitiesdict[str, RatingDistribution]rating_distributionr1   r9   list[dict[str, str]]issuespassstatusr   N)r   r   r   r   r   r   r   r:   rA   rB   rC   rD   r3   rE   dictrG   rI   r1   r9   rK   rM   r   r   r   r    r<   r<   F   s    %NL-24-H*H*/*EM'E-24-H*HNCKNC%*4%@NN@9>t9T6T %m DJD#(#>F >FCr   r<   r>   r?   c                   SSK Jn  SSKJn  U R	                  UR                  UR                  S5      R                  S5      UR                  UR                  5      R                  S5      5      R                  UR                  U:  UR                  U:*  5      R                  S5      R                  S5      R                  5       nU Vs/ s H  ofR                  UR                  S.PM     sn$ s  snf )zCount meetings per month.r   func)MeetingYYYY-MMr   countr   rT   )
sqlalchemyrQ   packages.core.storage.modelsrR   queryto_charmeeting_datelabelrT   idfiltergroup_byorder_byallr   )sessionr>   r?   rQ   rR   rowsrs          r    _get_monthly_meetingsrd   ]   s    4 	LL--y9??HJJwzz"((1	
 
$$	173G3G73R	S	'		'		 	 ;??$Qgg0$???s   
"C/c                8   SSK Jn  SSKJnJn  U R                  UR                  UR                  S5      R                  S5      UR                  UR                  5      R                  S5      5      R                  XUR                  UR                  :H  5      R                  UR                  U:  UR                  U:*  5      R                  S5      R                  S5      R!                  5       nU Vs/ s H  owR"                  UR                  S.PM     sn$ s  snf )zCount races per month.r   rP   rR   RacerS   r   rT   rU   )rV   rQ   rW   rR   rg   rX   rY   rZ   r[   rT   r\   join
meeting_idr]   r^   r_   r`   r   )ra   r>   r?   rQ   rR   rg   rb   rc   s           r    _get_monthly_racesrj   p   s    : 	LL--y9??HJJtww%%g.	
 
dOOwzz1	2	$$	173G3G73R	S	'		'		 	 ;??$Qgg0$???s   2"Dc                   SSK Jn  SSKJnJnJn  U R                  UR                  UR                  S5      R                  S5      UR                  UR                  5      R                  S5      5      R                  XUR                  UR                  :H  5      R                  XfR                  UR                  :H  5      R                  UR                  U:  UR                  U:*  5      R!                  S5      R#                  S5      R%                  5       nU Vs/ s H  oR&                  UR                  S.PM     sn$ s  snf )zCount starters per month.r   rP   )rR   rg   StarterrS   r   rT   rU   )rV   rQ   rW   rR   rg   rl   rX   rY   rZ   r[   rT   r\   rh   ri   race_idr]   r^   r_   r`   r   )	ra   r>   r?   rQ   rR   rg   rl   rb   rc   s	            r    _get_monthly_startersrn      s    CC 	LL--y9??HJJwzz"((1	
 
dOOwzz1	2	g$''1	2	$$	173G3G73R	S	'		'		 	 ;??$Qgg0$???s   "D?c                   SSK JnJnJnJnJnJn  U R                  U5      R                  5       U R                  U5      R                  5       U R                  U5      R                  5       U R                  U5      R                  5       U R                  U5      R                  5       U R                  U5      R                  5       S.$ )z&Count unique entities in the database.r   )DriverHorserR   rg   rl   Trainer)horsesdriverstrainersr   r   r   )	rW   rp   rq   rR   rg   rl   rr   rX   rT   )ra   rp   rq   rR   rg   rl   rr   s          r    _get_entity_countsrv      s    [[ --&,,.==(..0MM'*002MM'*002t$**,MM'*002 r   c                   SSK nSSKJn  / SQn0 nU GHg  nU R                  UR                  5      R                  UR                  U:H  5      R                  5       nU Vs/ s H  n[        UR                  5      PM     nnU(       d  [        5       XE'   M  [        U5      n	[        U	5      n
[        [        U5      [        U5      UR                  U5      UR                  U5      [        U5      S:  a  UR!                  U5      OSU	[        S[#        U
S-  5      5         U	[        S[#        U
S-  5      5         U	[        U
S-
  [#        U
S	-  5      5         U	[        U
S-
  [#        U
S
-  5      5         U
S9
XE'   GMj     U$ s  snf )z(Get rating distribution per entity type.r   N)RatingSnapshot)horsedrivertrainer   r$   g?g      ?g      ?gffffff?)
r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   )
statisticsrW   rx   rX   ratingr]   entity_typer`   r%   r"   sortedlenr&   r'   r(   r)   stdevr   )ra   r}   rx   entity_typesdistributionset	snapshotssratingssorted_ratingsns              r    _get_rating_distributionr      sU   ;1L35MMM.//0VN.."45SU 	
 -66Iq5?I6 2 4M.GG)$$W--0\A-=
  )3c!ST]34s1c!d(m45s1q5#a$h-89s1q5#a$h-89
 6 + 7s    Fc           
        SSK Jn  SSKJnJn  U R                  UR                  UR                  UR                  UR                  5      R                  S5      5      R                  U5      R                  UR                  U:  UR                  U:*  5      R                  UR                  UR                  5      R                  UR                  UR                  5      S:  5      R!                  5       n/ nU H  u  pn
U R                  U5      R                  UR                  U:H  UR                  U	:H  5      R!                  5       nUR#                  [%        U5      U	U
U Vs/ s H  oR                  PM     snS.5        M     U R                  UR&                  UR                  UR                  5      R                  S5      5      R                  UR&                  R)                  S5      5      R                  UR&                  5      R                  UR                  UR                  5      S:  5      R!                  5       nU Hx  u  pU R                  U5      R                  UR&                  U:H  5      R!                  5       nUR#                  [%        U5      U
U Vs/ s H  oR                  PM     snS.5        Mz     U R                  U5      R                  U5      R                  UR                  U:  UR                  U:*  5      R                  5       n[+        U[-        U5      [-        U5      US	9$ s  snf s  snf )
z'Find duplicate races in the date range.r   rP   rf   cntr|   )ri   race_numberrT   race_idsN)tab_event_idrT   r   )r3   r4   r5   r9   )rV   rQ   rW   rR   rg   rX   ri   r   rT   r\   r[   rh   r]   rZ   r^   havingr`   appendr   r   isnotr1   r   )ra   r>   r?   rQ   rR   rg   dup_race_numbersdup_detailsri   r   r   r   rc   dup_event_idsr   r3   s                   r    _find_duplicate_racesr      s   : 	OOJJtww%%e,	

 
g	$$	173G3G73R	S	$//4#3#3	4	

477#a'	(	  )+K(8$
MM$VDOOz143C3C{3RSSU 	
 	!*o*+015aTT51		
 )9" 	JJtww%%e,	
 
!!''-	.	$##	$	

477#a'	(	  +MM$VD%%56SU 	
 	 #L 1+015aTT51	
 + 	d	g	$$	173G3G73R	S		  "#34-(	 K 24 2s   "L6L;c                   / nU  H)  nUS   S:X  d  M  UR                  SSSUS    3S.5        M+     U H)  nUS   S:X  d  M  UR                  SSSUS    3S.5        M+     UR                  UR                  -   nUS:  a;  UR                  US	:  a  S
OSSSU SUR                   SUR                   S3S.5        UR                  5        H  u  pxUR                  S:X  a  UR                  SSSU S3S.5        M0  UR
                  S:  d  UR                  S:  d  MR  UR                  SSU SUR
                  S SUR                  S S3S.5        M     U$ )zCheck for data quality issues.rT   r   warningmissing_datazNo meetings found for r   )severitycategorymessagezNo races found for 
   errorr9   zFound z' duplicate race group(s) (race_number: z, tab_event_id: )
no_ratingszNo rating snapshots for z	 entitiesi  i  rating_rangez ratings have extreme range: [.1fz, z] (expected ~500-3000))r   r4   r5   itemsr/   r&   r'   )	rA   rB   r9   rI   rK   row
total_dupsr   dists	            r    _check_issuesr     s    $&F  w<1MM ) .!7G~F   w<1MM ) .!4S\NC  22Z5M5MMJA~'1BGI(ZL )%%/%F%F$G H%%/%=%=$>aA	
	
 16681$MM ) ,!9+iP xx#~D$-$2*m ,  $~R~ >34	
 90 Mr   z--fromTzStart date (YYYY-MM-DD))requiredhelpz--tozEnd date (YYYY-MM-DD)z--outoutput_filez=Output JSON file path (optional, prints to stdout if omitted))defaultr   z	--verboseFz!Show detailed per-month breakdown)is_flagr   r   c                    [         R                  " U 5      n[         R                  " U5      nWW:  a+  [        R	                  S5        [
        R                  " S5        [        R	                  SU SU S35        [        U U[        R                  " 5       R                  5       S	9n[        5        n[        XU5      Ul        [        XU5      Ul        [!        XU5      Ul        [%        S
 UR                   5       5      Ul        [%        S UR                   5       5      Ul        [%        S UR"                   5       5      Ul        [-        U5      Ul        [1        U5      Ul        [5        XU5      Ul        [9        UR                  UR                  UR6                  UR2                  5      Ul        SSS5        UR:                   V	s/ s H  oS   S:X  d  M  U	PM     n
n	UR:                   V	s/ s H  oS   S:X  d  M  U	PM     nn	U
(       a  SUl        OU(       a  SUl        OSUl        [?        SS9nURA                  SSS9  URA                  SSSS9  URC                  SUR<                  RE                  5       [G        UR<                  5      S9  URC                  S[I        UR&                  5      5        URC                  S[I        UR(                  5      5        URC                  S[I        UR*                  5      5        URC                  S [I        UR.                  RK                  S!S"5      5      5        URC                  S#[I        UR.                  RK                  S$S"5      5      5        URC                  S%[I        UR.                  RK                  S&S"5      5      5        URC                  S'[I        UR6                  RL                  UR6                  RN                  -   5      5        URC                  S([I        [Q        UR:                  5      5      5        URC                  S)[I        [Q        U
5      5      U
(       a  S*OSS9  URC                  S+[I        [Q        U5      5      U(       a  S,OSS9  [        R	                  U5        UR2                  (       Ga>  [?        S-S9nURA                  S.SS9  URA                  S/SS09  URA                  S1SS09  URA                  S2SS09  URA                  S3SS09  URA                  S4SS09  URA                  S5SS09  UR2                  RS                  5        H}  u  pURT                  S":  d  M  URC                  U[I        URT                  5      URV                  S6 URX                  S6 URZ                  S6 UR\                  S6 UR^                  S6 5        M     [        R	                  S75        [        R	                  U5        U(       Ga  UR                  (       Ga  [?        S8S9nURA                  S9SS9  URA                  S:SS09  URA                  S;SS09  URA                  S<SS09  0 nUR                   H  nUS=   URa                  US>   0 5      S?'   M      UR                   H  nUS=   URa                  US>   0 5      S@'   M      UR"                   H  nUS=   URa                  US>   0 5      SA'   M      [c        URe                  5       5       Hg  nUU   nURC                  U[I        URK                  S?S"5      5      [I        URK                  S@S"5      5      [I        URK                  SAS"5      5      5        Mi     [        R	                  S75        [        R	                  U5        UR:                  (       a  [?        S(S9nURA                  SBSCS9  URA                  SD5        URA                  SE5        UR:                   HP  nS*S,SFSG.RK                  US   SH5      nURC                  SIU SJUS   RE                  5        SKU SJ3USL   USM   5        MR     [        R	                  S75        [        R	                  U5        UR<                  S:X  a  [        R	                  SN5        O;UR<                  S:X  a  [        R	                  SO5        O[        R	                  SP5        [g        U5      nU(       aJ  [i        USQ5       n[j        Rl                  " UUSR[H        SS9  SSS5        [        R	                  STU SU35        OC[        R	                  SV5        [n        Rp                  " [j        Rr                  " USR[H        SS95        UR<                  S:X  a  [
        R                  " S5        [
        R                  " S"5        g! [         a:  n[        R	                  SU S35        [
        R                  " S5         SnAG	NSnAff = f! , (       d  f       GN= fs  sn	f s  sn	f ! , (       d  f       GN= f)WzValidate backfilled data in a date range.

Checks meeting/race/starter counts per month, rating convergence,
and duplicate races. Outputs a JSON report.
z[red]Invalid date format: z[/red]r|   Nz4[red]Error: start date must be before end date[/red]z'
[bold]Validating backfilled data from z to z[/bold]
)r>   r?   r@   c              3  *   #    U  H	  oS    v   M     g7frT   Nr   .0rc   s     r    	<genexpr>$validate_backfill.<locals>.<genexpr>       #P8O1gJ8O   c              3  *   #    U  H	  oS    v   M     g7fr   r   r   s     r    r   r     s      J5I75Ir   c              3  *   #    U  H	  oS    v   M     g7fr   r   r   s     r    r   r     r   r   r   r   r   failwarnrL   zValidation Summary)titleMetriccyan)styleValuegreenright)r   justifyStatuszTotal meetingszTotal raceszTotal startersHorsesrs   r   Driversrt   Trainersru   zDuplicate groupsIssuesErrorsredWarningsyellowz"Rating Distribution by Entity TypezEntity Type	Snapshots)r   MeanMedianStdMinMaxr   
zMonthly BreakdownMonthMeetingsRacesStartersrT   r   r   r   r   SeverityboldCategoryMessageblue)r   r   infowhite[]z[/r   r   u+   
[bold red]✗ Validation FAILED[/bold red]u?   
[bold yellow]⚠ Validation passed with warnings[/bold yellow]u/   
[bold green]✓ Validation PASSED[/bold green]w   )indentr   z
[green]Report saved to z[/green]z!
[dim]JSON report (stdout):[/dim]):r   fromisoformat
ValueErrorconsoleprintsysexitr<   r   now	isoformatr   rd   rA   rj   rB   rn   rC   sumrD   r3   rE   rv   rG   r   rI   r   r9   r   rK   rM   r   
add_columnadd_rowupper_style_for_statusr   getr4   r5   r   r   r/   r(   r)   r*   r&   r'   
setdefaultr   keys_serialise_reportopenjsondumpclickechodumps)r>   r?   r   verbose
start_dateend_dateereportra   ierrorswarningssummary_tablerd_tabler   r   monthly_tablemonthly_mapr   r   dataissues_tableissue	sev_stylereport_dictfs                             r    validate_backfillr  g  sU   >''	2
%%g.
 HLMMM<ZLXJV_`a\\^--/F 
'"7X"V1'xP"7X"V !$#P8O8O#P P  JV5I5I JJ ##P8O8O#P P 27 ; &>g%F" 2'xP &##  &&	
' 
6  CAJ-7*BaFC!==G=ajMY,F=HG	  45MXV4WGWE(FMM$7$7$9ARSYS`S`Aab*C0E0E,FG-V-?-?)@A*C0E0E,FG(C(=(=(A(A(A(N$OP)S)>)>)B)B9a)P%QR*c&*?*?*C*CJPQ*R&ST,c&2C2C2Z2Z]c]n]n]|]|2|.}~(CFMM(:$;<(CF$4VEQXY*c#h-&8HZabMM-  !!!CDM8K9FG4Hg6E73E73E732288:HB##a'  ,,-yyo{{3'xxnxxnxxn ; 	dh 6***$78   7  W =  ' :  W =')**CCFw<K""3w<4Z@ +''C@CGK""3w<4W= (**CCFw<K""3w<4Z@ + K,,./Eu%D!!DHHZ+,DHHWa()DHHZ+,	 0 	dm$ }}8,
&9
+	*]]E# c%
#W-	 
   I;aj 1 7 7 9:"YKqIj!i  # 	dl# }}DE	&	 XYHI $F+K+s#qIIk1Q< $1+hGH:;

4::k!SAB }}HHQKM  21#V<=  
6 DG` $#sH   ,f 1Dg	
gg0g  g g%
g/gg	
g%
g4c                     U S:X  a  gU S:X  a  gg)NrL   r   r   r   r   r   )rM   s    r    r   r   5  s    	6	r   c                   U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  R                  U R                  R                  -   S.U R                  U R                  U R                  U R                  R                  5        VVs0 s H  u  pU[!        U5      _M     snn[!        U R                  5      U R"                  S.$ s  snnf )z+Convert report to a JSON-serialisable dict.)rD   r3   rE   rG   duplicate_groups)r>   r?   r@   rM   summaryrA   rB   rC   rI   r9   rK   )r>   r?   r@   rM   rD   r3   rE   rG   r9   r4   r5   rA   rB   rC   rI   r   r   rK   )r   r   r   s      r    r   r   =  s     %%>>++--$33!--$33$33 & 1 1 H H6K\K\KjKj j
 #33--"33-3-G-G-M-M-O 
-OBt-O 
 V../--'  
s   D__main__)r>   r   r?   r   returnr8   )r  rF   )r  rH   )r>   r   r?   r   r  r1   )
rA   list[dict[str, int]]rB   r  r9   r1   rI   rH   r  rJ   )r>   r   r?   r   r   z
str | Noner   bool)rM   r   r  r   )r   r<   r  zdict[str, Any])(r   
__future__r   r   r   dataclassesr   r   r   r   r   typingr	   r   rich.consoler
   
rich.tabler   packages.core.common.loggingr   packages.core.storage.databaser   r   r   r"   r1   r<   rd   rj   rn   rv   r   r   r   commandoptionr  r   r   r   r   r   r    <module>r     s   #  
 0 0 #      6 6 
)       C C C   ,@&@(@*$NJZF*F'F F 7	F
 FX 	"	 
	 	 	H	 	,	o 2oj2 z r   