Coverage for packages / hrnz_scraper / historical_scraper.py: 9%
514 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:37 +1200
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-08 08:37 +1200
1"""HRNZ scraper for historical results via the Results Enquiry page."""
3from __future__ import annotations
5import asyncio
6import os
7import re
8from datetime import date
9from typing import Any
10from urllib.parse import parse_qs, urlencode, urljoin, urlparse
12from bs4 import BeautifulSoup
14try:
15 from playwright.async_api import Browser, Page, async_playwright
16except ImportError: # pragma: no cover - optional dependency for scraping runtime
17 async_playwright = None
18 Browser = Page = Any
20from packages.core.common.logging import get_logger
21from packages.hrnz_scraper.proxy import build_decodo_proxy
23logger = get_logger(__name__)
26class HRNZHistoricalResultsScraper:
27 """Scraper for HRNZ historical results enquiry."""
29 BASE_URL = "https://harness.hrnz.co.nz"
30 RESULTS_PATH = "/gws/ws/r/infohorsews/wsd06x"
31 SEARCH_URL = (
32 "https://harness.hrnz.co.nz/gws/ws/r/infohorsews/wsd08x"
33 "?Arg=hrnzg-Ptype&Arg=ResultsSearch&Arg=hrnzg-rSite&Arg=TRUE"
34 )
36 RATE_LIMIT_DELAY = 2.0
38 def __init__(self, timeout: float = 30000):
39 """Initialize HRNZ historical scraper.
41 Args:
42 timeout: Request timeout in milliseconds (default: 30000ms = 30s)
43 """
44 env_timeout = os.getenv("HRNZ_PLAYWRIGHT_TIMEOUT_MS", "").strip()
45 if env_timeout:
46 try:
47 timeout = float(env_timeout)
48 except ValueError:
49 logger.warning(
50 "Invalid HRNZ_PLAYWRIGHT_TIMEOUT_MS=%s; using default.", env_timeout
51 )
52 self.timeout = timeout
53 self._playwright = None
54 self._browser: Browser | None = None
55 self._last_request_time = 0.0
57 async def __aenter__(self):
58 await self._ensure_browser()
59 return self
61 async def __aexit__(self, exc_type, exc_val, exc_tb):
62 await self.close()
64 async def _ensure_browser(self):
65 if async_playwright is None:
66 raise ImportError(
67 "playwright is required for HRNZ scraping; install it or use the API ingest path"
68 )
69 if self._browser is None:
70 self._playwright = await async_playwright().start()
71 self._browser = await self._playwright.chromium.launch(headless=True)
72 logger.info("Playwright browser initialized")
74 async def close(self):
75 if self._browser is not None:
76 await self._browser.close()
77 self._browser = None
78 if self._playwright is not None:
79 await self._playwright.stop()
80 self._playwright = None
81 logger.info("Playwright browser closed")
83 async def _rate_limited_fetch(self, url: str) -> str:
84 await self._ensure_browser()
86 import time
88 elapsed = time.time() - self._last_request_time
89 if elapsed < self.RATE_LIMIT_DELAY:
90 await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
92 logger.debug(f"Fetching: {url}")
94 try:
95 proxy = build_decodo_proxy()
96 context = (
97 await self._browser.new_context(proxy=proxy)
98 if proxy
99 else await self._browser.new_context()
100 )
101 page: Page = await context.new_page()
102 try:
103 await page.goto(
104 url, timeout=self.timeout, wait_until="domcontentloaded"
105 )
106 await page.wait_for_timeout(1000)
107 content = await page.content()
108 self._last_request_time = time.time()
109 return content
110 finally:
111 await page.close()
112 await context.close()
113 except Exception as e:
114 raise RuntimeError(f"Failed to fetch {url}: {e}") from e
116 async def iter_meetings(
117 self,
118 start_date: date,
119 end_date: date,
120 race_day_type: str = "OfficialRaces",
121 club_no: str = "",
122 ):
123 """Yield meeting metadata within a date range."""
124 seen_racedays: set[str] = set()
125 current = date(start_date.year, start_date.month, 1)
127 while current <= end_date:
128 meetings = await self.list_meetings_for_month(
129 current.year,
130 current.month,
131 race_day_type=race_day_type,
132 club_no=club_no,
133 )
134 for meeting in meetings:
135 meeting_date = meeting.get("meeting_date")
136 if meeting_date and not (start_date <= meeting_date <= end_date):
137 continue
138 raceday_id = meeting.get("raceday_id")
139 if raceday_id and raceday_id in seen_racedays:
140 continue
141 if raceday_id:
142 seen_racedays.add(raceday_id)
143 yield meeting
145 if current.month == 12:
146 current = date(current.year + 1, 1, 1)
147 else:
148 current = date(current.year, current.month + 1, 1)
150 async def list_meetings_for_month(
151 self,
152 year: int,
153 month: int,
154 race_day_type: str = "OfficialRaces",
155 club_no: str = "",
156 ) -> list[dict[str, Any]]:
157 """Fetch and parse the raceday list for a month."""
158 url = self._build_raceday_search_url(year, month, race_day_type, club_no)
159 html = await self._rate_limited_fetch(url)
160 return self._parse_raceday_list(html, year)
162 async def get_meeting_results(
163 self, results_url: str, meeting_meta: dict[str, Any] | None = None
164 ):
165 """Scrape results from a meeting results page."""
166 html = await self._rate_limited_fetch(results_url)
167 soup = BeautifulSoup(html, "html.parser")
169 meeting = self._parse_meeting_header(soup)
170 if meeting_meta:
171 meeting.setdefault("raceday_id", meeting_meta.get("raceday_id"))
172 meeting.setdefault("meeting_time", meeting_meta.get("meeting_time"))
173 meeting.setdefault("venue", meeting_meta.get("meeting_name"))
174 meeting.setdefault("name", meeting_meta.get("meeting_name"))
175 if "date" not in meeting and meeting_meta.get("meeting_date"):
176 meeting["date"] = meeting_meta["meeting_date"].isoformat()
177 meeting["source_url"] = results_url
179 races = self._parse_races(soup)
180 if not races:
181 race_links = self._parse_race_links(soup)
182 if not race_links:
183 logger.warning("No race links found for meeting page: %s", results_url)
184 races = []
185 for race_link in race_links:
186 race_html = await self._rate_limited_fetch(race_link["results_url"])
187 race_soup = BeautifulSoup(race_html, "html.parser")
188 race = self._parse_race_page(race_soup, race_link)
189 if race and race.get("starters"):
190 races.append(race)
191 if race_links and not races:
192 logger.warning(
193 "Race links found but no starters parsed for meeting page: %s",
194 results_url,
195 )
196 meeting["races"] = races
198 logger.info(
199 "Scraped meeting: %s on %s (%s races)",
200 meeting.get("venue"),
201 meeting.get("date"),
202 len(races),
203 )
205 return meeting
207 def _build_raceday_search_url(
208 self, year: int, month: int, race_day_type: str, club_no: str
209 ) -> str:
210 params = [
211 ("Arg", "hrnzg-Ptype"),
212 ("Arg", "RaceResults"),
213 ("Arg", "hrnzg-rSite"),
214 ("Arg", "TRUE"),
215 ("Arg", "hrnzg-ResultsType"),
216 ("Arg", "RacedaySearch"),
217 ("Arg", "hrnzg-ResultsYear"),
218 ("Arg", str(year)),
219 ("Arg", "hrnzg-ResultsMonth"),
220 ("Arg", str(month)),
221 ("Arg", "hrnzg-ResultsDay"),
222 ("Arg", "1"),
223 ("Arg", "hrnzg-ResultsRacedayType"),
224 ("Arg", race_day_type),
225 ("Arg", "hrnzg-ResultsClubNo"),
226 ("Arg", club_no),
227 ]
228 return f"{self.BASE_URL}{self.RESULTS_PATH}?{urlencode(params)}"
230 def _parse_raceday_list(self, html: str, year: int) -> list[dict[str, Any]]:
231 soup = BeautifulSoup(html, "html.parser")
232 table = soup.find("table") # First table holds raceday list
233 if not table:
234 logger.warning("No raceday list table found")
235 return []
237 meetings = []
239 for row in table.find_all("tr"):
240 if row.find("th"):
241 continue
243 for link in row.find_all("a", href=True):
244 href = link.get("href")
245 if not href or "RacesDisplay" not in href:
246 continue
247 meeting_name = link.get_text(strip=True)
248 if not meeting_name:
249 continue
250 results_url = urljoin(self.BASE_URL, href)
251 raceday_id = self._extract_raceday_id(results_url)
252 meetings.append(
253 {
254 "raceday_id": raceday_id,
255 "meeting_name": meeting_name,
256 "meeting_date": None,
257 "meeting_time": None,
258 "results_url": results_url,
259 }
260 )
262 return meetings
264 def _parse_raceday_header(
265 self, header_text: str, year: int
266 ) -> tuple[date | None, str | None]:
267 header_text = header_text.replace("\xa0", " ").strip()
268 date_match = re.search(r"(\d{1,2})\s+([A-Za-z]{3})", header_text)
269 time_match = re.search(r"(\d{1,2}:\d{2})", header_text)
270 if not date_match:
271 return None, None
273 day = int(date_match.group(1))
274 month_str = date_match.group(2).lower()
275 month_map = {
276 "jan": 1,
277 "feb": 2,
278 "mar": 3,
279 "apr": 4,
280 "may": 5,
281 "jun": 6,
282 "jul": 7,
283 "aug": 8,
284 "sep": 9,
285 "oct": 10,
286 "nov": 11,
287 "dec": 12,
288 }
289 month = month_map.get(month_str)
290 if not month:
291 return None, None
292 try:
293 meeting_date = date(year, month, day)
294 except ValueError:
295 return None, None
297 meeting_time = time_match.group(1) if time_match else None
298 return meeting_date, meeting_time
300 def _parse_meeting_header(self, soup: BeautifulSoup) -> dict[str, Any]:
301 meeting: dict[str, Any] = {}
303 h1 = soup.find("h1")
304 if h1:
305 venue = h1.get_text(strip=True)
306 venue = venue.replace(" Inc", "").replace(" Inc.", "").strip()
307 meeting["venue"] = venue
309 date_div = soup.find("div", class_="hrnz-content__date")
310 if date_div:
311 date_text = date_div.get_text(strip=True)
312 meeting["date_raw"] = date_text
313 parsed_date = self._parse_date(date_text)
314 if parsed_date:
315 meeting["date"] = parsed_date
317 meeting_div = soup.find("div", class_="hrnz-field__meeting")
318 if meeting_div:
319 h5 = meeting_div.find("h5")
320 if h5:
321 meeting_name = h5.get_text(strip=True)
322 if " at " in meeting_name:
323 meeting_name = meeting_name.split(" at ")[0].strip()
324 meeting["name"] = meeting_name
326 return meeting
328 def _parse_date(self, date_str: str) -> str | None:
329 import datetime as dt
330 from datetime import datetime
332 date_str = date_str.strip().replace("\xa0", " ")
334 formats = [
335 "%A, %d %B %Y",
336 "%A, %d %B",
337 "%d %B %Y",
338 "%d %B",
339 "%d/%m/%Y",
340 "%d-%m-%Y",
341 "%d/%m/%y",
342 "%d-%m-%y",
343 ]
345 for fmt in formats:
346 try:
347 parsed = datetime.strptime(date_str.strip(), fmt)
348 if "%Y" not in fmt and "%y" not in fmt:
349 current_year = dt.datetime.now().year
350 parsed = parsed.replace(year=current_year)
351 if parsed.year < 100:
352 parsed = parsed.replace(
353 year=parsed.year + (2000 if parsed.year <= 50 else 1900)
354 )
355 return parsed.date().isoformat()
356 except ValueError:
357 continue
359 logger.warning("Could not parse date: %s", date_str)
360 return None
362 def _parse_races(self, soup: BeautifulSoup) -> list[dict[str, Any]]:
363 races = []
364 race_sections = soup.find_all("div", class_="hrnz-race")
366 for section in race_sections:
367 race = self._parse_race_section(section)
368 if race and race.get("starters"):
369 races.append(race)
371 return races
373 def _parse_race_links(self, soup: BeautifulSoup) -> list[dict[str, Any]]:
374 race_links = []
375 seen_urls: set[str] = set()
376 tables = soup.find_all("table")
377 for table in tables:
378 headers = [th.get_text(" ", strip=True) for th in table.find_all("th")]
379 if "Race" not in " ".join(headers):
380 continue
381 for row in table.find_all("tr"):
382 cells = row.find_all("td")
383 if len(cells) < 2:
384 continue
385 race_number_text = cells[0].get_text(" ", strip=True)
386 name_text = cells[1].get_text(" ", strip=True)
387 link = row.find("a", href=True)
388 if not link:
389 continue
390 href = link.get("href", "")
391 if "RaceDisplay" not in href:
392 continue
393 results_url = urljoin(self.BASE_URL, href)
394 if results_url in seen_urls:
395 continue
396 race_number = None
397 match = re.search(r"R(\d+)", race_number_text, re.IGNORECASE)
398 if match:
399 race_number = int(match.group(1))
400 race_links.append(
401 {
402 "race_number": race_number,
403 "name": name_text,
404 "results_url": results_url,
405 }
406 )
407 seen_urls.add(results_url)
409 if race_links:
410 return race_links
412 for link in soup.find_all("a", href=True):
413 href = link.get("href", "")
414 if "RaceDisplay" not in href:
415 continue
416 results_url = urljoin(self.BASE_URL, href)
417 if results_url in seen_urls:
418 continue
419 text = link.get_text(" ", strip=True)
420 race_number = None
421 match = re.search(r"Race\s*(\d+)|R(\d+)", text, re.IGNORECASE)
422 if match:
423 race_number = int(match.group(1) or match.group(2))
424 race_links.append(
425 {
426 "race_number": race_number,
427 "name": text or None,
428 "results_url": results_url,
429 }
430 )
431 seen_urls.add(results_url)
432 return race_links
434 def _parse_race_page(
435 self, soup: BeautifulSoup, race_meta: dict[str, Any] | None = None
436 ) -> dict[str, Any] | None:
437 race: dict[str, Any] = {"starters": []}
438 if race_meta:
439 if race_meta.get("race_number") is not None:
440 race["race_number"] = race_meta["race_number"]
441 if race_meta.get("name"):
442 race["name"] = race_meta["name"]
444 title_tag = None
445 for h5 in soup.find_all("h5"):
446 if re.search(
447 r"Race\s+\d+\s*-", h5.get_text(" ", strip=True), re.IGNORECASE
448 ):
449 title_tag = h5
450 break
452 if title_tag:
453 title_text = title_tag.get_text(" ", strip=True)
454 match = re.search(r"Race\s+(\d+)\s*-\s*(.*)", title_text, re.IGNORECASE)
455 if match:
456 race["race_number"] = int(match.group(1))
457 name_part = match.group(2)
458 if "," in name_part:
459 name_part = name_part.split(",", 1)[0].strip()
460 race["name"] = name_part.strip()
461 distance_match = re.search(r"(\d{3,4})m", title_text, re.IGNORECASE)
462 if distance_match:
463 race["distance_m"] = int(distance_match.group(1))
465 page_text = soup.get_text(" ", strip=True)
466 weather = self._extract_label_value(page_text, "Weather")
467 track_condition = self._extract_label_value(page_text, "Track")
468 if weather:
469 race["weather"] = weather
470 if track_condition:
471 race["track_condition"] = track_condition
473 name_upper = race.get("name", "").upper()
474 if "MOBILE" in name_upper:
475 race["start_type"] = "Mobile"
476 elif "STANDING" in name_upper or "STAND" in name_upper:
477 race["start_type"] = "Standing"
479 if "PACE" in name_upper:
480 race["gait"] = "Pace"
481 elif "TROT" in name_upper:
482 race["gait"] = "Trot"
484 table = soup.find("table", class_="hrnz-table--participants")
485 if table:
486 race["starters"] = self._parse_race_table(table)
488 return race
490 def _parse_race_section(self, section: BeautifulSoup) -> dict[str, Any] | None:
491 header = section.find("div", class_="hrnz-race__header")
492 if not header:
493 return None
495 race: dict[str, Any] = {"starters": []}
497 race_number = None
498 number_dd = header.find("dd")
499 if number_dd:
500 match = re.search(r"(\\d+)", number_dd.get_text(strip=True))
501 if match:
502 race_number = int(match.group(1))
504 if not race_number:
505 race_id = section.get("id", "")
506 match = re.search(r"race-(\\d+)", race_id)
507 if match:
508 race_number = int(match.group(1))
510 if race_number:
511 race["race_number"] = race_number
513 name = ""
514 name_tag = header.find("h3")
515 if name_tag:
516 name = name_tag.get_text(strip=True)
517 race["name"] = name
519 details_tag = header.find("h4")
520 details_text = ""
521 if details_tag:
522 details_text = details_tag.get_text(" ", strip=True)
523 race["details"] = details_text
525 meta_text = header.get_text(" ", strip=True)
526 weather = self._extract_label_value(meta_text, "Weather")
527 track_condition = self._extract_label_value(meta_text, "Track")
528 if weather:
529 race["weather"] = weather
530 if track_condition:
531 race["track_condition"] = track_condition
533 distance_match = re.search(r"(\\d{3,4})m", details_text, re.IGNORECASE)
534 if distance_match:
535 race["distance_m"] = int(distance_match.group(1))
537 name_upper = name.upper()
538 if "MOBILE" in name_upper:
539 race["start_type"] = "Mobile"
540 elif "STANDING" in name_upper or "STAND" in name_upper:
541 race["start_type"] = "Standing"
543 if "PACE" in name_upper:
544 race["gait"] = "Pace"
545 elif "TROT" in name_upper:
546 race["gait"] = "Trot"
548 table = section.find("table", class_="hrnz-table--participants")
549 if table:
550 race["starters"] = self._parse_race_table(table)
552 return race
554 def _parse_race_table(self, table: BeautifulSoup) -> list[dict[str, Any]]:
555 starters = []
556 rows = table.find_all("tr")
557 header_map = self._build_header_map(rows)
558 placing_headers = {"placing", "place", "pos", "position", "finish", "fin"}
559 has_placing_column = any(key in header_map for key in placing_headers)
560 row_index = 0
562 for row in rows:
563 if row.find("th"):
564 continue
565 cells = row.find_all(["td", "th"])
566 if len(cells) < 4:
567 continue
568 row_index += 1
569 fallback_placing = None if has_placing_column else row_index
570 starter = self._parse_starter_row(
571 cells, header_map, fallback_placing=fallback_placing
572 )
573 if starter:
574 starters.append(starter)
576 return starters
578 @staticmethod
579 def _build_header_map(rows: list) -> dict[str, int]:
580 for row in rows:
581 headers = row.find_all("th")
582 if not headers:
583 continue
584 header_map = {}
585 for idx, header in enumerate(headers):
586 text = header.get_text(strip=True)
587 if text:
588 header_map[text.strip().lower()] = idx
589 if header_map:
590 return header_map
591 return {}
593 def _parse_starter_row(
594 self,
595 cells: list,
596 header_map: dict[str, int],
597 fallback_placing: int | None = None,
598 ) -> dict[str, Any] | None:
599 try:
600 starter: dict[str, Any] = {}
602 def _cell_by_label(label: str) -> Any | None:
603 target = label.lower()
604 for cell in cells:
605 data_label = cell.get("data-label")
606 if data_label and data_label.strip().lower() == target:
607 return cell
608 idx = header_map.get(target)
609 if idx is not None and idx < len(cells):
610 return cells[idx]
611 return None
613 placing_cell = (
614 _cell_by_label("Placing")
615 or _cell_by_label("Place")
616 or _cell_by_label("Pos")
617 or _cell_by_label("Position")
618 or _cell_by_label("Finish")
619 or _cell_by_label("Fin")
620 or cells[0]
621 )
622 pos_text = placing_cell.get_text(strip=True)
623 if pos_text:
624 pos_upper = pos_text.upper()
625 if pos_upper in ("SCR", "SCRATCH", "S"):
626 return None
627 pos_match = re.match(r"(\\d+)", pos_text)
628 if pos_match:
629 starter["placing"] = int(pos_match.group(1))
630 elif pos_upper in ("DNS", "DNF", "DSQ", "LR", "NP"):
631 starter["did_not_finish"] = True
632 starter["placing"] = None
633 else:
634 starter["placing"] = None
635 elif fallback_placing and not starter.get("did_not_finish"):
636 starter["placing"] = fallback_placing
638 book_cell = _cell_by_label("Book") or _cell_by_label("Bk")
639 if book_cell:
640 book_text = book_cell.get_text(strip=True)
641 if book_text and book_text.isdigit():
642 starter["runner_number"] = int(book_text)
644 horse_cell = _cell_by_label("Horse") or cells[2]
645 horse_link = horse_cell.find("a")
646 if horse_link:
647 starter["horse_name"] = horse_link.get_text(strip=True)
648 horse_href = horse_link.get("href", "")
649 horse_uuid = self._extract_uuid(horse_href)
650 if horse_uuid:
651 starter["horse_id"] = horse_uuid
652 else:
653 horse_name = horse_cell.get_text(strip=True)
654 if horse_name:
655 starter["horse_name"] = horse_name
657 barrier_cell = _cell_by_label("Barrier") or _cell_by_label("Draw")
658 if barrier_cell:
659 barrier_text = barrier_cell.get_text(strip=True)
660 if barrier_text:
661 barrier_match = re.match(r"(\\d+)", barrier_text)
662 if barrier_match:
663 starter["barrier"] = int(barrier_match.group(1))
664 elif re.search(r"[A-Za-z]", barrier_text):
665 starter["barrier_position"] = barrier_text
667 hcap_cell = _cell_by_label("Hcap") or _cell_by_label("HCP")
668 if hcap_cell:
669 hcap_text = hcap_cell.get_text(strip=True)
670 if hcap_text:
671 if hcap_text.lower().startswith("fr"):
672 starter["handicap_m"] = 0
673 else:
674 hcap_match = re.match(r"(\\d+)", hcap_text)
675 if hcap_match:
676 starter["handicap_m"] = int(hcap_match.group(1))
678 time_cell = _cell_by_label("Time") or _cell_by_label("Time/ Margin")
679 if time_cell:
680 time_text = time_cell.get_text(strip=True)
681 if time_text:
682 starter["race_time"] = time_text
684 margin_cell = _cell_by_label("Margin")
685 if margin_cell:
686 margin_text = margin_cell.get_text(strip=True)
687 if margin_text:
688 starter["margin"] = margin_text
690 driver_cell = _cell_by_label("Driver")
691 if driver_cell:
692 driver_link = driver_cell.find("a")
693 if driver_link:
694 driver_name = driver_link.get_text(strip=True)
695 if driver_name:
696 starter["driver_name"] = driver_name
697 driver_href = driver_link.get("href", "")
698 driver_uuid = self._extract_uuid(driver_href)
699 if driver_uuid:
700 starter["driver_id"] = driver_uuid
701 else:
702 driver_name = driver_cell.get_text(strip=True)
703 if driver_name:
704 starter["driver_name"] = driver_name
706 trainer_cell = _cell_by_label("Trainer")
707 if trainer_cell:
708 trainer_link = trainer_cell.find("a")
709 if trainer_link:
710 trainer_name = trainer_link.get_text(strip=True)
711 if trainer_name:
712 starter["trainer_name"] = trainer_name
713 trainer_href = trainer_link.get("href", "")
714 trainer_uuid = self._extract_uuid(trainer_href)
715 if trainer_uuid:
716 starter["trainer_id"] = trainer_uuid
717 else:
718 trainer_name = trainer_cell.get_text(strip=True)
719 if trainer_name:
720 starter["trainer_name"] = trainer_name
722 if starter.get("horse_name"):
723 return starter
725 except Exception as e:
726 logger.debug("Error parsing starter row: %s", e)
728 return None
730 @staticmethod
731 def _extract_uuid(href: str) -> str | None:
732 uuid_match = re.search(r"([0-9A-F-]{36})", href, re.IGNORECASE)
733 if uuid_match:
734 return uuid_match.group(1)
735 return None
737 @staticmethod
738 def _extract_raceday_id(url: str) -> str | None:
739 parsed = urlparse(url)
740 args = parse_qs(parsed.query).get("Arg", [])
741 for idx, value in enumerate(args):
742 if value == "hrnzg-RacedayID" and idx + 1 < len(args):
743 return args[idx + 1]
744 return None
746 @staticmethod
747 def _extract_label_value(text: str, label: str) -> str | None:
748 pattern = rf"{re.escape(label)}:\\s*([^\\n\\r]+)"
749 match = re.search(pattern, text, re.IGNORECASE)
750 if not match:
751 return None
752 value = match.group(1)
753 value = re.split(r"(Weather:|Track:)", value, maxsplit=1)[0]
754 return value.strip().strip(";")