Coverage for packages/hrnz_scraper/historical

1"""HRNZ scraper for historical results via the Results Enquiry page."""

3from __future__ import annotations

5import asyncio

6import os

7import re

8from datetime import date

9from typing import Any

10from urllib.parse import parse_qs, urlencode, urljoin, urlparse

12from bs4 import BeautifulSoup

14try:

15 from playwright.async_api import Browser, Page, async_playwright

16except ImportError: # pragma: no cover - optional dependency for scraping runtime

17 async_playwright = None

18 Browser = Page = Any

20from packages.core.common.logging import get_logger

21from packages.hrnz_scraper.proxy import build_decodo_proxy

23logger = get_logger(__name__)

26class HRNZHistoricalResultsScraper:

27 """Scraper for HRNZ historical results enquiry."""

29 BASE_URL = "https://harness.hrnz.co.nz"

30 RESULTS_PATH = "/gws/ws/r/infohorsews/wsd06x"

31 SEARCH_URL = (

32 "https://harness.hrnz.co.nz/gws/ws/r/infohorsews/wsd08x"

33 "?Arg=hrnzg-Ptype&Arg=ResultsSearch&Arg=hrnzg-rSite&Arg=TRUE"

34 )

36 RATE_LIMIT_DELAY = 2.0

38 def __init__(self, timeout: float = 30000):

39 """Initialize HRNZ historical scraper.

41 Args:

42 timeout: Request timeout in milliseconds (default: 30000ms = 30s)

43 """

44 env_timeout = os.getenv("HRNZ_PLAYWRIGHT_TIMEOUT_MS", "").strip()

45 if env_timeout:

46 try:

47 timeout = float(env_timeout)

48 except ValueError:

49 logger.warning(

50 "Invalid HRNZ_PLAYWRIGHT_TIMEOUT_MS=%s; using default.", env_timeout

51 )

52 self.timeout = timeout

53 self._playwright = None

54 self._browser: Browser | None = None

55 self._last_request_time = 0.0

57 async def __aenter__(self):

58 await self._ensure_browser()

59 return self

61 async def __aexit__(self, exc_type, exc_val, exc_tb):

62 await self.close()

64 async def _ensure_browser(self):

65 if async_playwright is None:

66 raise ImportError(

67 "playwright is required for HRNZ scraping; install it or use the API ingest path"

68 )

69 if self._browser is None:

70 self._playwright = await async_playwright().start()

71 self._browser = await self._playwright.chromium.launch(headless=True)

72 logger.info("Playwright browser initialized")

74 async def close(self):

75 if self._browser is not None:

76 await self._browser.close()

77 self._browser = None

78 if self._playwright is not None:

79 await self._playwright.stop()

80 self._playwright = None

81 logger.info("Playwright browser closed")

83 async def _rate_limited_fetch(self, url: str) -> str:

84 await self._ensure_browser()

86 import time

88 elapsed = time.time() - self._last_request_time

89 if elapsed < self.RATE_LIMIT_DELAY:

90 await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)

92 logger.debug(f"Fetching: {url}")

94 try:

95 proxy = build_decodo_proxy()

96 context = (

97 await self._browser.new_context(proxy=proxy)

98 if proxy

99 else await self._browser.new_context()

100 )

101 page: Page = await context.new_page()

102 try:

103 await page.goto(

104 url, timeout=self.timeout, wait_until="domcontentloaded"

105 )

106 await page.wait_for_timeout(1000)

107 content = await page.content()

108 self._last_request_time = time.time()

109 return content

110 finally:

111 await page.close()

112 await context.close()

113 except Exception as e:

114 raise RuntimeError(f"Failed to fetch {url}: {e}") from e

115

116 async def iter_meetings(

117 self,

118 start_date: date,

119 end_date: date,

120 race_day_type: str = "OfficialRaces",

121 club_no: str = "",

122 ):

123 """Yield meeting metadata within a date range."""

124 seen_racedays: set[str] = set()

125 current = date(start_date.year, start_date.month, 1)

126

127 while current <= end_date:

128 meetings = await self.list_meetings_for_month(

129 current.year,

130 current.month,

131 race_day_type=race_day_type,

132 club_no=club_no,

133 )

134 for meeting in meetings:

135 meeting_date = meeting.get("meeting_date")

136 if meeting_date and not (start_date <= meeting_date <= end_date):

137 continue

138 raceday_id = meeting.get("raceday_id")

139 if raceday_id and raceday_id in seen_racedays:

140 continue

141 if raceday_id:

142 seen_racedays.add(raceday_id)

143 yield meeting

144

145 if current.month == 12:

146 current = date(current.year + 1, 1, 1)

147 else:

148 current = date(current.year, current.month + 1, 1)

149

150 async def list_meetings_for_month(

151 self,

152 year: int,

153 month: int,

154 race_day_type: str = "OfficialRaces",

155 club_no: str = "",

156 ) -> list[dict[str, Any]]:

157 """Fetch and parse the raceday list for a month."""

158 url = self._build_raceday_search_url(year, month, race_day_type, club_no)

159 html = await self._rate_limited_fetch(url)

160 return self._parse_raceday_list(html, year)

161

162 async def get_meeting_results(

163 self, results_url: str, meeting_meta: dict[str, Any] | None = None

164 ):

165 """Scrape results from a meeting results page."""

166 html = await self._rate_limited_fetch(results_url)

167 soup = BeautifulSoup(html, "html.parser")

168

169 meeting = self._parse_meeting_header(soup)

170 if meeting_meta:

171 meeting.setdefault("raceday_id", meeting_meta.get("raceday_id"))

172 meeting.setdefault("meeting_time", meeting_meta.get("meeting_time"))

173 meeting.setdefault("venue", meeting_meta.get("meeting_name"))

174 meeting.setdefault("name", meeting_meta.get("meeting_name"))

175 if "date" not in meeting and meeting_meta.get("meeting_date"):

176 meeting["date"] = meeting_meta["meeting_date"].isoformat()

177 meeting["source_url"] = results_url

178

179 races = self._parse_races(soup)

180 if not races:

181 race_links = self._parse_race_links(soup)

182 if not race_links:

183 logger.warning("No race links found for meeting page: %s", results_url)

184 races = []

185 for race_link in race_links:

186 race_html = await self._rate_limited_fetch(race_link["results_url"])

187 race_soup = BeautifulSoup(race_html, "html.parser")

188 race = self._parse_race_page(race_soup, race_link)

189 if race and race.get("starters"):

190 races.append(race)

191 if race_links and not races:

192 logger.warning(

193 "Race links found but no starters parsed for meeting page: %s",

194 results_url,

195 )

196 meeting["races"] = races

197

198 logger.info(

199 "Scraped meeting: %s on %s (%s races)",

200 meeting.get("venue"),

201 meeting.get("date"),

202 len(races),

203 )

204

205 return meeting

206

207 def _build_raceday_search_url(

208 self, year: int, month: int, race_day_type: str, club_no: str

209 ) -> str:

210 params = [

211 ("Arg", "hrnzg-Ptype"),

212 ("Arg", "RaceResults"),

213 ("Arg", "hrnzg-rSite"),

214 ("Arg", "TRUE"),

215 ("Arg", "hrnzg-ResultsType"),

216 ("Arg", "RacedaySearch"),

217 ("Arg", "hrnzg-ResultsYear"),

218 ("Arg", str(year)),

219 ("Arg", "hrnzg-ResultsMonth"),

220 ("Arg", str(month)),

221 ("Arg", "hrnzg-ResultsDay"),

222 ("Arg", "1"),

223 ("Arg", "hrnzg-ResultsRacedayType"),

224 ("Arg", race_day_type),

225 ("Arg", "hrnzg-ResultsClubNo"),

226 ("Arg", club_no),

227 ]

228 return f"{self.BASE_URL}{self.RESULTS_PATH}?{urlencode(params)}"

229

230 def _parse_raceday_list(self, html: str, year: int) -> list[dict[str, Any]]:

231 soup = BeautifulSoup(html, "html.parser")

232 table = soup.find("table") # First table holds raceday list

233 if not table:

234 logger.warning("No raceday list table found")

235 return []

236

237 meetings = []

238

239 for row in table.find_all("tr"):

240 if row.find("th"):

241 continue

242

243 for link in row.find_all("a", href=True):

244 href = link.get("href")

245 if not href or "RacesDisplay" not in href:

246 continue

247 meeting_name = link.get_text(strip=True)

248 if not meeting_name:

249 continue

250 results_url = urljoin(self.BASE_URL, href)

251 raceday_id = self._extract_raceday_id(results_url)

252 meetings.append(

253 {

254 "raceday_id": raceday_id,

255 "meeting_name": meeting_name,

256 "meeting_date": None,

257 "meeting_time": None,

258 "results_url": results_url,

259 }

260 )

261

262 return meetings

263

264 def _parse_raceday_header(

265 self, header_text: str, year: int

266 ) -> tuple[date | None, str | None]:

267 header_text = header_text.replace("\xa0", " ").strip()

268 date_match = re.search(r"(\d{1,2})\s+([A-Za-z]{3})", header_text)

269 time_match = re.search(r"(\d{1,2}:\d{2})", header_text)

270 if not date_match:

271 return None, None

272

273 day = int(date_match.group(1))

274 month_str = date_match.group(2).lower()

275 month_map = {

276 "jan": 1,

277 "feb": 2,

278 "mar": 3,

279 "apr": 4,

280 "may": 5,

281 "jun": 6,

282 "jul": 7,

283 "aug": 8,

284 "sep": 9,

285 "oct": 10,

286 "nov": 11,

287 "dec": 12,

288 }

289 month = month_map.get(month_str)

290 if not month:

291 return None, None

292 try:

293 meeting_date = date(year, month, day)

294 except ValueError:

295 return None, None

296

297 meeting_time = time_match.group(1) if time_match else None

298 return meeting_date, meeting_time

299

300 def _parse_meeting_header(self, soup: BeautifulSoup) -> dict[str, Any]:

301 meeting: dict[str, Any] = {}

302

303 h1 = soup.find("h1")

304 if h1:

305 venue = h1.get_text(strip=True)

306 venue = venue.replace(" Inc", "").replace(" Inc.", "").strip()

307 meeting["venue"] = venue

308

309 date_div = soup.find("div", class_="hrnz-content__date")

310 if date_div:

311 date_text = date_div.get_text(strip=True)

312 meeting["date_raw"] = date_text

313 parsed_date = self._parse_date(date_text)

314 if parsed_date:

315 meeting["date"] = parsed_date

316

317 meeting_div = soup.find("div", class_="hrnz-field__meeting")

318 if meeting_div:

319 h5 = meeting_div.find("h5")

320 if h5:

321 meeting_name = h5.get_text(strip=True)

322 if " at " in meeting_name:

323 meeting_name = meeting_name.split(" at ")[0].strip()

324 meeting["name"] = meeting_name

325

326 return meeting

327

328 def _parse_date(self, date_str: str) -> str | None:

329 import datetime as dt

330 from datetime import datetime

331

332 date_str = date_str.strip().replace("\xa0", " ")

333

334 formats = [

335 "%A, %d %B %Y",

336 "%A, %d %B",

337 "%d %B %Y",

338 "%d %B",

339 "%d/%m/%Y",

340 "%d-%m-%Y",

341 "%d/%m/%y",

342 "%d-%m-%y",

343 ]

344

345 for fmt in formats:

346 try:

347 parsed = datetime.strptime(date_str.strip(), fmt)

348 if "%Y" not in fmt and "%y" not in fmt:

349 current_year = dt.datetime.now().year

350 parsed = parsed.replace(year=current_year)

351 if parsed.year < 100:

352 parsed = parsed.replace(

353 year=parsed.year + (2000 if parsed.year <= 50 else 1900)

354 )

355 return parsed.date().isoformat()

356 except ValueError:

357 continue

358

359 logger.warning("Could not parse date: %s", date_str)

360 return None

361

362 def _parse_races(self, soup: BeautifulSoup) -> list[dict[str, Any]]:

363 races = []

364 race_sections = soup.find_all("div", class_="hrnz-race")

365

366 for section in race_sections:

367 race = self._parse_race_section(section)

368 if race and race.get("starters"):

369 races.append(race)

370

371 return races

372

373 def _parse_race_links(self, soup: BeautifulSoup) -> list[dict[str, Any]]:

374 race_links = []

375 seen_urls: set[str] = set()

376 tables = soup.find_all("table")

377 for table in tables:

378 headers = [th.get_text(" ", strip=True) for th in table.find_all("th")]

379 if "Race" not in " ".join(headers):

380 continue

381 for row in table.find_all("tr"):

382 cells = row.find_all("td")

383 if len(cells) < 2:

384 continue

385 race_number_text = cells[0].get_text(" ", strip=True)

386 name_text = cells[1].get_text(" ", strip=True)

387 link = row.find("a", href=True)

388 if not link:

389 continue

390 href = link.get("href", "")

391 if "RaceDisplay" not in href:

392 continue

393 results_url = urljoin(self.BASE_URL, href)

394 if results_url in seen_urls:

395 continue

396 race_number = None

397 match = re.search(r"R(\d+)", race_number_text, re.IGNORECASE)

398 if match:

399 race_number = int(match.group(1))

400 race_links.append(

401 {

402 "race_number": race_number,

403 "name": name_text,

404 "results_url": results_url,

405 }

406 )

407 seen_urls.add(results_url)

408

409 if race_links:

410 return race_links

411

412 for link in soup.find_all("a", href=True):

413 href = link.get("href", "")

414 if "RaceDisplay" not in href:

415 continue

416 results_url = urljoin(self.BASE_URL, href)

417 if results_url in seen_urls:

418 continue

419 text = link.get_text(" ", strip=True)

420 race_number = None

421 match = re.search(r"Race\s*(\d+)|R(\d+)", text, re.IGNORECASE)

422 if match:

423 race_number = int(match.group(1) or match.group(2))

424 race_links.append(

425 {

426 "race_number": race_number,

427 "name": text or None,

428 "results_url": results_url,

429 }

430 )

431 seen_urls.add(results_url)

432 return race_links

433

434 def _parse_race_page(

435 self, soup: BeautifulSoup, race_meta: dict[str, Any] | None = None

436 ) -> dict[str, Any] | None:

437 race: dict[str, Any] = {"starters": []}

438 if race_meta:

439 if race_meta.get("race_number") is not None:

440 race["race_number"] = race_meta["race_number"]

441 if race_meta.get("name"):

442 race["name"] = race_meta["name"]

443

444 title_tag = None

445 for h5 in soup.find_all("h5"):

446 if re.search(

447 r"Race\s+\d+\s*-", h5.get_text(" ", strip=True), re.IGNORECASE

448 ):

449 title_tag = h5

450 break

451

452 if title_tag:

453 title_text = title_tag.get_text(" ", strip=True)

454 match = re.search(r"Race\s+(\d+)\s*-\s*(.*)", title_text, re.IGNORECASE)

455 if match:

456 race["race_number"] = int(match.group(1))

457 name_part = match.group(2)

458 if "," in name_part:

459 name_part = name_part.split(",", 1)[0].strip()

460 race["name"] = name_part.strip()

461 distance_match = re.search(r"(\d{3,4})m", title_text, re.IGNORECASE)

462 if distance_match:

463 race["distance_m"] = int(distance_match.group(1))

464

465 page_text = soup.get_text(" ", strip=True)

466 weather = self._extract_label_value(page_text, "Weather")

467 track_condition = self._extract_label_value(page_text, "Track")

468 if weather:

469 race["weather"] = weather

470 if track_condition:

471 race["track_condition"] = track_condition

472

473 name_upper = race.get("name", "").upper()

474 if "MOBILE" in name_upper:

475 race["start_type"] = "Mobile"

476 elif "STANDING" in name_upper or "STAND" in name_upper:

477 race["start_type"] = "Standing"

478

479 if "PACE" in name_upper:

480 race["gait"] = "Pace"

481 elif "TROT" in name_upper:

482 race["gait"] = "Trot"

483

484 table = soup.find("table", class_="hrnz-table--participants")

485 if table:

486 race["starters"] = self._parse_race_table(table)

487

488 return race

489

490 def _parse_race_section(self, section: BeautifulSoup) -> dict[str, Any] | None:

491 header = section.find("div", class_="hrnz-race__header")

492 if not header:

493 return None

494

495 race: dict[str, Any] = {"starters": []}

496

497 race_number = None

498 number_dd = header.find("dd")

499 if number_dd:

500 match = re.search(r"(\\d+)", number_dd.get_text(strip=True))

501 if match:

502 race_number = int(match.group(1))

503

504 if not race_number:

505 race_id = section.get("id", "")

506 match = re.search(r"race-(\\d+)", race_id)

507 if match:

508 race_number = int(match.group(1))

509

510 if race_number:

511 race["race_number"] = race_number

512

513 name = ""

514 name_tag = header.find("h3")

515 if name_tag:

516 name = name_tag.get_text(strip=True)

517 race["name"] = name

518

519 details_tag = header.find("h4")

520 details_text = ""

521 if details_tag:

522 details_text = details_tag.get_text(" ", strip=True)

523 race["details"] = details_text

524

525 meta_text = header.get_text(" ", strip=True)

526 weather = self._extract_label_value(meta_text, "Weather")

527 track_condition = self._extract_label_value(meta_text, "Track")

528 if weather:

529 race["weather"] = weather

530 if track_condition:

531 race["track_condition"] = track_condition

532

533 distance_match = re.search(r"(\\d{3,4})m", details_text, re.IGNORECASE)

534 if distance_match:

535 race["distance_m"] = int(distance_match.group(1))

536

537 name_upper = name.upper()

538 if "MOBILE" in name_upper:

539 race["start_type"] = "Mobile"

540 elif "STANDING" in name_upper or "STAND" in name_upper:

541 race["start_type"] = "Standing"

542

543 if "PACE" in name_upper:

544 race["gait"] = "Pace"

545 elif "TROT" in name_upper:

546 race["gait"] = "Trot"

547

548 table = section.find("table", class_="hrnz-table--participants")

549 if table:

550 race["starters"] = self._parse_race_table(table)

551

552 return race

553

554 def _parse_race_table(self, table: BeautifulSoup) -> list[dict[str, Any]]:

555 starters = []

556 rows = table.find_all("tr")

557 header_map = self._build_header_map(rows)

558 placing_headers = {"placing", "place", "pos", "position", "finish", "fin"}

559 has_placing_column = any(key in header_map for key in placing_headers)

560 row_index = 0

561

562 for row in rows:

563 if row.find("th"):

564 continue

565 cells = row.find_all(["td", "th"])

566 if len(cells) < 4:

567 continue

568 row_index += 1

569 fallback_placing = None if has_placing_column else row_index

570 starter = self._parse_starter_row(

571 cells, header_map, fallback_placing=fallback_placing

572 )

573 if starter:

574 starters.append(starter)

575

576 return starters

577

578 @staticmethod

579 def _build_header_map(rows: list) -> dict[str, int]:

580 for row in rows:

581 headers = row.find_all("th")

582 if not headers:

583 continue

584 header_map = {}

585 for idx, header in enumerate(headers):

586 text = header.get_text(strip=True)

587 if text:

588 header_map[text.strip().lower()] = idx

589 if header_map:

590 return header_map

591 return {}

592

593 def _parse_starter_row(

594 self,

595 cells: list,

596 header_map: dict[str, int],

597 fallback_placing: int | None = None,

598 ) -> dict[str, Any] | None:

599 try:

600 starter: dict[str, Any] = {}

601

602 def _cell_by_label(label: str) -> Any | None:

603 target = label.lower()

604 for cell in cells:

605 data_label = cell.get("data-label")

606 if data_label and data_label.strip().lower() == target:

607 return cell

608 idx = header_map.get(target)

609 if idx is not None and idx < len(cells):

610 return cells[idx]

611 return None

612

613 placing_cell = (

614 _cell_by_label("Placing")

615 or _cell_by_label("Place")

616 or _cell_by_label("Pos")

617 or _cell_by_label("Position")

618 or _cell_by_label("Finish")

619 or _cell_by_label("Fin")

620 or cells[0]

621 )

622 pos_text = placing_cell.get_text(strip=True)

623 if pos_text:

624 pos_upper = pos_text.upper()

625 if pos_upper in ("SCR", "SCRATCH", "S"):

626 return None

627 pos_match = re.match(r"(\\d+)", pos_text)

628 if pos_match:

629 starter["placing"] = int(pos_match.group(1))

630 elif pos_upper in ("DNS", "DNF", "DSQ", "LR", "NP"):

631 starter["did_not_finish"] = True

632 starter["placing"] = None

633 else:

634 starter["placing"] = None

635 elif fallback_placing and not starter.get("did_not_finish"):

636 starter["placing"] = fallback_placing

637

638 book_cell = _cell_by_label("Book") or _cell_by_label("Bk")

639 if book_cell:

640 book_text = book_cell.get_text(strip=True)

641 if book_text and book_text.isdigit():

642 starter["runner_number"] = int(book_text)

643

644 horse_cell = _cell_by_label("Horse") or cells[2]

645 horse_link = horse_cell.find("a")

646 if horse_link:

647 starter["horse_name"] = horse_link.get_text(strip=True)

648 horse_href = horse_link.get("href", "")

649 horse_uuid = self._extract_uuid(horse_href)

650 if horse_uuid:

651 starter["horse_id"] = horse_uuid

652 else:

653 horse_name = horse_cell.get_text(strip=True)

654 if horse_name:

655 starter["horse_name"] = horse_name

656

657 barrier_cell = _cell_by_label("Barrier") or _cell_by_label("Draw")

658 if barrier_cell:

659 barrier_text = barrier_cell.get_text(strip=True)

660 if barrier_text:

661 barrier_match = re.match(r"(\\d+)", barrier_text)

662 if barrier_match:

663 starter["barrier"] = int(barrier_match.group(1))

664 elif re.search(r"[A-Za-z]", barrier_text):

665 starter["barrier_position"] = barrier_text

666

667 hcap_cell = _cell_by_label("Hcap") or _cell_by_label("HCP")

668 if hcap_cell:

669 hcap_text = hcap_cell.get_text(strip=True)

670 if hcap_text:

671 if hcap_text.lower().startswith("fr"):

672 starter["handicap_m"] = 0

673 else:

674 hcap_match = re.match(r"(\\d+)", hcap_text)

675 if hcap_match:

676 starter["handicap_m"] = int(hcap_match.group(1))

677

678 time_cell = _cell_by_label("Time") or _cell_by_label("Time/ Margin")

679 if time_cell:

680 time_text = time_cell.get_text(strip=True)

681 if time_text:

682 starter["race_time"] = time_text

683

684 margin_cell = _cell_by_label("Margin")

685 if margin_cell:

686 margin_text = margin_cell.get_text(strip=True)

687 if margin_text:

688 starter["margin"] = margin_text

689

690 driver_cell = _cell_by_label("Driver")

691 if driver_cell:

692 driver_link = driver_cell.find("a")

693 if driver_link:

694 driver_name = driver_link.get_text(strip=True)

695 if driver_name:

696 starter["driver_name"] = driver_name

697 driver_href = driver_link.get("href", "")

698 driver_uuid = self._extract_uuid(driver_href)

699 if driver_uuid:

700 starter["driver_id"] = driver_uuid

701 else:

702 driver_name = driver_cell.get_text(strip=True)

703 if driver_name:

704 starter["driver_name"] = driver_name

705

706 trainer_cell = _cell_by_label("Trainer")

707 if trainer_cell:

708 trainer_link = trainer_cell.find("a")

709 if trainer_link:

710 trainer_name = trainer_link.get_text(strip=True)

711 if trainer_name:

712 starter["trainer_name"] = trainer_name

713 trainer_href = trainer_link.get("href", "")

714 trainer_uuid = self._extract_uuid(trainer_href)

715 if trainer_uuid:

716 starter["trainer_id"] = trainer_uuid

717 else:

718 trainer_name = trainer_cell.get_text(strip=True)

719 if trainer_name:

720 starter["trainer_name"] = trainer_name

721

722 if starter.get("horse_name"):

723 return starter

724

725 except Exception as e:

726 logger.debug("Error parsing starter row: %s", e)

727

728 return None

729

730 @staticmethod

731 def _extract_uuid(href: str) -> str | None:

732 uuid_match = re.search(r"([0-9A-F-]{36})", href, re.IGNORECASE)

733 if uuid_match:

734 return uuid_match.group(1)

735 return None

736

737 @staticmethod

738 def _extract_raceday_id(url: str) -> str | None:

739 parsed = urlparse(url)

740 args = parse_qs(parsed.query).get("Arg", [])

741 for idx, value in enumerate(args):

742 if value == "hrnzg-RacedayID" and idx + 1 < len(args):

743 return args[idx + 1]

744 return None

745

746 @staticmethod

747 def _extract_label_value(text: str, label: str) -> str | None:

748 pattern = rf"{re.escape(label)}:\\s*([^\\n\\r]+)"

749 match = re.search(pattern, text, re.IGNORECASE)

750 if not match:

751 return None

752 value = match.group(1)

753 value = re.split(r"(Weather:|Track:)", value, maxsplit=1)[0]

754 return value.strip().strip(";")

Coverage for packages / hrnz_scraper / historical_scraper.py: 9%

514 statements