diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index 751ef34..271f9c6 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -47,7 +47,6 @@ async def process_event( async def refresh_html_cache( client: httpx.AsyncClient, url: str, - sport: str, now_ts: float, ) -> dict[str, str | float]: @@ -61,6 +60,15 @@ async def refresh_html_cache( soup = HTMLParser(r.text) + pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE) + + h2_title = soup.css_first("h2").text(strip=True) + + if sport_name := pattern.search(h2_title): + sport = sport_name[1] + else: + sport = "Event" + events = {} for row in soup.css("table#eventsTable tbody tr"): @@ -96,7 +104,7 @@ async def refresh_html_cache( async def get_events( client: httpx.AsyncClient, - sport_urls: dict[str, str], + sport_urls: list[str], cached_keys: set[str], ) -> list[dict[str, str]]: @@ -107,10 +115,9 @@ async def get_events( refresh_html_cache( client, url, - sport, now.timestamp(), ) - for sport, url in sport_urls.items() + for url in sport_urls ] results = await asyncio.gather(*tasks) @@ -145,10 +152,10 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f'Scraping from "{BASE_URL}"') - sport_urls = { - sport: urljoin(BASE_URL, sport.lower()) - for sport in ["Soccer", "MLB", "NBA", "NFL", "Fighting", "Motorsports"] - } + sport_urls = [ + urljoin(BASE_URL, sport) + for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"] + ] events = await get_events( client,