From 00000d9cbfd11c9bd9950d3e1b52314aee892b3a Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Tue, 18 Nov 2025 00:38:40 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 2 +- M3U8/scrapers/roxie.py | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/M3U8/fetch.py b/M3U8/fetch.py index a67a26f..530a986 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -54,7 +54,7 @@ async def main() -> None: asyncio.create_task(streamfree.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), - #asyncio.create_task(volo.scrape(network.client)), + # asyncio.create_task(volo.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)), ] diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index 517b145..71793b5 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -18,6 +18,15 @@ HTML_CACHE = Cache("roxie-html.json", exp=19_800) BASE_URL = "https://roxiestreams.live" +valid_sports = { + "fighting": "Fighting", + "mlb": "MLB", + "motorsports": "Racing", + "nba": "NBA", + "nfl": "American Football", + "soccer": "Soccer", +} + async def process_event( client: httpx.AsyncClient, @@ -47,6 +56,7 @@ async def process_event( async def refresh_html_cache( client: httpx.AsyncClient, url: str, + sport: str, now_ts: float, ) -> dict[str, str | float]: @@ -60,11 +70,6 @@ async def refresh_html_cache( soup = HTMLParser(r.text) - pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE) - - h2_title = soup.css_first("h2").text(strip=True) - - sport = sport_name[1] if (sport_name := pattern.search(h2_title)) else "Event" events = {} for row in soup.css("table#eventsTable tbody tr"): @@ -85,10 +90,12 @@ async def refresh_html_cache( event_dt = Time.from_str(data_start, timezone="PST") - key = f"[{sport}] {event} (ROXIE)" + event_sport = valid_sports[sport] + + key = f"[{event_sport}] {event} (ROXIE)" events[key] = { - "sport": sport, + "sport": event_sport, "event": event, "link": href, "event_ts": event_dt.timestamp(), @@ -100,7 +107,7 @@ async def refresh_html_cache( async def get_events( client: httpx.AsyncClient, - sport_urls: list[str], + sport_urls: dict[str, str], cached_keys: set[str], ) -> list[dict[str, str]]: @@ -111,9 +118,10 @@ async def get_events( refresh_html_cache( client, url, + sport, now.timestamp(), ) - for url in sport_urls + for sport, url in sport_urls.items() ] results = await asyncio.gather(*tasks) @@ -148,10 +156,7 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f'Scraping from "{BASE_URL}"') - sport_urls = [ - urljoin(BASE_URL, sport) - for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"] - ] + sport_urls = {sport: urljoin(BASE_URL, sport) for sport in valid_sports} events = await get_events( client,