From 00000d991eb4c6c2218702fa8dd48a819e2f82cc Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Fri, 15 May 2026 21:56:53 -0400 Subject: [PATCH] e - edit scraping for mainportal.py --- M3U8/scrapers/mainportal.py | 39 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/M3U8/scrapers/mainportal.py b/M3U8/scrapers/mainportal.py index 735b2a49..fc159b2c 100644 --- a/M3U8/scrapers/mainportal.py +++ b/M3U8/scrapers/mainportal.py @@ -1,4 +1,6 @@ import asyncio +import json +import re from functools import partial from urllib.parse import urljoin @@ -53,35 +55,30 @@ async def process_event( return m3u8_url -async def get_api_data() -> dict[str, dict[str, list[dict]]]: - tasks = [ - ( - sport, - network.request( - urljoin(url, "api/v2/stateshot"), - log=log, - ), - ) - for sport, url in API_URLS.items() - ] - - results = await asyncio.gather(*(task for _, task in tasks)) - - return {sport: r.json() for (sport, _), r in zip(tasks, results) if r} - - async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: - now = Time.clean(Time.now()) + tasks = [network.request(url, log=log) for url in BASE_URLS.values()] - api_data = await get_api_data() + results = await asyncio.gather(*tasks) events = [] + if not (html_data := [(html.text, html.url) for html in results if html]): + return events + + now = Time.clean(Time.now()) + + stateshot_ptrn = re.compile(r"var\s+stateshot\s+=\s+(.*);", re.I) + start_dt = now.delta(hours=-1) end_dt = now.delta(minutes=1) - for sport in api_data: - data = api_data[sport] + for content, url in html_data: + sport = next((k for k, v in BASE_URLS.items() if v == url), "Live Event") + + if not (match := stateshot_ptrn.search(content)): + continue + + data: dict = json.loads(f"{match[1]}") teams = data.get("teams", {})