diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index 34507fb..40f418c 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -36,11 +36,12 @@ async def process_event( re.IGNORECASE, ) - if match := valid_m3u8.search(r.text): - log.info(f"URL {url_num}) Captured M3U8") - return match[2] + if not (match := valid_m3u8.search(r.text)): + log.info(f"URL {url_num}) No M3U8 found") + return - log.info(f"URL {url_num}) No M3U8 found") + log.info(f"URL {url_num}) Captured M3U8") + return match[2] async def get_events( diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index db23726..bf179e5 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -48,11 +48,12 @@ async def process_event( re.IGNORECASE, ) - if match := valid_m3u8.search(r.text): - log.info(f"URL {url_num}) Captured M3U8") - return match[1] + if not (match := valid_m3u8.search(r.text)): + log.info(f"URL {url_num}) No M3U8 found") + return - log.info(f"URL {url_num}) No M3U8 found") + log.info(f"URL {url_num}) Captured M3U8") + return match[1] async def refresh_html_cache( @@ -62,8 +63,6 @@ async def refresh_html_cache( now_ts: float, ) -> dict[str, dict[str, str | float]]: - log.info("Refreshing HTML cache") - try: r = await client.get(url) r.raise_for_status() @@ -116,6 +115,8 @@ async def get_events( now = Time.clean(Time.now()) if not (events := HTML_CACHE.load()): + log.info("Refreshing HTML cache") + tasks = [ refresh_html_cache( client, diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py index 73154c7..fe83daa 100644 --- a/M3U8/scrapers/shark.py +++ b/M3U8/scrapers/shark.py @@ -60,6 +60,8 @@ async def refresh_html_cache( return {} + pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE) + soup = HTMLParser(r.content) events = {} @@ -81,8 +83,6 @@ async def refresh_html_cache( if not embed_btn or not (onclick := embed_btn.attributes.get("onclick")): continue - pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE) - if not (match := pattern.search(onclick)): continue diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index 778c91f..4a222a5 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -40,13 +40,14 @@ async def process_event( valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE) - if match := valid_m3u8.search(r.text): - encoded = match[2][::-1] - decoded = base64.b64decode(encoded[::-1]).decode("utf-8") - log.info(f"URL {url_num}) Captured M3U8") - return decoded + if not (match := valid_m3u8.search(r.text)): + log.info(f"URL {url_num}) No M3U8 found") + return - log.info(f"URL {url_num}) No M3U8 found") + encoded = match[2][::-1] + decoded = base64.b64decode(encoded[::-1]).decode("utf-8") + log.info(f"URL {url_num}) Captured M3U8") + return decoded async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py index f7dd00d..3e4fe39 100644 --- a/M3U8/scrapers/streamcenter.py +++ b/M3U8/scrapers/streamcenter.py @@ -118,7 +118,7 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f"Loaded {cached_count} event(s) from cache") - log.info(f'Scraping from "{BASE_URL}"') + log.info(f'Scraping from "https://streamcenter.xyz"') events = await get_events(client, set(cached_urls.keys())) diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py index 426e991..e11ebeb 100644 --- a/M3U8/scrapers/streamsgate.py +++ b/M3U8/scrapers/streamsgate.py @@ -37,7 +37,7 @@ TAG = "STRMSG8" def get_event(t1: str, t2: str) -> str: match t1: case "RED ZONE": - return "Red Zone" + return "NFL RedZone" case "TBD": return "TBD" @@ -58,7 +58,9 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An return r.json() -async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: +async def refresh_api_cache( + client: httpx.AsyncClient, ts: float +) -> list[dict[str, Any]]: log.info("Refreshing API cache") tasks = [ @@ -73,7 +75,7 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: for ev in data: ev["ts"] = ev.pop("timestamp") - data[-1]["timestamp"] = Time.now().timestamp() + data[-1]["timestamp"] = ts return data @@ -81,14 +83,15 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: async def get_events( client: httpx.AsyncClient, cached_keys: set[str] ) -> list[dict[str, str]]: + now = Time.clean(Time.now()) + if not (api_data := API_FILE.load(per_entry=False, index=-1)): - api_data = await refresh_api_cache(client) + api_data = await refresh_api_cache(client, now.timestamp()) API_FILE.write(api_data) events = [] - now = Time.clean(Time.now()) start_dt = now.delta(minutes=-30) end_dt = now.delta(minutes=30) diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index e9ef4e6..14cb247 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -39,7 +39,9 @@ def fix_sport(s: str) -> str: async def refresh_api_cache( client: httpx.AsyncClient, url: str, + ts: float, ) -> list[dict[str, Any]]: + log.info("Refreshing API cache") try: @@ -52,7 +54,7 @@ async def refresh_api_cache( data = r.json() - data[-1]["timestamp"] = Time.now().timestamp() + data[-1]["timestamp"] = ts return data @@ -126,16 +128,19 @@ async def get_events( cached_keys: set[str], ) -> list[dict[str, str]]: + now = Time.clean(Time.now()) + if not (api_data := API_FILE.load(per_entry=False, index=-1)): api_data = await refresh_api_cache( - client, urljoin(url, "api/matches/all-today") + client, + urljoin(url, "api/matches/all-today"), + now.timestamp(), ) API_FILE.write(api_data) events = [] - now = Time.clean(Time.now()) start_dt = now.delta(minutes=-30) end_dt = now.delta(minutes=30) pattern = re.compile(r"[\n\r]+|\s{2,}") diff --git a/M3U8/scrapers/webcast.py b/M3U8/scrapers/webcast.py index 039b0a5..6620a15 100644 --- a/M3U8/scrapers/webcast.py +++ b/M3U8/scrapers/webcast.py @@ -27,8 +27,6 @@ def fix_event(s: str) -> str: async def refresh_html_cache( client: httpx.AsyncClient, url: str ) -> dict[str, dict[str, str | float]]: - log.info("Refreshing HTML cache") - try: r = await client.get(url) r.raise_for_status() @@ -95,6 +93,8 @@ async def get_events( now = Time.clean(Time.now()) if not (events := HTML_CACHE.load()): + log.info("Refreshing HTML cache") + tasks = [refresh_html_cache(client, url) for url in BASE_URLS.values()] results = await asyncio.gather(*tasks)