From 00000d980a28bb6a318d2e18f4c3f95b8e83dc46 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Fri, 5 Dec 2025 20:12:12 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 3 + M3U8/scrapers/streamsgate.py | 197 +++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 M3U8/scrapers/streamsgate.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 2f610a4..a623aba 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -14,6 +14,7 @@ from scrapers import ( streambtw, streamcenter, streamfree, + streamsgate, strmd, tvpass, watchfooty, @@ -56,6 +57,7 @@ async def main() -> None: asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streamcenter.scrape(network.client)), asyncio.create_task(streamfree.scrape(network.client)), + asyncio.create_task(streamsgate.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)), @@ -76,6 +78,7 @@ async def main() -> None: | streamcenter.urls | strmd.urls | streamfree.urls + | streamsgate.urls | tvpass.urls | watchfooty.urls | webcast.urls diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py new file mode 100644 index 0000000..e4ec17c --- /dev/null +++ b/M3U8/scrapers/streamsgate.py @@ -0,0 +1,197 @@ +import asyncio +from functools import partial +from itertools import chain +from typing import Any +from urllib.parse import urljoin + +import httpx +from playwright.async_api import async_playwright + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +CACHE_FILE = Cache("streamsgate.json", exp=10_800) + +API_FILE = Cache("streamsgate-api.json", exp=28_800) + +BASE_URL = "https://streamingon.org" + +SPORT_ENDPOINTS = [ + "soccer", + "nfl", + "nba", + "cfb", + "mlb", + "nhl", + "ufc", + "boxing", + "f1", +] + +TAG = "STRMSG8" + + +def get_event(t1: str, t2: str) -> str: + match t1: + case "RED ZONE": + return "Red Zone" + + case "TBD": + return "TBD" + + case _: + return f"{t1.strip()} vs {t2.strip()}" + + +async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, Any]]: + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return [] + + return r.json() + + +async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: + log.info("Refreshing API cache") + + tasks = [ + get_api_data(client, urljoin(BASE_URL, f"data/{sport}.json")) + for sport in SPORT_ENDPOINTS + ] + + results = await asyncio.gather(*tasks) + + data = list(chain(*results)) + + for ev in data: + ev["ts"] = ev.pop("timestamp") + + data[-1]["timestamp"] = Time.now().timestamp() + + return data + + +async def get_events( + client: httpx.AsyncClient, cached_keys: set[str] +) -> list[dict[str, str]]: + if not (api_data := API_FILE.load(per_entry=False, index=-1)): + api_data = await refresh_api_cache(client) + + API_FILE.write(api_data) + + events = [] + + now = Time.clean(Time.now()) + start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=30) + + for stream_group in api_data: + event_ts = stream_group.get("ts") + + sport = stream_group.get("league") + + t1, t2 = stream_group.get("away"), stream_group.get("home") + + if not (event_ts and sport): + continue + + event_dt = Time.from_ts(event_ts) + + if not start_dt <= event_dt <= end_dt: + continue + + event = get_event(t1, t2) + + if not (streams := stream_group.get("streams")): + continue + + if not (url := streams[0].get("url")): + continue + + key = f"[{sport}] {event} ({TAG})" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": event, + "link": url, + "timestamp": event_dt.timestamp(), + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events(client, set(cached_urls.keys())) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + async with async_playwright() as p: + browser, context = await network.browser(p) + + for i, ev in enumerate(events, start=1): + handler = partial( + network.process_event, + url=ev["link"], + url_num=i, + context=context, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + log=log, + ) + + sport, event, ts = ( + ev["sport"], + ev["event"], + ev["timestamp"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": BASE_URL, + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + "link": ev["link"], + } + + cached_urls[key] = entry + + urls[key] = cached_urls[key] = entry + + await browser.close() + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls)