diff --git a/M3U8/fetch.py b/M3U8/fetch.py index b354425a..028540dd 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -20,6 +20,7 @@ from scrapers import ( streamcenter, streamhub, streamsgate, + timstreams, totalsportek, tvapp, volokit, @@ -70,7 +71,8 @@ async def main() -> None: asyncio.create_task(sport9.scrape(xtrnl_brwsr)), asyncio.create_task(streamcenter.scrape(hdl_brwsr)), # asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), - asyncio.create_task(streamsgate.scrape(hdl_brwsr)), + asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)), + asyncio.create_task(timstreams.scrape(xtrnl_brwsr)), ] httpx_tasks = [ @@ -115,6 +117,7 @@ async def main() -> None: | streamcenter.urls | streamhub.urls | streamsgate.urls + | timstreams.urls | totalsportek.urls | tvapp.urls | volokit.urls diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py index 97cf9f2e..f790a6e1 100644 --- a/M3U8/scrapers/streamcenter.py +++ b/M3U8/scrapers/streamcenter.py @@ -105,7 +105,7 @@ async def scrape(browser: Browser) -> None: if events: log.info(f"Processing {len(events)} new URL(s)") - async with network.event_context(browser, stealth=False) as context: + async with network.event_context(browser) as context: for i, ev in enumerate(events, start=1): async with network.event_page(context) as page: handler = partial( diff --git a/M3U8/scrapers/timstreams.py b/M3U8/scrapers/timstreams.py new file mode 100644 index 00000000..3d39e371 --- /dev/null +++ b/M3U8/scrapers/timstreams.py @@ -0,0 +1,177 @@ +from functools import partial +from typing import Any +from urllib.parse import urljoin + +from playwright.async_api import Browser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "TIMSTRM" + +CACHE_FILE = Cache(TAG, exp=10_800) + +API_FILE = Cache(f"{TAG}-api", exp=19_800) + +API_URL = "https://stra.viaplus.site/main" + +BASE_URL = "https://timstreams.fit" + +SPORT_GENRES = { + 1: "Soccer", + 2: "Motorsport", + 3: "MMA", + 4: "Fight", + 5: "Boxing", + 6: "Wrestling", + 7: "Basketball", + # 8: "American Football", + 9: "Baseball", + 10: "Tennis", + 11: "Hockey", + # 12: "Darts", + # 13: "Cricket", + # 14: "Cycling", + # 15: "Rugby", + # 16: "Live Shows", + # 17: "Other", +} + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + now = Time.clean(Time.now()) + + if not (api_data := API_FILE.load(per_entry=False, index=-1)): + log.info("Refreshing API cache") + + api_data = [{"timestamp": now.timestamp()}] + + if r := await network.request(API_URL, log=log): + api_data: list[dict] = r.json() + + api_data[-1]["timestamp"] = now.timestamp() + + API_FILE.write(api_data) + + events = [] + + start_dt = now.delta(hours=-2) + end_dt = now.delta(minutes=30) + + for info in api_data: + if not (category := info.get("category")) or category != "Events": + continue + + stream_events: list[dict[str, Any]] = info["events"] + + for ev in stream_events: + if (genre := ev["genre"]) not in SPORT_GENRES: + continue + + event_dt = Time.from_str(ev["time"], timezone="EST") + + if not start_dt <= event_dt <= end_dt: + continue + + name: str = ev["name"] + + url_id: str = ev["URL"] + + logo: str | None = ev.get("logo") + + sport = SPORT_GENRES[genre] + + if f"[{sport}] {name} ({TAG})" in cached_keys: + continue + + if not (streams := ev["streams"]) or not (url := streams[0].get("url")): + continue + + events.append( + { + "sport": sport, + "event": name, + "link": urljoin(BASE_URL, f"watch?id={url_id}"), + "ref": url, + "logo": logo, + "timestamp": event_dt.timestamp(), + } + ) + + return events + + +async def scrape(browser: Browser) -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events(cached_urls.keys()) + + if events: + log.info(f"Processing {len(events)} new URL(s)") + + async with network.event_context(browser, stealth=False) as context: + for i, ev in enumerate(events, start=1): + async with network.event_page(context) as page: + handler = partial( + network.process_event, + url=(link := ev["link"]), + url_num=i, + page=page, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.PW_S, + log=log, + ) + + sport, event, logo, ref, ts = ( + ev["sport"], + ev["event"], + ev["logo"], + ev["ref"], + ev["timestamp"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, pic = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo or pic, + "base": ref, + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + if new_count := valid_count - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls)