From 00000d9b2c204c1935d59ae88685fc9699dffadf Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Tue, 19 May 2026 16:07:17 -0400 Subject: [PATCH] e - add footfast.py - misc edits. --- M3U8/fetch.py | 3 + M3U8/scrapers/cdnlivetv.py | 2 +- M3U8/scrapers/footfast.py | 160 +++++++++++++++++++++++++++++++++++++ M3U8/scrapers/ovogoal.py | 2 +- M3U8/scrapers/streamhub.py | 2 +- 5 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 M3U8/scrapers/footfast.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index d332d533..01666c76 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -8,6 +8,7 @@ from scrapers import ( cdnlivetv, embedhd, fawa, + footfast, fsports, istreameast, mainportal, @@ -59,6 +60,7 @@ async def main() -> None: pw_tasks = [ asyncio.create_task(embedhd.scrape(hdl_brwsr)), + asyncio.create_task(footfast.scrape(xtrnl_brwsr)), asyncio.create_task(fsports.scrape(xtrnl_brwsr)), asyncio.create_task(roxie.scrape(hdl_brwsr)), # asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), @@ -95,6 +97,7 @@ async def main() -> None: cdnlivetv.urls | embedhd.urls | fawa.urls + | footfast.urls | fsports.urls | istreameast.urls | mainportal.urls diff --git a/M3U8/scrapers/cdnlivetv.py b/M3U8/scrapers/cdnlivetv.py index 41a8d419..3819bde1 100644 --- a/M3U8/scrapers/cdnlivetv.py +++ b/M3U8/scrapers/cdnlivetv.py @@ -99,7 +99,7 @@ async def scrape(browser: Browser) -> None: if events := await get_events(cached_urls.keys()): log.info(f"Processing {len(events)} new URL(s)") - async with network.event_context(browser) as context: + async with network.event_context(browser, stealth=False) as context: for i, ev in enumerate(events, start=1): async with network.event_page(context) as page: handler = partial( diff --git a/M3U8/scrapers/footfast.py b/M3U8/scrapers/footfast.py new file mode 100644 index 00000000..f86964f7 --- /dev/null +++ b/M3U8/scrapers/footfast.py @@ -0,0 +1,160 @@ +from functools import partial +from urllib.parse import urljoin + +from playwright.async_api import Browser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "FOOTFAST" + +CACHE_FILE = Cache(TAG, exp=5_400) + +API_FILE = Cache(f"{TAG}-api", exp=28_800) + +BASE_URL = "https://footfast.cc" + +CATEGORIES = { + 1: "Soccer", + 3: "NBA", + 6: "UFC/MMA", + 8: "NHL", + 13: "Live Event", + 17: "MLB", + 10: "Racing", + 21: "Basketball", + #: "American Football", + #: "Boxing", + #: "Rugby", + #: "Tennis", + #: "Golf", +} + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + now = Time.clean(Time.now()) + + if not (api_data := API_FILE.load(per_entry=False)): + log.info("Refreshing API cache") + + api_data = {"timestamp": now.timestamp()} + + if r := await network.request(urljoin(BASE_URL, "api/public/catalog"), log=log): + api_data: dict[str, list[dict]] = r.json() + + api_data["timestamp"] = now.timestamp() + + API_FILE.write(api_data) + + events = [] + + start_ts = now.delta(hours=-3).timestamp() + + for event_info in api_data.get("events", []): + event_name: str = event_info.get("name") + category_id: int = event_info.get("category_id") + + event_ts: int = event_info.get("start") + + if not (event_name and category_id and event_ts): + continue + + if not start_ts <= event_ts <= now.timestamp(): + continue + + # if not (sources := event_info.get("source")): + # continue + + # elif not (source_id := sources[0].get("id")): + # continue + + if not (sport := CATEGORIES.get(category_id)): + continue + + if f"[{sport}] {event_name} ({TAG})" in cached_keys: + continue + + embed_id: str = event_info["embedId"] + + events.append( + { + "sport": sport, + "event": event_name, + # "link": f"https://aerastora.com/event/{embed_id}?source={source_id}", + "link": urljoin(BASE_URL, f"event/{embed_id}"), + "timestamp": event_ts, + } + ) + + return events + + +async def scrape(browser: Browser) -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + if events := await get_events(cached_urls.keys()): + log.info(f"Processing {len(events)} new URL(s)") + + async with network.event_context(browser, stealth=False) as context: + for i, ev in enumerate(events, start=1): + async with network.event_page(context) as page: + handler = partial( + network.process_event, + url=(link := ev["link"]), + url_num=i, + page=page, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.PW_S, + log=log, + ) + + sport, event, ts = ( + ev["sport"], + ev["event"], + ev["timestamp"], + ) + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + key = f"[{sport}] {event} ({TAG})" + + entry = { + "url": url, + "logo": logo, + "base": "https://aerastora.com/ ", + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + log.info(f"Collected and cached {valid_count - cached_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py index 9ae83c49..97fec349 100644 --- a/M3U8/scrapers/ovogoal.py +++ b/M3U8/scrapers/ovogoal.py @@ -14,7 +14,7 @@ TAG = "OVO" CACHE_FILE = Cache(TAG, exp=28_800) -BASE_URL = "https://ovogoaal.com" +BASE_URL = "https://ovogoalz.top" async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index 3a975d6e..77484618 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -184,7 +184,7 @@ async def scrape(browser: Browser) -> None: if events := await get_events(): log.info(f"Processing {len(events)} new URL(s)") - async with network.event_context(browser) as context: + async with network.event_context(browser, stealth=False) as context: for i, ev in enumerate(events, start=1): async with network.event_page(context) as page: