From 00000d919926cac6ce6a042466f0fc0a808be8a8 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Sat, 11 Oct 2025 18:43:57 -0400 Subject: [PATCH] e --- M3U8/fetch.py | 4 +- M3U8/scrapers/old/ppv.py | 3 + M3U8/scrapers/strmd.py | 31 +--- M3U8/scrapers/utils/config.py | 28 ++++ M3U8/scrapers/watchfooty.py | 292 ++++++++++++++++++++++++++++++++++ 5 files changed, 328 insertions(+), 30 deletions(-) create mode 100644 M3U8/scrapers/watchfooty.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 0e49db8..f73df1a 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -2,7 +2,7 @@ import asyncio from pathlib import Path -from scrapers import fstv, livetvsx, streambtw, streamed, strmd, tvpass +from scrapers import fstv, livetvsx, streambtw, streamed, strmd, tvpass, watchfooty from scrapers.utils import get_logger, network log = get_logger(__name__) @@ -32,6 +32,7 @@ async def main() -> None: asyncio.create_task(streamed.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), + asyncio.create_task(watchfooty.scrape(network.client)), ] await asyncio.gather(*tasks) @@ -43,6 +44,7 @@ async def main() -> None: | streamed.urls | strmd.urls | tvpass.urls + | watchfooty.urls ) live_events = [] diff --git a/M3U8/scrapers/old/ppv.py b/M3U8/scrapers/old/ppv.py index d626ae0..0ec3d23 100644 --- a/M3U8/scrapers/old/ppv.py +++ b/M3U8/scrapers/old/ppv.py @@ -247,3 +247,6 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info("No new events found") CACHE_FILE.write(cached_urls) + + +# same provider as strmd.py diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index e2f2017..331cd90 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -31,34 +31,6 @@ def validate_category(s: str) -> str: return s.capitalize() -def get_tvg_info(sport: str, event: str) -> tuple[str | None, str]: - match sport: - case "American Football": - if leagues.is_valid(event, "NFL"): - return leagues.info("NFL") - - else: - return leagues.info("NCAA") - - case "Basketball": - if leagues.is_valid(event, "NBA"): - return leagues.info("NBA") - - elif leagues.is_valid(event, "WNBA"): - return leagues.info("WNBA") - - # NCAA - - else: - return leagues.info("Basketball") - - case "Hockey": - return leagues.info("NHL") - - case _: - return leagues.info(sport) - - async def refresh_api_cache( client: httpx.AsyncClient, url: str ) -> list[dict[str, Any]]: @@ -139,6 +111,7 @@ async def get_events( base_url: str, cached_keys: set[str], ) -> list[dict[str, str]]: + if not (api_data := API_FILE.load(per_entry=False, index=True)): api_data = await refresh_api_cache( client, @@ -257,7 +230,7 @@ async def scrape(client: httpx.AsyncClient) -> None: key = f"[{sport}] {event} (STRMD)" - tvg_id, pic = get_tvg_info(sport, event) + tvg_id, pic = leagues.get_tvg_info(sport, event) entry = { "url": url, diff --git a/M3U8/scrapers/utils/config.py b/M3U8/scrapers/utils/config.py index 4b50824..74eb434 100644 --- a/M3U8/scrapers/utils/config.py +++ b/M3U8/scrapers/utils/config.py @@ -50,6 +50,7 @@ class Time(datetime): formats = [ "%Y-%m-%d %H:%M", "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%S.%fZ", "%a, %d %b %Y %H:%M:%S %z", ] @@ -108,6 +109,33 @@ class Leagues: return event.lower() == "nfl redzone" if league == "NFL" else False + def get_tvg_info(self, sport: str, event: str) -> tuple[str | None, str]: + match sport: + case "American Football": + return ( + self.info("NFL") + if self.is_valid(event, "NFL") + else self.info("NCAA") + ) + + case "Basketball": + if self.is_valid(event, "NBA"): + return self.info("NBA") + + elif self.is_valid(event, "WNBA"): + return self.info("WNBA") + + # NCAA + + else: + return self.info("Basketball") + + case "Hockey": + return self.info("NHL") + + case _: + return self.info(sport) + @property def league_names(self) -> list[str]: return self.data["teams"].keys() diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py new file mode 100644 index 0000000..6238de3 --- /dev/null +++ b/M3U8/scrapers/watchfooty.py @@ -0,0 +1,292 @@ +import asyncio +import re +from functools import partial +from pathlib import Path +from typing import Any +from urllib.parse import urljoin + +import httpx +from playwright.async_api import BrowserContext, async_playwright + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +API_FILE = Cache(Path(__file__).parent / "caches" / "watchfty_api.json", exp=28_800) + +CACHE_FILE = Cache(Path(__file__).parent / "caches" / "watchfty.json", exp=10_800) + +MIRRORS = [ + "https://www.watchfooty.cc", + "https://www.watchfooty.vip", + "https://www.watchfooty.live", +] + +SPORT_ENDPOINTS = [ + "football", + "american-football", + "hockey", + "basketball", + "baseball", + "racing", + "fighting", + "golf", +] + + +async def get_api_data( + client: httpx.AsyncClient, + url: str, +) -> list[dict[str, Any]]: + try: + r = await client.get(url, timeout=10) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + return [] + + return r.json() + + +async def refresh_api_cache( + client: httpx.AsyncClient, + url: str, +) -> list[dict[str, Any]]: + log.info("Refreshing API cache") + + tasks = [ + get_api_data( + client, + urljoin(url, f"api/v1/matches/{sport}"), + ) + for sport in SPORT_ENDPOINTS + ] + + results = await asyncio.gather(*tasks) + + data = [event for sublist in results if sublist for event in sublist] + + data[0]["timestamp"] = Time.now().timestamp() + + return data + + +async def process_event( + url: str, + url_num: int, + context: BrowserContext, +) -> str | None: + page = await context.new_page() + + captured: list[str] = [] + + got_one = asyncio.Event() + + handler = partial(network.capture_req, captured=captured, got_one=got_one) + + page.on("request", handler) + + try: + await page.goto( + url, + wait_until="domcontentloaded", + timeout=15_000, + ) + + try: + header = await page.wait_for_selector( + "text=/Stream Links/i", + timeout=5_000, + ) + + text = await header.inner_text() + except TimeoutError: + return + + match = re.search(r"\((\d+)\)", text) + + if not match or int(match[1]) == 0: + return + + try: + frame_el = await page.wait_for_selector( + "iframe[src*='embed.best-sports.stream']", + timeout=4_000, + ) + except TimeoutError: + log.warning("No iframe found — exiting early.") + return + + iframe = await frame_el.content_frame() + + if not iframe: + return + + try: + await iframe.wait_for_selector( + "button.vds-play-button", + state="visible", + timeout=4_000, + ) + + await iframe.click("button.vds-play-button") + except TimeoutError: + log.warning("Play button not found inside iframe.") + + wait_task = asyncio.create_task(got_one.wait()) + + try: + await asyncio.wait_for(wait_task, timeout=10) + except asyncio.TimeoutError: + log.warning(f"URL {url_num}) Timed out waiting for M3U8.") + return + + finally: + if not wait_task.done(): + wait_task.cancel() + + try: + await wait_task + except asyncio.CancelledError: + pass + + if captured: + log.info(f"URL {url_num}) Captured M3U8") + + return captured[-1] + + log.warning(f"URL {url_num}) No M3U8 captured after waiting.") + return + + except Exception as e: + log.warning(f"URL {url_num}) Exception while processing: {e}") + return + + finally: + page.remove_listener("request", handler) + await page.close() + + +async def get_events( + client: httpx.AsyncClient, + base_url: str, + cached_keys: set[str], +) -> list[dict[str, str]]: + + if not (api_data := API_FILE.load(per_entry=False, index=True)): + api_data = await refresh_api_cache(client, base_url) + + API_FILE.write(api_data) + + events: list[dict[str, str]] = [] + + now = Time.clean(Time.now()) + start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=30) + pattern = re.compile(r"\-+|\(") + + for event in api_data: + match_id = event["matchId"] + name = event["title"] + league = event["league"] + + if not (date := event.get("date")): + continue + + event_dt = Time.from_str(date) + + if not start_dt <= event_dt <= end_dt: + continue + + sport = pattern.split(league, 1)[0].strip() + + logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None + + key = f"[{sport}] {name} (WFTY)" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": name, + "link": f"https://www.watchfooty.live/en/stream/{match_id}", + "logo": logo, + "timestamp": event_dt.timestamp(), + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + if not (base_url := await network.get_base(MIRRORS)): + log.warning("No working WatchFooty mirrors") + CACHE_FILE.write(cached_urls) + return + + log.info(f'Scraping from "{base_url}"') + + events = await get_events( + client, + base_url, + set(cached_urls.keys()), + ) + + log.info(f"Processing {len(events)} new URL(s)") + + # breakpoint() + + async with async_playwright() as p: + browser, context = await network.browser(p) + + for i, ev in enumerate(events, start=1): + url = await network.safe_process( + lambda: process_event( + ev["link"], + url_num=i, + context=context, + ), + url_num=i, + log=log, + ) + + if url: + sport, event, logo, ts = ( + ev["sport"], + ev["event"], + ev["logo"], + ev["timestamp"], + ) + + key = f"[{sport}] {event} (WFTY)" + + tvg_id, pic = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo or pic, + "base": base_url, + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + } + + urls[key] = cached_urls[key] = entry + + await browser.close() + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls)