From 00000d94d0b9563e9b1973c9fa40cd6ce20331cb Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Tue, 25 Nov 2025 22:55:03 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 6 +- M3U8/scrapers/strmd.py | 2 +- M3U8/scrapers/{sport9.py => vuen.py} | 125 ++++++++++++++++++++------- M3U8/scrapers/watchfooty.py | 2 +- 4 files changed, 97 insertions(+), 38 deletions(-) rename M3U8/scrapers/{sport9.py => vuen.py} (50%) diff --git a/M3U8/fetch.py b/M3U8/fetch.py index cd5f2cf..53c28e0 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -9,12 +9,12 @@ from scrapers import ( ppv, roxie, shark, - sport9, streambtw, streameast, streamfree, strmd, tvpass, + vuen, watchfooty, ) from scrapers.utils import get_logger, network @@ -49,12 +49,12 @@ async def main() -> None: asyncio.create_task(ppv.scrape(network.client)), asyncio.create_task(roxie.scrape(network.client)), asyncio.create_task(shark.scrape(network.client)), - asyncio.create_task(sport9.scrape(network.client)), asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streameast.scrape(network.client)), asyncio.create_task(streamfree.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), + asyncio.create_task(vuen.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)), ] @@ -66,12 +66,12 @@ async def main() -> None: | ppv.urls | roxie.urls | shark.urls - | sport9.urls | streambtw.urls | streameast.urls | strmd.urls | streamfree.urls | tvpass.urls + | vuen.urls | watchfooty.urls ) diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index 1929ffc..3d9c454 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -144,7 +144,7 @@ async def get_events( if not (ts := event["date"]): continue - start_ts = int(str(ts)[:-3]) + start_ts = int(f"{ts}"[:-3]) event_dt = Time.from_ts(start_ts) diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/vuen.py similarity index 50% rename from M3U8/scrapers/sport9.py rename to M3U8/scrapers/vuen.py index a0fae72..aa6d622 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/vuen.py @@ -1,3 +1,5 @@ +import json +import re from functools import partial from urllib.parse import urljoin @@ -11,50 +13,104 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("sport9.json", exp=3_600) +CACHE_FILE = Cache("vuen.json", exp=3_600) -BASE_URL = "https://sport9.ru" +API_FILE = Cache("vuen-html.json", exp=28_800) + +BASE_URL = "https://vuen.link" + + +async def refresh_api_cache( + client: httpx.AsyncClient, + url: str, +) -> dict[str, list[dict]]: + now = Time.now() + + log.info("Refreshing API cache") + + data = {} + + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return data + + soup = HTMLParser(r.text) + + for script in soup.css("script"): + if not script: + continue + + content = script.text(strip=True) + + if not ( + match := re.search( + r"window\.matches\s*=\s*JSON\.parse\(`(.*?)`\)", + content, + re.DOTALL, + ) + ): + continue + + data["matches"] = json.loads(match[1]) + + data["timestamp"] = now.timestamp() + + break + + return data async def get_events( client: httpx.AsyncClient, cached_keys: set[str], ) -> list[dict[str, str]]: - try: - r = await client.get(BASE_URL) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{BASE_URL}": {e}') + if not (api_data := API_FILE.load(per_entry=False)): + api_data = await refresh_api_cache(client, BASE_URL) - return [] - - soup = HTMLParser(r.text) + API_FILE.write(api_data) events = [] - for card in soup.css("a.match-card"): - live_badge = card.css_first(".live-badge") + now = Time.clean(Time.now()) + start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=30) - if not live_badge or live_badge.text(strip=True) != "Live": + for match in api_data.get("matches", []): + if not (ts := match.get("startTimestamp")): continue - if not (sport_node := card.css_first(".tournament-name")): + start_ts = int(f"{ts}"[:-3]) + + event_dt = Time.from_ts(start_ts) + + if not start_dt <= event_dt <= end_dt: continue - team_1_node = card.css_first(".teams-container2 .team1") - team_2_node = card.css_first(".teams-container2 .team2") + event_name = match["matchstr"] - if not (team_1_node and team_2_node): + sport = match["league"] + + channels: list[dict[str, str | list[str]]] = match.get("channels", []) + + event_link = None + + if not channels: + event_link = urljoin(BASE_URL, match["slug"]) + + for channel in channels: + event_link = (channel.get("oldLinks") or channel.get("links") or [None])[0] + + if event_link: + break + + if not event_link: continue - if not (href := card.attributes.get("href")): - continue - - sport = sport_node.text(strip=True) - team_1 = team_1_node.text(strip=True) - team_2 = team_2_node.text(strip=True) - - key = f"[{sport}] {team_1} vs {team_2} (SPRT9)" + key = f"[{sport}] {event_name} (VUEN)" if cached_keys & {key}: continue @@ -62,8 +118,9 @@ async def get_events( events.append( { "sport": sport, - "event": f"{team_1} vs {team_2}", - "link": urljoin(BASE_URL, href), + "event": event_name, + "link": event_link, + "timestamp": now.timestamp(), } ) @@ -84,10 +141,8 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f"Processing {len(events)} new URL(s)") if events: - now = Time.now().timestamp() - async with async_playwright() as p: - browser, context = await network.browser(p, browser="brave") + browser, context = await network.browser(p) for i, ev in enumerate(events, start=1): handler = partial( @@ -105,9 +160,13 @@ async def scrape(client: httpx.AsyncClient) -> None: ) if url: - sport, event = ev["sport"], ev["event"] + sport, event, ts = ( + ev["sport"], + ev["event"], + ev["timestamp"], + ) - key = f"[{sport}] {event} (SPRT9)" + key = f"[{sport}] {event} (VUEN)" tvg_id, logo = leagues.get_tvg_info(sport, event) @@ -115,7 +174,7 @@ async def scrape(client: httpx.AsyncClient) -> None: "url": url, "logo": logo, "base": "https://vividmosaica.com/", - "timestamp": now, + "timestamp": ts, "id": tvg_id or "Live.Event.us", } diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index 6439ce5..04e589c 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -183,7 +183,7 @@ async def get_events( if not (ts := event.get("ts")): continue - start_ts = int(str(ts)[:-3]) + start_ts = int(f"{ts}"[:-3]) event_dt = Time.from_ts(start_ts)