From 00000d9049bfdc9d894d2d48dc99d333b95ddec2 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Wed, 26 Nov 2025 19:53:42 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 3 - M3U8/scrapers/streamfree.py | 131 +++++-------------------- M3U8/scrapers/vuen.py | 188 ------------------------------------ 3 files changed, 23 insertions(+), 299 deletions(-) delete mode 100644 M3U8/scrapers/vuen.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 302553e..cd5f2cf 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -15,7 +15,6 @@ from scrapers import ( streamfree, strmd, tvpass, - vuen, watchfooty, ) from scrapers.utils import get_logger, network @@ -56,7 +55,6 @@ async def main() -> None: asyncio.create_task(streamfree.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), - asyncio.create_task(vuen.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)), ] @@ -74,7 +72,6 @@ async def main() -> None: | strmd.urls | streamfree.urls | tvpass.urls - | vuen.urls | watchfooty.urls ) diff --git a/M3U8/scrapers/streamfree.py b/M3U8/scrapers/streamfree.py index fff48f4..09a0440 100644 --- a/M3U8/scrapers/streamfree.py +++ b/M3U8/scrapers/streamfree.py @@ -1,18 +1,14 @@ -from functools import partial from urllib.parse import urljoin import httpx -from playwright.async_api import async_playwright -from .utils import Cache, Time, get_logger, leagues, network +from .utils import Cache, Time, get_logger, leagues log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("streamfree.json", exp=10_800) - -API_FILE = Cache("streamfree-api.json", exp=19_800) +CACHE_FILE = Cache("streamfree.json", exp=19_800) BASE_URL = "https://streamfree.to" @@ -21,8 +17,6 @@ async def refresh_api_cache( client: httpx.AsyncClient, url: str, ) -> dict[str, dict[str, list]]: - log.info("Refreshing API cache") - try: r = await client.get(url) r.raise_for_status() @@ -31,127 +25,48 @@ async def refresh_api_cache( return {} - data = r.json() - - data["timestamp"] = Time.now().timestamp() - - return data + return r.json() -async def get_events( - client: httpx.AsyncClient, - url: str, - cached_keys: set[str], -) -> list[dict[str, str]]: +async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | float]]: + api_data = await refresh_api_cache(client, urljoin(BASE_URL, "streams")) - if not (api_data := API_FILE.load(per_entry=False)): - api_data = await refresh_api_cache( - client, - urljoin(url, "streams"), - ) + events = {} - API_FILE.write(api_data) - - events = [] - - now = Time.clean(Time.now()) - start_dt = now.delta(hours=-1) - end_dt = now.delta(minutes=10) - - for category, streams in api_data.get("streams", {}).items(): + for _, streams in api_data.get("streams", {}).items(): if not streams: continue for stream in streams: - event_dt = Time.from_ts(stream["match_timestamp"]) - - if not start_dt <= event_dt <= end_dt: - continue - sport, name = stream["league"], stream["name"] key = f"[{sport}] {name} (STRMFR)" - if cached_keys & {key}: - continue + tvg_id, logo = leagues.get_tvg_info(sport, name) - stream_url = stream["stream_key"] - - events.append( - { - "sport": sport, - "event": name, - "link": urljoin(url, f"player/{category}/{stream_url}"), - "timestamp": event_dt.timestamp(), - } - ) + events[key] = { + "url": urljoin(BASE_URL, f"live/{stream['stream_key']}720p/index.m3u8"), + "logo": logo, + "base": BASE_URL, + "timestamp": Time.now().timestamp(), + "id": tvg_id or "Live.Event.us", + } return events async def scrape(client: httpx.AsyncClient) -> None: - cached_urls = CACHE_FILE.load() - cached_count = len(cached_urls) - urls.update(cached_urls) - - log.info(f"Loaded {cached_count} event(s) from cache") + if cached := CACHE_FILE.load(): + urls.update(cached) + log.info(f"Loaded {len(urls)} event(s) from cache") + return log.info(f'Scraping from "{BASE_URL}"') - events = await get_events( - client, - BASE_URL, - set(cached_urls.keys()), - ) + events = await get_events(client) - log.info(f"Processing {len(events)} new URL(s)") + urls.update(events) - if events: - async with async_playwright() as p: - browser, context = await network.browser(p, browser="brave") + CACHE_FILE.write(urls) - for i, ev in enumerate(events, start=1): - handler = partial( - network.process_event, - url=ev["link"], - url_num=i, - context=context, - timeout=6, - log=log, - ) - - url = await network.safe_process( - handler, - url_num=i, - log=log, - ) - - if url: - sport, event, ts = ( - ev["sport"], - ev["event"], - ev["timestamp"], - ) - - key = f"[{sport}] {event} (STRMFR)" - - tvg_id, logo = leagues.get_tvg_info(sport, event) - - entry = { - "url": url.replace("540p", "720p"), - "logo": logo, - "base": BASE_URL, - "timestamp": ts, - "id": tvg_id or "Live.Event.us", - } - - urls[key] = cached_urls[key] = entry - - await browser.close() - - if new_count := len(cached_urls) - cached_count: - log.info(f"Collected and cached {new_count} new event(s)") - else: - log.info("No new events found") - - CACHE_FILE.write(cached_urls) + log.info(f"Collected and cached {len(urls)} new event(s)") diff --git a/M3U8/scrapers/vuen.py b/M3U8/scrapers/vuen.py deleted file mode 100644 index 17a0d15..0000000 --- a/M3U8/scrapers/vuen.py +++ /dev/null @@ -1,188 +0,0 @@ -import json -import re -from functools import partial -from urllib.parse import urljoin - -import httpx -from playwright.async_api import async_playwright -from selectolax.parser import HTMLParser - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -CACHE_FILE = Cache("vuen.json", exp=3_600) - -API_FILE = Cache("vuen-html.json", exp=28_800) - -BASE_URL = "https://vuen.link" - - -async def refresh_api_cache( - client: httpx.AsyncClient, - url: str, -) -> dict[str, list[dict]]: - log.info("Refreshing API cache") - - data = {} - - try: - r = await client.get(url) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') - - return data - - soup = HTMLParser(r.text) - - for script in soup.css("script"): - if not script: - continue - - content = script.text(strip=True) - - if not ( - match := re.search( - r"window\.matches\s*=\s*JSON\.parse\(`(.*?)`\)", - content, - re.DOTALL, - ) - ): - continue - - data["matches"] = json.loads(match[1]) - - data["timestamp"] = Time.now().timestamp() - - break - - return data - - -async def get_events( - client: httpx.AsyncClient, - cached_keys: set[str], -) -> list[dict[str, str]]: - if not (api_data := API_FILE.load(per_entry=False)): - api_data = await refresh_api_cache(client, BASE_URL) - - API_FILE.write(api_data) - - events = [] - - now = Time.clean(Time.now()) - start_dt = now.delta(minutes=-30) - end_dt = now.delta(minutes=30) - - for match in api_data.get("matches", []): - if not (ts := match.get("startTimestamp")): - continue - - start_ts = int(f"{ts}"[:-3]) - - event_dt = Time.from_ts(start_ts) - - if not start_dt <= event_dt <= end_dt: - continue - - event_name = match["matchstr"] - - sport = match["league"] - - channels: list[dict[str, str | list[str]]] = match.get("channels", []) - - event_link = None - - if not channels: - event_link = urljoin(BASE_URL, match["slug"]) - - for channel in channels: - event_link = (channel.get("oldLinks") or channel.get("links") or [None])[0] - - if event_link: - break - - if not event_link: - continue - - key = f"[{sport}] {event_name} (VUEN)" - - if cached_keys & {key}: - continue - - events.append( - { - "sport": sport, - "event": event_name, - "link": event_link, - "timestamp": now.timestamp(), - } - ) - - return events - - -async def scrape(client: httpx.AsyncClient) -> None: - cached_urls = CACHE_FILE.load() - cached_count = len(cached_urls) - urls.update(cached_urls) - - log.info(f"Loaded {cached_count} event(s) from cache") - - log.info(f'Scraping from "{BASE_URL}"') - - events = await get_events(client, set(cached_urls.keys())) - - log.info(f"Processing {len(events)} new URL(s)") - - if events: - async with async_playwright() as p: - browser, context = await network.browser(p) - - for i, ev in enumerate(events, start=1): - handler = partial( - network.process_event, - url=ev["link"], - url_num=i, - context=context, - log=log, - ) - - url = await network.safe_process( - handler, - url_num=i, - log=log, - ) - - if url: - sport, event, ts = ( - ev["sport"], - ev["event"], - ev["timestamp"], - ) - - key = f"[{sport}] {event} (VUEN)" - - tvg_id, logo = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo, - "base": "https://vividmosaica.com/", - "timestamp": ts, - "id": tvg_id or "Live.Event.us", - } - - urls[key] = cached_urls[key] = entry - - await browser.close() - - if new_count := len(cached_urls) - cached_count: - log.info(f"Collected and cached {new_count} new event(s)") - else: - log.info("No new events found") - - CACHE_FILE.write(cached_urls)