From 00000d9362283e4284adfc3dd32d1117c6c14e7b Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:27:42 -0400 Subject: [PATCH] e --- M3U8/fetch.py | 3 +- M3U8/scrapers/ace.py | 2 +- M3U8/scrapers/fstv.py | 2 +- M3U8/scrapers/livetvsx.py | 5 +- M3U8/scrapers/ppv.py | 3 + M3U8/scrapers/streambtw.py | 2 +- M3U8/scrapers/streameast.py | 2 +- M3U8/scrapers/streamed.py | 214 ++++++++++++++++++++++++++++++++++ M3U8/scrapers/tvpass.py | 2 +- M3U8/scrapers/utils/config.py | 4 +- 10 files changed, 230 insertions(+), 9 deletions(-) create mode 100644 M3U8/scrapers/streamed.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 5115ff5..5dee3d7 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -2,7 +2,7 @@ import asyncio from pathlib import Path -from scrapers import fstv, livetvsx, ppv, streambtw, streameast, tvpass +from scrapers import fstv, livetvsx, ppv, streambtw, streameast, streamed, tvpass from scrapers.utils import CLIENT, UA, get_logger log = get_logger(__name__) @@ -42,6 +42,7 @@ async def main() -> None: | ppv.urls | streambtw.urls | streameast.urls + | streamed.urls | tvpass.urls ) diff --git a/M3U8/scrapers/ace.py b/M3U8/scrapers/ace.py index 257bd22..807e053 100644 --- a/M3U8/scrapers/ace.py +++ b/M3U8/scrapers/ace.py @@ -116,7 +116,7 @@ async def scrape(client: httpx.AsyncClient) -> None: entry = { "url": link, "logo": logo, - "id": tvg_id or "Live.Event.us", + "id": tvg_id, } urls[key] = entry diff --git a/M3U8/scrapers/fstv.py b/M3U8/scrapers/fstv.py index e3b0494..ff84943 100644 --- a/M3U8/scrapers/fstv.py +++ b/M3U8/scrapers/fstv.py @@ -164,7 +164,7 @@ async def scrape(client: httpx.AsyncClient) -> None: "logo": logo, "base": base_url, "timestamp": now.timestamp(), - "id": tvg_id or "Live.Event.us", + "id": tvg_id, "href": ev["href"], } diff --git a/M3U8/scrapers/livetvsx.py b/M3U8/scrapers/livetvsx.py index 5fe3402..46df6c2 100644 --- a/M3U8/scrapers/livetvsx.py +++ b/M3U8/scrapers/livetvsx.py @@ -240,6 +240,9 @@ async def get_events( elem.clear() continue + if start_dt.date() != now.date(): + continue + if not start_dt <= dt <= end_dt: elem.clear() continue @@ -316,7 +319,7 @@ async def scrape(client: httpx.AsyncClient) -> None: entry = { "url": url, "logo": logo, - "id": tvg_id or "Live.Event.us", + "id": tvg_id, "base": "https://livetv.sx/enx/", "timestamp": now.timestamp(), } diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 1cfad0f..5d7dece 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -173,6 +173,9 @@ async def get_events( end_dt = datetime.fromtimestamp(end_ts, tz=TZ) + timedelta(minutes=30) + if start_dt.date() != now.date(): + continue + if not start_dt <= now < end_dt: continue diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index 8a7795f..aeb3df1 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -103,7 +103,7 @@ async def scrape(client: httpx.AsyncClient) -> None: "logo": logo, "base": BASE_URL, "timestamp": now.timestamp(), - "id": tvg_id or "Live.Event.us", + "id": tvg_id, } urls[key] = entry diff --git a/M3U8/scrapers/streameast.py b/M3U8/scrapers/streameast.py index 9407a5d..c5a1218 100644 --- a/M3U8/scrapers/streameast.py +++ b/M3U8/scrapers/streameast.py @@ -206,7 +206,7 @@ async def scrape(client: httpx.AsyncClient) -> None: "logo": logo, "base": base_url, "timestamp": now.timestamp(), - "id": tvg_id or "Live.Event.us", + "id": tvg_id, } urls[key] = cached_urls[key] = entry diff --git a/M3U8/scrapers/streamed.py b/M3U8/scrapers/streamed.py new file mode 100644 index 0000000..e7d70e1 --- /dev/null +++ b/M3U8/scrapers/streamed.py @@ -0,0 +1,214 @@ +import asyncio +from datetime import datetime, timedelta +from functools import partial +from pathlib import Path + +import httpx +from playwright.async_api import async_playwright +from selectolax.parser import HTMLParser + +from .utils import ( + TZ, + capture_req, + get_logger, + leagues, + load_cache, + new_browser, + now, + safe_process_event, + write_cache, +) + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +CACHE_FILE = Path(__file__).parent / "caches" / "streamed.json" + +HTML_CACHE = Path(__file__).parent / "caches" / "streamed_php.json" + +base_url = "https://streamed.site/webmaster.php" + + +def get_date(s: str) -> datetime: + try: + return datetime.strptime(s, "%Y-%m-%d %H:%M %Z").astimezone(TZ) + except ValueError: + s = s.replace("ET", "").strip() + return datetime.strptime(s, "%Y-%m-%d %H:%M").astimezone(TZ) + + +async def process_event(url: str, url_num: int) -> str | None: + async with async_playwright() as p: + browser, context = await new_browser(p, browser="brave") + + page = await context.new_page() + + captured: list[str] = [] + + got_one = asyncio.Event() + + handler = partial(capture_req, captured=captured, got_one=got_one) + + page.on("request", handler) + + try: + await page.goto(url, wait_until="domcontentloaded", timeout=15_000) + + wait_task = asyncio.create_task(got_one.wait()) + + try: + await asyncio.wait_for(wait_task, timeout=10) + except asyncio.TimeoutError: + log.warning(f"URL {url_num}) Timed out waiting for M3U8.") + return + + finally: + if not wait_task.done(): + wait_task.cancel() + + try: + await wait_task + except asyncio.CancelledError: + pass + + if captured: + log.info(f"URL {url_num}) Captured M3U8") + + return captured[-1] + + log.warning(f"URL {url_num}) No M3U8 captured after waiting.") + return + + except Exception as e: + log.warning(f"URL {url_num}) Exception while processing: {e}") + return + + finally: + page.remove_listener("request", handler) + await page.close() + await browser.close() + + +async def refresh_html_cache(client: httpx.AsyncClient, url: str) -> dict[str, str]: + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}"\n{e}') + + return [] + + soup = HTMLParser(r.text) + events = {} + + for row in soup.css("div.wrap div.row"): + if not (date := row.css_first("div.date")): + continue + + event_dt = get_date(date.text(strip=True)) + + if event_dt.date() != now.date(): + continue + + league = row.css_first("div.league") + + title = row.css_first("div.title") + + hds_a = row.css_first("div.hds a") + + if not (league and title and hds_a): + continue + + sport, event = league.text(strip=True), title.text(strip=True) + + onclick = hds_a.attributes.get("onclick", "") + + if not (chnl_id := "".join(s for s in onclick if s.isdigit())): + continue + + key = f"[{sport}] {event} (STRMD)" + + events[key] = { + "sport": sport, + "event": event, + "link": f"https://streamed.site/set.php?{chnl_id}", + "ts": event_dt.timestamp(), + } + + return events + + +async def get_events( + client: httpx.AsyncClient, + url: str, + cached_keys: set[str], +) -> list[dict[str, str]]: + + if not (events := load_cache(HTML_CACHE, exp=10_800)): + events = await refresh_html_cache(client, url) + write_cache(HTML_CACHE, events) + + live = [] + start_ts = (now - timedelta(minutes=30)).timestamp() + end_ts = (now + timedelta(minutes=30)).timestamp() + + for k, v in events.items(): + if cached_keys & {k}: + continue + + if not start_ts <= v["ts"] < end_ts: + continue + + live.append({**v}) + + return live + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = load_cache(CACHE_FILE, exp=10_800) + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Collected {cached_count} event(s) from cache") + + log.info(f'Scraping from "{base_url}"') + + events = await get_events( + client, + base_url, + set(cached_urls.keys()), + ) + + log.info(f"Processing {len(events)} new URL(s)") + + for i, ev in enumerate(events, start=1): + url = await safe_process_event( + lambda: process_event(ev["link"], url_num=i), + url_num=i, + log=log, + ) + + if url: + sport, event = ev["sport"], ev["event"] + + tvg_id, logo = leagues.info(sport) + + key = f"[{sport}] {event} (STRMD)" + + entry = { + "url": url, + "logo": logo, + "base": "https://streamed.site/", + "timestamp": now.timestamp(), + "id": tvg_id, + } + + urls[key] = cached_urls[key] = entry + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + write_cache(CACHE_FILE, cached_urls) diff --git a/M3U8/scrapers/tvpass.py b/M3U8/scrapers/tvpass.py index 5312fc7..703b487 100644 --- a/M3U8/scrapers/tvpass.py +++ b/M3U8/scrapers/tvpass.py @@ -56,7 +56,7 @@ async def scrape(client: httpx.AsyncClient) -> None: entry = { "url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8", "logo": logo, - "id": tvg_id or "Live.Event.us", + "id": tvg_id, "base": "https://tvpass.org", "timestamp": now.timestamp(), } diff --git a/M3U8/scrapers/utils/config.py b/M3U8/scrapers/utils/config.py index b2d00bf..8d01b99 100644 --- a/M3U8/scrapers/utils/config.py +++ b/M3U8/scrapers/utils/config.py @@ -21,7 +21,7 @@ class Leagues: def teams(self, league: str) -> list[str]: return self.data["teams"].get(league, []) - def info(self, name: str) -> tuple[str | None, str]: + def info(self, name: str) -> tuple[str | str]: name = name.upper() if match := next( @@ -38,7 +38,7 @@ class Leagues: return (tvg_id, logo or live_img) - return (None, live_img) + return ("Live.Event.us", live_img) def is_valid(self, event: str, league: str) -> bool: if match := re.search(r"(\-|vs.?)", event):