From 00000d9ef12842ade8371f8ee1f167ba6a1ac48e Mon Sep 17 00:00:00 2001
From: doms9 <96013514+doms9@users.noreply.github.com>
Date: Thu, 11 Sep 2025 14:55:53 -0400
Subject: [PATCH] e

---
 M3U8/fetch.py                   |  13 +-
 M3U8/scrapers/fstv.py           |   8 +-
 M3U8/scrapers/livetvsx.py       |  58 ++++-----
 M3U8/scrapers/ppv.py            |   9 +-
 M3U8/scrapers/streameast.py     | 206 ++++++++++++++++++++++++++++++++
 M3U8/scrapers/utils/__init__.py |   4 +
 M3U8/scrapers/utils/config.py   |  64 +++++++++-
 7 files changed, 318 insertions(+), 44 deletions(-)
 create mode 100644 M3U8/scrapers/streameast.py

diff --git a/M3U8/fetch.py b/M3U8/fetch.py
index fa89b75..dd76016 100644
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@@ -3,8 +3,8 @@ import asyncio
 from pathlib import Path
 
 import httpx
-from scrapers import livetvsx, ppv, streambtw, tvpass
-from scrapers.utils import get_logger
+from scrapers import livetvsx, ppv, streambtw, streameast, tvpass
+from scrapers.utils import UA, get_logger
 
 log = get_logger(__name__)
 
@@ -15,9 +15,7 @@ M3U8_FILE = Path(__file__).parent / "TV.m3u8"
 CLIENT = httpx.AsyncClient(
     timeout=5,
     follow_redirects=True,
-    headers={
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
-    },
+    headers={"User-Agent": UA},
 )
 
 
@@ -41,6 +39,7 @@ async def main() -> None:
         asyncio.create_task(livetvsx.main(CLIENT)),
         asyncio.create_task(ppv.main(CLIENT)),
         asyncio.create_task(streambtw.main(CLIENT)),
+        asyncio.create_task(streameast.main(CLIENT)),
         asyncio.create_task(tvpass.main(CLIENT)),
         vanilla_fetch(),
     ]
@@ -49,7 +48,9 @@ async def main() -> None:
 
     base_m3u8, tvg_chno = results[-1]
 
-    additions = livetvsx.urls | ppv.urls | streambtw.urls | tvpass.urls
+    additions = (
+        livetvsx.urls | ppv.urls | streambtw.urls | streameast.urls | tvpass.urls
+    )
 
     lines = [
         f'#EXTINF:-1 tvg-chno="{chnl_num}" tvg-id="(N/A)" tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}\n{info["url"]}'
diff --git a/M3U8/scrapers/fstv.py b/M3U8/scrapers/fstv.py
index 1abda54..1ab4012 100644
--- a/M3U8/scrapers/fstv.py
+++ b/M3U8/scrapers/fstv.py
@@ -18,7 +18,7 @@ MIRRORS = [
 ]
 
 
-async def get_hrefs(client: httpx.AsyncClient, base_url: str) -> list[tuple[str, str]]:
+async def get_events(client: httpx.AsyncClient, base_url: str) -> list[tuple[str, str]]:
     log.info(f'Scraping from "{base_url}"')
 
     try:
@@ -55,7 +55,7 @@ async def get_hrefs(client: httpx.AsyncClient, base_url: str) -> list[tuple[str,
     return events.items()
 
 
-async def fetch_m3u8(client: httpx.AsyncClient, url: str) -> tuple[str, list[str]]:
+async def process_events(client: httpx.AsyncClient, url: str) -> tuple[str, list[str]]:
     try:
         r = await client.get(url)
         r.raise_for_status()
@@ -89,9 +89,9 @@ async def main(client: httpx.AsyncClient) -> None:
         log.warning("No working FSTV mirrors")
         return
 
-    events = await get_hrefs(client, base_url)
+    events = await get_events(client, base_url)
 
-    tasks = [fetch_m3u8(client, href) for _, href in events if href]
+    tasks = [process_events(client, href) for _, href in events if href]
     results = await asyncio.gather(*tasks)
 
     for (event, _), (match_name, m3u8_urls) in zip(events, results):
diff --git a/M3U8/scrapers/livetvsx.py b/M3U8/scrapers/livetvsx.py
index 7afc54e..0c062b0 100644
--- a/M3U8/scrapers/livetvsx.py
+++ b/M3U8/scrapers/livetvsx.py
@@ -14,6 +14,7 @@ from .utils import (
     LOGOS,
     TZ,
     capture_req,
+    firefox,
     get_logger,
     load_cache,
     now,
@@ -96,11 +97,7 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO | No
 
 async def process_event(url: str, url_num: int) -> str | None:
     async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
-
-        context = await browser.new_context(
-            ignore_https_errors=True  # website doesn't send valid certs
-        )
+        browser, context = await firefox(p, ignore_https_errors=True)
 
         page = await context.new_page()
 
@@ -217,7 +214,9 @@ async def get_events(
 ) -> list[dict[str, str]]:
 
     events: list[dict[str, str]] = []
-    window_start, window_end = now - timedelta(hours=1), now + timedelta(minutes=30)
+
+    start_dt = now - timedelta(minutes=30)
+    end_dt = now + timedelta(minutes=30)
 
     if buffer := await fetch_xml_stream(url, ssl_ctx):
         pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
@@ -236,30 +235,33 @@ async def get_events(
                     elem.clear()
                     continue
 
-                if window_start <= dt <= window_end:
-                    sport, event = (
-                        (
-                            desc.split(".")[0].strip(),
-                            " ".join(p.strip() for p in desc.split(".")[1:]),
-                        )
-                        if desc
-                        else ("", "")
+                if not start_dt <= dt <= end_dt:
+                    elem.clear()
+                    continue
+
+                sport, event = (
+                    (
+                        desc.split(".")[0].strip(),
+                        " ".join(p.strip() for p in desc.split(".")[1:]),
                     )
+                    if desc
+                    else ("", "")
+                )
 
-                    key = f"[{sport}: {event}] {title}"
+                key = f"[{sport}: {event}] {title}"
 
-                    if key in cached_keys:
-                        elem.clear()
-                        continue
+                if key in cached_keys:
+                    elem.clear()
+                    continue
 
-                    events.append(
-                        {
-                            "sport": sport,
-                            "event": event,
-                            "title": title,
-                            "link": link,
-                        }
-                    )
+                events.append(
+                    {
+                        "sport": sport,
+                        "event": event,
+                        "title": title,
+                        "link": link,
+                    }
+                )
 
                 elem.clear()
 
@@ -312,8 +314,8 @@ async def main(client: httpx.AsyncClient) -> None:
             urls[key] = cached_urls[key] = entry
 
     if new_count := len(cached_urls) - cached_count:
-        CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
-
         log.info(f"Collected and cached {new_count} new event(s)")
     else:
         log.info("No new events found")
+
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py
index 66be2e8..fbe1da5 100644
--- a/M3U8/scrapers/ppv.py
+++ b/M3U8/scrapers/ppv.py
@@ -14,6 +14,7 @@ from playwright.async_api import async_playwright
 from .utils import (
     TZ,
     capture_req,
+    firefox,
     get_base,
     get_logger,
     load_cache,
@@ -55,9 +56,7 @@ async def refresh_api_cache(
 
 async def process_event(url: str, url_num: int) -> str | None:
     async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
-
-        context = await browser.new_context()
+        browser, context = await firefox(p)
 
         page = await context.new_page()
 
@@ -210,11 +209,11 @@ async def main(client: httpx.AsyncClient) -> None:
             urls[key] = cached_urls[key] = entry
 
     if new_count := len(cached_urls) - cached_count:
-        CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
-
         log.info(f"Collected and cached {new_count} new event(s)")
     else:
         log.info("No new events found")
 
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
+
 
 # works if no cloudflare bot detection
diff --git a/M3U8/scrapers/streameast.py b/M3U8/scrapers/streameast.py
new file mode 100644
index 0000000..706d3f3
--- /dev/null
+++ b/M3U8/scrapers/streameast.py
@@ -0,0 +1,206 @@
+import asyncio
+import json
+from datetime import datetime, timedelta
+from functools import partial
+from pathlib import Path
+from urllib.parse import urljoin
+
+import httpx
+from playwright.async_api import async_playwright
+from selectolax.parser import HTMLParser
+
+from .utils import (
+    LOGOS,
+    TZ,
+    capture_req,
+    firefox,
+    get_base,
+    get_logger,
+    load_cache,
+    now,
+    safe_process_event,
+)
+
+log = get_logger(__name__)
+
+urls: dict[str, dict[str, str | float]] = {}
+
+CACHE_FILE = Path(__file__).parent / "caches" / "streameast.json"
+
+MIRRORS = [
+    "https://streameast.ga",
+    "https://streameast.tw",
+    "https://streameast.ph",
+    "https://streameast.sg",
+    "https://streameast.ch",
+    "https://streameast.ec",
+    "https://streameast.fi",
+    "https://streameast.ms",
+    "https://streameast.ps",
+    "https://streameast.cf",
+    "https://streameast.sk",
+    "https://thestreameast.co",
+    "https://thestreameast.fun",
+    "https://thestreameast.ru",
+    "https://thestreameast.su",
+]
+
+LOGOS["CFB"] = LOGOS["NCAAF"]
+LOGOS["CBB"] = LOGOS["NCAAB"]
+
+
+async def process_event(url: str, url_num: int) -> str | None:
+    async with async_playwright() as p:
+        browser, context = await firefox(p)
+
+        page = await context.new_page()
+
+        captured: list[str] = []
+
+        got_one = asyncio.Event()
+
+        handler = partial(capture_req, captured=captured, got_one=got_one)
+
+        page.on("request", handler)
+
+        try:
+            await page.goto(url, wait_until="domcontentloaded", timeout=15_000)
+
+            wait_task = asyncio.create_task(got_one.wait())
+
+            try:
+                await asyncio.wait_for(wait_task, timeout=10)
+            except asyncio.TimeoutError:
+                log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
+                return
+
+            finally:
+                if not wait_task.done():
+                    wait_task.cancel()
+
+                    try:
+                        await wait_task
+                    except asyncio.CancelledError:
+                        pass
+
+            if captured:
+                log.info(f"URL {url_num}) Captured M3U8")
+
+                return captured[-1]
+
+            log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
+            return
+
+        except Exception as e:
+            log.warning(f"URL {url_num}) Exception while processing: {e}")
+            return
+
+        finally:
+            page.remove_listener("request", handler)
+            await page.close()
+            await browser.close()
+
+
+async def get_events(
+    client: httpx.AsyncClient,
+    url: str,
+    cached_keys: list[str],
+) -> list[dict[str, str]]:
+    try:
+        r = await client.get(url)
+        r.raise_for_status()
+    except Exception as e:
+        log.error(f'Failed to fetch "{url}"\n{e}')
+
+        return []
+
+    soup = HTMLParser(r.text)
+    events = []
+
+    start_dt = now - timedelta(minutes=30)
+    end_dt = now + timedelta(minutes=30)
+
+    for li in soup.css("li.f1-podium--item"):
+        a = li.css_first("a.f1-podium--link")
+
+        if not a:
+            continue
+
+        href = urljoin(url, a.attributes.get("href", ""))
+
+        sport = a.css_first(".MacBaslikKat").text(strip=True)
+
+        name = a.css_first(".MacIsimleri").text(strip=True)
+
+        time_span = a.css_first(".f1-podium--time")
+        time_text = time_span.text(strip=True)
+        timestamp = int(time_span.attributes.get("data-zaman"))
+
+        key = f"[{sport}] {name}"
+
+        if key in cached_keys:
+            continue
+
+        event_dt = datetime.fromtimestamp(timestamp, TZ)
+
+        if time_text == "LIVE" or (start_dt <= event_dt < end_dt):
+            events.append(
+                {
+                    "sport": sport,
+                    "event": name,
+                    "link": href,
+                    "logo": LOGOS.get(
+                        sport,
+                        "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
+                    ),
+                }
+            )
+
+    return events
+
+
+async def main(client: httpx.AsyncClient) -> None:
+    cached_urls = load_cache(CACHE_FILE, exp=14400)
+    cached_count = len(cached_urls)
+    urls.update(cached_urls)
+
+    log.info(f"Collected {cached_count} event(s) from cache")
+
+    if not (base_url := await get_base(client, MIRRORS)):
+        log.warning("No working StreamEast mirrors")
+        return
+
+    log.info(f'Scraping from "{base_url}"')
+
+    events = await get_events(
+        client,
+        base_url,
+        set(cached_urls.keys()),
+    )
+
+    log.info(f"Processing {len(events)} new URL(s)")
+
+    for i, ev in enumerate(events, start=1):
+        url = await safe_process_event(
+            lambda: process_event(ev["link"], url_num=i),
+            url_num=i,
+            log=log,
+        )
+
+        if url:
+            entry = {
+                "url": url,
+                "logo": ev["logo"],
+                "timestamp": now.timestamp(),
+            }
+
+            key = f"[{ev['sport']}] {ev['event']}"
+
+            urls[key] = cached_urls[key] = entry
+
+    if new_count := len(cached_urls) - cached_count:
+        log.info(f"Collected and cached {new_count} new event(s)")
+    else:
+        log.info("No new events found")
+
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
diff --git a/M3U8/scrapers/utils/__init__.py b/M3U8/scrapers/utils/__init__.py
index de1bf57..855bdb0 100644
--- a/M3U8/scrapers/utils/__init__.py
+++ b/M3U8/scrapers/utils/__init__.py
@@ -1,7 +1,9 @@
 from .config import (
     LOGOS,
     TZ,
+    UA,
     capture_req,
+    firefox,
     get_base,
     get_logger,
     load_cache,
@@ -12,7 +14,9 @@ from .config import (
 __all__ = [
     "LOGOS",
     "TZ",
+    "UA",
     "capture_req",
+    "firefox",
     "get_base",
     "get_logger",
     "load_cache",
diff --git a/M3U8/scrapers/utils/config.py b/M3U8/scrapers/utils/config.py
index a3eaaae..5731e0f 100644
--- a/M3U8/scrapers/utils/config.py
+++ b/M3U8/scrapers/utils/config.py
@@ -9,12 +9,18 @@ from typing import Any
 
 import httpx
 import pytz
-from playwright.async_api import Request
+from playwright.async_api import Request, Playwright, Browser, BrowserContext
 
 TZ = pytz.timezone("America/New_York")
 
 now = datetime.now(TZ)
 
+UA = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
+)
+
 LOGOS = {
     "MLB": "https://i.gyazo.com/0fe7865ef2f06c9507791b24f04dbca8.png",
     "NBA": "https://i.gyazo.com/773c23570f095a5d549c23b9401d83f4.png",
@@ -162,3 +168,59 @@ def capture_req(
     if valid_m3u8.search(req.url):
         captured.append(req.url)
         got_one.set()
+
+
+async def firefox(
+    playwright: Playwright, ignore_https_errors: bool = False
+) -> tuple[Browser, BrowserContext]:
+    browser = await playwright.firefox.launch(headless=True)
+
+    context = await browser.new_context(
+        user_agent=UA,
+        viewport={"width": 1366, "height": 768},
+        device_scale_factor=1,
+        locale="en-US",
+        timezone_id="America/New_York",
+        color_scheme="dark",
+        permissions=["geolocation"],
+        extra_http_headers={
+            "Accept-Language": "en-US,en;q=0.9",
+            "Upgrade-Insecure-Requests": "1",
+        },
+        ignore_https_errors=ignore_https_errors,
+    )
+
+    await context.add_init_script(
+        """
+        Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+
+        Object.defineProperty(navigator, 'languages', {
+          get: () => ['en-US', 'en']
+        });
+
+        Object.defineProperty(navigator, 'plugins', {
+          get: () => [1, 2, 3, 4]
+        });
+
+        const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight');
+        Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', {
+          ...elementDescriptor,
+          get: function() {
+            if (this.id === 'modernizr') { return 24; }
+            return elementDescriptor.get.apply(this);
+          }
+        });
+
+        Object.defineProperty(window.screen, 'width', { get: ()     => 1366 });
+        Object.defineProperty(window.screen, 'height', { get: ()    => 768 });
+
+        const getParameter = WebGLRenderingContext.prototype.   getParameter;
+        WebGLRenderingContext.prototype.getParameter = function (param) {
+          if (param === 37445) return "Intel Inc."; //  UNMASKED_VENDOR_WEBGL
+          if (param === 37446) return "Intel Iris OpenGL    Engine"; // UNMASKED_RENDERER_WEBGL
+          return getParameter.apply(this, [param]);
+        };
+        """
+    )
+
+    return browser, context