e

2025-09-11 14:55:53 -04:00 · 2025-09-11 14:55:53 -04:00 · 00000d9ef1
commit 00000d9ef1
parent 8796e2dfc6
7 changed files with 318 additions and 44 deletions
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@ -3,8 +3,8 @@ import asyncio
 from pathlib import Path

 import httpx
-from scrapers import livetvsx, ppv, streambtw, tvpass
-from scrapers.utils import get_logger
+from scrapers import livetvsx, ppv, streambtw, streameast, tvpass
+from scrapers.utils import UA, get_logger

 log = get_logger(__name__)

@ -15,9 +15,7 @@ M3U8_FILE = Path(__file__).parent / "TV.m3u8"
 CLIENT = httpx.AsyncClient(
    timeout=5,
    follow_redirects=True,
-    headers={
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
-    },
+    headers={"User-Agent": UA},
 )


@ -41,6 +39,7 @@ async def main() -> None:
        asyncio.create_task(livetvsx.main(CLIENT)),
        asyncio.create_task(ppv.main(CLIENT)),
        asyncio.create_task(streambtw.main(CLIENT)),
+        asyncio.create_task(streameast.main(CLIENT)),
        asyncio.create_task(tvpass.main(CLIENT)),
        vanilla_fetch(),
    ]
@ -49,7 +48,9 @@ async def main() -> None:

    base_m3u8, tvg_chno = results[-1]

-    additions = livetvsx.urls | ppv.urls | streambtw.urls | tvpass.urls
+    additions = (
+        livetvsx.urls | ppv.urls | streambtw.urls | streameast.urls | tvpass.urls
+    )

    lines = [
        f'#EXTINF:-1 tvg-chno="{chnl_num}" tvg-id="(N/A)" tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}\n{info["url"]}'
--- a/M3U8/scrapers/fstv.py
+++ b/M3U8/scrapers/fstv.py
@ -18,7 +18,7 @@ MIRRORS = [
 ]


-async def get_hrefs(client: httpx.AsyncClient, base_url: str) -> list[tuple[str, str]]:
+async def get_events(client: httpx.AsyncClient, base_url: str) -> list[tuple[str, str]]:
    log.info(f'Scraping from "{base_url}"')

    try:
@ -55,7 +55,7 @@ async def get_hrefs(client: httpx.AsyncClient, base_url: str) -> list[tuple[str,
    return events.items()


-async def fetch_m3u8(client: httpx.AsyncClient, url: str) -> tuple[str, list[str]]:
+async def process_events(client: httpx.AsyncClient, url: str) -> tuple[str, list[str]]:
    try:
        r = await client.get(url)
        r.raise_for_status()
@ -89,9 +89,9 @@ async def main(client: httpx.AsyncClient) -> None:
        log.warning("No working FSTV mirrors")
        return

-    events = await get_hrefs(client, base_url)
+    events = await get_events(client, base_url)

-    tasks = [fetch_m3u8(client, href) for _, href in events if href]
+    tasks = [process_events(client, href) for _, href in events if href]
    results = await asyncio.gather(*tasks)

    for (event, _), (match_name, m3u8_urls) in zip(events, results):
--- a/M3U8/scrapers/livetvsx.py
+++ b/M3U8/scrapers/livetvsx.py
@ -14,6 +14,7 @@ from .utils import (
    LOGOS,
    TZ,
    capture_req,
+    firefox,
    get_logger,
    load_cache,
    now,
@ -96,11 +97,7 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO | No

 async def process_event(url: str, url_num: int) -> str | None:
    async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
-
-        context = await browser.new_context(
-            ignore_https_errors=True  # website doesn't send valid certs
-        )
+        browser, context = await firefox(p, ignore_https_errors=True)

        page = await context.new_page()

@ -217,7 +214,9 @@ async def get_events(
 ) -> list[dict[str, str]]:

    events: list[dict[str, str]] = []
-    window_start, window_end = now - timedelta(hours=1), now + timedelta(minutes=30)
+
+    start_dt = now - timedelta(minutes=30)
+    end_dt = now + timedelta(minutes=30)

    if buffer := await fetch_xml_stream(url, ssl_ctx):
        pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
@ -236,7 +235,10 @@ async def get_events(
                    elem.clear()
                    continue

-                if window_start <= dt <= window_end:
+                if not start_dt <= dt <= end_dt:
+                    elem.clear()
+                    continue
+
                sport, event = (
                    (
                        desc.split(".")[0].strip(),
@ -312,8 +314,8 @@ async def main(client: httpx.AsyncClient) -> None:
            urls[key] = cached_urls[key] = entry

    if new_count := len(cached_urls) - cached_count:
-        CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
-
        log.info(f"Collected and cached {new_count} new event(s)")
    else:
        log.info("No new events found")
+
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
--- a/M3U8/scrapers/ppv.py
+++ b/M3U8/scrapers/ppv.py
@ -14,6 +14,7 @@ from playwright.async_api import async_playwright
 from .utils import (
    TZ,
    capture_req,
+    firefox,
    get_base,
    get_logger,
    load_cache,
@ -55,9 +56,7 @@ async def refresh_api_cache(

 async def process_event(url: str, url_num: int) -> str | None:
    async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
-
-        context = await browser.new_context()
+        browser, context = await firefox(p)

        page = await context.new_page()

@ -210,11 +209,11 @@ async def main(client: httpx.AsyncClient) -> None:
            urls[key] = cached_urls[key] = entry

    if new_count := len(cached_urls) - cached_count:
-        CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
-
        log.info(f"Collected and cached {new_count} new event(s)")
    else:
        log.info("No new events found")

+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
+

 # works if no cloudflare bot detection
--- a/M3U8/scrapers/streameast.py
+++ b/M3U8/scrapers/streameast.py
@ -0,0 +1,206 @@
+import asyncio
+import json
+from datetime import datetime, timedelta
+from functools import partial
+from pathlib import Path
+from urllib.parse import urljoin
+
+import httpx
+from playwright.async_api import async_playwright
+from selectolax.parser import HTMLParser
+
+from .utils import (
+    LOGOS,
+    TZ,
+    capture_req,
+    firefox,
+    get_base,
+    get_logger,
+    load_cache,
+    now,
+    safe_process_event,
+)
+
+log = get_logger(__name__)
+
+urls: dict[str, dict[str, str | float]] = {}
+
+CACHE_FILE = Path(__file__).parent / "caches" / "streameast.json"
+
+MIRRORS = [
+    "https://streameast.ga",
+    "https://streameast.tw",
+    "https://streameast.ph",
+    "https://streameast.sg",
+    "https://streameast.ch",
+    "https://streameast.ec",
+    "https://streameast.fi",
+    "https://streameast.ms",
+    "https://streameast.ps",
+    "https://streameast.cf",
+    "https://streameast.sk",
+    "https://thestreameast.co",
+    "https://thestreameast.fun",
+    "https://thestreameast.ru",
+    "https://thestreameast.su",
+]
+
+LOGOS["CFB"] = LOGOS["NCAAF"]
+LOGOS["CBB"] = LOGOS["NCAAB"]
+
+
+async def process_event(url: str, url_num: int) -> str | None:
+    async with async_playwright() as p:
+        browser, context = await firefox(p)
+
+        page = await context.new_page()
+
+        captured: list[str] = []
+
+        got_one = asyncio.Event()
+
+        handler = partial(capture_req, captured=captured, got_one=got_one)
+
+        page.on("request", handler)
+
+        try:
+            await page.goto(url, wait_until="domcontentloaded", timeout=15_000)
+
+            wait_task = asyncio.create_task(got_one.wait())
+
+            try:
+                await asyncio.wait_for(wait_task, timeout=10)
+            except asyncio.TimeoutError:
+                log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
+                return
+
+            finally:
+                if not wait_task.done():
+                    wait_task.cancel()
+
+                    try:
+                        await wait_task
+                    except asyncio.CancelledError:
+                        pass
+
+            if captured:
+                log.info(f"URL {url_num}) Captured M3U8")
+
+                return captured[-1]
+
+            log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
+            return
+
+        except Exception as e:
+            log.warning(f"URL {url_num}) Exception while processing: {e}")
+            return
+
+        finally:
+            page.remove_listener("request", handler)
+            await page.close()
+            await browser.close()
+
+
+async def get_events(
+    client: httpx.AsyncClient,
+    url: str,
+    cached_keys: list[str],
+) -> list[dict[str, str]]:
+    try:
+        r = await client.get(url)
+        r.raise_for_status()
+    except Exception as e:
+        log.error(f'Failed to fetch "{url}"\n{e}')
+
+        return []
+
+    soup = HTMLParser(r.text)
+    events = []
+
+    start_dt = now - timedelta(minutes=30)
+    end_dt = now + timedelta(minutes=30)
+
+    for li in soup.css("li.f1-podium--item"):
+        a = li.css_first("a.f1-podium--link")
+
+        if not a:
+            continue
+
+        href = urljoin(url, a.attributes.get("href", ""))
+
+        sport = a.css_first(".MacBaslikKat").text(strip=True)
+
+        name = a.css_first(".MacIsimleri").text(strip=True)
+
+        time_span = a.css_first(".f1-podium--time")
+        time_text = time_span.text(strip=True)
+        timestamp = int(time_span.attributes.get("data-zaman"))
+
+        key = f"[{sport}] {name}"
+
+        if key in cached_keys:
+            continue
+
+        event_dt = datetime.fromtimestamp(timestamp, TZ)
+
+        if time_text == "LIVE" or (start_dt <= event_dt < end_dt):
+            events.append(
+                {
+                    "sport": sport,
+                    "event": name,
+                    "link": href,
+                    "logo": LOGOS.get(
+                        sport,
+                        "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
+                    ),
+                }
+            )
+
+    return events
+
+
+async def main(client: httpx.AsyncClient) -> None:
+    cached_urls = load_cache(CACHE_FILE, exp=14400)
+    cached_count = len(cached_urls)
+    urls.update(cached_urls)
+
+    log.info(f"Collected {cached_count} event(s) from cache")
+
+    if not (base_url := await get_base(client, MIRRORS)):
+        log.warning("No working StreamEast mirrors")
+        return
+
+    log.info(f'Scraping from "{base_url}"')
+
+    events = await get_events(
+        client,
+        base_url,
+        set(cached_urls.keys()),
+    )
+
+    log.info(f"Processing {len(events)} new URL(s)")
+
+    for i, ev in enumerate(events, start=1):
+        url = await safe_process_event(
+            lambda: process_event(ev["link"], url_num=i),
+            url_num=i,
+            log=log,
+        )
+
+        if url:
+            entry = {
+                "url": url,
+                "logo": ev["logo"],
+                "timestamp": now.timestamp(),
+            }
+
+            key = f"[{ev['sport']}] {ev['event']}"
+
+            urls[key] = cached_urls[key] = entry
+
+    if new_count := len(cached_urls) - cached_count:
+        log.info(f"Collected and cached {new_count} new event(s)")
+    else:
+        log.info("No new events found")
+
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
--- a/M3U8/scrapers/utils/init.py
+++ b/M3U8/scrapers/utils/init.py
@ -1,7 +1,9 @@
 from .config import (
    LOGOS,
    TZ,
+    UA,
    capture_req,
+    firefox,
    get_base,
    get_logger,
    load_cache,
@ -12,7 +14,9 @@ from .config import (
 __all__ = [
    "LOGOS",
    "TZ",
+    "UA",
    "capture_req",
+    "firefox",
    "get_base",
    "get_logger",
    "load_cache",
--- a/M3U8/scrapers/utils/config.py
+++ b/M3U8/scrapers/utils/config.py
@ -9,12 +9,18 @@ from typing import Any

 import httpx
 import pytz
-from playwright.async_api import Request
+from playwright.async_api import Request, Playwright, Browser, BrowserContext

 TZ = pytz.timezone("America/New_York")

 now = datetime.now(TZ)

+UA = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
+)
+
 LOGOS = {
    "MLB": "https://i.gyazo.com/0fe7865ef2f06c9507791b24f04dbca8.png",
    "NBA": "https://i.gyazo.com/773c23570f095a5d549c23b9401d83f4.png",
@ -162,3 +168,59 @@ def capture_req(
    if valid_m3u8.search(req.url):
        captured.append(req.url)
        got_one.set()
+
+
+async def firefox(
+    playwright: Playwright, ignore_https_errors: bool = False
+) -> tuple[Browser, BrowserContext]:
+    browser = await playwright.firefox.launch(headless=True)
+
+    context = await browser.new_context(
+        user_agent=UA,
+        viewport={"width": 1366, "height": 768},
+        device_scale_factor=1,
+        locale="en-US",
+        timezone_id="America/New_York",
+        color_scheme="dark",
+        permissions=["geolocation"],
+        extra_http_headers={
+            "Accept-Language": "en-US,en;q=0.9",
+            "Upgrade-Insecure-Requests": "1",
+        },
+        ignore_https_errors=ignore_https_errors,
+    )
+
+    await context.add_init_script(
+        """
+        Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+
+        Object.defineProperty(navigator, 'languages', {
+          get: () => ['en-US', 'en']
+        });
+
+        Object.defineProperty(navigator, 'plugins', {
+          get: () => [1, 2, 3, 4]
+        });
+
+        const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight');
+        Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', {
+          ...elementDescriptor,
+          get: function() {
+            if (this.id === 'modernizr') { return 24; }
+            return elementDescriptor.get.apply(this);
+          }
+        });
+
+        Object.defineProperty(window.screen, 'width', { get: ()     => 1366 });
+        Object.defineProperty(window.screen, 'height', { get: ()    => 768 });
+
+        const getParameter = WebGLRenderingContext.prototype.   getParameter;
+        WebGLRenderingContext.prototype.getParameter = function (param) {
+          if (param === 37445) return "Intel Inc."; //  UNMASKED_VENDOR_WEBGL
+          if (param === 37446) return "Intel Iris OpenGL    Engine"; // UNMASKED_RENDERER_WEBGL
+          return getParameter.apply(this, [param]);
+        };
+        """
+    )
+
+    return browser, context