e

2025-09-02 18:06:35 -04:00 · 2025-09-02 18:06:35 -04:00 · 00000d941c
commit 00000d941c
parent 7617aa4bc6
6 changed files with 243 additions and 159 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,4 @@ wheels/
 # Misc
 .python-version
 stuff/
+cached-ca.pem
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@ -7,11 +7,11 @@ from scrape import ace, fstv, livetvsx, logger, tvpass

 log = logger.get_logger(__name__)

-base_url = "https://s.id/ePwXT"
+BASE_URL = "https://s.id/ePwXT"

-m3u8_file = Path(__file__).parent / "TV.m3u8"
+M3U8_FILE = Path(__file__).parent / "TV.m3u8"

-client = httpx.AsyncClient(
+CLIENT = httpx.AsyncClient(
    timeout=5,
    follow_redirects=True,
    headers={
@ -24,10 +24,10 @@ async def vanilla_fetch() -> tuple[list[str], int]:
    log.info("Fetching base M3U8")

    try:
-        r = await client.get(base_url)
+        r = await CLIENT.get(BASE_URL)
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{base_url}"\n{e}')
+        log.error(f'Failed to fetch "{BASE_URL}"\n{e}')
        raise SystemExit(e) from e

    d = r.text.splitlines()[1:]
@ -41,8 +41,8 @@ async def main() -> None:
    tasks = [
        # ace.main(client),
        # fstv.main(client),
-        livetvsx.main(),
-        tvpass.main(client),
+        livetvsx.main(CLIENT),
+        tvpass.main(CLIENT),
    ]

    await asyncio.gather(*tasks)
@ -59,7 +59,7 @@ async def main() -> None:
        )
    ]

-    m3u8_file.write_text(
+    M3U8_FILE.write_text(
        '#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/EPG/TV.xml"\n'
        + "\n".join(base_m3u8)
        + "\n"
@ -68,7 +68,7 @@ async def main() -> None:
        encoding="utf-8",
    )

-    log.info(f"M3U8 saved to {m3u8_file.name}")
+    log.info(f"M3U8 saved to {M3U8_FILE.name}")


 if __name__ == "__main__":
--- a/M3U8/scrape/ace.py
+++ b/M3U8/scrape/ace.py
@ -13,7 +13,7 @@ log = get_logger(__name__)

 urls: dict[str, dict[str, str]] = {}

-mirrors = [
+MIRRORS = [
    "https://aceztrims.pages.dev/",
    "https://acestrlms.pages.dev/",
 ]
@ -97,7 +97,7 @@ async def get_m3u8_links(client: httpx.AsyncClient, url: str) -> list[str]:


 async def main(client: httpx.AsyncClient) -> None:
-    if not (base_url := await get_base(client, mirrors)):
+    if not (base_url := await get_base(client, MIRRORS)):
        log.warning("No working ace mirrors")
        return

--- a/M3U8/scrape/fstv.py
+++ b/M3U8/scrape/fstv.py
@ -11,7 +11,7 @@ log = get_logger(__name__)

 urls: dict[str, dict[str, str]] = {}

-mirrors = [
+MIRRORS = [
    "https://fstv.online",
    "https://fstv.space",
    "https://fstv.zip",
@ -103,7 +103,7 @@ async def fetch_m3u8(client: httpx.AsyncClient, url: str) -> tuple[str, list[str


 async def main(client: httpx.AsyncClient) -> None:
-    if not (base_url := await get_base(client, mirrors)):
+    if not (base_url := await get_base(client, MIRRORS)):
        log.warning("No working FSTV mirrors")
        return

--- a/M3U8/scrape/livetvsx.py
+++ b/M3U8/scrape/livetvsx.py
@ -1,20 +1,34 @@
 #!/usr/bin/env python3
 import asyncio
+import io
+import ssl
+import xml.etree.ElementTree as ET
+from datetime import datetime, timedelta
+from pathlib import Path
 from typing import Any
-from urllib.parse import urljoin

-from playwright.async_api import BrowserContext, Request, async_playwright
+import httpx
+from playwright.async_api import Request, async_playwright

 from .logger import get_logger
-from .tvpass import logos
-
-base_url = "https://livetv.sx/enx/"
+from .tvpass import TZ, logos

 log = get_logger(__name__)

+urls: dict[str, str] = {}
+
 tvp_sports = set(logos.keys())

-urls: dict[str, str] = {}
+BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml"
+
+CERT_BUNDL_URLS = [
+    "https://curl.se/ca/cacert.pem",
+    "https://ssl.com/repo/certs/Cloudflare-TLS-I-E1.pem",
+    "https://ssl.com/repo/certs/SSL.com-TLS-T-ECC-R2.pem",
+    "https://ssl.com/repo/certs/Sectigo-AAA-Root.pem",
+]
+
+CERT_FILE = Path(__file__).parent / "cached-ca.pem"


 async def safe_process_event(fn, timeout_sec=20) -> Any | None:
@ -24,11 +38,113 @@ async def safe_process_event(fn, timeout_sec=20) -> Any | None:
        log.warning(f"Timed out after {timeout_sec}s, skipping event")


-async def process_event(
-    ev: dict[str, str],
-    context: BrowserContext,
-    max_wait_ms=15_000,
-) -> str | None:
+async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
+    try:
+        r = await client.get(url)
+        r.raise_for_status()
+    except Exception:
+        log.error(f"Failed to write fetch: {url} returned {r.status_code}")
+
+    with cert.open("a", encoding="utf-8") as f:
+        f.write(f"{r.text}\n")
+
+
+async def refresh_cert_cache(client: httpx.AsyncClient) -> ssl.SSLContext:
+    CERT_FILE.unlink(missing_ok=True)
+
+    tasks = [write_to_cert(client, url, CERT_FILE) for url in CERT_BUNDL_URLS]
+
+    await asyncio.gather(*tasks)
+
+
+async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
+    if CERT_FILE.is_file():
+        mtime = datetime.fromtimestamp(CERT_FILE.stat().st_mtime)
+
+        if datetime.now() - mtime < timedelta(days=30):
+            return ssl.create_default_context(cafile=CERT_FILE)
+
+    log.info("Refreshing cached certificate")
+
+    await refresh_cert_cache(client)
+
+    return ssl.create_default_context(cafile=CERT_FILE)
+
+
+async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
+    buffer = io.BytesIO()
+
+    try:
+        async with httpx.AsyncClient(timeout=10, verify=ssl_ctx) as client:
+            async with client.stream("GET", url) as r:
+                r.raise_for_status()
+
+                async for chunk in r.aiter_bytes(8192):
+                    buffer.write(chunk)
+
+        buffer.seek(0)
+
+        return buffer
+    except Exception as e:
+        log.error(f"Failed to fetch {url}: {e}")
+
+        return io.BytesIO(b"")
+
+
+async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]:
+    events = []
+    pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
+    now = datetime.now(TZ)
+
+    window_start, window_end = now - timedelta(hours=3), now + timedelta(hours=1)
+
+    buffer = await fetch_xml_stream(url, ssl_ctx)
+
+    for _, elem in ET.iterparse(buffer, events=("end",)):
+        if elem.tag == "item":
+            title = elem.findtext("title")
+            desc = elem.findtext("description")
+            pub_date = elem.findtext("pubDate")
+            link = elem.findtext("link")
+
+            try:
+                dt = datetime.strptime(pub_date, pub_date_format)
+                dt = dt.astimezone(TZ)
+            except Exception:
+                elem.clear()
+                continue
+
+            if window_start <= dt <= window_end:
+                sport, event = (
+                    (
+                        desc.split(".")[0].strip(),
+                        " ".join(p.strip() for p in desc.split(".")[1:]),
+                    )
+                    if desc
+                    else ("", "")
+                )
+
+                events.append(
+                    {
+                        "sport": sport,
+                        "event": event,
+                        "title": title,
+                        "link": link,
+                    }
+                )
+
+            elem.clear()
+
+    return events
+
+
+async def process_event(url: str, max_wait_ms=15_000) -> str | None:
+    async with async_playwright() as p:
+        browser = await p.firefox.launch(headless=True)
+
+        context = await browser.new_context(
+            ignore_https_errors=True  # website doesn't send valid certs
+        )
        ev_page = await context.new_page()

        captured: list[str] = []
@ -49,7 +165,11 @@ async def process_event(
        popup = None

        try:
-        await ev_page.goto(ev["href"], wait_until="domcontentloaded", timeout=30_000)
+            await ev_page.goto(
+                url,
+                wait_until="domcontentloaded",
+                timeout=30_000,
+            )

            btn = await ev_page.query_selector(".lnkhdr > tbody > tr > td:nth-child(2)")

@ -123,8 +243,6 @@ async def process_event(
            log.warning("No m3u8 captured in popup or inline playback.")

        except Exception as e:
-        log.error(f"Error processing {ev['name']}: {e}")
-
            try:
                ev_page.remove_listener("request", capture_req)

@ -137,65 +255,29 @@ async def process_event(
            except Exception:
                pass

+        await browser.close()

-async def main() -> None:
-    log.info(f'Scraping from "{base_url}"')

-    async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
+async def main(client: httpx.AsyncClient) -> None:
+    log.info(f'Scraping from "{BASE_URL}"')

-        context = await browser.new_context(
-            ignore_https_errors=True  # website doesn't send valid certs
-        )
+    cert = await get_cert(client)

-        page = await context.new_page()
+    events = await parse_feed(BASE_URL, cert)

-        await page.goto(base_url, wait_until="domcontentloaded", timeout=60_000)
-
-        rows = await page.query_selector_all("#upcoming table tr")
-
-        events = []
-
-        seen_hrefs = set()
-
-        for row in rows:
-            img = await row.query_selector("img")
-
-            league = (await img.get_attribute("alt") or "").strip() if img else ""
-
-            live_anchor = None
-
-            for a in await row.query_selector_all("a.live"):
-                txt = (await a.text_content() or "").strip()
-
-                if txt:
-                    live_anchor = a
-                    break
-
-            if live_anchor:
-                href = await live_anchor.get_attribute("href")
-
-                full_url = urljoin(base_url, href)
-
-                if full_url in seen_hrefs:
-                    continue
-
-                seen_hrefs.add(full_url)
-
-                text = (await live_anchor.text_content() or "").strip()
-
-                events.append({"name": text, "href": full_url, "league": league})
+    log.info(f"Processing {len(events)} events")

    for ev in events:
-            if (
-                sport := ev["league"].split(".")[-1].strip()
-            ) in tvp_sports:  # already in tvpass
+        if tvp_sports & {
+            sport := ev["sport"],
+            event := ev["event"],
+        }:  # already in tvpass
            continue

-            url = await safe_process_event(lambda: process_event(ev, context))
+        url = await safe_process_event(lambda: process_event(ev["link"]))

        if url:
-                urls[f"[{sport}] {ev['name']}"] = {
+            urls[f"[{sport}: {event}] {ev['title']}"] = {
                "url": url,
                "logo": logos.get(
                    sport,
@ -203,6 +285,7 @@ async def main() -> None:
                ),
            }

-        await browser.close()
-
    log.info(f"Collected {len(urls)} live events")
+
+
+# add caching
--- a/M3U8/scrape/tvpass.py
+++ b/M3U8/scrape/tvpass.py
@ -26,10 +26,10 @@ logos = {
    "WNBA": "https://i.gyazo.com/02d665a5704118d195dbcd5fa20d5462.png",
 }

+TZ = pytz.timezone("America/New_York")
+

 def load_cache() -> dict[str, str]:
-    TZ = pytz.timezone("America/New_York")
-
    try:
        data = json.loads(base_file.read_text(encoding="utf-8"))