From 00000d941c4bfbe969341c306175e809c7dc3415 Mon Sep 17 00:00:00 2001
From: doms9 <96013514+doms9@users.noreply.github.com>
Date: Tue, 2 Sep 2025 18:06:35 -0400
Subject: [PATCH] e

---
 .gitignore              |   1 +
 M3U8/fetch.py           |  18 +-
 M3U8/scrape/ace.py      |   4 +-
 M3U8/scrape/fstv.py     |   4 +-
 M3U8/scrape/livetvsx.py | 371 ++++++++++++++++++++++++----------------
 M3U8/scrape/tvpass.py   |   4 +-
 6 files changed, 243 insertions(+), 159 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5037e3b..2e6430b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ wheels/
 # Misc
 .python-version
 stuff/
+cached-ca.pem
diff --git a/M3U8/fetch.py b/M3U8/fetch.py
index 3fee23f..1090c34 100644
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@@ -7,11 +7,11 @@ from scrape import ace, fstv, livetvsx, logger, tvpass
 
 log = logger.get_logger(__name__)
 
-base_url = "https://s.id/ePwXT"
+BASE_URL = "https://s.id/ePwXT"
 
-m3u8_file = Path(__file__).parent / "TV.m3u8"
+M3U8_FILE = Path(__file__).parent / "TV.m3u8"
 
-client = httpx.AsyncClient(
+CLIENT = httpx.AsyncClient(
     timeout=5,
     follow_redirects=True,
     headers={
@@ -24,10 +24,10 @@ async def vanilla_fetch() -> tuple[list[str], int]:
     log.info("Fetching base M3U8")
 
     try:
-        r = await client.get(base_url)
+        r = await CLIENT.get(BASE_URL)
         r.raise_for_status()
     except Exception as e:
-        log.error(f'Failed to fetch "{base_url}"\n{e}')
+        log.error(f'Failed to fetch "{BASE_URL}"\n{e}')
         raise SystemExit(e) from e
 
     d = r.text.splitlines()[1:]
@@ -41,8 +41,8 @@ async def main() -> None:
     tasks = [
         # ace.main(client),
         # fstv.main(client),
-        livetvsx.main(),
-        tvpass.main(client),
+        livetvsx.main(CLIENT),
+        tvpass.main(CLIENT),
     ]
 
     await asyncio.gather(*tasks)
@@ -59,7 +59,7 @@ async def main() -> None:
         )
     ]
 
-    m3u8_file.write_text(
+    M3U8_FILE.write_text(
         '#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/EPG/TV.xml"\n'
         + "\n".join(base_m3u8)
         + "\n"
@@ -68,7 +68,7 @@ async def main() -> None:
         encoding="utf-8",
     )
 
-    log.info(f"M3U8 saved to {m3u8_file.name}")
+    log.info(f"M3U8 saved to {M3U8_FILE.name}")
 
 
 if __name__ == "__main__":
diff --git a/M3U8/scrape/ace.py b/M3U8/scrape/ace.py
index 256502d..48c0d84 100644
--- a/M3U8/scrape/ace.py
+++ b/M3U8/scrape/ace.py
@@ -13,7 +13,7 @@ log = get_logger(__name__)
 
 urls: dict[str, dict[str, str]] = {}
 
-mirrors = [
+MIRRORS = [
     "https://aceztrims.pages.dev/",
     "https://acestrlms.pages.dev/",
 ]
@@ -97,7 +97,7 @@ async def get_m3u8_links(client: httpx.AsyncClient, url: str) -> list[str]:
 
 
 async def main(client: httpx.AsyncClient) -> None:
-    if not (base_url := await get_base(client, mirrors)):
+    if not (base_url := await get_base(client, MIRRORS)):
         log.warning("No working ace mirrors")
         return
 
diff --git a/M3U8/scrape/fstv.py b/M3U8/scrape/fstv.py
index 631d16c..ef03466 100644
--- a/M3U8/scrape/fstv.py
+++ b/M3U8/scrape/fstv.py
@@ -11,7 +11,7 @@ log = get_logger(__name__)
 
 urls: dict[str, dict[str, str]] = {}
 
-mirrors = [
+MIRRORS = [
     "https://fstv.online",
     "https://fstv.space",
     "https://fstv.zip",
@@ -103,7 +103,7 @@ async def fetch_m3u8(client: httpx.AsyncClient, url: str) -> tuple[str, list[str
 
 
 async def main(client: httpx.AsyncClient) -> None:
-    if not (base_url := await get_base(client, mirrors)):
+    if not (base_url := await get_base(client, MIRRORS)):
         log.warning("No working FSTV mirrors")
         return
 
diff --git a/M3U8/scrape/livetvsx.py b/M3U8/scrape/livetvsx.py
index 23b85de..c2bd0bf 100644
--- a/M3U8/scrape/livetvsx.py
+++ b/M3U8/scrape/livetvsx.py
@@ -1,20 +1,34 @@
 #!/usr/bin/env python3
 import asyncio
+import io
+import ssl
+import xml.etree.ElementTree as ET
+from datetime import datetime, timedelta
+from pathlib import Path
 from typing import Any
-from urllib.parse import urljoin
 
-from playwright.async_api import BrowserContext, Request, async_playwright
+import httpx
+from playwright.async_api import Request, async_playwright
 
 from .logger import get_logger
-from .tvpass import logos
-
-base_url = "https://livetv.sx/enx/"
+from .tvpass import TZ, logos
 
 log = get_logger(__name__)
 
+urls: dict[str, str] = {}
+
 tvp_sports = set(logos.keys())
 
-urls: dict[str, str] = {}
+BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml"
+
+CERT_BUNDL_URLS = [
+    "https://curl.se/ca/cacert.pem",
+    "https://ssl.com/repo/certs/Cloudflare-TLS-I-E1.pem",
+    "https://ssl.com/repo/certs/SSL.com-TLS-T-ECC-R2.pem",
+    "https://ssl.com/repo/certs/Sectigo-AAA-Root.pem",
+]
+
+CERT_FILE = Path(__file__).parent / "cached-ca.pem"
 
 
 async def safe_process_event(fn, timeout_sec=20) -> Any | None:
@@ -24,108 +38,196 @@ async def safe_process_event(fn, timeout_sec=20) -> Any | None:
         log.warning(f"Timed out after {timeout_sec}s, skipping event")
 
 
-async def process_event(
-    ev: dict[str, str],
-    context: BrowserContext,
-    max_wait_ms=15_000,
-) -> str | None:
-    ev_page = await context.new_page()
+async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
+    try:
+        r = await client.get(url)
+        r.raise_for_status()
+    except Exception:
+        log.error(f"Failed to write fetch: {url} returned {r.status_code}")
 
-    captured: list[str] = []
+    with cert.open("a", encoding="utf-8") as f:
+        f.write(f"{r.text}\n")
 
-    got_one = asyncio.Event()
 
-    def capture_req(req: Request) -> None:
-        if (
-            ".m3u8" in req.url
-            and "amazonaws" not in req.url
-            and "knitcdn" not in req.url
-            and not captured
-        ):
-            captured.append(req.url)
+async def refresh_cert_cache(client: httpx.AsyncClient) -> ssl.SSLContext:
+    CERT_FILE.unlink(missing_ok=True)
 
-            got_one.set()
+    tasks = [write_to_cert(client, url, CERT_FILE) for url in CERT_BUNDL_URLS]
 
-    popup = None
+    await asyncio.gather(*tasks)
+
+
+async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
+    if CERT_FILE.is_file():
+        mtime = datetime.fromtimestamp(CERT_FILE.stat().st_mtime)
+
+        if datetime.now() - mtime < timedelta(days=30):
+            return ssl.create_default_context(cafile=CERT_FILE)
+
+    log.info("Refreshing cached certificate")
+
+    await refresh_cert_cache(client)
+
+    return ssl.create_default_context(cafile=CERT_FILE)
+
+
+async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
+    buffer = io.BytesIO()
 
     try:
-        await ev_page.goto(ev["href"], wait_until="domcontentloaded", timeout=30_000)
+        async with httpx.AsyncClient(timeout=10, verify=ssl_ctx) as client:
+            async with client.stream("GET", url) as r:
+                r.raise_for_status()
 
-        btn = await ev_page.query_selector(".lnkhdr > tbody > tr > td:nth-child(2)")
+                async for chunk in r.aiter_bytes(8192):
+                    buffer.write(chunk)
+
+        buffer.seek(0)
+
+        return buffer
+    except Exception as e:
+        log.error(f"Failed to fetch {url}: {e}")
+
+        return io.BytesIO(b"")
+
+
+async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]:
+    events = []
+    pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
+    now = datetime.now(TZ)
+
+    window_start, window_end = now - timedelta(hours=3), now + timedelta(hours=1)
+
+    buffer = await fetch_xml_stream(url, ssl_ctx)
+
+    for _, elem in ET.iterparse(buffer, events=("end",)):
+        if elem.tag == "item":
+            title = elem.findtext("title")
+            desc = elem.findtext("description")
+            pub_date = elem.findtext("pubDate")
+            link = elem.findtext("link")
 
-        if btn:
             try:
-                await btn.click()
+                dt = datetime.strptime(pub_date, pub_date_format)
+                dt = dt.astimezone(TZ)
+            except Exception:
+                elem.clear()
+                continue
 
-                await ev_page.wait_for_timeout(500)
-            except Exception as e:
-                log.debug(f"Failed to click Browser Links tab: {e}")
-        else:
-            log.warning("Browser Links tab not found")
+            if window_start <= dt <= window_end:
+                sport, event = (
+                    (
+                        desc.split(".")[0].strip(),
+                        " ".join(p.strip() for p in desc.split(".")[1:]),
+                    )
+                    if desc
+                    else ("", "")
+                )
 
-        link_img = await ev_page.query_selector(
-            "tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
+                events.append(
+                    {
+                        "sport": sport,
+                        "event": event,
+                        "title": title,
+                        "link": link,
+                    }
+                )
+
+            elem.clear()
+
+    return events
+
+
+async def process_event(url: str, max_wait_ms=15_000) -> str | None:
+    async with async_playwright() as p:
+        browser = await p.firefox.launch(headless=True)
+
+        context = await browser.new_context(
+            ignore_https_errors=True  # website doesn't send valid certs
         )
+        ev_page = await context.new_page()
 
-        if not link_img:
-            log.warning("No browser link to click.")
+        captured: list[str] = []
 
-        ev_page.on("request", capture_req)
+        got_one = asyncio.Event()
+
+        def capture_req(req: Request) -> None:
+            if (
+                ".m3u8" in req.url
+                and "amazonaws" not in req.url
+                and "knitcdn" not in req.url
+                and not captured
+            ):
+                captured.append(req.url)
+
+                got_one.set()
+
+        popup = None
 
         try:
-            async with ev_page.expect_popup(timeout=5_000) as popup_info:
+            await ev_page.goto(
+                url,
+                wait_until="domcontentloaded",
+                timeout=30_000,
+            )
+
+            btn = await ev_page.query_selector(".lnkhdr > tbody > tr > td:nth-child(2)")
+
+            if btn:
+                try:
+                    await btn.click()
+
+                    await ev_page.wait_for_timeout(500)
+                except Exception as e:
+                    log.debug(f"Failed to click Browser Links tab: {e}")
+            else:
+                log.warning("Browser Links tab not found")
+
+            link_img = await ev_page.query_selector(
+                "tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
+            )
+
+            if not link_img:
+                log.warning("No browser link to click.")
+
+            ev_page.on("request", capture_req)
+
+            try:
+                async with ev_page.expect_popup(timeout=5_000) as popup_info:
+                    try:
+                        await link_img.click()
+                    except Exception as e:
+                        log.debug(
+                            f"Click failed (popup might have already been opened): {e}"
+                        )
+
+                popup = await popup_info.value
+
+                popup.on("request", capture_req)
+            except Exception:
+
                 try:
                     await link_img.click()
                 except Exception as e:
-                    log.debug(
-                        f"Click failed (popup might have already been opened): {e}"
-                    )
+                    log.debug(f"Fallback click failed: {e}")
 
-            popup = await popup_info.value
-
-            popup.on("request", capture_req)
-        except Exception:
+            wait_task = asyncio.create_task(got_one.wait())
 
             try:
-                await link_img.click()
-            except Exception as e:
-                log.debug(f"Fallback click failed: {e}")
+                await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
 
-        wait_task = asyncio.create_task(got_one.wait())
+            except asyncio.TimeoutError:
+                log.warning("Timed out waiting for m3u8.")
 
-        try:
-            await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
+            finally:
+                if not wait_task.done():
+                    wait_task.cancel()
 
-        except asyncio.TimeoutError:
-            log.warning("Timed out waiting for m3u8.")
+                    try:
+                        await wait_task
+                    except asyncio.CancelledError:
+                        pass
 
-        finally:
-            if not wait_task.done():
-                wait_task.cancel()
-
-                try:
-                    await wait_task
-                except asyncio.CancelledError:
-                    pass
-
-        ev_page.remove_listener("request", capture_req)
-
-        if popup:
-            popup.remove_listener("request", capture_req)
-
-            await popup.close()
-
-        await ev_page.close()
-
-        if captured:
-            return captured[-1]
-
-        log.warning("No m3u8 captured in popup or inline playback.")
-
-    except Exception as e:
-        log.error(f"Error processing {ev['name']}: {e}")
-
-        try:
             ev_page.remove_listener("request", capture_req)
 
             if popup:
@@ -134,75 +236,56 @@ async def process_event(
                 await popup.close()
 
             await ev_page.close()
-        except Exception:
-            pass
 
+            if captured:
+                return captured[-1]
 
-async def main() -> None:
-    log.info(f'Scraping from "{base_url}"')
+            log.warning("No m3u8 captured in popup or inline playback.")
 
-    async with async_playwright() as p:
-        browser = await p.firefox.launch(headless=True)
+        except Exception as e:
+            try:
+                ev_page.remove_listener("request", capture_req)
 
-        context = await browser.new_context(
-            ignore_https_errors=True  # website doesn't send valid certs
-        )
+                if popup:
+                    popup.remove_listener("request", capture_req)
 
-        page = await context.new_page()
+                    await popup.close()
 
-        await page.goto(base_url, wait_until="domcontentloaded", timeout=60_000)
-
-        rows = await page.query_selector_all("#upcoming table tr")
-
-        events = []
-
-        seen_hrefs = set()
-
-        for row in rows:
-            img = await row.query_selector("img")
-
-            league = (await img.get_attribute("alt") or "").strip() if img else ""
-
-            live_anchor = None
-
-            for a in await row.query_selector_all("a.live"):
-                txt = (await a.text_content() or "").strip()
-
-                if txt:
-                    live_anchor = a
-                    break
-
-            if live_anchor:
-                href = await live_anchor.get_attribute("href")
-
-                full_url = urljoin(base_url, href)
-
-                if full_url in seen_hrefs:
-                    continue
-
-                seen_hrefs.add(full_url)
-
-                text = (await live_anchor.text_content() or "").strip()
-
-                events.append({"name": text, "href": full_url, "league": league})
-
-        for ev in events:
-            if (
-                sport := ev["league"].split(".")[-1].strip()
-            ) in tvp_sports:  # already in tvpass
-                continue
-
-            url = await safe_process_event(lambda: process_event(ev, context))
-
-            if url:
-                urls[f"[{sport}] {ev['name']}"] = {
-                    "url": url,
-                    "logo": logos.get(
-                        sport,
-                        "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
-                    ),
-                }
+                await ev_page.close()
+            except Exception:
+                pass
 
         await browser.close()
 
-        log.info(f"Collected {len(urls)} live events")
+
+async def main(client: httpx.AsyncClient) -> None:
+    log.info(f'Scraping from "{BASE_URL}"')
+
+    cert = await get_cert(client)
+
+    events = await parse_feed(BASE_URL, cert)
+
+    log.info(f"Processing {len(events)} events")
+
+    for ev in events:
+        if tvp_sports & {
+            sport := ev["sport"],
+            event := ev["event"],
+        }:  # already in tvpass
+            continue
+
+        url = await safe_process_event(lambda: process_event(ev["link"]))
+
+        if url:
+            urls[f"[{sport}: {event}] {ev['title']}"] = {
+                "url": url,
+                "logo": logos.get(
+                    sport,
+                    "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
+                ),
+            }
+
+    log.info(f"Collected {len(urls)} live events")
+
+
+# add caching
diff --git a/M3U8/scrape/tvpass.py b/M3U8/scrape/tvpass.py
index 487e2b6..752fddd 100644
--- a/M3U8/scrape/tvpass.py
+++ b/M3U8/scrape/tvpass.py
@@ -26,10 +26,10 @@ logos = {
     "WNBA": "https://i.gyazo.com/02d665a5704118d195dbcd5fa20d5462.png",
 }
 
+TZ = pytz.timezone("America/New_York")
+
 
 def load_cache() -> dict[str, str]:
-    TZ = pytz.timezone("America/New_York")
-
     try:
         data = json.loads(base_file.read_text(encoding="utf-8"))