From 00000d98552f8aaac73d4877b5151850efbd14d4 Mon Sep 17 00:00:00 2001
From: doms9 <96013514+doms9@users.noreply.github.com>
Date: Wed, 3 Sep 2025 00:00:22 -0400
Subject: [PATCH] e

---
 M3U8/fetch.py           |   9 +--
 M3U8/scrape/ace.py      |   2 +-
 M3U8/scrape/fstv.py     |   5 +-
 M3U8/scrape/livetvsx.py | 120 ++++++++++++++++++++++++++++------------
 M3U8/scrape/tvpass.py   |   4 +-
 5 files changed, 98 insertions(+), 42 deletions(-)

diff --git a/M3U8/fetch.py b/M3U8/fetch.py
index 1090c34..80f1a3d 100644
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@@ -41,13 +41,14 @@ async def main() -> None:
     tasks = [
         # ace.main(client),
         # fstv.main(client),
-        livetvsx.main(CLIENT),
-        tvpass.main(CLIENT),
+        asyncio.create_task(livetvsx.main(CLIENT)),
+        asyncio.create_task(tvpass.main(CLIENT)),
+        vanilla_fetch(),
     ]
 
-    await asyncio.gather(*tasks)
+    results = await asyncio.gather(*tasks)
 
-    base_m3u8, tvg_chno = await vanilla_fetch()
+    base_m3u8, tvg_chno = results[-1]
 
     additions = ace.urls | fstv.urls | livetvsx.urls | tvpass.urls
 
diff --git a/M3U8/scrape/ace.py b/M3U8/scrape/ace.py
index 48c0d84..3080531 100644
--- a/M3U8/scrape/ace.py
+++ b/M3U8/scrape/ace.py
@@ -50,7 +50,7 @@ async def get_schedule(client: httpx.AsyncClient, base_url: str) -> list[dict]:
         )
 
         if " - " in after_time:
-            sport, event_name = [x.strip() for x in after_time.split(" - ", 1)]
+            sport, event_name = (x.strip() for x in after_time.split(" - ", 1))
         else:
             sport, event_name = "", after_time
 
diff --git a/M3U8/scrape/fstv.py b/M3U8/scrape/fstv.py
index ef03466..8e6ebb1 100644
--- a/M3U8/scrape/fstv.py
+++ b/M3U8/scrape/fstv.py
@@ -131,4 +131,7 @@ async def main(client: httpx.AsyncClient) -> None:
                 ),
             }
 
-    log.info(f"Collected {len(urls)} live events")
+    log.info(f"Collected {len(urls)} live event(s)")
+
+
+# add caching
diff --git a/M3U8/scrape/livetvsx.py b/M3U8/scrape/livetvsx.py
index c2bd0bf..0ef6cfe 100644
--- a/M3U8/scrape/livetvsx.py
+++ b/M3U8/scrape/livetvsx.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import asyncio
 import io
+import json
 import ssl
 import xml.etree.ElementTree as ET
 from datetime import datetime, timedelta
@@ -30,12 +31,15 @@ CERT_BUNDL_URLS = [
 
 CERT_FILE = Path(__file__).parent / "cached-ca.pem"
 
+CACHE_FILE = Path(__file__).parent / "livetvsx.json"
 
-async def safe_process_event(fn, timeout_sec=20) -> Any | None:
+
+async def safe_process_event(fn, url_num: int, timeout=20) -> Any | None:
     try:
-        return await asyncio.wait_for(fn(), timeout=timeout_sec)
+        return await asyncio.wait_for(fn(), timeout=timeout)
     except asyncio.TimeoutError:
-        log.warning(f"Timed out after {timeout_sec}s, skipping event")
+        log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
+        return
 
 
 async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
@@ -71,6 +75,21 @@ async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
     return ssl.create_default_context(cafile=CERT_FILE)
 
 
+def load_cache() -> dict[str, dict[str, str | str]]:
+    try:
+        data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
+
+        now = datetime.now().timestamp()
+
+        return {
+            k: v
+            for k, v in data.items()
+            if now - v.get("timestamp", 0) < timedelta(hours=4).total_seconds()
+        }
+    except (FileNotFoundError, json.JSONDecodeError):
+        return {}
+
+
 async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
     buffer = io.BytesIO()
 
@@ -91,8 +110,13 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
         return io.BytesIO(b"")
 
 
-async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]:
-    events = []
+async def parse_feed(
+    url: str,
+    ssl_ctx: ssl.SSLContext,
+    cached_keys: set[str],
+) -> list[dict[str, str]]:
+
+    events: list[dict[str, str]] = []
     pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
     now = datetime.now(TZ)
 
@@ -124,21 +148,28 @@ async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, s
                     else ("", "")
                 )
 
-                events.append(
-                    {
-                        "sport": sport,
-                        "event": event,
-                        "title": title,
-                        "link": link,
-                    }
-                )
+                key = f"[{sport}: {event}] {title}"
+
+                if key in cached_keys:
+                    elem.clear()
+                    continue
+
+                elif not tvp_sports & {sport, event}:
+                    events.append(
+                        {
+                            "sport": sport,
+                            "event": event,
+                            "title": title,
+                            "link": link,
+                        }
+                    )
 
             elem.clear()
 
     return events
 
 
-async def process_event(url: str, max_wait_ms=15_000) -> str | None:
+async def process_event(url: str, url_num: int, max_wait_ms=15_000) -> str | None:
     async with async_playwright() as p:
         browser = await p.firefox.launch(headless=True)
 
@@ -179,16 +210,18 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
 
                     await ev_page.wait_for_timeout(500)
                 except Exception as e:
-                    log.debug(f"Failed to click Browser Links tab: {e}")
+                    log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}")
+                    return
             else:
-                log.warning("Browser Links tab not found")
+                log.warning(f"URL {url_num}) Browser Links tab not found")
 
             link_img = await ev_page.query_selector(
                 "tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
             )
 
             if not link_img:
-                log.warning("No browser link to click.")
+                log.warning(f"URL {url_num}) No browser link to click.")
+                return
 
             ev_page.on("request", capture_req)
 
@@ -198,7 +231,7 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
                         await link_img.click()
                     except Exception as e:
                         log.debug(
-                            f"Click failed (popup might have already been opened): {e}"
+                            f"URL {url_num}) Click failed (popup might have already been opened): {e}"
                         )
 
                 popup = await popup_info.value
@@ -209,7 +242,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
                 try:
                     await link_img.click()
                 except Exception as e:
-                    log.debug(f"Fallback click failed: {e}")
+                    log.debug(f"URL {url_num}) Fallback click failed: {e}")
+                    return
 
             wait_task = asyncio.create_task(got_one.wait())
 
@@ -217,7 +251,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
                 await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
 
             except asyncio.TimeoutError:
-                log.warning("Timed out waiting for m3u8.")
+                log.warning(f"URL {url_num}) Timed out waiting for m3u8.")
+                return
 
             finally:
                 if not wait_task.done():
@@ -238,10 +273,12 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
             await ev_page.close()
 
             if captured:
+                log.info(f"URL {url_num}) Captured M3U8")
+
                 return captured[-1]
 
-            log.warning("No m3u8 captured in popup or inline playback.")
-
+            log.warning(f"URL {url_num}) No m3u8 captured in popup or inline playback.")
+            return
         except Exception as e:
             try:
                 ev_page.remove_listener("request", capture_req)
@@ -263,29 +300,44 @@ async def main(client: httpx.AsyncClient) -> None:
 
     cert = await get_cert(client)
 
-    events = await parse_feed(BASE_URL, cert)
+    cached_urls = load_cache()
+    cached_keys = set(cached_urls.keys())
+    cached_count = len(cached_urls)
 
-    log.info(f"Processing {len(events)} events")
+    events = await parse_feed(BASE_URL, cert, cached_keys)
 
-    for ev in events:
-        if tvp_sports & {
-            sport := ev["sport"],
-            event := ev["event"],
-        }:  # already in tvpass
-            continue
+    log.info(f"Processing {len(events)} URLs")
 
-        url = await safe_process_event(lambda: process_event(ev["link"]))
+    now_ts = datetime.now().timestamp()
+
+    for num, ev in enumerate(events, start=1):
+        sport = ev["sport"]
+        event = ev["event"]
+        title = ev["title"]
+        link = ev["link"]
+
+        key = f"[{sport}: {event}] {title}"
+
+        url = await safe_process_event(
+            lambda: process_event(link, url_num=num), url_num=num
+        )
 
         if url:
-            urls[f"[{sport}: {event}] {ev['title']}"] = {
+            entry = {
                 "url": url,
                 "logo": logos.get(
                     sport,
                     "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
                 ),
+                "timestamp": now_ts,
             }
 
-    log.info(f"Collected {len(urls)} live events")
+            urls[key] = cached_urls[key] = entry
 
+    CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
 
-# add caching
+    new_count = len(cached_urls) - cached_count
+
+    log.info(f"Cached {cached_count} event(s)")
+
+    log.info(f"Collected {new_count} new event(s)")
diff --git a/M3U8/scrape/tvpass.py b/M3U8/scrape/tvpass.py
index 752fddd..cc2262e 100644
--- a/M3U8/scrape/tvpass.py
+++ b/M3U8/scrape/tvpass.py
@@ -51,7 +51,7 @@ async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
 async def main(client: httpx.AsyncClient) -> None:
     if cached := load_cache():
         urls.update(cached)
-        log.info(f"Collected {len(urls)} events from cache")
+        log.info(f"Collected {len(urls)} event(s) from cache")
         return
 
     log.info(f'Scraping from "{base_url}"')
@@ -87,4 +87,4 @@ async def main(client: httpx.AsyncClient) -> None:
     if urls:
         base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
 
-        log.info(f"Cached {len(urls)} events")
+        log.info(f"Cached {len(urls)} event(s)")