From 00000d9ef768e9211d58e4e92bbf4359b3040b33 Mon Sep 17 00:00:00 2001
From: doms9 <96013514+doms9@users.noreply.github.com>
Date: Thu, 16 Apr 2026 17:42:28 -0400
Subject: [PATCH] e - change caching process for select sites

---
 M3U8/scrapers/cdnlivetv.py    |  18 ++----
 M3U8/scrapers/ovogoal.py      |  26 +++-----
 M3U8/scrapers/pawa.py         |  26 +++-----
 M3U8/scrapers/roxie.py        |  26 +++-----
 M3U8/scrapers/shark.py        | 116 ++++++++++++++--------------------
 M3U8/scrapers/streamcenter.py |  37 +++++------
 M3U8/scrapers/streamtpnew.py  |  26 +++-----
 M3U8/scrapers/tvapp.py        |  45 +++++++------
 M3U8/scrapers/webcast.py      |  30 +++------
 9 files changed, 138 insertions(+), 212 deletions(-)

diff --git a/M3U8/scrapers/cdnlivetv.py b/M3U8/scrapers/cdnlivetv.py
index d4182c5c..41a8d419 100644
--- a/M3U8/scrapers/cdnlivetv.py
+++ b/M3U8/scrapers/cdnlivetv.py
@@ -30,8 +30,11 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
         if r := await network.request(
             urljoin(API_URL, "api/v1/events/sports"),
+            params={
+                "user": "cdnlivetv",
+                "plan": "free",
+            },
             log=log,
-            params={"user": "cdnlivetv", "plan": "free"},
         ):
             api_data = r.json().get("cdn-live-tv")
 
@@ -68,22 +71,11 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
             event_links: list[str] = [channel["url"] for channel in channels]
 
-            # if not (
-            #     link := (
-            #         event_links[0]
-            #         if len(event_links) == 1
-            #         else await network.get_base(event_links)
-            #     )
-            # ):
-            #     continue
-
-            link = event_links[0]
-
             events.append(
                 {
                     "sport": league,
                     "event": name,
-                    "link": link,
+                    "link": event_links[0],
                     "timestamp": event_dt.timestamp(),
                 }
             )
diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py
index beeacbf6..4f419e14 100644
--- a/M3U8/scrapers/ovogoal.py
+++ b/M3U8/scrapers/ovogoal.py
@@ -53,7 +53,7 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]
     return match[3], iframe_src
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     events = []
 
     if not (html_data := await network.request(BASE_URL, log=log)):
@@ -76,9 +76,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
         event_name = event_name_elem.text(strip=True)
 
-        if f"[{sport}] {event_name} ({TAG})" in cached_keys:
-            continue
-
         events.append(
             {
                 "sport": sport,
@@ -91,20 +88,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -140,13 +134,11 @@ async def scrape() -> None:
             cached_urls[key] = entry
 
             if url:
-                valid_count += 1
-
                 urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/pawa.py b/M3U8/scrapers/pawa.py
index ddcce199..a4cf6f96 100644
--- a/M3U8/scrapers/pawa.py
+++ b/M3U8/scrapers/pawa.py
@@ -55,7 +55,7 @@ async def process_event(url: str, url_num: int) -> str | None:
     return m3u.split("&remote")[0]
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     events = []
 
     if not (html_data := await network.request(BASE_URL, log=log)):
@@ -74,9 +74,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
         title = title.replace(" v ", " vs ")
 
-        if f"[{sport}] {title} ({TAG})" in cached_keys:
-            continue
-
         events.append(
             {
                 "sport": sport,
@@ -89,20 +86,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -138,13 +132,11 @@ async def scrape() -> None:
             cached_urls[key] = entry
 
             if url:
-                valid_count += 1
-
                 urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py
index 51aa23dc..7851a305 100644
--- a/M3U8/scrapers/roxie.py
+++ b/M3U8/scrapers/roxie.py
@@ -78,7 +78,7 @@ async def process_event(
         return
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     tasks = [network.request(url, log=log) for url in SPORT_URLS.values()]
 
     results = await asyncio.gather(*tasks)
@@ -102,9 +102,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
             if not (href := a_tag.attributes.get("href")):
                 continue
 
-            if f"[{sport}] {event} ({TAG})" in cached_keys:
-                continue
-
             events.append(
                 {
                     "sport": sport,
@@ -117,20 +114,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape(browser: Browser) -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -169,13 +163,11 @@ async def scrape(browser: Browser) -> None:
                     cached_urls[key] = entry
 
                     if url:
-                        valid_count += 1
-
                         urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py
index a0985383..4bbfff09 100644
--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@@ -11,9 +11,7 @@ urls: dict[str, dict[str, str | float]] = {}
 
 TAG = "SHARK"
 
-CACHE_FILE = Cache(TAG, exp=10_800)
-
-HTML_FILE = Cache(f"{TAG}-html", exp=19_800)
+CACHE_FILE = Cache(TAG, exp=19_800)
 
 BASE_URL = "https://sharkstreams.net"
 
@@ -38,8 +36,10 @@ async def process_event(url: str, url_num: int) -> str | None:
     return pattern.sub(r"chunks.m3u8", urls[0])
 
 
-async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
-    events = {}
+async def get_events() -> dict[str, dict[str, str | float]]:
+    now = Time.clean(Time.now())
+
+    events = []
 
     if not (html_data := await network.request(BASE_URL, log=log)):
         return events
@@ -59,6 +59,9 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
 
         event_dt = Time.from_str(date_node.text(strip=True), timezone="EST")
 
+        if event_dt.date() != now.date():
+            continue
+
         sport = sport_node.text(strip=True)
 
         event_name = name_node.text(strip=True)
@@ -73,59 +76,30 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
 
         link = match[1].replace("player.php", "get-stream.php")
 
-        key = f"[{sport}] {event_name} ({TAG})"
-
-        events[key] = {
-            "sport": sport,
-            "event": event_name,
-            "link": link,
-            "event_ts": event_dt.timestamp(),
-            "timestamp": now_ts,
-        }
+        events.append(
+            {
+                "sport": sport,
+                "event": event_name,
+                "link": link,
+                "timestamp": now.timestamp(),
+            }
+        )
 
     return events
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
-    now = Time.clean(Time.now())
-
-    if not (events := HTML_FILE.load()):
-        log.info("Refreshing HTML cache")
-
-        events = await refresh_html_cache(now.timestamp())
-
-        HTML_FILE.write(events)
-
-    live = []
-
-    start_ts = now.delta(hours=-1).timestamp()
-    end_ts = now.delta(minutes=10).timestamp()
-
-    for k, v in events.items():
-        if k in cached_keys:
-            continue
-
-        if not start_ts <= v["event_ts"] <= end_ts:
-            continue
-
-        live.append(v)
-
-    return live
-
-
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    cached_count = len(cached_urls)
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    urls.update(cached_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         for i, ev in enumerate(events, start=1):
             handler = partial(
@@ -141,31 +115,33 @@ async def scrape() -> None:
                 log=log,
             )
 
+            sport, event, ts = (
+                ev["sport"],
+                ev["event"],
+                ev["timestamp"],
+            )
+
+            tvg_id, logo = leagues.get_tvg_info(sport, event)
+
+            key = f"[{sport}] {event} ({TAG})"
+
+            entry = {
+                "url": url,
+                "logo": logo,
+                "base": BASE_URL,
+                "timestamp": ts,
+                "id": tvg_id or "Live.Event.us",
+                "link": link,
+            }
+
+            cached_urls[key] = entry
+
             if url:
-                sport, event, ts = (
-                    ev["sport"],
-                    ev["event"],
-                    ev["event_ts"],
-                )
+                urls[key] = entry
 
-                tvg_id, logo = leagues.get_tvg_info(sport, event)
-
-                key = f"[{sport}] {event} ({TAG})"
-
-                entry = {
-                    "url": url,
-                    "logo": logo,
-                    "base": BASE_URL,
-                    "timestamp": ts,
-                    "id": tvg_id or "Live.Event.us",
-                    "link": link,
-                }
-
-                urls[key] = cached_urls[key] = entry
-
-        log.info(f"Collected and cached {len(cached_urls) - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py
index 6bd83498..3fd6a946 100644
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@@ -46,7 +46,7 @@ async def process_event(url: str, url_num: int) -> str | None:
     return f"https://mainstreams.pro/hls/{iframe_src.rsplit("=", 1)[-1]}.m3u8"
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     now = Time.clean(Time.now())
 
     events = []
@@ -82,14 +82,12 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
         if not (sport := CATEGORIES.get(category_id)):
             continue
 
-        if f"[{sport}] {name} ({TAG})" in cached_keys:
-            continue
-
         events.append(
             {
                 "sport": sport,
                 "event": name,
                 "link": iframe.split("<")[0],
+                "timestamp": now.timestamp(),
             }
         )
 
@@ -97,22 +95,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info('Scraping from "https://streamcenter.xyz"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
-
-        now = Time.clean(Time.now())
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         for i, ev in enumerate(events, start=1):
             handler = partial(
@@ -128,7 +121,11 @@ async def scrape() -> None:
                 log=log,
             )
 
-            sport, event = ev["sport"], ev["event"]
+            sport, event, ts = (
+                ev["sport"],
+                ev["event"],
+                ev["timestamp"],
+            )
 
             key = f"[{sport}] {event} ({TAG})"
 
@@ -138,7 +135,7 @@ async def scrape() -> None:
                 "url": url,
                 "logo": logo,
                 "base": "https://streamcenter.xyz",
-                "timestamp": now.timestamp(),
+                "timestamp": ts,
                 "id": tvg_id or "Live.Event.us",
                 "link": link,
             }
@@ -146,13 +143,11 @@ async def scrape() -> None:
             cached_urls[key] = entry
 
             if url:
-                valid_count += 1
-
                 urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/streamtpnew.py b/M3U8/scrapers/streamtpnew.py
index e6dfae86..f5da675f 100644
--- a/M3U8/scrapers/streamtpnew.py
+++ b/M3U8/scrapers/streamtpnew.py
@@ -50,7 +50,7 @@ async def process_event(url: str, url_num: int) -> str | None:
     return m3u8.split("ip=")[0]
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     events = []
 
     if not (api_req := await network.request(API_URL, log=log)):
@@ -70,9 +70,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
         if (sport := event.get("category")) and sport == "Other":
             sport = "Live Event"
 
-        if f"[{sport}] {name} ({TAG})" in cached_keys:
-            continue
-
         events.append(
             {
                 "sport": sport,
@@ -85,20 +82,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info('Scraping from "https://streamtpnew.com"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -134,13 +128,11 @@ async def scrape() -> None:
             cached_urls[key] = entry
 
             if url:
-                valid_count += 1
-
                 urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/tvapp.py b/M3U8/scrapers/tvapp.py
index e541991a..f89a6b82 100644
--- a/M3U8/scrapers/tvapp.py
+++ b/M3U8/scrapers/tvapp.py
@@ -74,8 +74,8 @@ async def get_events() -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    if cached := CACHE_FILE.load():
-        urls.update(cached)
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
         log.info(f"Loaded {len(urls)} event(s) from cache")
 
@@ -84,7 +84,7 @@ async def scrape() -> None:
     log.info(f'Scraping from "{BASE_URL}"')
 
     if events := await get_events():
-        log.info(f"Processing {len(events)} new URL(s)")
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -102,24 +102,29 @@ async def scrape() -> None:
                 log=log,
             )
 
+            sport, event = ev["sport"], ev["event"]
+
+            key = f"[{sport}] {event} ({TAG})"
+
+            tvg_id, logo = leagues.get_tvg_info(sport, event)
+
+            entry = {
+                "url": url,
+                "logo": logo,
+                "base": BASE_URL,
+                "timestamp": now.timestamp(),
+                "id": tvg_id or "Live.Event.us",
+                "link": link,
+            }
+
+            cached_urls[key] = entry
+
             if url:
-                sport, event = ev["sport"], ev["event"]
-
-                key = f"[{sport}] {event} ({TAG})"
-
-                tvg_id, logo = leagues.get_tvg_info(sport, event)
-
-                entry = {
-                    "url": url,
-                    "logo": logo,
-                    "base": BASE_URL,
-                    "timestamp": now.timestamp(),
-                    "id": tvg_id or "Live.Event.us",
-                    "link": link,
-                }
-
                 urls[key] = entry
 
-    log.info(f"Collected and cached {len(urls)} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
-    CACHE_FILE.write(urls)
+    else:
+        log.info("No events found")
+
+    CACHE_FILE.write(cached_urls)
diff --git a/M3U8/scrapers/webcast.py b/M3U8/scrapers/webcast.py
index 95d6f63f..df5ef690 100644
--- a/M3U8/scrapers/webcast.py
+++ b/M3U8/scrapers/webcast.py
@@ -91,7 +91,7 @@ async def process_event(
     return data.get("url")
 
 
-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events() -> list[dict[str, str]]:
     tasks = [network.request(url, log=log) for url in BASE_URLS.values()]
 
     results = await asyncio.gather(*tasks)
@@ -120,15 +120,10 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
             if not (href := vs_node.attributes.get("href")):
                 continue
 
-            event = fix_event(event_name)
-
-            if f"[{sport}] {event} ({TAG})" in cached_keys:
-                continue
-
             events.append(
                 {
                     "sport": sport,
-                    "event": event,
+                    "event": fix_event(event_name),
                     "link": href,
                 }
             )
@@ -137,20 +132,17 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
 
 
 async def scrape() -> None:
-    cached_urls = CACHE_FILE.load()
+    if cached_urls := CACHE_FILE.load():
+        urls.update(cached_urls)
 
-    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+        log.info(f"Loaded {len(urls)} event(s) from cache")
 
-    valid_count = cached_count = len(valid_urls)
-
-    urls.update(valid_urls)
-
-    log.info(f"Loaded {cached_count} event(s) from cache")
+        return
 
     log.info(f'Scraping from "{' & '.join(BASE_URLS.values())}"')
 
-    if events := await get_events(cached_urls.keys()):
-        log.info(f"Processing {len(events)} new URL(s)")
+    if events := await get_events():
+        log.info(f"Processing {len(events)} URL(s)")
 
         now = Time.clean(Time.now())
 
@@ -187,13 +179,11 @@ async def scrape() -> None:
             cached_urls[key] = entry
 
             if url:
-                valid_count += 1
-
                 urls[key] = entry
 
-        log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
+        log.info(f"Collected and cached {len(urls)} event(s)")
 
     else:
-        log.info("No new events found")
+        log.info("No events found")
 
     CACHE_FILE.write(cached_urls)