e

- change scrape window for istreameast.py - harden scraping method for roxie.py - catch nulls for ppv.py - change scraping method for totalsportek.py - misc edits.
2026-06-15 12:46:27 +02:00 · 2026-02-18 15:47:50 -05:00 · 2026-02-18 15:47:50 -05:00 · 00000d940c
commit 00000d940c
parent 0fe9c5b1dd
7 changed files with 141 additions and 87 deletions
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -17,7 +17,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)

 HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)

-MIRRORS = ["https://streamhub.pro", "https://livesports4u.net"]
+BASE_URL = "https://livesports4u.net"

 CATEGORIES = {
    "Soccer": "sport_68c02a4464a38",
@ -35,7 +35,6 @@ CATEGORIES = {


 async def refresh_html_cache(
-    url: str,
    date: str,
    sport_id: str,
    ts: float,
@ -45,7 +44,7 @@ async def refresh_html_cache(

    if not (
        html_data := await network.request(
-            urljoin(url, f"events/{date}"),
+            urljoin(BASE_URL, f"events/{date}"),
            log=log,
            params={"sport_id": sport_id},
        )
@ -95,7 +94,7 @@ async def refresh_html_cache(
    return events


-async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
+async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
    now = Time.clean(Time.now())

    if not (events := HTML_CACHE.load()):
@ -103,7 +102,6 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:

        tasks = [
            refresh_html_cache(
-                url,
                date,
                sport_id,
                now.timestamp(),
@ -146,16 +144,9 @@ async def scrape(browser: Browser) -> None:

    log.info(f"Loaded {cached_count} event(s) from cache")

-    if not (base_url := await network.get_base(MIRRORS)):
-        log.warning("No working PPV mirrors")
+    log.info(f'Scraping from "{BASE_URL}"')

-        CACHE_FILE.write(cached_urls)
-
-        return
-
-    log.info(f'Scraping from "{base_url}"')
-
-    events = await get_events(base_url, cached_urls.keys())
+    events = await get_events(cached_urls.keys())

    log.info(f"Processing {len(events)} new URL(s)")