e

fix streamhub scraping misc edits
2026-06-14 12:36:27 +02:00 · 2025-12-15 15:53:36 -05:00 · 2025-12-15 15:53:36 -05:00 · 00000d9ebc
commit 00000d9ebc
parent 86a88e206e
7 changed files with 33 additions and 29 deletions
--- a/M3U8/scrapers/pixel.py
+++ b/M3U8/scrapers/pixel.py
@ -1,5 +1,4 @@
 import json
-import re

 from playwright.async_api import async_playwright

@ -49,8 +48,6 @@ async def get_events() -> dict[str, dict[str, str | float]]:

    events = {}

-    pattern = re.compile(r"https?://[^\s'\"]+?\.m3u8(?:\?[^\s'\"]*)?", re.IGNORECASE)
-
    for event in api_data.get("events", []):
        event_dt = Time.from_str(event["date"], timezone="UTC")

@ -66,19 +63,18 @@ async def get_events() -> dict[str, dict[str, str | float]]:
        stream_urls = [(i, f"server{i}URL") for i in range(1, 4)]

        for z, stream_url in stream_urls:
-            if stream_link := channel_info.get(stream_url):
-                if pattern.search(stream_link):
-                    key = f"[{sport}] {event_name} {z} ({TAG})"
+            if (stream_link := channel_info.get(stream_url)) and stream_link != "null":
+                key = f"[{sport}] {event_name} {z} ({TAG})"

-                    tvg_id, logo = leagues.get_tvg_info(sport, event_name)
+                tvg_id, logo = leagues.get_tvg_info(sport, event_name)

-                    events[key] = {
-                        "url": stream_link,
-                        "logo": logo,
-                        "base": "https://pixelsport.tv",
-                        "timestamp": now.timestamp(),
-                        "id": tvg_id or "Live.Event.us",
-                    }
+                events[key] = {
+                    "url": stream_link,
+                    "logo": logo,
+                    "base": "https://pixelsport.tv",
+                    "timestamp": now.timestamp(),
+                    "id": tvg_id or "Live.Event.us",
+                }

    return events

--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@ -47,7 +47,6 @@ async def process_event(
 async def refresh_html_cache(
    client: httpx.AsyncClient, now_ts: float
 ) -> dict[str, dict[str, str | float]]:
-
    log.info("Refreshing HTML cache")

    try:
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -29,7 +29,7 @@ async def get_html_data(
        r = await client.get(url, params={"date": date})
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{url}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')

        return b""

--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -42,7 +42,7 @@ async def refresh_api_cache(
        r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{BASE_URL}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')

        return []

--- a/M3U8/scrapers/streamfree.py
+++ b/M3U8/scrapers/streamfree.py
@ -17,12 +17,10 @@ BASE_URL = "https://streamfree.to/"

 async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
    try:
-        url = urljoin(BASE_URL, "streams")
-
-        r = await client.get(url)
+        r = await client.get(urljoin(BASE_URL, "streams"))
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{url}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')

        return {}

--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -36,15 +36,21 @@ CATEGORIES = {
 }


-async def get_html_data(client: httpx.AsyncClient, sport_id: str) -> bytes:
-    try:
-        url = urljoin(BASE_URL, f"events/{Time.now().date()}")
+async def get_html_data(
+    client: httpx.AsyncClient,
+    date: str,
+    sport_id: str,
+) -> bytes:

-        r = await client.get(url, params={"sport_id": sport_id})
+    try:
+        r = await client.get(
+            urljoin(BASE_URL, f"events/{date}"),
+            params={"sport_id": sport_id},
+        )

        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{url}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')

        return b""

@ -53,11 +59,12 @@ async def get_html_data(client: httpx.AsyncClient, sport_id: str) -> bytes:

 async def refresh_html_cache(
    client: httpx.AsyncClient,
+    date: str,
    sport_id: str,
    ts: float,
 ) -> dict[str, dict[str, str | float]]:

-    html_data = await get_html_data(client, sport_id)
+    html_data = await get_html_data(client, date, sport_id)

    soup = HTMLParser(html_data)

@ -113,12 +120,16 @@ async def get_events(
    if not (events := HTML_CACHE.load()):
        log.info("Refreshing HTML cache")

+        dates = [now.date(), now.delta(days=1).date()]
+
        tasks = [
            refresh_html_cache(
                client,
+                date,
                sport_id,
                now.timestamp(),
            )
+            for date in dates
            for sport_id in CATEGORIES.values()
        ]

@ -184,7 +195,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
                    ev["event"],
                    ev["logo"],
                    ev["link"],
-                    ev["timestamp"],
+                    ev["event_ts"],
                )

                key = f"[{sport}] {event} ({TAG})"