e

fix streamhub scraping misc edits
health log
2026-03-11 11:57:38 +01:00 · 2025-12-15 15:53:36 -05:00 · 2025-12-15 20:44:37 +00:00 · 2025-12-15 15:31:33 -05:00 · 2025-12-15 15:01:42 -05:00 · 2025-12-15 14:32:02 -05:00
12 changed files with 89623 additions and 91548 deletions
--- a/EPG/TV.xml
+++ b/EPG/TV.xml
--- a/M3U8/TV.m3u8
+++ b/M3U8/TV.m3u8
--- a/M3U8/events.m3u8
+++ b/M3U8/events.m3u8
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@ -62,7 +62,7 @@ async def main() -> None:
        asyncio.create_task(streamhub.scrape(network.client)),
        asyncio.create_task(streamsgate.scrape(network.client)),
        asyncio.create_task(strmd.scrape(network.client)),
-        # asyncio.create_task(timstreams.scrape(network.client)),
+        asyncio.create_task(timstreams.scrape(network.client)),
        asyncio.create_task(tvpass.scrape(network.client)),
        asyncio.create_task(watchfooty.scrape(network.client)),
        asyncio.create_task(webcast.scrape(network.client)),
--- a/M3U8/scrapers/pixel.py
+++ b/M3U8/scrapers/pixel.py
@ -1,5 +1,4 @@
 import json
 import re
 from playwright.async_api import async_playwright
@ -49,8 +48,6 @@ async def get_events() -> dict[str, dict[str, str | float]]:
    events = {}
    pattern = re.compile(r"https?://[^\s'\"]+?\.m3u8(?:\?[^\s'\"]*)?", re.IGNORECASE)
    for event in api_data.get("events", []):
        event_dt = Time.from_str(event["date"], timezone="UTC")
@ -66,19 +63,18 @@ async def get_events() -> dict[str, dict[str, str | float]]:
        stream_urls = [(i, f"server{i}URL") for i in range(1, 4)]
        for z, stream_url in stream_urls:
-            if stream_link := channel_info.get(stream_url):
+            if (stream_link := channel_info.get(stream_url)) and stream_link != "null":
-                if pattern.search(stream_link):
+                key = f"[{sport}] {event_name} {z} ({TAG})"
                    key = f"[{sport}] {event_name} {z} ({TAG})"
-                    tvg_id, logo = leagues.get_tvg_info(sport, event_name)
+                tvg_id, logo = leagues.get_tvg_info(sport, event_name)
-                    events[key] = {
+                events[key] = {
-                        "url": stream_link,
+                    "url": stream_link,
-                        "logo": logo,
+                    "logo": logo,
-                        "base": "https://pixelsport.tv",
+                    "base": "https://pixelsport.tv",
-                        "timestamp": now.timestamp(),
+                    "timestamp": now.timestamp(),
-                        "id": tvg_id or "Live.Event.us",
+                    "id": tvg_id or "Live.Event.us",
-                    }
+                }
    return events
--- a/M3U8/scrapers/roxie.py
+++ b/M3U8/scrapers/roxie.py
@ -57,6 +57,18 @@ async def process_event(
    return match[1]
 async def get_html_data(client: httpx.AsyncClient, url: str) -> bytes:
    try:
        r = await client.get(url)
        r.raise_for_status()
    except Exception as e:
        log.error(f'Failed to fetch "{url}": {e}')
        return b""
    return r.content
 async def refresh_html_cache(
    client: httpx.AsyncClient,
    url: str,
@ -64,15 +76,9 @@ async def refresh_html_cache(
    now_ts: float,
 ) -> dict[str, dict[str, str | float]]:
-    try:
+    html_data = await get_html_data(client, url)
        r = await client.get(url)
        r.raise_for_status()
    except Exception as e:
        log.error(f'Failed to fetch "{url}": {e}')
-        return {}
+    soup = HTMLParser(html_data)
    soup = HTMLParser(r.content)
    events = {}
@ -108,16 +114,15 @@ async def refresh_html_cache(
 async def get_events(
-    client: httpx.AsyncClient,
+    client: httpx.AsyncClient, cached_keys: set[str]
    sport_urls: dict[str, str],
    cached_keys: set[str],
 ) -> list[dict[str, str]]:
    now = Time.clean(Time.now())
    if not (events := HTML_CACHE.load()):
        log.info("Refreshing HTML cache")
        sport_urls = {sport: urljoin(BASE_URL, sport) for sport in SPORT_ENDPOINTS}
        tasks = [
            refresh_html_cache(
                client,
@ -160,13 +165,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
    log.info(f'Scraping from "{BASE_URL}"')
-    sport_urls = {sport: urljoin(BASE_URL, sport) for sport in SPORT_ENDPOINTS}
+    events = await get_events(client, set(cached_urls.keys()))
    events = await get_events(
        client,
        sport_urls,
        set(cached_urls.keys()),
    )
    log.info(f"Processing {len(events)} new URL(s)")
--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@ -47,7 +47,6 @@ async def process_event(
 async def refresh_html_cache(
    client: httpx.AsyncClient, now_ts: float
 ) -> dict[str, dict[str, str | float]]:
    log.info("Refreshing HTML cache")
    try:
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -29,7 +29,7 @@ async def get_html_data(
        r = await client.get(url, params={"date": date})
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{url}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')
        return b""
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -42,7 +42,7 @@ async def refresh_api_cache(
        r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{BASE_URL}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')
        return []
--- a/M3U8/scrapers/streamfree.py
+++ b/M3U8/scrapers/streamfree.py
@ -17,12 +17,10 @@ BASE_URL = "https://streamfree.to/"
 async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
    try:
-        url = urljoin(BASE_URL, "streams")
+        r = await client.get(urljoin(BASE_URL, "streams"))
        r = await client.get(url)
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{url}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')
        return {}
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -1,5 +1,6 @@
 import asyncio
 from functools import partial
 from urllib.parse import urljoin
 import httpx
 from playwright.async_api import async_playwright
@ -15,7 +16,9 @@ TAG = "STRMHUB"
 CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
-BASE_URL = "https://streamhub.pro/live-now"
+HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=28_800)
 BASE_URL = "https://streamhub.pro/"
 CATEGORIES = {
@ -33,69 +36,126 @@ CATEGORIES = {
 }
-async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes:
+async def get_html_data(
    client: httpx.AsyncClient,
    date: str,
    sport_id: str,
 ) -> bytes:
    try:
-        r = await client.get(BASE_URL, params={"sport_id": sport})
+        r = await client.get(
            urljoin(BASE_URL, f"events/{date}"),
            params={"sport_id": sport_id},
        )
        r.raise_for_status()
    except Exception as e:
-        log.error(f'Failed to fetch "{BASE_URL}": {e}')
+        log.error(f'Failed to fetch "{r.url}": {e}')
        return b""
    return r.content
-async def get_events(
+async def refresh_html_cache(
-    client: httpx.AsyncClient, cached_keys: set[str]
+    client: httpx.AsyncClient,
-) -> list[dict[str, str]]:
+    date: str,
    sport_id: str,
    ts: float,
 ) -> dict[str, dict[str, str | float]]:
-    tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()]
+    html_data = await get_html_data(client, date, sport_id)
-    results = await asyncio.gather(*tasks)
+    soup = HTMLParser(html_data)
-    soups = [HTMLParser(html) for html in results]
+    events = {}
-    events = []
+    for section in soup.css(".events-section"):
        if not (sport_node := section.css_first(".section-titlte")):
            continue
-    for soup in soups:
+        sport = sport_node.text(strip=True)
-        for section in soup.css(".events-section"):
+
-            if not (sport_node := section.css_first(".section-titlte")):
+        logo = section.css_first(".league-icon img").attributes.get("src")
        for event in section.css(".section-event"):
            event_name = "Live Event"
            if teams := event.css_first(".event-competitors"):
                home, away = teams.text(strip=True).split("vs.")
                event_name = f"{away} vs {home}"
            if not (event_button := event.css_first(".event-button a")) or not (
                href := event_button.attributes.get("href")
            ):
                continue
-            sport = sport_node.text(strip=True)
+            event_date = event.css_first(".event-countdown").attributes.get(
                "data-start"
            )
-            logo = section.css_first(".league-icon img").attributes.get("src")
+            event_dt = Time.from_str(event_date, timezone="UTC")
-            for event in section.css(".section-event"):
+            key = f"[{sport}] {event_name} ({TAG})"
                event_name = "Live Event"
-                if teams := event.css_first(".event-competitors"):
+            events[key] = {
-                    home, away = teams.text(strip=True).split("vs.")
+                "sport": sport,
-
+                "event": event_name,
-                    event_name = f"{away} vs {home}"
+                "link": href,
-
+                "logo": logo,
-                if not (event_button := event.css_first("div.event-button a")) or not (
+                "timestamp": ts,
-                    href := event_button.attributes.get("href")
+                "event_ts": event_dt.timestamp(),
-                ):
+            }
                    continue
                key = f"[{sport}] {event_name} ({TAG})"
                if cached_keys & {key}:
                    continue
                events.append(
                    {
                        "sport": sport,
                        "event": event_name,
                        "link": href,
                        "logo": logo,
                    }
                )
    return events
 async def get_events(
    client: httpx.AsyncClient,
    cached_keys: set[str],
 ) -> list[dict[str, str]]:
    now = Time.clean(Time.now())
    if not (events := HTML_CACHE.load()):
        log.info("Refreshing HTML cache")
        dates = [now.date(), now.delta(days=1).date()]
        tasks = [
            refresh_html_cache(
                client,
                date,
                sport_id,
                now.timestamp(),
            )
            for date in dates
            for sport_id in CATEGORIES.values()
        ]
        results = await asyncio.gather(*tasks)
        events = {k: v for data in results for k, v in data.items()}
        HTML_CACHE.write(events)
    live = []
    start_ts = now.delta(hours=-1).timestamp()
    end_ts = now.delta(minutes=5).timestamp()
    for k, v in events.items():
        if cached_keys & {k}:
            continue
        if not start_ts <= v["event_ts"] <= end_ts:
            continue
        live.append({**v})
    return live
 async def scrape(client: httpx.AsyncClient) -> None:
    cached_urls = CACHE_FILE.load()
    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
@ -111,8 +171,6 @@ async def scrape(client: httpx.AsyncClient) -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
        now = Time.now().timestamp()
        async with async_playwright() as p:
            browser, context = await network.browser(p)
@ -132,11 +190,12 @@ async def scrape(client: httpx.AsyncClient) -> None:
                    log=log,
                )
-                sport, event, logo, link = (
+                sport, event, logo, link, ts = (
                    ev["sport"],
                    ev["event"],
                    ev["logo"],
                    ev["link"],
                    ev["event_ts"],
                )
                key = f"[{sport}] {event} ({TAG})"
@ -147,7 +206,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
                    "url": url,
                    "logo": logo or pic,
                    "base": "https://storytrench.net/",
-                    "timestamp": now,
+                    "timestamp": ts,
                    "id": tvg_id or "Live.Event.us",
                    "link": link,
                }
--- a/readme.md
+++ b/readme.md
@ -1,12 +1,10 @@
-## Base Log @ 2025-12-14 20:40 UTC
+## Base Log @ 2025-12-15 20:44 UTC
-### ✅ Working Streams: 143<br>❌ Dead Streams: 3
+### ✅ Working Streams: 145<br>❌ Dead Streams: 1
 | Channel | Error (Code) | Link |
 | ------- | ------------ | ---- |
 | FDSN Florida | HTTP Error (403) | `http://cord-cutter.net:8080/k4Svp2/645504/46794` |
 | Spectrum SportsNet LA Dodgers | HTTP Error (502) | `http://cord-cutter.net:8080/k4Svp2/645504/31636` |
 | getTV | HTTP Error (403) | `http://cord-cutter.net:8080/k4Svp2/645504/18366` |
 ---
 #### Base Channels URL
 ```
Author	SHA1	Message	Date
doms9	00000d9ebc	e fix streamhub scraping misc edits	2025-12-15 15:53:36 -05:00
GitHub Actions Bot	86a88e206e	health log	2025-12-15 20:44:37 +00:00
GitHub Actions Bot	9ceaf58464	update M3U8	2025-12-15 15:31:33 -05:00
GitHub Actions Bot	38d0b4789b	update M3U8	2025-12-15 15:01:42 -05:00
GitHub Actions Bot	1ed6ae2fa2	update M3U8	2025-12-15 14:32:02 -05:00
GitHub Actions Bot	25099a00bc	update M3U8	2025-12-15 14:01:36 -05:00
GitHub Actions Bot	45b9e13357	update EPG	2025-12-15 18:59:48 +00:00
GitHub Actions Bot	c2176bfa6c	update M3U8	2025-12-15 13:31:33 -05:00
GitHub Actions Bot	290177daaa	update M3U8	2025-12-15 13:01:38 -05:00
GitHub Actions Bot	aaa01c8496	update M3U8	2025-12-15 12:02:30 -05:00
GitHub Actions Bot	33129a8005	update M3U8	2025-12-15 11:01:32 -05:00
GitHub Actions Bot	d846a24f62	update M3U8	2025-12-15 10:02:04 -05:00
GitHub Actions Bot	6cc57b8353	health log	2025-12-15 14:51:09 +00:00
GitHub Actions Bot	7404d016da	update M3U8	2025-12-15 09:00:40 -05:00
GitHub Actions Bot	ed889f0c52	update M3U8	2025-12-15 08:01:54 -05:00
GitHub Actions Bot	3cde03ff54	update EPG	2025-12-15 10:56:56 +00:00
GitHub Actions Bot	345f6df1d6	health log	2025-12-15 08:53:42 +00:00
doms9	00000d9cc1	e cache all events for streamhub instead of live events	2025-12-15 02:06:46 -05:00
GitHub Actions Bot	f755ffc78b	update M3U8	2025-12-14 23:30:26 -05:00
GitHub Actions Bot	91e4994c32	update M3U8	2025-12-14 23:01:04 -05:00
GitHub Actions Bot	2f47e80d83	update EPG	2025-12-15 03:56:37 +00:00
GitHub Actions Bot	31f5671034	health log	2025-12-15 03:52:12 +00:00
GitHub Actions Bot	783953d797	update M3U8	2025-12-14 22:30:45 -05:00
GitHub Actions Bot	a953d526df	update M3U8	2025-12-14 22:00:51 -05:00
GitHub Actions Bot	5302dccdac	update M3U8	2025-12-14 21:30:30 -05:00
GitHub Actions Bot	58d4140a2e	update M3U8	2025-12-14 21:01:09 -05:00
GitHub Actions Bot	6f5f9c45fd	update M3U8	2025-12-14 20:30:51 -05:00
GitHub Actions Bot	7fdcefb0c1	update M3U8	2025-12-14 20:01:11 -05:00
GitHub Actions Bot	819b3f5f1f	update M3U8	2025-12-14 19:31:07 -05:00
GitHub Actions Bot	65a5e11448	update M3U8	2025-12-14 19:01:52 -05:00
GitHub Actions Bot	19cb160712	update M3U8	2025-12-14 18:31:37 -05:00
GitHub Actions Bot	d5f714251e	update M3U8	2025-12-14 18:02:27 -05:00
GitHub Actions Bot	69d67c467c	update M3U8	2025-12-14 17:31:25 -05:00
GitHub Actions Bot	3e1cac41c1	update M3U8	2025-12-14 17:01:45 -05:00
GitHub Actions Bot	bb3600ede9	update M3U8	2025-12-14 16:32:15 -05:00
GitHub Actions Bot	a1b593d216	update M3U8	2025-12-14 16:02:56 -05:00