e

fix scraping for streamhub
2026-03-09 11:37:43 +01:00 · 2025-12-13 21:29:13 -05:00 · 2025-12-13 21:29:13 -05:00 · 00000d9c19
commit 00000d9c19
parent 15ea61dcb3
4 changed files with 63 additions and 38 deletions
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -19,7 +19,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
 BASE_URL = "https://sport9.ru"
-async def get_html(
+async def get_html_data(
    client: httpx.AsyncClient,
    url: str,
    date: str,
@ -43,7 +43,7 @@ async def get_events(
    now = Time.now()
    tasks = [
-        get_html(client, BASE_URL, str(d.date()))
+        get_html_data(client, BASE_URL, str(d.date()))
        for d in [
            now.delta(days=-1),
            now,
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -18,7 +18,7 @@ API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
 BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"
-categories = {
+CATEGORIES = {
    4: "Basketball",
    9: "Football",
    13: "Baseball",
@ -87,7 +87,7 @@ async def get_events(
        if not start_dt <= event_dt <= end_dt:
            continue
-        if not (sport := categories.get(category_id)):
+        if not (sport := CATEGORIES.get(category_id)):
            continue
        key = f"[{sport}] {name} ({TAG})"
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -1,3 +1,4 @@
 import asyncio
 from functools import partial
 import httpx
@ -17,42 +18,67 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
 BASE_URL = "https://streamhub.pro/live-now"
-async def get_events(
+CATEGORIES = {
-    client: httpx.AsyncClient, cached_keys: set[str]
+    "Soccer": "sport_68c02a4464a38",
-) -> list[dict[str, str]]:
+    "American Football": "sport_68c02a4465113",
    # "Baseball": "sport_68c02a446582f",
    "Basketball": "sport_68c02a4466011",
    # "Cricket": "sport_68c02a44669f3",
    "Hockey": "sport_68c02a4466f56",
    "MMA": "sport_68c02a44674e9",
    "Racing": "sport_68c02a4467a48",
    # "Rugby": "sport_68c02a4467fc1",
    # "Tennis": "sport_68c02a4468cf7",
    # "Volleyball": "sport_68c02a4469422",
 }
 async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes:
    try:
-        r = await client.get(BASE_URL)
+        r = await client.get(BASE_URL, params={"sport_id": sport})
        r.raise_for_status()
    except Exception as e:
        log.error(f'Failed to fetch "{BASE_URL}": {e}')
-        return []
+        return b""
-    soup = HTMLParser(r.content)
+    return r.content
 async def get_events(
    client: httpx.AsyncClient, cached_keys: set[str]
 ) -> list[dict[str, str]]:
    tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()]
    results = await asyncio.gather(*tasks)
    soups = [HTMLParser(html) for html in results]
    events = []
-    for event in soup.css(".events-section"):
+    for soup in soups:
-        if not (title_node := event.css_first(".section-titlte")):
+        for section in soup.css(".events-section"):
            if not (sport_node := section.css_first(".section-titlte")):
                continue
-        sport = title_node.text(strip=True)
+            sport = sport_node.text(strip=True)
-        if not event.css_first(".event-competitors"):
+            logo = section.css_first(".league-icon img").attributes.get("src")
            continue
-        home_team = event.css_first(".event-home-team").text(strip=True)
+            for event in section.css(".section-event"):
-        away_team = event.css_first(".event-visitor-team").text(strip=True)
+                event_name = "Live Event"
-        logo = event.css_first(".league-icon img").attributes.get("src")
+                if teams := event.css_first(".event-competitors"):
                    home, away = teams.text(strip=True).split("vs.")
                    event_name = f"{away} vs {home}"
                if not (event_button := event.css_first("div.event-button a")) or not (
                    href := event_button.attributes.get("href")
                ):
                    continue
        event_name = f"{away_team} vs {home_team}"
                key = f"[{sport}] {event_name} ({TAG})"
                if cached_keys & {key}:
--- a/M3U8/scrapers/timstreams.py
+++ b/M3U8/scrapers/timstreams.py
@ -22,8 +22,7 @@ BASE_MIRRORS = [
    "https://timstreams.top",
 ]
-
+SPORT_GENRES = {
 sport_genres = {
    1: "Soccer",
    2: "Motorsport",
    3: "MMA",
@ -79,7 +78,7 @@ async def get_events(
            if (genre := ev["genre"]) in {16, 17}:
                continue
-            sport = sport_genres.get(genre, "Live Event")
+            sport = SPORT_GENRES.get(genre, "Live Event")
            streams: list[dict[str, str]] = ev["streams"]