e

fix scraping for streamhub
2026-03-11 11:57:38 +01:00 · 2025-12-13 21:29:13 -05:00 · 2025-12-13 21:29:13 -05:00 · 00000d9c19
commit 00000d9c19
parent 15ea61dcb3
4 changed files with 63 additions and 38 deletions
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -19,7 +19,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
 BASE_URL = "https://sport9.ru"


-async def get_html(
+async def get_html_data(
    client: httpx.AsyncClient,
    url: str,
    date: str,
@ -43,7 +43,7 @@ async def get_events(
    now = Time.now()

    tasks = [
-        get_html(client, BASE_URL, str(d.date()))
+        get_html_data(client, BASE_URL, str(d.date()))
        for d in [
            now.delta(days=-1),
            now,
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -18,7 +18,7 @@ API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
 BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"


-categories = {
+CATEGORIES = {
    4: "Basketball",
    9: "Football",
    13: "Baseball",
@ -87,7 +87,7 @@ async def get_events(
        if not start_dt <= event_dt <= end_dt:
            continue

-        if not (sport := categories.get(category_id)):
+        if not (sport := CATEGORIES.get(category_id)):
            continue

        key = f"[{sport}] {name} ({TAG})"
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -1,3 +1,4 @@
+import asyncio
 from functools import partial

 import httpx
@ -17,42 +18,67 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
 BASE_URL = "https://streamhub.pro/live-now"


-async def get_events(
-    client: httpx.AsyncClient, cached_keys: set[str]
-) -> list[dict[str, str]]:
+CATEGORIES = {
+    "Soccer": "sport_68c02a4464a38",
+    "American Football": "sport_68c02a4465113",
+    # "Baseball": "sport_68c02a446582f",
+    "Basketball": "sport_68c02a4466011",
+    # "Cricket": "sport_68c02a44669f3",
+    "Hockey": "sport_68c02a4466f56",
+    "MMA": "sport_68c02a44674e9",
+    "Racing": "sport_68c02a4467a48",
+    # "Rugby": "sport_68c02a4467fc1",
+    # "Tennis": "sport_68c02a4468cf7",
+    # "Volleyball": "sport_68c02a4469422",
+}
+
+
+async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes:
    try:
-        r = await client.get(BASE_URL)
+        r = await client.get(BASE_URL, params={"sport_id": sport})
        r.raise_for_status()
    except Exception as e:
        log.error(f'Failed to fetch "{BASE_URL}": {e}')

-        return []
+        return b""

-    soup = HTMLParser(r.content)
+    return r.content
+
+
+async def get_events(
+    client: httpx.AsyncClient, cached_keys: set[str]
+) -> list[dict[str, str]]:
+
+    tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()]
+
+    results = await asyncio.gather(*tasks)
+
+    soups = [HTMLParser(html) for html in results]

    events = []

-    for event in soup.css(".events-section"):
-        if not (title_node := event.css_first(".section-titlte")):
+    for soup in soups:
+        for section in soup.css(".events-section"):
+            if not (sport_node := section.css_first(".section-titlte")):
                continue

-        sport = title_node.text(strip=True)
+            sport = sport_node.text(strip=True)

-        if not event.css_first(".event-competitors"):
-            continue
+            logo = section.css_first(".league-icon img").attributes.get("src")

-        home_team = event.css_first(".event-home-team").text(strip=True)
-        away_team = event.css_first(".event-visitor-team").text(strip=True)
+            for event in section.css(".section-event"):
+                event_name = "Live Event"

-        logo = event.css_first(".league-icon img").attributes.get("src")
+                if teams := event.css_first(".event-competitors"):
+                    home, away = teams.text(strip=True).split("vs.")
+
+                    event_name = f"{away} vs {home}"

                if not (event_button := event.css_first("div.event-button a")) or not (
                    href := event_button.attributes.get("href")
                ):
                    continue

-        event_name = f"{away_team} vs {home_team}"
-
                key = f"[{sport}] {event_name} ({TAG})"

                if cached_keys & {key}:
--- a/M3U8/scrapers/timstreams.py
+++ b/M3U8/scrapers/timstreams.py
@ -22,8 +22,7 @@ BASE_MIRRORS = [
    "https://timstreams.top",
 ]

-
-sport_genres = {
+SPORT_GENRES = {
    1: "Soccer",
    2: "Motorsport",
    3: "MMA",
@ -79,7 +78,7 @@ async def get_events(
            if (genre := ev["genre"]) in {16, 17}:
                continue

-            sport = sport_genres.get(genre, "Live Event")
+            sport = SPORT_GENRES.get(genre, "Live Event")

            streams: list[dict[str, str]] = ev["streams"]