From 00000d9c193eaafa6b84a02551e11631d122ff5e Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Sat, 13 Dec 2025 21:29:13 -0500 Subject: [PATCH] e fix scraping for streamhub --- M3U8/scrapers/sport9.py | 4 +- M3U8/scrapers/streamcenter.py | 4 +- M3U8/scrapers/streamhub.py | 88 +++++++++++++++++++++++------------ M3U8/scrapers/timstreams.py | 5 +- 4 files changed, 63 insertions(+), 38 deletions(-) diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index a835aad..6056edd 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -19,7 +19,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) BASE_URL = "https://sport9.ru" -async def get_html( +async def get_html_data( client: httpx.AsyncClient, url: str, date: str, @@ -43,7 +43,7 @@ async def get_events( now = Time.now() tasks = [ - get_html(client, BASE_URL, str(d.date())) + get_html_data(client, BASE_URL, str(d.date())) for d in [ now.delta(days=-1), now, diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py index 2cf5a1e..7d36041 100644 --- a/M3U8/scrapers/streamcenter.py +++ b/M3U8/scrapers/streamcenter.py @@ -18,7 +18,7 @@ API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties" -categories = { +CATEGORIES = { 4: "Basketball", 9: "Football", 13: "Baseball", @@ -87,7 +87,7 @@ async def get_events( if not start_dt <= event_dt <= end_dt: continue - if not (sport := categories.get(category_id)): + if not (sport := CATEGORIES.get(category_id)): continue key = f"[{sport}] {name} ({TAG})" diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index 98bc5b2..c8f0bbf 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -1,3 +1,4 @@ +import asyncio from functools import partial import httpx @@ -17,55 +18,80 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) BASE_URL = "https://streamhub.pro/live-now" -async def get_events( - client: httpx.AsyncClient, cached_keys: set[str] -) -> list[dict[str, str]]: +CATEGORIES = { + "Soccer": "sport_68c02a4464a38", + "American Football": "sport_68c02a4465113", + # "Baseball": "sport_68c02a446582f", + "Basketball": "sport_68c02a4466011", + # "Cricket": "sport_68c02a44669f3", + "Hockey": "sport_68c02a4466f56", + "MMA": "sport_68c02a44674e9", + "Racing": "sport_68c02a4467a48", + # "Rugby": "sport_68c02a4467fc1", + # "Tennis": "sport_68c02a4468cf7", + # "Volleyball": "sport_68c02a4469422", +} + + +async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes: try: - r = await client.get(BASE_URL) + r = await client.get(BASE_URL, params={"sport_id": sport}) r.raise_for_status() except Exception as e: log.error(f'Failed to fetch "{BASE_URL}": {e}') - return [] + return b"" - soup = HTMLParser(r.content) + return r.content + + +async def get_events( + client: httpx.AsyncClient, cached_keys: set[str] +) -> list[dict[str, str]]: + + tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()] + + results = await asyncio.gather(*tasks) + + soups = [HTMLParser(html) for html in results] events = [] - for event in soup.css(".events-section"): - if not (title_node := event.css_first(".section-titlte")): - continue + for soup in soups: + for section in soup.css(".events-section"): + if not (sport_node := section.css_first(".section-titlte")): + continue - sport = title_node.text(strip=True) + sport = sport_node.text(strip=True) - if not event.css_first(".event-competitors"): - continue + logo = section.css_first(".league-icon img").attributes.get("src") - home_team = event.css_first(".event-home-team").text(strip=True) - away_team = event.css_first(".event-visitor-team").text(strip=True) + for event in section.css(".section-event"): + event_name = "Live Event" - logo = event.css_first(".league-icon img").attributes.get("src") + if teams := event.css_first(".event-competitors"): + home, away = teams.text(strip=True).split("vs.") - if not (event_button := event.css_first("div.event-button a")) or not ( - href := event_button.attributes.get("href") - ): - continue + event_name = f"{away} vs {home}" - event_name = f"{away_team} vs {home_team}" + if not (event_button := event.css_first("div.event-button a")) or not ( + href := event_button.attributes.get("href") + ): + continue - key = f"[{sport}] {event_name} ({TAG})" + key = f"[{sport}] {event_name} ({TAG})" - if cached_keys & {key}: - continue + if cached_keys & {key}: + continue - events.append( - { - "sport": sport, - "event": event_name, - "link": href, - "logo": logo, - } - ) + events.append( + { + "sport": sport, + "event": event_name, + "link": href, + "logo": logo, + } + ) return events diff --git a/M3U8/scrapers/timstreams.py b/M3U8/scrapers/timstreams.py index 6c7bdf5..b4d72ee 100644 --- a/M3U8/scrapers/timstreams.py +++ b/M3U8/scrapers/timstreams.py @@ -22,8 +22,7 @@ BASE_MIRRORS = [ "https://timstreams.top", ] - -sport_genres = { +SPORT_GENRES = { 1: "Soccer", 2: "Motorsport", 3: "MMA", @@ -79,7 +78,7 @@ async def get_events( if (genre := ev["genre"]) in {16, 17}: continue - sport = sport_genres.get(genre, "Live Event") + sport = SPORT_GENRES.get(genre, "Live Event") streams: list[dict[str, str]] = ev["streams"]