fix scraping for streamhub
This commit is contained in:
doms9 2025-12-13 21:29:13 -05:00
parent 15ea61dcb3
commit 00000d9c19
4 changed files with 63 additions and 38 deletions

View file

@ -19,7 +19,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
BASE_URL = "https://sport9.ru" BASE_URL = "https://sport9.ru"
async def get_html( async def get_html_data(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
date: str, date: str,
@ -43,7 +43,7 @@ async def get_events(
now = Time.now() now = Time.now()
tasks = [ tasks = [
get_html(client, BASE_URL, str(d.date())) get_html_data(client, BASE_URL, str(d.date()))
for d in [ for d in [
now.delta(days=-1), now.delta(days=-1),
now, now,

View file

@ -18,7 +18,7 @@ API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties" BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"
categories = { CATEGORIES = {
4: "Basketball", 4: "Basketball",
9: "Football", 9: "Football",
13: "Baseball", 13: "Baseball",
@ -87,7 +87,7 @@ async def get_events(
if not start_dt <= event_dt <= end_dt: if not start_dt <= event_dt <= end_dt:
continue continue
if not (sport := categories.get(category_id)): if not (sport := CATEGORIES.get(category_id)):
continue continue
key = f"[{sport}] {name} ({TAG})" key = f"[{sport}] {name} ({TAG})"

View file

@ -1,3 +1,4 @@
import asyncio
from functools import partial from functools import partial
import httpx import httpx
@ -17,42 +18,67 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://streamhub.pro/live-now" BASE_URL = "https://streamhub.pro/live-now"
async def get_events( CATEGORIES = {
client: httpx.AsyncClient, cached_keys: set[str] "Soccer": "sport_68c02a4464a38",
) -> list[dict[str, str]]: "American Football": "sport_68c02a4465113",
# "Baseball": "sport_68c02a446582f",
"Basketball": "sport_68c02a4466011",
# "Cricket": "sport_68c02a44669f3",
"Hockey": "sport_68c02a4466f56",
"MMA": "sport_68c02a44674e9",
"Racing": "sport_68c02a4467a48",
# "Rugby": "sport_68c02a4467fc1",
# "Tennis": "sport_68c02a4468cf7",
# "Volleyball": "sport_68c02a4469422",
}
async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes:
try: try:
r = await client.get(BASE_URL) r = await client.get(BASE_URL, params={"sport_id": sport})
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}') log.error(f'Failed to fetch "{BASE_URL}": {e}')
return [] return b""
soup = HTMLParser(r.content) return r.content
async def get_events(
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()]
results = await asyncio.gather(*tasks)
soups = [HTMLParser(html) for html in results]
events = [] events = []
for event in soup.css(".events-section"): for soup in soups:
if not (title_node := event.css_first(".section-titlte")): for section in soup.css(".events-section"):
if not (sport_node := section.css_first(".section-titlte")):
continue continue
sport = title_node.text(strip=True) sport = sport_node.text(strip=True)
if not event.css_first(".event-competitors"): logo = section.css_first(".league-icon img").attributes.get("src")
continue
home_team = event.css_first(".event-home-team").text(strip=True) for event in section.css(".section-event"):
away_team = event.css_first(".event-visitor-team").text(strip=True) event_name = "Live Event"
logo = event.css_first(".league-icon img").attributes.get("src") if teams := event.css_first(".event-competitors"):
home, away = teams.text(strip=True).split("vs.")
event_name = f"{away} vs {home}"
if not (event_button := event.css_first("div.event-button a")) or not ( if not (event_button := event.css_first("div.event-button a")) or not (
href := event_button.attributes.get("href") href := event_button.attributes.get("href")
): ):
continue continue
event_name = f"{away_team} vs {home_team}"
key = f"[{sport}] {event_name} ({TAG})" key = f"[{sport}] {event_name} ({TAG})"
if cached_keys & {key}: if cached_keys & {key}:

View file

@ -22,8 +22,7 @@ BASE_MIRRORS = [
"https://timstreams.top", "https://timstreams.top",
] ]
SPORT_GENRES = {
sport_genres = {
1: "Soccer", 1: "Soccer",
2: "Motorsport", 2: "Motorsport",
3: "MMA", 3: "MMA",
@ -79,7 +78,7 @@ async def get_events(
if (genre := ev["genre"]) in {16, 17}: if (genre := ev["genre"]) in {16, 17}:
continue continue
sport = sport_genres.get(genre, "Live Event") sport = SPORT_GENRES.get(genre, "Live Event")
streams: list[dict[str, str]] = ev["streams"] streams: list[dict[str, str]] = ev["streams"]