fix scraping for streamhub
This commit is contained in:
doms9 2025-12-13 21:29:13 -05:00
parent 15ea61dcb3
commit 00000d9c19
4 changed files with 63 additions and 38 deletions

View file

@ -19,7 +19,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
BASE_URL = "https://sport9.ru"
async def get_html(
async def get_html_data(
client: httpx.AsyncClient,
url: str,
date: str,
@ -43,7 +43,7 @@ async def get_events(
now = Time.now()
tasks = [
get_html(client, BASE_URL, str(d.date()))
get_html_data(client, BASE_URL, str(d.date()))
for d in [
now.delta(days=-1),
now,

View file

@ -18,7 +18,7 @@ API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"
categories = {
CATEGORIES = {
4: "Basketball",
9: "Football",
13: "Baseball",
@ -87,7 +87,7 @@ async def get_events(
if not start_dt <= event_dt <= end_dt:
continue
if not (sport := categories.get(category_id)):
if not (sport := CATEGORIES.get(category_id)):
continue
key = f"[{sport}] {name} ({TAG})"

View file

@ -1,3 +1,4 @@
import asyncio
from functools import partial
import httpx
@ -17,42 +18,67 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://streamhub.pro/live-now"
async def get_events(
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
CATEGORIES = {
"Soccer": "sport_68c02a4464a38",
"American Football": "sport_68c02a4465113",
# "Baseball": "sport_68c02a446582f",
"Basketball": "sport_68c02a4466011",
# "Cricket": "sport_68c02a44669f3",
"Hockey": "sport_68c02a4466f56",
"MMA": "sport_68c02a44674e9",
"Racing": "sport_68c02a4467a48",
# "Rugby": "sport_68c02a4467fc1",
# "Tennis": "sport_68c02a4468cf7",
# "Volleyball": "sport_68c02a4469422",
}
async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes:
try:
r = await client.get(BASE_URL)
r = await client.get(BASE_URL, params={"sport_id": sport})
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return []
return b""
soup = HTMLParser(r.content)
return r.content
async def get_events(
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()]
results = await asyncio.gather(*tasks)
soups = [HTMLParser(html) for html in results]
events = []
for event in soup.css(".events-section"):
if not (title_node := event.css_first(".section-titlte")):
for soup in soups:
for section in soup.css(".events-section"):
if not (sport_node := section.css_first(".section-titlte")):
continue
sport = title_node.text(strip=True)
sport = sport_node.text(strip=True)
if not event.css_first(".event-competitors"):
continue
logo = section.css_first(".league-icon img").attributes.get("src")
home_team = event.css_first(".event-home-team").text(strip=True)
away_team = event.css_first(".event-visitor-team").text(strip=True)
for event in section.css(".section-event"):
event_name = "Live Event"
logo = event.css_first(".league-icon img").attributes.get("src")
if teams := event.css_first(".event-competitors"):
home, away = teams.text(strip=True).split("vs.")
event_name = f"{away} vs {home}"
if not (event_button := event.css_first("div.event-button a")) or not (
href := event_button.attributes.get("href")
):
continue
event_name = f"{away_team} vs {home_team}"
key = f"[{sport}] {event_name} ({TAG})"
if cached_keys & {key}:

View file

@ -22,8 +22,7 @@ BASE_MIRRORS = [
"https://timstreams.top",
]
sport_genres = {
SPORT_GENRES = {
1: "Soccer",
2: "Motorsport",
3: "MMA",
@ -79,7 +78,7 @@ async def get_events(
if (genre := ev["genre"]) in {16, 17}:
continue
sport = sport_genres.get(genre, "Live Event")
sport = SPORT_GENRES.get(genre, "Live Event")
streams: list[dict[str, str]] = ev["streams"]