fix streambtw.py scraping
This commit is contained in:
doms9 2026-02-07 12:52:02 -05:00
parent f3f1f3cd82
commit 00000d98b2

View file

@ -1,7 +1,7 @@
import base64 import base64
import json
import re import re
from functools import partial from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
@ -53,26 +53,47 @@ async def get_events() -> list[dict[str, str]]:
soup = HTMLParser(html_data.content) soup = HTMLParser(html_data.content)
for card in soup.css(".league"): script_text = None
if not (league_elem := card.css_first(".league-title")):
continue
for event in card.css(".match"): for s in soup.css("script"):
if not (match_elem := event.css_first(".match-name")): t = s.text() or ""
continue
if (not (watch_btn := event.css_first("a.watch-btn"))) or ( if "const DATA" in t:
not (href := watch_btn.attributes.get("href")) script_text = t
break
if not script_text:
return events
if not (
match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
): ):
continue return events
league, name = league_elem.text(strip=True), match_elem.text(strip=True) data_js = match[1].replace("\n ", "").replace("\n ", "")
s1 = re.sub(r"{\s", '{"', data_js)
s2 = re.sub(r':"', '":"', s1)
s3 = re.sub(r":\[", '":[', s2)
s4 = re.sub(r"},\]", "}]", s3)
s5 = re.sub(r'",\s', '","', s4)
data: list[dict[str, str]] = json.loads(s5)
for matches in data:
league = matches["title"]
items: list[dict[str, str]] = matches["items"]
for info in items:
title = info["title"]
url = info["url"]
events.append( events.append(
{ {
"sport": fix_league(league), "sport": fix_league(league),
"event": name, "event": title,
"link": urljoin(BASE_URL, href), "link": url,
} }
) )