mirror of
https://github.com/doms9/iptv.git
synced 2026-03-07 11:18:25 +01:00
e
fix streambtw.py scraping
This commit is contained in:
parent
f3f1f3cd82
commit
00000d98b2
1 changed files with 35 additions and 14 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import base64
|
||||
import json
|
||||
import re
|
||||
from functools import partial
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from selectolax.parser import HTMLParser
|
||||
|
||||
|
|
@ -53,26 +53,47 @@ async def get_events() -> list[dict[str, str]]:
|
|||
|
||||
soup = HTMLParser(html_data.content)
|
||||
|
||||
for card in soup.css(".league"):
|
||||
if not (league_elem := card.css_first(".league-title")):
|
||||
continue
|
||||
script_text = None
|
||||
|
||||
for event in card.css(".match"):
|
||||
if not (match_elem := event.css_first(".match-name")):
|
||||
continue
|
||||
for s in soup.css("script"):
|
||||
t = s.text() or ""
|
||||
|
||||
if (not (watch_btn := event.css_first("a.watch-btn"))) or (
|
||||
not (href := watch_btn.attributes.get("href"))
|
||||
):
|
||||
continue
|
||||
if "const DATA" in t:
|
||||
script_text = t
|
||||
break
|
||||
|
||||
league, name = league_elem.text(strip=True), match_elem.text(strip=True)
|
||||
if not script_text:
|
||||
return events
|
||||
|
||||
if not (
|
||||
match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
|
||||
):
|
||||
return events
|
||||
|
||||
data_js = match[1].replace("\n ", "").replace("\n ", "")
|
||||
s1 = re.sub(r"{\s", '{"', data_js)
|
||||
s2 = re.sub(r':"', '":"', s1)
|
||||
s3 = re.sub(r":\[", '":[', s2)
|
||||
s4 = re.sub(r"},\]", "}]", s3)
|
||||
s5 = re.sub(r'",\s', '","', s4)
|
||||
|
||||
data: list[dict[str, str]] = json.loads(s5)
|
||||
|
||||
for matches in data:
|
||||
league = matches["title"]
|
||||
|
||||
items: list[dict[str, str]] = matches["items"]
|
||||
|
||||
for info in items:
|
||||
title = info["title"]
|
||||
|
||||
url = info["url"]
|
||||
|
||||
events.append(
|
||||
{
|
||||
"sport": fix_league(league),
|
||||
"event": name,
|
||||
"link": urljoin(BASE_URL, href),
|
||||
"event": title,
|
||||
"link": url,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue