e

fix streambtw.py scraping
2026-06-14 12:36:27 +02:00 · 2026-02-07 12:52:02 -05:00 · 2026-02-07 12:52:02 -05:00 · 00000d98b2
commit 00000d98b2
parent f3f1f3cd82
1 changed files with 35 additions and 14 deletions
--- a/M3U8/scrapers/streambtw.py
+++ b/M3U8/scrapers/streambtw.py
@ -1,7 +1,7 @@
 import base64
 import json
 import re
 from functools import partial
 from urllib.parse import urljoin
 from selectolax.parser import HTMLParser
@ -53,26 +53,47 @@ async def get_events() -> list[dict[str, str]]:
    soup = HTMLParser(html_data.content)
-    for card in soup.css(".league"):
+    script_text = None
        if not (league_elem := card.css_first(".league-title")):
            continue
-        for event in card.css(".match"):
+    for s in soup.css("script"):
-            if not (match_elem := event.css_first(".match-name")):
+        t = s.text() or ""
                continue
-            if (not (watch_btn := event.css_first("a.watch-btn"))) or (
+        if "const DATA" in t:
-                not (href := watch_btn.attributes.get("href"))
+            script_text = t
            break
    if not script_text:
        return events
    if not (
        match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
    ):
-                continue
+        return events
-            league, name = league_elem.text(strip=True), match_elem.text(strip=True)
+    data_js = match[1].replace("\n      ", "").replace("\n    ", "")
    s1 = re.sub(r"{\s", '{"', data_js)
    s2 = re.sub(r':"', '":"', s1)
    s3 = re.sub(r":\[", '":[', s2)
    s4 = re.sub(r"},\]", "}]", s3)
    s5 = re.sub(r'",\s', '","', s4)
    data: list[dict[str, str]] = json.loads(s5)
    for matches in data:
        league = matches["title"]
        items: list[dict[str, str]] = matches["items"]
        for info in items:
            title = info["title"]
            url = info["url"]
            events.append(
                {
                    "sport": fix_league(league),
-                    "event": name,
+                    "event": title,
-                    "link": urljoin(BASE_URL, href),
+                    "link": url,
                }
            )