e

parse 'important games' section for totalsportek.py
2026-03-07 11:18:25 +01:00 · 2026-01-20 15:18:17 -05:00 · 2026-01-20 15:18:17 -05:00 · 00000d9553
commit 00000d9553
parent 0b7a4b1ae5
1 changed files with 35 additions and 36 deletions
--- a/M3U8/scrapers/totalsportek.py
+++ b/M3U8/scrapers/totalsportek.py
@ -1,6 +1,6 @@
 import re
 from functools import partial
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse
 from selectolax.parser import HTMLParser
@ -26,7 +26,9 @@ MIRRORS = [
 ]
-def fix_league(s: str) -> str:
+def fix_txt(s: str) -> str:
    s = " ".join(s.split())
    return s.upper() if s.islower() else s
@ -34,9 +36,9 @@ async def process_event(href: str, url_num: int) -> tuple[str | None, str | None
    valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
    for x, mirror in enumerate(MIRRORS, start=1):
-        base = mirror["base"]
+        base: str = mirror["base"]
-        hex_decode = mirror["hex_decode"]
+        hex_decode: bool = mirror["hex_decode"]
        url = urljoin(base, href)
@ -61,11 +63,10 @@ async def process_event(href: str, url_num: int) -> tuple[str | None, str | None
            log.warning(f"M{x} | URL {url_num}) No Clappr source found.")
            continue
-        raw = match[2]
+        raw: str = match[2]
        try:
            m3u8_url = bytes.fromhex(raw).decode("utf-8") if hex_decode else raw
        except Exception as e:
            log.warning(f"M{x} | URL {url_num}) Decoding failed: {e}")
            continue
@ -75,10 +76,7 @@ async def process_event(href: str, url_num: int) -> tuple[str | None, str | None
            return m3u8_url, iframe_src
-        else:
+        log.warning(f"M{x} | URL {url_num}) No M3U8 found")
            log.warning(f"M{x} | URL {url_num}) No M3U8 found")
            return None, None
    return None, None
@ -93,41 +91,42 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
    sport = "Live Event"
-    for box in soup.css(".div-main-box"):
+    for node in soup.css("a"):
-        for node in box.iter():
+        if not node.attributes.get("class"):
-            if not (node_class := node.attributes.get("class")):
+            continue
                continue
-            if "my-1" in node_class:
+        if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""):
-                if span := node.css_first("span"):
+            if span := node.css_first("span"):
-                    sport = span.text(strip=True)
+                sport = span.text(strip=True)
-            if node.tag == "a" and "nav-link2" in node_class:
+        sport = fix_txt(sport)
                if not (time_node := node.css_first(".col-3")):
                    continue
-                if time_node.text(strip=True) != "MatchStarted":
+        if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]):
-                    continue
+            continue
-                if not (href := node.attributes.get("href")) or href.startswith("http"):
+        if not (href := node.attributes.get("href")):
-                    continue
+            continue
-                sport = fix_league(sport)
+        href = urlparse(href).path if href.startswith("http") else href
-                teams = [t.text(strip=True) for t in node.css(".col-7 .col-12")]
+        if not (time_node := node.css_first(".col-3 span")):
            continue
-                event_name = " vs ".join(teams)
+        if time_node.text(strip=True) != "MatchStarted":
            continue
-                if f"[{sport}] {event_name} ({TAG})" in cached_keys:
+        event_name = fix_txt(" vs ".join(teams))
                    continue
-                events.append(
+        if f"[{sport}] {event_name} ({TAG})" in cached_keys:
-                    {
+            continue
-                        "sport": sport,
+
-                        "event": event_name,
+        events.append(
-                        "href": href,
+            {
-                    }
+                "sport": sport,
-                )
+                "event": event_name,
                "href": href,
            }
        )
    return events