e

2025-10-08 15:48:16 -04:00 · 2025-10-08 15:48:16 -04:00 · 00000d9729
commit 00000d9729
parent 6a93221b78
1 changed files with 118 additions and 118 deletions
--- a/M3U8/scrapers/streambtw.py
+++ b/M3U8/scrapers/streambtw.py
@ -1,118 +1,118 @@
-import re
+import re
-from pathlib import Path
+from pathlib import Path
-from urllib.parse import urljoin
+from urllib.parse import urljoin
-
+
-import httpx
+import httpx
-from selectolax.parser import HTMLParser
+from selectolax.parser import HTMLParser
-
+
-from .utils import Cache, Time, get_logger, leagues, network
+from .utils import Cache, Time, get_logger, leagues, network
-
+
-log = get_logger(__name__)
+log = get_logger(__name__)
-
+
-urls: dict[str, dict[str, str]] = {}
+urls: dict[str, dict[str, str]] = {}
-
+
-BASE_URL = "https://streambtw.com/"
+BASE_URL = "https://streambtw.com/"
-
+
-CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streambtw.json", exp=86_400)
+CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streambtw.json", exp=86_400)
-
+
-
+
-async def process_event(
+async def process_event(
-    client: httpx.AsyncClient,
+    client: httpx.AsyncClient,
-    url: str,
+    url: str,
-    url_num: int,
+    url_num: int,
-) -> str | None:
+) -> str | None:
-    try:
+    try:
-        r = await client.get(url)
+        r = await client.get(url)
-        r.raise_for_status()
+        r.raise_for_status()
-    except Exception as e:
+    except Exception as e:
-        log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}')
+        log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}')
-        return
+        return
-
+
-    valid_m3u8 = re.compile(
+    valid_m3u8 = re.compile(
-        r'var\s+randomM3u8\s*=\s*[\'"]([^\'"]+)[\'"]',
+        r'var\s+(\w+)\s*=\s*["\']?(https?:\/\/[^"\'\s>]+\.m3u8)["\']?',
-        re.IGNORECASE,
+        re.IGNORECASE,
-    )
+    )
-
+
-    if match := valid_m3u8.search(r.text):
+    if match := valid_m3u8.search(r.text):
-        log.info(f"URL {url_num}) Captured M3U8")
+        log.info(f"URL {url_num}) Captured M3U8")
-        return match[1]
+        return match[2]
-
+
-    log.info(f"URL {url_num}) No M3U8 found")
+    log.info(f"URL {url_num}) No M3U8 found")
-
+
-
+
-async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:
+async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:
-    try:
+    try:
-        r = await client.get(BASE_URL)
+        r = await client.get(BASE_URL)
-        r.raise_for_status()
+        r.raise_for_status()
-    except Exception as e:
+    except Exception as e:
-        log.error(f'Failed to fetch "{BASE_URL}": {e}')
+        log.error(f'Failed to fetch "{BASE_URL}": {e}')
-
+
-        return []
+        return []
-
+
-    soup = HTMLParser(r.text)
+    soup = HTMLParser(r.text)
-
+
-    events = []
+    events = []
-
+
-    for card in soup.css("div.container div.card"):
+    for card in soup.css("div.container div.card"):
-        sport = card.css_first("h5.card-title").text(strip=True)
+        sport = card.css_first("h5.card-title").text(strip=True)
-
+
-        name = card.css_first("p.card-text").text(strip=True)
+        name = card.css_first("p.card-text").text(strip=True)
-
+
-        link = card.css_first("a.btn.btn-primary")
+        link = card.css_first("a.btn.btn-primary")
-
+
-        if not (href := link.attrs.get("href")):
+        if not (href := link.attrs.get("href")):
-            continue
+            continue
-
+
-        events.append(
+        events.append(
-            {
+            {
-                "sport": sport,
+                "sport": sport,
-                "event": name,
+                "event": name,
-                "link": urljoin(BASE_URL, href),
+                "link": urljoin(BASE_URL, href),
-            }
+            }
-        )
+        )
-
+
-    return events
+    return events
-
+
-
+
-async def scrape(client: httpx.AsyncClient) -> None:
+async def scrape(client: httpx.AsyncClient) -> None:
-    if cached := CACHE_FILE.load():
+    if cached := CACHE_FILE.load():
-        urls.update(cached)
+        urls.update(cached)
-        log.info(f"Loaded {len(urls)} event(s) from cache")
+        log.info(f"Loaded {len(urls)} event(s) from cache")
-        return
+        return
-
+
-    log.info(f'Scraping from "{BASE_URL}"')
+    log.info(f'Scraping from "{BASE_URL}"')
-
+
-    events = await get_events(client)
+    events = await get_events(client)
-
+
-    log.info(f"Processing {len(events)} new URL(s)")
+    log.info(f"Processing {len(events)} new URL(s)")
-
+
-    now = Time.now().timestamp()
+    now = Time.now().timestamp()
-
+
-    for i, ev in enumerate(events, start=1):
+    for i, ev in enumerate(events, start=1):
-        url = await network.safe_process(
+        url = await network.safe_process(
-            lambda: process_event(client, url=ev["link"], url_num=i),
+            lambda: process_event(client, url=ev["link"], url_num=i),
-            url_num=i,
+            url_num=i,
-            log=log,
+            log=log,
-            timeout=10,
+            timeout=10,
-        )
+        )
-
+
-        if url:
+        if url:
-            sport, event = ev["sport"], ev["event"]
+            sport, event = ev["sport"], ev["event"]
-
+
-            key = f"[{sport}] {event} (SBTW)"
+            key = f"[{sport}] {event} (SBTW)"
-
+
-            tvg_id, logo = leagues.info(sport)
+            tvg_id, logo = leagues.info(sport)
-
+
-            entry = {
+            entry = {
-                "url": url,
+                "url": url,
-                "logo": logo,
+                "logo": logo,
-                "base": BASE_URL,
+                "base": BASE_URL,
-                "timestamp": now,
+                "timestamp": now,
-                "id": tvg_id or "Live.Event.us",
+                "id": tvg_id or "Live.Event.us",
-            }
+            }
-
+
-            urls[key] = entry
+            urls[key] = entry
-
+
-    log.info(f"Collected {len(urls)} event(s)")
+    log.info(f"Collected {len(urls)} event(s)")
-
+
-    CACHE_FILE.write(urls)
+    CACHE_FILE.write(urls)