import re from pathlib import Path from urllib.parse import urljoin import httpx from selectolax.parser import HTMLParser from .utils import ( get_logger, league_info, load_cache, now, safe_process_event, write_cache, ) log = get_logger(__name__) urls: dict[str, dict[str, str]] = {} BASE_URL = "https://streambtw.com/" CACHE_FILE = Path(__file__).parent / "caches" / "streambtw.json" async def process_event( client: httpx.AsyncClient, url: str, url_num: int, ) -> str | None: try: r = await client.get(url) r.raise_for_status() except Exception as e: log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}') return valid_m3u8 = re.compile( r'var\s+randomM3u8\s*=\s*[\'"]([^\'"]+)[\'"]', re.IGNORECASE, ) if match := valid_m3u8.search(r.text): log.info(f"URL {url_num}) Captured M3U8") return match[1] log.info(f"URL {url_num}) No M3U8 found") async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: try: r = await client.get(BASE_URL) r.raise_for_status() except Exception as e: log.error(f'Failed to fetch "{BASE_URL}"\n{e}') return [] soup = HTMLParser(r.text) events = [] for card in soup.css("div.container div.card"): img = card.css_first("img.league-logo") logo = img.attrs.get("src") if img else None sport = card.css_first("h5.card-title").text(strip=True) name = card.css_first("p.card-text").text(strip=True) link = card.css_first("a.btn.btn-primary") if href := link.attrs.get("href"): events.append( { "sport": sport, "event": name, "link": urljoin(BASE_URL, href), "logo": logo, } ) return events async def main(client: httpx.AsyncClient) -> None: if cached := load_cache(CACHE_FILE, exp=86400, nearest_hr=True): urls.update(cached) log.info(f"Collected {len(urls)} event(s) from cache") return log.info(f'Scraping from "{BASE_URL}"') events = await get_events(client) log.info(f"Processing {len(events)} new URL(s)") for i, ev in enumerate(events, start=1): url = await safe_process_event( lambda: process_event(client, url=ev["link"], url_num=i), url_num=i, log=log, ) if url: sport, event = ev["sport"], ev["event"] key = f"[{sport}] {event} (SBTW)" entry = { "url": url, "logo": ev["logo"] or league_info(sport)["logo"], "base": BASE_URL, "timestamp": now.timestamp(), "tvg-id": league_info(sport)["id"], } urls[key] = entry log.info(f"Collected {len(urls)} event(s)") write_cache(CACHE_FILE, urls)