This commit is contained in:
doms9 2025-10-08 15:48:16 -04:00
parent 6a93221b78
commit 00000d9729

View file

@ -1,118 +1,118 @@
import re import re
from pathlib import Path from pathlib import Path
from urllib.parse import urljoin from urllib.parse import urljoin
import httpx import httpx
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__) log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {} urls: dict[str, dict[str, str]] = {}
BASE_URL = "https://streambtw.com/" BASE_URL = "https://streambtw.com/"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streambtw.json", exp=86_400) CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streambtw.json", exp=86_400)
async def process_event( async def process_event(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
url_num: int, url_num: int,
) -> str | None: ) -> str | None:
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}') log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}')
return return
valid_m3u8 = re.compile( valid_m3u8 = re.compile(
r'var\s+randomM3u8\s*=\s*[\'"]([^\'"]+)[\'"]', r'var\s+(\w+)\s*=\s*["\']?(https?:\/\/[^"\'\s>]+\.m3u8)["\']?',
re.IGNORECASE, re.IGNORECASE,
) )
if match := valid_m3u8.search(r.text): if match := valid_m3u8.search(r.text):
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return match[1] return match[2]
log.info(f"URL {url_num}) No M3U8 found") log.info(f"URL {url_num}) No M3U8 found")
async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:
try: try:
r = await client.get(BASE_URL) r = await client.get(BASE_URL)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}') log.error(f'Failed to fetch "{BASE_URL}": {e}')
return [] return []
soup = HTMLParser(r.text) soup = HTMLParser(r.text)
events = [] events = []
for card in soup.css("div.container div.card"): for card in soup.css("div.container div.card"):
sport = card.css_first("h5.card-title").text(strip=True) sport = card.css_first("h5.card-title").text(strip=True)
name = card.css_first("p.card-text").text(strip=True) name = card.css_first("p.card-text").text(strip=True)
link = card.css_first("a.btn.btn-primary") link = card.css_first("a.btn.btn-primary")
if not (href := link.attrs.get("href")): if not (href := link.attrs.get("href")):
continue continue
events.append( events.append(
{ {
"sport": sport, "sport": sport,
"event": name, "event": name,
"link": urljoin(BASE_URL, href), "link": urljoin(BASE_URL, href),
} }
) )
return events return events
async def scrape(client: httpx.AsyncClient) -> None: async def scrape(client: httpx.AsyncClient) -> None:
if cached := CACHE_FILE.load(): if cached := CACHE_FILE.load():
urls.update(cached) urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache") log.info(f"Loaded {len(urls)} event(s) from cache")
return return
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(client) events = await get_events(client)
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
now = Time.now().timestamp() now = Time.now().timestamp()
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
url = await network.safe_process( url = await network.safe_process(
lambda: process_event(client, url=ev["link"], url_num=i), lambda: process_event(client, url=ev["link"], url_num=i),
url_num=i, url_num=i,
log=log, log=log,
timeout=10, timeout=10,
) )
if url: if url:
sport, event = ev["sport"], ev["event"] sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} (SBTW)" key = f"[{sport}] {event} (SBTW)"
tvg_id, logo = leagues.info(sport) tvg_id, logo = leagues.info(sport)
entry = { entry = {
"url": url, "url": url,
"logo": logo, "logo": logo,
"base": BASE_URL, "base": BASE_URL,
"timestamp": now, "timestamp": now,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
} }
urls[key] = entry urls[key] = entry
log.info(f"Collected {len(urls)} event(s)") log.info(f"Collected {len(urls)} event(s)")
CACHE_FILE.write(urls) CACHE_FILE.write(urls)