import json from pathlib import Path from urllib.parse import unquote, urljoin import httpx from selectolax.parser import HTMLParser from .utils import LOGOS, get_base, get_logger, load_cache, now, safe_process_event log = get_logger(__name__) urls: dict[str, dict[str, str]] = {} MIRRORS = [ "https://fstv.zip", "https://fstv.space", "https://fstv.online", "https://fstv.us", ] CACHE_FILE = Path(__file__).parent / "caches" / "fstv.json" async def get_events( client: httpx.AsyncClient, base_url: str, cached_hrefs: set[str], ) -> list[dict[str, str]]: log.info(f'Scraping from "{base_url}"') try: r = await client.get(base_url) r.raise_for_status() except Exception as e: log.error(f'Failed to fetch "{base_url}"\n{e}') return [] soup = HTMLParser(r.text) events = [] for wrpr in soup.css("div.fixtures-live-wrapper"): for games in wrpr.css(".match-table-item"): league_name = games.css_first(".league-info a.league-name") league_match = games.css_first(".common-table-row a[href*='/match/']") if league_name and league_match: full_text = league_name.text(strip=True) if "]" in full_text: event_name = full_text.split("]", 1)[1].strip() else: event_name = full_text href = league_match.attributes.get("href") link = urljoin(base_url, href) if cached_hrefs & {href}: continue events.append( { "sport": event_name, "link": link, "logo": LOGOS.get( event_name, "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png", ), "href": href, } ) return events async def process_event( client: httpx.AsyncClient, url: str, url_num: int, ) -> tuple[str, str]: try: r = await client.get(url) r.raise_for_status() except Exception as e: log.error(f'URL {url_num}) Failed to fetch "{url}"\n{e}') return "", "" soup = HTMLParser(r.text) if category_links := soup.css(".common-list-category .category-item a"): match_name = category_links[-1].text(strip=True) else: match_name = None if not match_name or match_name.lower() == "vs": if og_title := soup.css_first("meta[property='og:title']"): match_name = ( og_title.attributes.get("content", "").split(" start on")[0].strip() ) if ifr := soup.css_first("iframe"): if src := ifr.attributes.get("src", ""): log.info(f"URL {url_num}) Captured M3U8") return match_name, unquote(src).split("link=")[-1] log.info(f"URL {url_num}) No M3U8 found") return "", "" async def main(client: httpx.AsyncClient) -> None: cached_urls = load_cache(CACHE_FILE, exp=14400) cached_hrefs = {entry["href"] for entry in cached_urls.values()} cached_count = len(cached_urls) urls.update(cached_urls) log.info(f"Collected {cached_count} event(s) from cache") if not (base_url := await get_base(client, MIRRORS)): log.warning("No working FSTV mirrors") return events = await get_events( client, base_url, cached_hrefs, ) log.info(f"Processing {len(events)} new URL(s)") for i, ev in enumerate(events, start=1): match_name, url = await safe_process_event( lambda: process_event( client, ev["link"], url_num=i, ), url_num=i, log=log, ) if url: key = ( f"[{ev['sport']}] {match_name} (FSTV)" if match_name else f"[{ev['sport']}] (FSTV)" ) entry = { "url": url, "logo": ev["logo"], "base": base_url, "timestamp": now.timestamp(), "href": ev["href"], } urls[key] = cached_urls[key] = entry if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") else: log.info("No new events found") CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")