From 00000d94e2519246ea65282ff8d6ef3d5c07b696 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Wed, 26 Nov 2025 02:38:29 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 3 + M3U8/scrapers/sport9.py | 132 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 M3U8/scrapers/sport9.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 53c28e0..302553e 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -9,6 +9,7 @@ from scrapers import ( ppv, roxie, shark, + sport9, streambtw, streameast, streamfree, @@ -49,6 +50,7 @@ async def main() -> None: asyncio.create_task(ppv.scrape(network.client)), asyncio.create_task(roxie.scrape(network.client)), asyncio.create_task(shark.scrape(network.client)), + asyncio.create_task(sport9.scrape(network.client)), asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streameast.scrape(network.client)), asyncio.create_task(streamfree.scrape(network.client)), @@ -66,6 +68,7 @@ async def main() -> None: | ppv.urls | roxie.urls | shark.urls + | sport9.urls | streambtw.urls | streameast.urls | strmd.urls diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py new file mode 100644 index 0000000..2f0352d --- /dev/null +++ b/M3U8/scrapers/sport9.py @@ -0,0 +1,132 @@ +from functools import partial +from urllib.parse import urljoin + +import httpx +from playwright.async_api import async_playwright +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +CACHE_FILE = Cache("sport9.json", exp=3_600) + +BASE_URL = "https://sport9.ru" + + +async def get_events( + client: httpx.AsyncClient, + cached_keys: set[str], +) -> list[dict[str, str]]: + try: + r = await client.get(BASE_URL) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{BASE_URL}": {e}') + + return [] + + soup = HTMLParser(r.text) + + events = [] + + for card in soup.css("a.match-card"): + live_badge = card.css_first(".live-badge") + + if not live_badge or live_badge.text(strip=True) != "Live": + continue + + if not (sport_node := card.css_first(".tournament-name")): + continue + + team_1_node = card.css_first(".teams-container2 .team1") + team_2_node = card.css_first(".teams-container2 .team2") + + if not (team_1_node and team_2_node): + continue + + if not (href := card.attributes.get("href")): + continue + + sport = sport_node.text(strip=True) + team_1 = team_1_node.text(strip=True) + team_2 = team_2_node.text(strip=True) + + key = f"[{sport}] {team_1} vs {team_2} (SPRT9)" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": f"{team_1} vs {team_2}", + "link": urljoin(BASE_URL, href), + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events(client, set(cached_urls.keys())) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + now = Time.now().timestamp() + + async with async_playwright() as p: + browser, context = await network.browser(p, browser="brave") + + for i, ev in enumerate(events, start=1): + handler = partial( + network.process_event, + client=client, + url=ev["link"], + url_num=i, + context=context, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + log=log, + ) + + if url: + sport, event = ev["sport"], ev["event"] + + key = f"[{sport}] {event} (SPRT9)" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": "https://vividmosaica.com/", + "timestamp": now, + "id": tvg_id or "Live.Event.us", + } + + urls[key] = cached_urls[key] = entry + + await browser.close() + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls)