From 00000d9da5bc64ae7189958bfd2cd0f2e0566536 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Thu, 27 Nov 2025 03:03:17 -0500 Subject: [PATCH] e --- M3U8/scrapers/fawa.py | 2 +- M3U8/scrapers/old/fstv.py | 4 +- M3U8/scrapers/roxie.py | 2 +- M3U8/scrapers/shark.py | 2 +- M3U8/scrapers/sport9.py | 95 ++++++++++++++++++++++++------------- M3U8/scrapers/streambtw.py | 2 +- M3U8/scrapers/streameast.py | 2 +- 7 files changed, 70 insertions(+), 39 deletions(-) diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index bb3dffe..bbc7965 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -52,7 +52,7 @@ async def get_events( return [] - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) valid_event = re.compile(r"\d{1,2}:\d{1,2}") clean_event = re.compile(r"\s+-+\s+\w{1,4}") diff --git a/M3U8/scrapers/old/fstv.py b/M3U8/scrapers/old/fstv.py index a640e95..13fb6b6 100644 --- a/M3U8/scrapers/old/fstv.py +++ b/M3U8/scrapers/old/fstv.py @@ -29,7 +29,7 @@ async def process_event( return "", "" - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) if category_links := soup.css(".common-list-category .category-item a"): match_name = category_links[-1].text(strip=True) @@ -65,7 +65,7 @@ async def get_events( return [] - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) events = [] diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index 3635dde..e82b44b 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -68,7 +68,7 @@ async def refresh_html_cache( return {} - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) events = {} diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py index 471a6c0..5efb898 100644 --- a/M3U8/scrapers/shark.py +++ b/M3U8/scrapers/shark.py @@ -56,7 +56,7 @@ async def refresh_html_cache( return {} - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) events = {} diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index a31c311..e6942d4 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -1,3 +1,4 @@ +import asyncio from functools import partial from urllib.parse import urljoin @@ -16,56 +17,86 @@ CACHE_FILE = Cache("sport9.json", exp=3_600) BASE_URL = "https://sport9.ru" +async def get_html( + client: httpx.AsyncClient, + url: str, + date: str, +) -> bytes: + try: + r = await client.get(url, params={"date": date}) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return b"" + + return r.content + + async def get_events( client: httpx.AsyncClient, cached_keys: set[str], ) -> list[dict[str, str]]: - try: - r = await client.get(BASE_URL) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{BASE_URL}": {e}') + now = Time.now() - return [] + tasks = [ + get_html(client, BASE_URL, str(d.date())) + for d in [ + now.delta(days=-1), + now, + now.delta(days=1), + ] + ] - soup = HTMLParser(r.text) + results = await asyncio.gather(*tasks) + + soups = [HTMLParser(html) for html in results] events = [] - for card in soup.css("a.match-card"): - live_badge = card.css_first(".live-badge") + for soup in soups: + for card in soup.css("a.match-card"): + live_badge = card.css_first(".live-badge") - if not live_badge or live_badge.text(strip=True) != "Live": - continue + if not live_badge or live_badge.text(strip=True) != "Live": + continue - if not (sport_node := card.css_first(".tournament-name")): - continue + if not (sport_node := card.css_first(".tournament-name")): + continue - team_1_node = card.css_first(".teams-container2 .team1") - team_2_node = card.css_first(".teams-container2 .team2") + sport = sport_node.text(strip=True) + team_1_node = card.css_first(".team1 .team-name") + team_2_node = card.css_first(".team2 .team-name") - if not (team_1_node and team_2_node): - continue + if team_1_node and not team_2_node: + event = team_1_node.text(strip=True) - if not (href := card.attributes.get("href")): - continue + elif team_2_node and not team_1_node: + event = team_2_node.text(strip=True) - sport = sport_node.text(strip=True) - team_1 = team_1_node.text(strip=True) - team_2 = team_2_node.text(strip=True) + elif team_1_node and team_2_node: + event = ( + f"{team_1_node.text(strip=True)} vs {team_2_node.text(strip=True)}" + ) - key = f"[{sport}] {team_1} vs {team_2} (SPRT9)" + else: + continue - if cached_keys & {key}: - continue + if not (href := card.attributes.get("href")): + continue - events.append( - { - "sport": sport, - "event": f"{team_1} vs {team_2}", - "link": urljoin(BASE_URL, href), - } - ) + key = f"[{sport}] {event} (SPRT9)" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": event, + "link": urljoin(BASE_URL, href), + } + ) return events diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index d28f51c..c22e652 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -50,7 +50,7 @@ async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: return [] - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) events = [] diff --git a/M3U8/scrapers/streameast.py b/M3U8/scrapers/streameast.py index 5bd1282..7a5493f 100644 --- a/M3U8/scrapers/streameast.py +++ b/M3U8/scrapers/streameast.py @@ -49,7 +49,7 @@ async def get_events( return [] - soup = HTMLParser(r.text) + soup = HTMLParser(r.content) events = []