From 00000d905bebe4e7f2d9410a490ddbf65ae3bdb4 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:40:47 -0400 Subject: [PATCH] e - edit scraping for ovogoal.py - misc edits. --- M3U8/scrapers/fawa.py | 2 +- M3U8/scrapers/ovogoal.py | 46 +++++++++++++++-------------------- M3U8/scrapers/totalsportek.py | 2 +- M3U8/scrapers/tvapp.py | 2 +- 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index c6062dae..1722e58b 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -76,7 +76,7 @@ async def get_events(cached_hrefs: set[str]) -> list[dict[str, str]]: { "sport": sport, "event": clean_event.sub("", event_name), - "link": urljoin(BASE_URL, href), + "link": urljoin(f"{html_data.url}", href), "href": href, } ) diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py index f9274b37..beeacbf6 100644 --- a/M3U8/scrapers/ovogoal.py +++ b/M3U8/scrapers/ovogoal.py @@ -1,5 +1,6 @@ import re from functools import partial +from urllib.parse import urljoin from selectolax.parser import HTMLParser @@ -16,10 +17,6 @@ CACHE_FILE = Cache(TAG, exp=28_800) BASE_URL = "https://ovogoaal.com" -def fix_league(s: str) -> str: - return " ".join(x.capitalize() for x in s.split()) if len(s) > 5 else s.upper() - - async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: nones = None, None @@ -64,34 +61,31 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: soup = HTMLParser(html_data.content) - sport = None + sport = "Live Event" - for node in soup.css(".wrapper *"): - if (cls := node.attributes.get("class")) == "section-title": - sport = fix_league(node.text(strip=True)) + for card in soup.css(".main-content .stream-row"): + if (not (watch_btn_elem := card.css_first(".watch-btn"))) or ( + not (onclick := watch_btn_elem.attributes.get("onclick")) + ): + continue - if node.tag == "a" and cls == "match": - if not sport: - continue + if not (event_name_elem := card.css_first(".stream-info")): + continue - if not (team_elems := node.css(".team")): - continue + href = onclick.split(".href=")[-1].replace("'", "") - if not (href := node.attributes.get("href")): - continue + event_name = event_name_elem.text(strip=True) - event_name = " vs ".join(team.text(strip=True) for team in team_elems) + if f"[{sport}] {event_name} ({TAG})" in cached_keys: + continue - if f"[{sport}] {event_name} ({TAG})" in cached_keys: - continue - - events.append( - { - "sport": sport, - "event": event_name, - "link": href, - } - ) + events.append( + { + "sport": sport, + "event": event_name, + "link": urljoin(f"{html_data.url}", href), + } + ) return events diff --git a/M3U8/scrapers/totalsportek.py b/M3U8/scrapers/totalsportek.py index a3d6370c..ca4304fa 100644 --- a/M3U8/scrapers/totalsportek.py +++ b/M3U8/scrapers/totalsportek.py @@ -113,7 +113,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: { "sport": sport, "event": event_name, - "link": urljoin(BASE_URL, href), + "link": urljoin(f"{html_data.url}", href), } ) diff --git a/M3U8/scrapers/tvapp.py b/M3U8/scrapers/tvapp.py index 60ac53d2..e541991a 100644 --- a/M3U8/scrapers/tvapp.py +++ b/M3U8/scrapers/tvapp.py @@ -66,7 +66,7 @@ async def get_events() -> list[dict[str, str]]: { "sport": sport, "event": event_name, - "link": urljoin(BASE_URL, href), + "link": urljoin(f"{html_data.url}", href), } )