diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 3f3609fc..bccb1b9e 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -12,6 +12,7 @@ from scrapers import ( istreameast, livetvsx, mainportal, + ovogoal, pawa, roxie, shark, @@ -71,6 +72,7 @@ async def main() -> None: asyncio.create_task(istreameast.scrape()), # asyncio.create_task(livetvsx.scrape()), asyncio.create_task(mainportal.scrape()), + asyncio.create_task(ovogoal.scrape()), asyncio.create_task(pawa.scrape()), asyncio.create_task(shark.scrape()), asyncio.create_task(streamcenter.scrape()), @@ -103,6 +105,7 @@ async def main() -> None: | istreameast.urls | livetvsx.urls | mainportal.urls + | ovogoal.urls | pawa.urls | roxie.urls | shark.urls diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py new file mode 100644 index 00000000..71644b80 --- /dev/null +++ b/M3U8/scrapers/ovogoal.py @@ -0,0 +1,144 @@ +import re +from functools import partial +from urllib.parse import urljoin + +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "OVO" + +CACHE_FILE = Cache(TAG, exp=28_800) + +BASE_URL = "https://ovogoaal.com" + + +async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: + nones = None, None + + if not (html_data := await network.request(url, log=log)): + log.warning(f"URL {url_num}) Failed to load url.") + return nones + + soup = HTMLParser(html_data.content) + + iframe = soup.css_first("iframe") + + if not iframe or not (iframe_src := iframe.attributes.get("src")): + log.warning(f"URL {url_num}) No iframe element found.") + return nones + + if not ( + iframe_src_data := await network.request( + iframe_src, + headers={"Referer": url}, + log=log, + ) + ): + log.warning(f"URL {url_num}) Failed to load iframe source.") + return nones + + valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.I) + + if not (match := valid_m3u8.search(iframe_src_data.text)): + log.warning(f"URL {url_num}) No Clappr source found.") + return nones + + log.info(f"URL {url_num}) Captured M3U8") + + return match[3], iframe_src + + +async def get_events() -> list[dict[str, str]]: + events = [] + + if not (html_data := await network.request(BASE_URL, log=log)): + return events + + soup = HTMLParser(html_data.content) + + sport = "Live Event" + + for card in soup.css(".main-content .stream-row"): + if (not (watch_btn_elem := card.css_first(".watch-btn"))) or ( + not (onclick := watch_btn_elem.attributes.get("onclick")) + ): + continue + + if not (event_name_elem := card.css_first(".stream-info")): + continue + + href = onclick.split(".href=")[-1].replace("'", "") + + event_name = event_name_elem.text(strip=True) + + events.append( + { + "sport": sport, + "event": event_name, + "link": urljoin(f"{html_data.url}", href), + } + ) + + return events + + +async def scrape() -> None: + if cached_urls := CACHE_FILE.load(): + urls.update({k: v for k, v in cached_urls.items() if v["url"]}) + + log.info(f"Loaded {len(urls)} event(s) from cache") + + return + + log.info(f'Scraping from "{BASE_URL}"') + + if events := await get_events(): + log.info(f"Processing {len(events)} URL(s)") + + now = Time.clean(Time.now()) + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + url=(link := ev["link"]), + url_num=i, + ) + + url, iframe = await network.safe_process( + handler, + url_num=i, + semaphore=network.HTTP_S, + log=log, + ) + + sport, event = ev["sport"], ev["event"] + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": iframe, + "timestamp": now.timestamp(), + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + urls[key] = entry + + log.info(f"Collected and cached {len(urls)} event(s)") + + else: + log.info("No events found") + + CACHE_FILE.write(cached_urls)