From 00000d927c7d5f103a196e4fa794cefdaeb2273d Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Sun, 7 Jun 2026 19:37:38 -0400 Subject: [PATCH] e - remove ovogoal.py - re-add mainportal.py --- M3U8/fetch.py | 6 +- M3U8/scrapers/mainportal.py | 191 ++++++++++++++++++++++++++++++++++++ M3U8/scrapers/ovogoal.py | 144 --------------------------- 3 files changed, 194 insertions(+), 147 deletions(-) create mode 100644 M3U8/scrapers/mainportal.py delete mode 100644 M3U8/scrapers/ovogoal.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 55d06dfc..0f06ecdf 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -10,7 +10,7 @@ from scrapers import ( fawa, fsports, istreameast, - ovogoal, + mainportal, roxie, shark, streamcenter, @@ -64,7 +64,7 @@ async def main() -> None: httpx_tasks = [ asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), - # asyncio.create_task(ovogoal.scrape()), + asyncio.create_task(mainportal.scrape()), asyncio.create_task(shark.scrape()), asyncio.create_task(streamcenter.scrape()), asyncio.create_task(streamsgate.scrape()), @@ -93,7 +93,7 @@ async def main() -> None: | fawa.urls | fsports.urls | istreameast.urls - | ovogoal.urls + | mainportal.urls | roxie.urls | shark.urls | streamcenter.urls diff --git a/M3U8/scrapers/mainportal.py b/M3U8/scrapers/mainportal.py new file mode 100644 index 00000000..fc159b2c --- /dev/null +++ b/M3U8/scrapers/mainportal.py @@ -0,0 +1,191 @@ +import asyncio +import json +import re +from functools import partial +from urllib.parse import urljoin + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "MP66" + +CACHE_FILE = Cache(TAG, exp=10_800) + +API_URLS = { + sport: f"https://api.{sport.lower()}24all.ir" + for sport in [ + "MLB", + # "NBA", + # "NFL", + "NHL", + ] +} + +BASE_URLS = {sport: url.replace("api.", "") for sport, url in API_URLS.items()} + + +async def process_event( + sport: str, + flavor_id: str, + media_id: int, + url_num: int, +) -> str | None: + + r = await network.client.post( + urljoin(API_URLS[sport], "api/v2/generate_stream_info"), + headers={"Referer": BASE_URLS[sport]}, + json={"flavor_id": flavor_id, "media_event_id": media_id}, + ) + + if r.status_code != 200: + log.warning(f"URL {url_num}) Failed to create post request.") + return + + data: dict[str, str] = r.json() + + if not (m3u8_url := data.get("url")): + log.warning(f"URL {url_num}) No M3U8 found") + return + + log.info(f"URL {url_num}) Captured M3U8") + + return m3u8_url + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + tasks = [network.request(url, log=log) for url in BASE_URLS.values()] + + results = await asyncio.gather(*tasks) + + events = [] + + if not (html_data := [(html.text, html.url) for html in results if html]): + return events + + now = Time.clean(Time.now()) + + stateshot_ptrn = re.compile(r"var\s+stateshot\s+=\s+(.*);", re.I) + + start_dt = now.delta(hours=-1) + end_dt = now.delta(minutes=1) + + for content, url in html_data: + sport = next((k for k, v in BASE_URLS.items() if v == url), "Live Event") + + if not (match := stateshot_ptrn.search(content)): + continue + + data: dict = json.loads(f"{match[1]}") + + teams = data.get("teams", {}) + + flavors = data.get("flavors", {}) + + media_events = data.get("media_events", {}) + + team_identifier: dict[int, str] = {t.get("id"): t.get("name") for t in teams} + + event_to_flavor_id: dict[int, str] = { + event_id: flavor["id"] + for flavor in flavors + for event_id in flavor.get("media_event_ids", []) + } + + parsed_media_events: dict[int, int] = { + x.get("game_id"): x.get("id") for x in media_events + } + + for game in data.get("games", {}): + game_id = game["id"] + + event_dt = Time.fromisoformat(game["datetime"]).to_tz("EST") + + if not start_dt <= event_dt <= end_dt: + continue + + away = team_identifier.get(game["away_team_id"]) + home = team_identifier.get(game["home_team_id"]) + + if f"[{sport}] {(event_name:=f"{away} vs {home}")} ({TAG})" in cached_keys: + continue + + media_id = parsed_media_events.get(game_id, 0) + + if (flavor_id := event_to_flavor_id.get(media_id)) and ( + flavor_id.lower().startswith("free.live") + ): + events.append( + { + "sport": sport, + "event": event_name, + "timestamp": event_dt.timestamp(), + "flavor_id": flavor_id, + "media_id": media_id, + } + ) + + return events + + +async def scrape() -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info('Scraping from "https://mainportal66.com"') + + if events := await get_events(cached_urls.keys()): + log.info(f"Processing {len(events)} new URL(s)") + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + sport=(sport := ev["sport"]), + flavor_id=ev["flavor_id"], + media_id=ev["media_id"], + url_num=i, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.HTTP_S, + log=log, + ) + + event, ts = ev["event"], ev["timestamp"] + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": BASE_URLS[sport], + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + log.info(f"Collected and cached {valid_count - cached_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py deleted file mode 100644 index 97fec349..00000000 --- a/M3U8/scrapers/ovogoal.py +++ /dev/null @@ -1,144 +0,0 @@ -import re -from functools import partial -from urllib.parse import urljoin - -from selectolax.parser import HTMLParser - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -TAG = "OVO" - -CACHE_FILE = Cache(TAG, exp=28_800) - -BASE_URL = "https://ovogoalz.top" - - -async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: - nones = None, None - - if not (html_data := await network.request(url, log=log)): - log.warning(f"URL {url_num}) Failed to load url.") - return nones - - soup = HTMLParser(html_data.content) - - iframe = soup.css_first("iframe") - - if not iframe or not (iframe_src := iframe.attributes.get("src")): - log.warning(f"URL {url_num}) No iframe element found.") - return nones - - if not ( - iframe_src_data := await network.request( - iframe_src, - headers={"Referer": url}, - log=log, - ) - ): - log.warning(f"URL {url_num}) Failed to load iframe source.") - return nones - - valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s+=\s+"([^"]*)"', re.I) - - if not (match := valid_m3u8.search(iframe_src_data.text)): - log.warning(f"URL {url_num}) No Clappr source found.") - return nones - - log.info(f"URL {url_num}) Captured M3U8") - - return match[3], iframe_src - - -async def get_events() -> list[dict[str, str]]: - events = [] - - if not (html_data := await network.request(BASE_URL, log=log)): - return events - - soup = HTMLParser(html_data.content) - - sport = "Live Event" - - for card in soup.css(".main-content .stream-row"): - if (not (watch_btn_elem := card.css_first(".watch-btn"))) or ( - not (onclick := watch_btn_elem.attributes.get("onclick")) - ): - continue - - if not (event_name_elem := card.css_first(".stream-info")): - continue - - href = onclick.split(".href=")[-1].replace("'", "") - - event_name = event_name_elem.text(strip=True) - - events.append( - { - "sport": sport, - "event": event_name, - "link": urljoin(f"{html_data.url}", href), - } - ) - - return events - - -async def scrape() -> None: - if cached_urls := CACHE_FILE.load(): - urls.update({k: v for k, v in cached_urls.items() if v["url"]}) - - log.info(f"Loaded {len(urls)} event(s) from cache") - - return - - log.info(f'Scraping from "{BASE_URL}"') - - if events := await get_events(): - log.info(f"Processing {len(events)} URL(s)") - - now = Time.clean(Time.now()) - - for i, ev in enumerate(events, start=1): - handler = partial( - process_event, - url=(link := ev["link"]), - url_num=i, - ) - - url, iframe = await network.safe_process( - handler, - url_num=i, - semaphore=network.HTTP_S, - log=log, - ) - - sport, event = ev["sport"], ev["event"] - - key = f"[{sport}] {event} ({TAG})" - - tvg_id, logo = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo, - "base": iframe, - "timestamp": now.timestamp(), - "id": tvg_id or "Live.Event.us", - "link": link, - } - - cached_urls[key] = entry - - if url: - urls[key] = entry - - log.info(f"Collected and cached {len(urls)} event(s)") - - else: - log.info("No events found") - - CACHE_FILE.write(cached_urls)