From 00000d9288a0792f778ddda5b6dc478d00720848 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Wed, 1 Apr 2026 23:21:43 -0400 Subject: [PATCH] e - add listapreta.py --- M3U8/fetch.py | 3 + M3U8/scrapers/listapreta.py | 182 +++++++++++++++++++++++++++++++++ M3U8/scrapers/utils/webwork.py | 3 +- 3 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 M3U8/scrapers/listapreta.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 0ece5fcc..eba9be88 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -10,6 +10,7 @@ from scrapers import ( fawa, fsports, istreameast, + listapreta, livetvsx, ovogoal, pawa, @@ -73,6 +74,7 @@ async def main() -> None: httpx_tasks = [ asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), + asyncio.create_task(listapreta.scrape()), # asyncio.create_task(ovogoal.scrape()), asyncio.create_task(pawa.scrape()), asyncio.create_task(shark.scrape()), @@ -101,6 +103,7 @@ async def main() -> None: | fawa.urls | fsports.urls | istreameast.urls + | listapreta.urls | livetvsx.urls | ovogoal.urls | pawa.urls diff --git a/M3U8/scrapers/listapreta.py b/M3U8/scrapers/listapreta.py new file mode 100644 index 00000000..b1a68846 --- /dev/null +++ b/M3U8/scrapers/listapreta.py @@ -0,0 +1,182 @@ +from functools import partial + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "LISTA" + +CACHE_FILE = Cache(TAG, exp=10_800) + +API_FILE = Cache(f"{TAG}-api", exp=19_800) + +API_URL = "https://listapreta.site/sports-widget/events.php" + + +async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: + nones = None, None + + event_id = url.split("id=")[-1] + + if not ( + token_req := await network.request( + "https://lista-preta-tv.site/generate_token.php", + params={"id": event_id}, + log=log, + ) + ): + log.warning(f"URL {url_num}) Failed to load token data.") + return nones + + if not (token_data := token_req.json()): + log.warning(f"URL {url_num}) No token data available.") + return nones + + elif not (token := token_data.get("token")) or not (exp := token_data.get("exp")): + log.warning(f"URL {url_num}) No token data available.") + return nones + + ref = f"https://lista-preta-tv.site/player-all.html?id={event_id}" + + if not ( + m3u8_req := await network.request( + "https://lista-preta-tv.site/m3u8.php", + headers={"Referer": ref}, + params={"id": event_id, "token": token, "exp": exp}, + follow_redirects=False, + log=log, + ) + ): + log.warning(f"URL {url_num}) Unable to fetch M3U8 request.") + return nones + + elif not (m3u8 := m3u8_req.headers.get("Location")): + log.warning(f"URL {url_num}) Unable to fetch M3U8 request.") + return nones + + log.info(f"URL {url_num}) Captured M3U8") + + return m3u8, ref + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + now = Time.clean(Time.now()) + + if not (api_data := API_FILE.load(per_entry=False, index=-1)): + log.info("Refreshing API cache") + + api_data = [{"timestamp": now.timestamp()}] + + if r := await network.request(API_URL, log=log): + api_data: list[dict[str, str]] = r.json() + + api_data[-1]["timestamp"] = now.timestamp() + + API_FILE.write(api_data) + + events = [] + + start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=30) + + for event in api_data: + sport = event.get("sport") + + t1, t2 = event.get("home"), event.get("away") + + if not (sport and t1 and t2): + continue + + event_name = f"{t1} vs {t2}" + + if f"[{sport}] {event_name} ({TAG})" in cached_keys: + continue + + event_dt = Time.from_str(event["start"], timezone="UTC") + + if not start_dt <= now <= end_dt: + continue + + if not (channels := event.get("channels")): + continue + + event_links: list[str] = [channel["url"] for channel in channels] + + link = event_links[0] + + events.append( + { + "sport": sport, + "event": event_name, + "link": link, + "timestamp": event_dt.timestamp(), + } + ) + + return events + + +async def scrape() -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info('Scraping from "https://listapreta.site"') + + if events := await get_events(cached_urls.keys()): + log.info(f"Processing {len(events)} new URL(s)") + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + url=(link := ev["link"]), + url_num=i, + ) + + url, iframe = await network.safe_process( + handler, + url_num=i, + semaphore=network.HTTP_S, + log=log, + ) + + sport, event, ts = ( + ev["sport"], + ev["event"], + ev["timestamp"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": iframe, + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + log.info(f"Collected and cached {valid_count - cached_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/utils/webwork.py b/M3U8/scrapers/utils/webwork.py index e3ac6cd9..618ae4be 100644 --- a/M3U8/scrapers/utils/webwork.py +++ b/M3U8/scrapers/utils/webwork.py @@ -57,7 +57,8 @@ class Network: try: r = await self.client.get(url, **kwargs) - r.raise_for_status() + if r.status_code >= 400: + r.raise_for_status() return r except (httpx.HTTPError, httpx.TimeoutException) as e: