From 00000d92478f03cf8c044e405461f60cb5b7cbeb Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Wed, 11 Feb 2026 23:22:53 -0500 Subject: [PATCH] e --- M3U8/scrapers/cdnlivetv.py | 5 +++-- M3U8/scrapers/embedhd.py | 5 +++-- M3U8/scrapers/pawa.py | 2 +- M3U8/scrapers/pixel.py | 9 +++++---- M3U8/scrapers/ppv.py | 9 +++++---- M3U8/scrapers/sport9.py | 2 +- M3U8/scrapers/streamhub.py | 20 +++++++++++++++----- M3U8/scrapers/totalsportek.py | 2 +- 8 files changed, 34 insertions(+), 20 deletions(-) diff --git a/M3U8/scrapers/cdnlivetv.py b/M3U8/scrapers/cdnlivetv.py index 69e9291f..c5c54b18 100644 --- a/M3U8/scrapers/cdnlivetv.py +++ b/M3U8/scrapers/cdnlivetv.py @@ -1,4 +1,5 @@ from functools import partial +from urllib.parse import urljoin from playwright.async_api import Browser @@ -14,7 +15,7 @@ CACHE_FILE = Cache(TAG, exp=10_800) API_FILE = Cache(f"{TAG}-api", exp=19_800) -API_URL = "https://api.cdn-live.tv/api/v1/events/sports" +API_URL = "https://api.cdn-live.tv" async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: @@ -26,7 +27,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: log.info("Refreshing API cache") if r := await network.request( - API_URL, + urljoin(API_URL, "api/v1/events/sports"), log=log, params={"user": "cdnlivetv", "plan": "free"}, ): diff --git a/M3U8/scrapers/embedhd.py b/M3U8/scrapers/embedhd.py index e7c24a3a..9e6a2280 100644 --- a/M3U8/scrapers/embedhd.py +++ b/M3U8/scrapers/embedhd.py @@ -1,4 +1,5 @@ from functools import partial +from urllib.parse import urljoin from playwright.async_api import Browser @@ -14,7 +15,7 @@ CACHE_FILE = Cache(TAG, exp=5_400) API_CACHE = Cache(f"{TAG}-api", exp=28_800) -BASE_URL = "https://embedhd.org/api-event.php" +BASE_URL = "https://embedhd.org" def fix_league(s: str) -> str: @@ -29,7 +30,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: api_data = {"timestamp": now.timestamp()} - if r := await network.request(BASE_URL, log=log): + if r := await network.request(urljoin(BASE_URL, "api-event.php"), log=log): api_data: dict = r.json() api_data["timestamp"] = now.timestamp() diff --git a/M3U8/scrapers/pawa.py b/M3U8/scrapers/pawa.py index a29bf5af..51d6f333 100644 --- a/M3U8/scrapers/pawa.py +++ b/M3U8/scrapers/pawa.py @@ -15,7 +15,7 @@ TAG = "PAWA" CACHE_FILE = Cache(TAG, exp=10_800) -BASE_URL = "https://pawastreams.net/feed/" +BASE_URL = "https://pawastreams.net/feed" async def process_event(url: str, url_num: int) -> str | None: diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index f00cc851..d779a1d6 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -1,5 +1,6 @@ import json from functools import partial +from urllib.parse import urljoin from playwright.async_api import Browser, Page @@ -13,20 +14,20 @@ TAG = "PIXEL" CACHE_FILE = Cache(TAG, exp=19_800) -BASE_URL = "https://pixelsport.tv/backend/livetv/events" +BASE_URL = "https://pixelsport.tv" async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]: try: await page.goto( - BASE_URL, + url := urljoin(BASE_URL, "backend/livetv/events"), wait_until="domcontentloaded", timeout=10_000, ) raw_json = await page.locator("pre").inner_text(timeout=5_000) except Exception as e: - log.error(f'Failed to fetch "{BASE_URL}": {e}') + log.error(f'Failed to fetch "{url}": {e}') return {} @@ -65,7 +66,7 @@ async def get_events(page: Page) -> dict[str, dict[str, str | float]]: events[key] = { "url": stream_link, "logo": logo, - "base": "https://pixelsport.tv", + "base": BASE_URL, "timestamp": now.timestamp(), "id": tvg_id or "Live.Event.us", } diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 08568a8a..2a9220b8 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -1,4 +1,5 @@ from functools import partial +from urllib.parse import urljoin from playwright.async_api import Browser @@ -15,9 +16,9 @@ CACHE_FILE = Cache(TAG, exp=10_800) API_FILE = Cache(f"{TAG}-api", exp=19_800) MIRRORS = [ - "https://old.ppv.to/api/streams", - "https://api.ppvs.su/api/streams", - "https://api.ppv.to/api/streams", + "https://old.ppv.to", + "https://api.ppvs.su", + "https://api.ppv.to", ] @@ -29,7 +30,7 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]: api_data = {"timestamp": now.timestamp()} - if r := await network.request(url, log=log): + if r := await network.request(urljoin(url, "api/streams"), log=log): api_data: dict = r.json() API_FILE.write(api_data) diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index fc3e4466..0c188548 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -15,7 +15,7 @@ TAG = "SPORT9" CACHE_FILE = Cache(TAG, exp=5_400) -BASE_URL = "https://sport9.ru/" +BASE_URL = "https://sport9.ru" async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index 3bc4b600..c2c87244 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -17,7 +17,7 @@ CACHE_FILE = Cache(TAG, exp=10_800) HTML_CACHE = Cache(f"{TAG}-html", exp=28_800) -BASE_URL = "https://streamhub.pro/" +MIRRORS = ["https://streamhub.pro", "https://livesports4u.net"] CATEGORIES = { "Soccer": "sport_68c02a4464a38", @@ -35,15 +35,17 @@ CATEGORIES = { async def refresh_html_cache( + url: str, date: str, sport_id: str, ts: float, ) -> dict[str, dict[str, str | float]]: + events = {} if not ( html_data := await network.request( - urljoin(BASE_URL, f"events/{date}"), + urljoin(url, f"events/{date}"), log=log, params={"sport_id": sport_id}, ) @@ -93,7 +95,7 @@ async def refresh_html_cache( return events -async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: +async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]: now = Time.clean(Time.now()) if not (events := HTML_CACHE.load()): @@ -101,6 +103,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: tasks = [ refresh_html_cache( + url, date, sport_id, now.timestamp(), @@ -143,9 +146,16 @@ async def scrape(browser: Browser) -> None: log.info(f"Loaded {cached_count} event(s) from cache") - log.info(f'Scraping from "{BASE_URL}"') + if not (base_url := await network.get_base(MIRRORS)): + log.warning("No working PPV mirrors") - events = await get_events(cached_urls.keys()) + CACHE_FILE.write(cached_urls) + + return + + log.info(f'Scraping from "{base_url}"') + + events = await get_events(base_url, cached_urls.keys()) log.info(f"Processing {len(events)} new URL(s)") diff --git a/M3U8/scrapers/totalsportek.py b/M3U8/scrapers/totalsportek.py index 5fc0bc57..ec8c41f7 100644 --- a/M3U8/scrapers/totalsportek.py +++ b/M3U8/scrapers/totalsportek.py @@ -14,7 +14,7 @@ TAG = "TOTALSPRTK" CACHE_FILE = Cache(TAG, exp=28_800) -BASE_URL = "https://live3.totalsportek777.com/" +BASE_URL = "https://live3.totalsportek777.com" def fix_txt(s: str) -> str: