From 00000d9ab1fb5935f6222988bb9ab77c18bae65f Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:02:26 -0500 Subject: [PATCH] e - edit livetvsx.py scraping --- M3U8/scrapers/livetvsx.py | 55 ++++++++++++++++++++-------------- M3U8/scrapers/pixel.py | 7 ++++- M3U8/scrapers/roxie.py | 6 +++- M3U8/scrapers/utils/webwork.py | 7 ++++- M3U8/scrapers/watchfooty.py | 6 +++- 5 files changed, 55 insertions(+), 26 deletions(-) diff --git a/M3U8/scrapers/livetvsx.py b/M3U8/scrapers/livetvsx.py index 96d1d372..76866f9f 100644 --- a/M3U8/scrapers/livetvsx.py +++ b/M3U8/scrapers/livetvsx.py @@ -1,4 +1,5 @@ import asyncio +import re from functools import partial import feedparser @@ -24,9 +25,22 @@ VALID_SPORTS = [ "Basketball", "Football", "Ice Hockey", + "Wrestling", ] +def fix_url(s: str) -> str | None: + pattern = re.compile(r"eventinfo\/(\d*)", re.I) + + if not (match := pattern.search(s)): + return + + elif not (event_id := match[1]).isalnum(): + return + + return f"https://cdn.livetv872.me/cache/links/en.{event_id}.html" + + async def process_event( url: str, url_num: int, @@ -43,43 +57,40 @@ async def process_event( got_one=got_one, ) + event_id_pattern = re.compile(r"&c=(\d*)", re.I) + page.on("request", handler) try: - await page.goto( + resp = await page.goto( url, wait_until="domcontentloaded", timeout=10_000, ) - await page.wait_for_timeout(1_500) + if resp.status != 200: + log.warning(f"URL {url_num}) status code: {resp.status}") + return - buttons = await page.query_selector_all(".lnktbj a[href*='webplayer']") + try: + event_a = page.locator('a[title*="Aliez"]').first - labels = await page.eval_on_selector_all( - ".lnktyt span", - "elements => elements.map(el => el.textContent.trim().toLowerCase())", - ) + href = await event_a.get_attribute("href", timeout=1_250) - for btn, label in zip(buttons, labels): - if label in ["web", "youtube"]: - continue - - if not (href := await btn.get_attribute("href")): - continue - - break - - else: + except TimeoutError: log.warning(f"URL {url_num}) No valid sources found.") return - href = href if href.startswith("http") else f"https:{href}" + if match := event_id_pattern.search(href): + event_id = match[1] - href.replace("livetv.sx", "livetv873.me") + event_url = f"https://emb.apl392.me/player/live.php?id={event_id}" + + else: + event_url = href if href.startswith("http") else f"https:{href}" await page.goto( - href, + event_url, wait_until="domcontentloaded", timeout=5_000, ) @@ -130,7 +141,7 @@ async def refresh_xml_cache(now_ts: float) -> dict[str, dict[str, str | float]]: if not (date := entry.get("published")): continue - if not (link := entry.get("link")): + if (not (link := entry.get("link"))) or (not (fixed_link := fix_url(link))): continue if not (title := entry.get("title")): @@ -151,7 +162,7 @@ async def refresh_xml_cache(now_ts: float) -> dict[str, dict[str, str | float]]: "sport": sport, "league": league, "event": title, - "link": link.replace("livetv.sx", "livetv873.me"), + "link": fixed_link, "event_ts": event_dt.timestamp(), "timestamp": now_ts, } diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index 560cad64..28a47b97 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -19,12 +19,17 @@ BASE_URL = "https://pixelsport.tv" async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]: try: - await page.goto( + resp = await page.goto( url := urljoin(BASE_URL, "backend/livetv/events"), wait_until="domcontentloaded", timeout=6_000, ) + if resp.status != 200: + log.warning(f"{url} status code: {resp.status}") + + return {} + raw_json = await page.locator("pre").inner_text(timeout=5_000) except Exception as e: log.error(f'Failed to fetch "{url}": {e}') diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index f1af53b6..96d553b9 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -93,12 +93,16 @@ async def process_event( page.on("request", handler) try: - await page.goto( + resp = await page.goto( url, wait_until="domcontentloaded", timeout=6_000, ) + if resp.status != 200: + log.warning(f"URL {url_num}) status code: {resp.status}") + return + try: if btn := await page.wait_for_selector( "button.streambutton:nth-of-type(1)", diff --git a/M3U8/scrapers/utils/webwork.py b/M3U8/scrapers/utils/webwork.py index 55ab510f..31f26b32 100644 --- a/M3U8/scrapers/utils/webwork.py +++ b/M3U8/scrapers/utils/webwork.py @@ -250,12 +250,17 @@ class Network: page.on("request", handler) try: - await page.goto( + resp = await page.goto( url, wait_until="domcontentloaded", timeout=6_000, ) + if resp.status != 200: + log.warning(f"URL {url_num}) status code: {resp.status}") + + return + wait_task = asyncio.create_task(got_one.wait()) try: diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index 0a93b3ce..dbd5aa24 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -89,12 +89,16 @@ async def process_event( page.on("request", handler) try: - await page.goto( + resp = await page.goto( url, wait_until="domcontentloaded", timeout=8_000, ) + if resp.status != 200: + log.warning(f"URL {url_num}) status code: {resp.status}") + return + await page.wait_for_timeout(2_000) try: