From 00000d9a4dea35d2c39ebba01316cee859124d8d Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Sun, 16 Nov 2025 12:59:29 -0500 Subject: [PATCH] e --- M3U8/fetch.py | 2 +- M3U8/scrapers/pixel.py | 44 ++++++++++++++++++++++++++---------------- M3U8/scrapers/roxie.py | 6 +----- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 8dce558..978addb 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -46,7 +46,7 @@ async def main() -> None: tasks = [ asyncio.create_task(fawa.scrape(network.client)), asyncio.create_task(lotus.scrape(network.client)), - asyncio.create_task(pixel.scrape(network.client)), + asyncio.create_task(pixel.scrape()), asyncio.create_task(ppv.scrape(network.client)), asyncio.create_task(roxie.scrape(network.client)), asyncio.create_task(streambtw.scrape(network.client)), diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index bff2dae..60b9221 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -1,8 +1,9 @@ +import json import re -import httpx +from playwright.async_api import async_playwright -from .utils import Cache, Time, get_logger, leagues +from .utils import Cache, Time, get_logger, leagues, network log = get_logger(__name__) @@ -16,36 +17,45 @@ BASE_URL = "https://pixelsport.tv/backend/livetv/events" async def refresh_api_cache( - client: httpx.AsyncClient, url: str, ts: float, ) -> dict[str, list[dict, str, str]]: log.info("Refreshing API cache") - try: - r = await client.get(url) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') + async with async_playwright() as p: + try: + browser, context = await network.browser(p) - return {} + page = await context.new_page() - data = r.json() + await page.goto( + url, + wait_until="domcontentloaded", + timeout=10_000, + ) + + raw_json = await page.locator("pre").inner_text() + + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return {} + + finally: + await browser.close() + + data = json.loads(raw_json) data["timestamp"] = ts return data -async def get_events( - client: httpx.AsyncClient, - cached_keys: set[str], -) -> dict[str, str | float]: +async def get_events(cached_keys: set[str]) -> dict[str, str | float]: now = Time.clean(Time.now()) if not (api_data := API_FILE.load(per_entry=False)): api_data = await refresh_api_cache( - client, BASE_URL, now.timestamp(), ) @@ -100,7 +110,7 @@ async def get_events( return events -async def scrape(client: httpx.AsyncClient) -> None: +async def scrape() -> None: cached_urls = CACHE_FILE.load() cached_count = len(cached_urls) urls.update(cached_urls) @@ -109,7 +119,7 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f'Scraping from "{BASE_URL}"') - events = await get_events(client, set(cached_urls.keys())) + events = await get_events(set(cached_urls.keys())) if events: for d in (urls, cached_urls): diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index 271f9c6..517b145 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -64,11 +64,7 @@ async def refresh_html_cache( h2_title = soup.css_first("h2").text(strip=True) - if sport_name := pattern.search(h2_title): - sport = sport_name[1] - else: - sport = "Event" - + sport = sport_name[1] if (sport_name := pattern.search(h2_title)) else "Event" events = {} for row in soup.css("table#eventsTable tbody tr"):