e

add semaphores to scrapers (maybe) fix hanging on watchfooty misc. edits
2026-03-07 11:18:25 +01:00 · 2025-12-23 03:17:48 -05:00 · 2025-12-23 03:17:48 -05:00 · 00000d920a
commit 00000d920a
parent 6e9729bf8c
20 changed files with 103 additions and 73 deletions
--- a/EPG/fetch.py
+++ b/EPG/fetch.py
@ -22,7 +22,7 @@ epg_urls = [
 ]
 client = httpx.AsyncClient(
-    timeout=5,
+    timeout=httpx.Timeout(5.0),
    follow_redirects=True,
    http2=True,
    headers={
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@ -64,7 +64,7 @@ async def main() -> None:
        asyncio.create_task(streamsgate.scrape()),
        asyncio.create_task(strmd.scrape()),
        asyncio.create_task(tvpass.scrape()),
-        # asyncio.create_task(watchfooty.scrape()),
+        asyncio.create_task(watchfooty.scrape()),
        asyncio.create_task(webcast.scrape()),
    ]
--- a/M3U8/scrapers/embedhd.py
+++ b/M3U8/scrapers/embedhd.py
@ -101,6 +101,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/fawa.py
+++ b/M3U8/scrapers/fawa.py
@ -114,8 +114,8 @@ async def scrape() -> None:
            url = await network.safe_process(
                handler,
                url_num=i,
                semaphore=network.HTTP_S,
                log=log,
                timeout=10,
            )
            if url:
--- a/M3U8/scrapers/istreameast.py
+++ b/M3U8/scrapers/istreameast.py
@ -131,8 +131,8 @@ async def scrape() -> None:
            url = await network.safe_process(
                handler,
                url_num=i,
                semaphore=network.HTTP_S,
                log=log,
                timeout=10,
            )
            if url:
--- a/M3U8/scrapers/pixel.py
+++ b/M3U8/scrapers/pixel.py
@ -1,6 +1,7 @@
 import json
 from functools import partial
-from playwright.async_api import async_playwright
+from playwright.async_api import BrowserContext, async_playwright
 from .utils import Cache, Time, get_logger, leagues, network
@ -15,36 +16,29 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
 BASE_URL = "https://pixelsport.tv/backend/livetv/events"
-async def get_api_data() -> dict[str, list[dict, str, str]]:
+async def get_api_data(context: BrowserContext) -> dict[str, list[dict, str, str]]:
-    async with async_playwright() as p:
+    try:
-        try:
+        page = await context.new_page()
            browser, context = await network.browser(p)
-            page = await context.new_page()
+        await page.goto(
            BASE_URL,
            wait_until="domcontentloaded",
            timeout=10_000,
        )
-            await page.goto(
+        raw_json = await page.locator("pre").inner_text(timeout=5_000)
-                BASE_URL,
+    except Exception as e:
-                wait_until="domcontentloaded",
+        log.error(f'Failed to fetch "{BASE_URL}": {e}')
                timeout=10_000,
            )
-            raw_json = await page.locator("pre").inner_text(timeout=5_000)
+        return {}
        except Exception as e:
            log.error(f'Failed to fetch "{BASE_URL}": {e}')
            return {}
        finally:
            await browser.close()
    return json.loads(raw_json)
-async def get_events() -> dict[str, dict[str, str | float]]:
+async def get_events(context: BrowserContext) -> dict[str, dict[str, str | float]]:
    now = Time.clean(Time.now())
-    api_data = await get_api_data()
+    api_data = await get_api_data(context)
    events = {}
@ -91,9 +85,21 @@ async def scrape() -> None:
    log.info(f'Scraping from "{BASE_URL}"')
-    events = await get_events()
+    async with async_playwright() as p:
        browser, context = await network.browser(p)
-    urls.update(events)
+        handler = partial(get_events, context=context)
        events = await network.safe_process(
            handler,
            url_num=1,
            semaphore=network.PW_S,
            log=log,
        )
        await browser.close()
    urls.update(events or {})
    CACHE_FILE.write(urls)
--- a/M3U8/scrapers/ppv.py
+++ b/M3U8/scrapers/ppv.py
@ -123,6 +123,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/roxie.py
+++ b/M3U8/scrapers/roxie.py
@ -159,6 +159,7 @@ async def scrape() -> None:
            url = await network.safe_process(
                handler,
                url_num=i,
                semaphore=network.HTTP_S,
                log=log,
            )
--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@ -137,6 +137,7 @@ async def scrape() -> None:
            url = await network.safe_process(
                handler,
                url_num=i,
                semaphore=network.HTTP_S,
                log=log,
            )
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -121,6 +121,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/streambtw.py
+++ b/M3U8/scrapers/streambtw.py
@ -66,9 +66,7 @@ async def get_events() -> list[dict[str, str]]:
            ):
                continue
-            league = league_elem.text(strip=True)
+            league, name = league_elem.text(strip=True), event_elem.text(strip=True)
            name = event_elem.text(strip=True)
            events.append(
                {
@ -108,8 +106,8 @@ async def scrape() -> None:
            url = await network.safe_process(
                handler,
                url_num=i,
                semaphore=network.HTTP_S,
                log=log,
                timeout=10,
            )
            if url:
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -119,6 +119,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/streamfree.py
+++ b/M3U8/scrapers/streamfree.py
@ -77,7 +77,14 @@ async def scrape() -> None:
    log.info(f'Scraping from "{BASE_URL}"')
-    urls.update(await get_events())
+    events = await network.safe_process(
        get_events,
        url_num=1,
        semaphore=network.HTTP_S,
        log=log,
    )
    urls.update(events or {})
    CACHE_FILE.write(urls)
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -164,6 +164,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/streamsgate.py
+++ b/M3U8/scrapers/streamsgate.py
@ -151,6 +151,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/strmd.py
+++ b/M3U8/scrapers/strmd.py
@ -151,6 +151,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/tvpass.py
+++ b/M3U8/scrapers/tvpass.py
@ -66,7 +66,14 @@ async def scrape() -> None:
    log.info(f'Scraping from "{BASE_URL}"')
-    urls.update(await get_events())
+    events = await network.safe_process(
        get_events,
        url_num=1,
        semaphore=network.HTTP_S,
        log=log,
    )
    urls.update(events or {})
    CACHE_FILE.write(urls)
--- a/M3U8/scrapers/utils/webwork.py
+++ b/M3U8/scrapers/utils/webwork.py
@ -24,11 +24,15 @@ class Network:
        "Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
    )
    HTTP_S = asyncio.Semaphore(10)
    PW_S = asyncio.Semaphore(3)
    proxy_base = "https://stream.nvrmind.xyz"
    def __init__(self) -> None:
        self.client = httpx.AsyncClient(
-            timeout=5,
+            timeout=httpx.Timeout(5.0),
            follow_redirects=True,
            headers={"User-Agent": Network.UA},
            http2=True,
@ -85,34 +89,39 @@ class Network:
    async def safe_process(
        fn: Callable[[], Awaitable[T]],
        url_num: int,
-        timeout: int | float = 15,
+        semaphore: asyncio.Semaphore,
        timeout: int | float = 10,
        log: logging.Logger | None = None,
    ) -> T | None:
        log = log or logger
-        task = asyncio.create_task(fn())
+        async with semaphore:
-
+            task = asyncio.create_task(fn())
        try:
            return await asyncio.wait_for(task, timeout=timeout)
        except asyncio.TimeoutError:
            log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
            task.cancel()
            try:
-                await task
+                return await asyncio.wait_for(task, timeout=timeout)
            except asyncio.CancelledError:
                pass
            except asyncio.TimeoutError:
                log.warning(
                    f"URL {url_num}) Timed out after {timeout}s, skipping event"
                )
                task.cancel()
                try:
                    await task
                except asyncio.CancelledError:
                    pass
                except Exception as e:
                    log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
                return
            except Exception as e:
-                log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
+                log.error(f"URL {url_num}) Unexpected error: {e}")
-            return
+                return
        except Exception as e:
            log.error(f"URL {url_num}) Unexpected error: {e}")
            return
    @staticmethod
    def capture_req(
--- a/M3U8/scrapers/watchfooty.py
+++ b/M3U8/scrapers/watchfooty.py
@ -78,12 +78,12 @@ async def process_event(
    pattern = re.compile(r"\((\d+)\)")
    page = await context.new_page()
    captured: list[str] = []
    got_one = asyncio.Event()
    page = await context.new_page()
    handler = partial(
        network.capture_req,
        captured=captured,
@ -102,10 +102,7 @@ async def process_event(
        await page.wait_for_timeout(2_000)
        try:
-            header = await page.wait_for_selector(
+            header = await page.wait_for_selector("text=/Stream Links/i", timeout=5_000)
                "text=/Stream Links/i",
                timeout=5_000,
            )
            text = await header.inner_text()
        except TimeoutError:
@ -120,8 +117,7 @@ async def process_event(
        try:
            first_available = await page.wait_for_selector(
-                'a[href*="/stream/"]',
+                'a[href*="/stream/"]', timeout=3_000
                timeout=3_000,
            )
        except TimeoutError:
            log.warning(f"URL {url_num}) No available stream links.")
@ -133,22 +129,18 @@ async def process_event(
            return None, None
        embed = re.sub(
            pattern=r"^.*\/stream",
            repl="https://spiderembed.top/embed",
            string=href,
        )
        await page.goto(
-            href,
+            embed,
            wait_until="domcontentloaded",
            timeout=5_000,
        )
        if not (iframe := await page.query_selector("iframe")):
            log.warning(f"URL {url_num}) No iframe found.")
            return None, None
        if not (iframe_src := await iframe.get_attribute("src")):
            log.warning(f"URL {url_num}) No iframe source found.")
            return None, None
        wait_task = asyncio.create_task(got_one.wait())
        try:
@ -170,7 +162,7 @@ async def process_event(
        if captured:
            log.info(f"URL {url_num}) Captured M3U8")
-            return captured[-1], iframe_src
+            return captured[0], embed
        log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
@ -282,6 +274,7 @@ async def scrape() -> None:
                url, iframe = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )
--- a/M3U8/scrapers/webcast.py
+++ b/M3U8/scrapers/webcast.py
@ -141,6 +141,7 @@ async def scrape() -> None:
                url = await network.safe_process(
                    handler,
                    url_num=i,
                    semaphore=network.PW_S,
                    log=log,
                )