From 00000d920aa169bde4f8c0a622e5bd65dbb7c2dd Mon Sep 17 00:00:00 2001
From: doms9 <96013514+doms9@users.noreply.github.com>
Date: Tue, 23 Dec 2025 03:17:48 -0500
Subject: [PATCH] e add semaphores to scrapers (maybe) fix hanging on
 watchfooty misc. edits

---
 EPG/fetch.py                   |  2 +-
 M3U8/fetch.py                  |  2 +-
 M3U8/scrapers/embedhd.py       |  1 +
 M3U8/scrapers/fawa.py          |  2 +-
 M3U8/scrapers/istreameast.py   |  2 +-
 M3U8/scrapers/pixel.py         | 54 +++++++++++++++++++---------------
 M3U8/scrapers/ppv.py           |  1 +
 M3U8/scrapers/roxie.py         |  1 +
 M3U8/scrapers/shark.py         |  1 +
 M3U8/scrapers/sport9.py        |  1 +
 M3U8/scrapers/streambtw.py     |  6 ++--
 M3U8/scrapers/streamcenter.py  |  1 +
 M3U8/scrapers/streamfree.py    |  9 +++++-
 M3U8/scrapers/streamhub.py     |  1 +
 M3U8/scrapers/streamsgate.py   |  1 +
 M3U8/scrapers/strmd.py         |  1 +
 M3U8/scrapers/tvpass.py        |  9 +++++-
 M3U8/scrapers/utils/webwork.py | 47 +++++++++++++++++------------
 M3U8/scrapers/watchfooty.py    | 33 ++++++++-------------
 M3U8/scrapers/webcast.py       |  1 +
 20 files changed, 103 insertions(+), 73 deletions(-)

diff --git a/EPG/fetch.py b/EPG/fetch.py
index 5d7983b..fbd916e 100644
--- a/EPG/fetch.py
+++ b/EPG/fetch.py
@@ -22,7 +22,7 @@ epg_urls = [
 ]
 
 client = httpx.AsyncClient(
-    timeout=5,
+    timeout=httpx.Timeout(5.0),
     follow_redirects=True,
     http2=True,
     headers={
diff --git a/M3U8/fetch.py b/M3U8/fetch.py
index 75de2ff..135eb2b 100644
--- a/M3U8/fetch.py
+++ b/M3U8/fetch.py
@@ -64,7 +64,7 @@ async def main() -> None:
         asyncio.create_task(streamsgate.scrape()),
         asyncio.create_task(strmd.scrape()),
         asyncio.create_task(tvpass.scrape()),
-        # asyncio.create_task(watchfooty.scrape()),
+        asyncio.create_task(watchfooty.scrape()),
         asyncio.create_task(webcast.scrape()),
     ]
 
diff --git a/M3U8/scrapers/embedhd.py b/M3U8/scrapers/embedhd.py
index 5ff3cf8..30c7622 100644
--- a/M3U8/scrapers/embedhd.py
+++ b/M3U8/scrapers/embedhd.py
@@ -101,6 +101,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py
index 8d61668..aec9edd 100644
--- a/M3U8/scrapers/fawa.py
+++ b/M3U8/scrapers/fawa.py
@@ -114,8 +114,8 @@ async def scrape() -> None:
             url = await network.safe_process(
                 handler,
                 url_num=i,
+                semaphore=network.HTTP_S,
                 log=log,
-                timeout=10,
             )
 
             if url:
diff --git a/M3U8/scrapers/istreameast.py b/M3U8/scrapers/istreameast.py
index c4ce5b7..d8a33c3 100644
--- a/M3U8/scrapers/istreameast.py
+++ b/M3U8/scrapers/istreameast.py
@@ -131,8 +131,8 @@ async def scrape() -> None:
             url = await network.safe_process(
                 handler,
                 url_num=i,
+                semaphore=network.HTTP_S,
                 log=log,
-                timeout=10,
             )
 
             if url:
diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py
index de9ab0a..14a2b73 100644
--- a/M3U8/scrapers/pixel.py
+++ b/M3U8/scrapers/pixel.py
@@ -1,6 +1,7 @@
 import json
+from functools import partial
 
-from playwright.async_api import async_playwright
+from playwright.async_api import BrowserContext, async_playwright
 
 from .utils import Cache, Time, get_logger, leagues, network
 
@@ -15,36 +16,29 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
 BASE_URL = "https://pixelsport.tv/backend/livetv/events"
 
 
-async def get_api_data() -> dict[str, list[dict, str, str]]:
-    async with async_playwright() as p:
-        try:
-            browser, context = await network.browser(p)
+async def get_api_data(context: BrowserContext) -> dict[str, list[dict, str, str]]:
+    try:
+        page = await context.new_page()
 
-            page = await context.new_page()
+        await page.goto(
+            BASE_URL,
+            wait_until="domcontentloaded",
+            timeout=10_000,
+        )
 
-            await page.goto(
-                BASE_URL,
-                wait_until="domcontentloaded",
-                timeout=10_000,
-            )
+        raw_json = await page.locator("pre").inner_text(timeout=5_000)
+    except Exception as e:
+        log.error(f'Failed to fetch "{BASE_URL}": {e}')
 
-            raw_json = await page.locator("pre").inner_text(timeout=5_000)
-
-        except Exception as e:
-            log.error(f'Failed to fetch "{BASE_URL}": {e}')
-
-            return {}
-
-        finally:
-            await browser.close()
+        return {}
 
     return json.loads(raw_json)
 
 
-async def get_events() -> dict[str, dict[str, str | float]]:
+async def get_events(context: BrowserContext) -> dict[str, dict[str, str | float]]:
     now = Time.clean(Time.now())
 
-    api_data = await get_api_data()
+    api_data = await get_api_data(context)
 
     events = {}
 
@@ -91,9 +85,21 @@ async def scrape() -> None:
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    events = await get_events()
+    async with async_playwright() as p:
+        browser, context = await network.browser(p)
 
-    urls.update(events)
+        handler = partial(get_events, context=context)
+
+        events = await network.safe_process(
+            handler,
+            url_num=1,
+            semaphore=network.PW_S,
+            log=log,
+        )
+
+        await browser.close()
+
+    urls.update(events or {})
 
     CACHE_FILE.write(urls)
 
diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py
index d4ad414..5bc31b4 100644
--- a/M3U8/scrapers/ppv.py
+++ b/M3U8/scrapers/ppv.py
@@ -123,6 +123,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py
index 868c2e8..d04f848 100644
--- a/M3U8/scrapers/roxie.py
+++ b/M3U8/scrapers/roxie.py
@@ -159,6 +159,7 @@ async def scrape() -> None:
             url = await network.safe_process(
                 handler,
                 url_num=i,
+                semaphore=network.HTTP_S,
                 log=log,
             )
 
diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py
index 26297f2..bc42cc3 100644
--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@@ -137,6 +137,7 @@ async def scrape() -> None:
             url = await network.safe_process(
                 handler,
                 url_num=i,
+                semaphore=network.HTTP_S,
                 log=log,
             )
 
diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py
index f3b7ab6..f795d45 100644
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@@ -121,6 +121,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py
index e6029b4..78cc3e5 100644
--- a/M3U8/scrapers/streambtw.py
+++ b/M3U8/scrapers/streambtw.py
@@ -66,9 +66,7 @@ async def get_events() -> list[dict[str, str]]:
             ):
                 continue
 
-            league = league_elem.text(strip=True)
-
-            name = event_elem.text(strip=True)
+            league, name = league_elem.text(strip=True), event_elem.text(strip=True)
 
             events.append(
                 {
@@ -108,8 +106,8 @@ async def scrape() -> None:
             url = await network.safe_process(
                 handler,
                 url_num=i,
+                semaphore=network.HTTP_S,
                 log=log,
-                timeout=10,
             )
 
             if url:
diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py
index 86719db..acf3622 100644
--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@@ -119,6 +119,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/streamfree.py b/M3U8/scrapers/streamfree.py
index f065ccd..050fd1c 100644
--- a/M3U8/scrapers/streamfree.py
+++ b/M3U8/scrapers/streamfree.py
@@ -77,7 +77,14 @@ async def scrape() -> None:
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    urls.update(await get_events())
+    events = await network.safe_process(
+        get_events,
+        url_num=1,
+        semaphore=network.HTTP_S,
+        log=log,
+    )
+
+    urls.update(events or {})
 
     CACHE_FILE.write(urls)
 
diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py
index ad051ad..9eda928 100644
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@@ -164,6 +164,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py
index 66fe4e8..1a9ee6e 100644
--- a/M3U8/scrapers/streamsgate.py
+++ b/M3U8/scrapers/streamsgate.py
@@ -151,6 +151,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py
index a882152..21b5b44 100644
--- a/M3U8/scrapers/strmd.py
+++ b/M3U8/scrapers/strmd.py
@@ -151,6 +151,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/tvpass.py b/M3U8/scrapers/tvpass.py
index 560734c..f99db2e 100644
--- a/M3U8/scrapers/tvpass.py
+++ b/M3U8/scrapers/tvpass.py
@@ -66,7 +66,14 @@ async def scrape() -> None:
 
     log.info(f'Scraping from "{BASE_URL}"')
 
-    urls.update(await get_events())
+    events = await network.safe_process(
+        get_events,
+        url_num=1,
+        semaphore=network.HTTP_S,
+        log=log,
+    )
+
+    urls.update(events or {})
 
     CACHE_FILE.write(urls)
 
diff --git a/M3U8/scrapers/utils/webwork.py b/M3U8/scrapers/utils/webwork.py
index 942e0ef..097a02b 100644
--- a/M3U8/scrapers/utils/webwork.py
+++ b/M3U8/scrapers/utils/webwork.py
@@ -24,11 +24,15 @@ class Network:
         "Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
     )
 
+    HTTP_S = asyncio.Semaphore(10)
+
+    PW_S = asyncio.Semaphore(3)
+
     proxy_base = "https://stream.nvrmind.xyz"
 
     def __init__(self) -> None:
         self.client = httpx.AsyncClient(
-            timeout=5,
+            timeout=httpx.Timeout(5.0),
             follow_redirects=True,
             headers={"User-Agent": Network.UA},
             http2=True,
@@ -85,34 +89,39 @@ class Network:
     async def safe_process(
         fn: Callable[[], Awaitable[T]],
         url_num: int,
-        timeout: int | float = 15,
+        semaphore: asyncio.Semaphore,
+        timeout: int | float = 10,
         log: logging.Logger | None = None,
     ) -> T | None:
 
         log = log or logger
 
-        task = asyncio.create_task(fn())
-
-        try:
-            return await asyncio.wait_for(task, timeout=timeout)
-        except asyncio.TimeoutError:
-            log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
-
-            task.cancel()
+        async with semaphore:
+            task = asyncio.create_task(fn())
 
             try:
-                await task
-            except asyncio.CancelledError:
-                pass
+                return await asyncio.wait_for(task, timeout=timeout)
 
+            except asyncio.TimeoutError:
+                log.warning(
+                    f"URL {url_num}) Timed out after {timeout}s, skipping event"
+                )
+
+                task.cancel()
+
+                try:
+                    await task
+                except asyncio.CancelledError:
+                    pass
+
+                except Exception as e:
+                    log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
+
+                return
             except Exception as e:
-                log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
+                log.error(f"URL {url_num}) Unexpected error: {e}")
 
-            return
-        except Exception as e:
-            log.error(f"URL {url_num}) Unexpected error: {e}")
-
-            return
+                return
 
     @staticmethod
     def capture_req(
diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py
index 3b90307..4367ac8 100644
--- a/M3U8/scrapers/watchfooty.py
+++ b/M3U8/scrapers/watchfooty.py
@@ -78,12 +78,12 @@ async def process_event(
 
     pattern = re.compile(r"\((\d+)\)")
 
-    page = await context.new_page()
-
     captured: list[str] = []
 
     got_one = asyncio.Event()
 
+    page = await context.new_page()
+
     handler = partial(
         network.capture_req,
         captured=captured,
@@ -102,10 +102,7 @@ async def process_event(
         await page.wait_for_timeout(2_000)
 
         try:
-            header = await page.wait_for_selector(
-                "text=/Stream Links/i",
-                timeout=5_000,
-            )
+            header = await page.wait_for_selector("text=/Stream Links/i", timeout=5_000)
 
             text = await header.inner_text()
         except TimeoutError:
@@ -120,8 +117,7 @@ async def process_event(
 
         try:
             first_available = await page.wait_for_selector(
-                'a[href*="/stream/"]',
-                timeout=3_000,
+                'a[href*="/stream/"]', timeout=3_000
             )
         except TimeoutError:
             log.warning(f"URL {url_num}) No available stream links.")
@@ -133,22 +129,18 @@ async def process_event(
 
             return None, None
 
+        embed = re.sub(
+            pattern=r"^.*\/stream",
+            repl="https://spiderembed.top/embed",
+            string=href,
+        )
+
         await page.goto(
-            href,
+            embed,
             wait_until="domcontentloaded",
             timeout=5_000,
         )
 
-        if not (iframe := await page.query_selector("iframe")):
-            log.warning(f"URL {url_num}) No iframe found.")
-
-            return None, None
-
-        if not (iframe_src := await iframe.get_attribute("src")):
-            log.warning(f"URL {url_num}) No iframe source found.")
-
-            return None, None
-
         wait_task = asyncio.create_task(got_one.wait())
 
         try:
@@ -170,7 +162,7 @@ async def process_event(
         if captured:
             log.info(f"URL {url_num}) Captured M3U8")
 
-            return captured[-1], iframe_src
+            return captured[0], embed
 
         log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
 
@@ -282,6 +274,7 @@ async def scrape() -> None:
                 url, iframe = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )
 
diff --git a/M3U8/scrapers/webcast.py b/M3U8/scrapers/webcast.py
index 1b9ee12..8a5623b 100644
--- a/M3U8/scrapers/webcast.py
+++ b/M3U8/scrapers/webcast.py
@@ -141,6 +141,7 @@ async def scrape() -> None:
                 url = await network.safe_process(
                     handler,
                     url_num=i,
+                    semaphore=network.PW_S,
                     log=log,
                 )