e

2026-03-10 11:47:34 +01:00 · 2025-12-18 04:14:54 -05:00 · 2025-12-18 04:14:54 -05:00 · 00000d939c
commit 00000d939c
parent 8b2d8cc1fc
20 changed files with 119 additions and 12 deletions
--- a/M3U8/scrapers/fawa.py
+++ b/M3U8/scrapers/fawa.py
@ -20,6 +20,7 @@ BASE_URL = "http://www.fawanews.sc/"
 async def process_event(url: str, url_num: int) -> str | None:
    if not (html_data := await network.request(url, log=log)):
        log.info(f"URL {url_num}) Failed to load url.")
+
        return

    valid_m3u8 = re.compile(
@ -29,9 +30,11 @@ async def process_event(url: str, url_num: int) -> str | None:

    if not (match := valid_m3u8.search(html_data.text)):
        log.info(f"URL {url_num}) No M3U8 found")
+
        return

    log.info(f"URL {url_num}) Captured M3U8")
+
    return match[2]


@ -83,8 +86,11 @@ async def get_events(cached_hrefs: set[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_hrefs = {entry["href"] for entry in cached_urls.values()}
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -137,6 +143,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/istreameast.py
+++ b/M3U8/scrapers/istreameast.py
@ -21,27 +21,33 @@ async def process_event(url: str, url_num: int) -> str | None:

    if not (event_data := await network.request(url, log=log)):
        log.info(f"URL {url_num}) Failed to load url.")
+
        return

    soup = HTMLParser(event_data.content)

    if not (iframe := soup.css_first("iframe#wp_player")):
        log.warning(f"URL {url_num}) No iframe element found.")
+
        return

    if not (iframe_src := iframe.attributes.get("src")):
        log.warning(f"URL {url_num}) No iframe source found.")
+
        return

    if not (iframe_src_data := await network.request(iframe_src, log=log)):
        log.info(f"URL {url_num}) Failed to load iframe source.")
+
        return

    if not (match := pattern.search(iframe_src_data.text)):
        log.warning(f"URL {url_num}) No Clappr source found.")
+
        return

    log.info(f"URL {url_num}) Captured M3U8")
+
    return base64.b64decode(match[1]).decode("utf-8")


@ -98,7 +104,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -137,6 +145,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/lotus.py
+++ b/M3U8/scrapers/lotus.py
@ -72,7 +72,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -129,6 +131,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/pixel.py
+++ b/M3U8/scrapers/pixel.py
@ -55,7 +55,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
            continue

        event_name = event["match_name"]
+
        channel_info: dict[str, str] = event["channel"]
+
        category: dict[str, str] = channel_info["TVCategory"]

        sport = category["name"]
@ -82,7 +84,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
 async def scrape() -> None:
    if cached := CACHE_FILE.load():
        urls.update(cached)
+
        log.info(f"Loaded {len(urls)} event(s) from cache")
+
        return

    log.info(f'Scraping from "{BASE_URL}"')
--- a/M3U8/scrapers/ppv.py
+++ b/M3U8/scrapers/ppv.py
@ -39,6 +39,7 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
        API_FILE.write(api_data)

    now = Time.clean(Time.now())
+
    start_dt = now.delta(minutes=-30)
    end_dt = now.delta(minutes=30)

@ -50,8 +51,11 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str

        for event in stream_group.get("streams", []):
            name = event.get("name")
+
            start_ts = event.get("starts_at")
+
            logo = event.get("poster")
+
            iframe = event.get("iframe")

            if not (name and start_ts and iframe):
@ -80,7 +84,9 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -148,6 +154,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/roxie.py
+++ b/M3U8/scrapers/roxie.py
@ -41,9 +41,11 @@ async def process_event(url: str, url_num: int) -> str | None:

    if not (match := valid_m3u8.search(html_data.text)):
        log.info(f"URL {url_num}) No M3U8 found")
+
        return

    log.info(f"URL {url_num}) Captured M3U8")
+
    return match[1]


@ -133,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -183,6 +187,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/shark.py
+++ b/M3U8/scrapers/shark.py
@ -21,15 +21,18 @@ BASE_URL = "https://sharkstreams.net"
 async def process_event(url: str, url_num: int) -> str | None:
    if not (r := await network.request(url, log=log)):
        log.info(f"URL {url_num}) Failed to load url.")
+
        return

    data: dict[str, list[str]] = r.json()

    if not (urls := data.get("urls")):
        log.info(f"URL {url_num}) No M3U8 found")
+
        return

    log.info(f"URL {url_num}) Captured M3U8")
+
    return urls[0]


@ -47,6 +50,7 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]

    for row in soup.css(".row"):
        date_node = row.css_first(".ch-date")
+
        sport_node = row.css_first(".ch-category")
        name_node = row.css_first(".ch-name")

@ -54,7 +58,9 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
            continue

        event_dt = Time.from_str(date_node.text(strip=True), timezone="EST")
+
        sport = sport_node.text(strip=True)
+
        event_name = name_node.text(strip=True)

        embed_btn = row.css_first("a.hd-link.secondary")
@ -107,7 +113,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -157,6 +165,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -52,7 +52,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
                continue

            sport = sport_node.text(strip=True)
+
            team_1_node = card.css_first(".team1 .team-name")
+
            team_2_node = card.css_first(".team2 .team-name")

            if team_1_node and team_2_node:
@ -88,7 +90,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -146,6 +150,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/streamcenter.py
+++ b/M3U8/scrapers/streamcenter.py
@ -91,7 +91,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -148,6 +150,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/streamfree.py
+++ b/M3U8/scrapers/streamfree.py
@ -70,7 +70,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
 async def scrape() -> None:
    if cached := CACHE_FILE.load():
        urls.update(cached)
+
        log.info(f"Loaded {len(urls)} event(s) from cache")
+
        return

    log.info(f'Scraping from "{BASE_URL}"')
--- a/M3U8/scrapers/streamhub.py
+++ b/M3U8/scrapers/streamhub.py
@ -135,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -194,6 +196,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/streamsgate.py
+++ b/M3U8/scrapers/streamsgate.py
@ -125,7 +125,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -182,6 +184,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/strmd.py
+++ b/M3U8/scrapers/strmd.py
@ -117,7 +117,9 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -180,6 +182,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/tvpass.py
+++ b/M3U8/scrapers/tvpass.py
@ -26,7 +26,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
    for i, line in enumerate(data, start=1):
        if line.startswith("#EXTINF"):
            tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
+
            tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
+
            group_title_match = re.search(r'group-title="([^"]*)"', line)

            tvg = tvg_id_match[1] if tvg_id_match else None
@ -57,7 +59,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
 async def scrape() -> None:
    if cached := CACHE_FILE.load():
        urls.update(cached)
+
        log.info(f"Loaded {len(urls)} event(s) from cache")
+
        return

    log.info(f'Scraping from "{BASE_URL}"')
--- a/M3U8/scrapers/utils/caching.py
+++ b/M3U8/scrapers/utils/caching.py
@ -7,7 +7,9 @@ from .config import Time
 class Cache:
    def __init__(self, file: str, exp: int | float) -> None:
        self.file = Path(__file__).parent.parent / "caches" / file
+
        self.exp = exp
+
        self.now_ts = Time.now().timestamp()

    def is_fresh(self, entry: dict) -> bool:
--- a/M3U8/scrapers/utils/config.py
+++ b/M3U8/scrapers/utils/config.py
@ -45,11 +45,13 @@ class Time(datetime):

    def to_tz(self, tzone: str) -> "Time":
        dt = self.astimezone(self.ZONES[tzone])
+
        return self.__class__.fromtimestamp(dt.timestamp(), tz=self.ZONES[tzone])

    @classmethod
    def _to_class_tz(cls, dt) -> "Time":
        dt = dt.astimezone(cls.TZ)
+
        return cls.fromtimestamp(dt.timestamp(), tz=cls.TZ)

    @classmethod
--- a/M3U8/scrapers/utils/logger.py
+++ b/M3U8/scrapers/utils/logger.py
@ -22,9 +22,13 @@ COLORS = {
 class ColorFormatter(logging.Formatter):
    def format(self, record) -> str:
        color = COLORS.get(record.levelname, COLORS["reset"])
+
        levelname = record.levelname
+
        record.levelname = f"{color}{levelname:<8}{COLORS['reset']}"
+
        formatted = super().format(record)
+
        record.levelname = levelname

        return formatted
@ -38,10 +42,15 @@ def get_logger(name: str | None = None) -> logging.Logger:

    if not logger.hasHandlers():
        handler = logging.StreamHandler()
+
        formatter = ColorFormatter(LOG_FMT, datefmt="%Y-%m-%d | %H:%M:%S")
+
        handler.setFormatter(formatter)
+
        logger.addHandler(handler)
+
        logger.setLevel(logging.INFO)
+
        logger.propagate = False

    return logger
--- a/M3U8/scrapers/utils/webwork.py
+++ b/M3U8/scrapers/utils/webwork.py
@ -12,6 +12,8 @@ from playwright.async_api import Browser, BrowserContext, Playwright, Request

 from .logger import get_logger

+logger = get_logger(__name__)
+
 T = TypeVar("T")


@ -32,8 +34,6 @@ class Network:
            http2=True,
        )

-        self._logger = get_logger("network")
-
    @staticmethod
    def build_proxy_url(
        tag: str,
@ -58,16 +58,18 @@ class Network:
        **kwargs,
    ) -> httpx.Response | None:

-        log = log or self._logger
+        log = log or logger

        try:
            r = await self.client.get(url, **kwargs)
-            r.raise_for_status()
-        except Exception as e:
-            log.error(f'Failed to fetch "{url}": {e}\n{kwargs = }')
-            return ""

-        return r
+            r.raise_for_status()
+
+            return r
+        except (httpx.HTTPError, httpx.TimeoutException) as e:
+            log.error(f'Failed to fetch "{url}": {e}')
+
+            return ""

    async def get_base(self, mirrors: list[str]) -> str | None:
        random.shuffle(mirrors)
@ -89,7 +91,7 @@ class Network:
        log: logging.Logger | None = None,
    ) -> T | None:

-        log = log or get_logger("network")
+        log = log or logger

        task = asyncio.create_task(fn())

@ -104,13 +106,15 @@ class Network:
                await task
            except asyncio.CancelledError:
                pass
+
            except Exception as e:
                log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")

-            return None
+            return
        except Exception as e:
            log.error(f"URL {url_num}) Unexpected error: {e}")
-            return None
+
+            return

    @staticmethod
    def capture_req(
@ -141,7 +145,7 @@ class Network:
        log: logging.Logger | None = None,
    ) -> str | None:

-        log = log or self._logger
+        log = log or logger

        page = await context.new_page()

@ -170,6 +174,7 @@ class Network:
                await asyncio.wait_for(wait_task, timeout=timeout)
            except asyncio.TimeoutError:
                log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
+
                return

            finally:
@ -183,17 +188,21 @@ class Network:

            if captured:
                log.info(f"URL {url_num}) Captured M3U8")
+
                return captured[0]

            log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
+
            return

        except Exception as e:
            log.warning(f"URL {url_num}) Exception while processing: {e}")
+
            return

        finally:
            page.remove_listener("request", handler)
+
            await page.close()

    @staticmethod
@ -205,7 +214,9 @@ class Network:

        if browser == "brave":
            brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222")
+
            context = brwsr.contexts[0]
+
        else:
            brwsr = await playwright.firefox.launch(headless=True)

--- a/M3U8/scrapers/watchfooty.py
+++ b/M3U8/scrapers/watchfooty.py
@ -104,12 +104,14 @@ async def process_event(
            text = await header.inner_text()
        except TimeoutError:
            log.warning(f"URL {url_num}) Can't find stream links header.")
+
            return

        match = re.search(r"\((\d+)\)", text)

        if not match or int(match[1]) == 0:
            log.warning(f"URL {url_num}) No available stream links.")
+
            return

        first_available = await page.wait_for_selector(
@ -124,6 +126,7 @@ async def process_event(
            await asyncio.wait_for(wait_task, timeout=6)
        except asyncio.TimeoutError:
            log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
+
            return

        finally:
@ -137,17 +140,21 @@ async def process_event(

        if captured:
            log.info(f"URL {url_num}) Captured M3U8")
+
            return captured[-1]

        log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
+
        return

    except Exception as e:
        log.warning(f"URL {url_num}) Exception while processing: {e}")
+
        return

    finally:
        page.remove_listener("request", handler)
+
        await page.close()


@ -213,8 +220,11 @@ async def get_events(

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
+
    valid_count = cached_count = len(valid_urls)
+
    urls.update(valid_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -225,7 +235,9 @@ async def scrape() -> None:

    if not (base_url and api_url):
        log.warning("No working Watch Footy mirrors")
+
        CACHE_FILE.write(cached_urls)
+
        return

    log.info(f'Scraping from "{base_url}"')
@ -287,6 +299,7 @@ async def scrape() -> None:

    if new_count := valid_count - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")

--- a/M3U8/scrapers/webcast.py
+++ b/M3U8/scrapers/webcast.py
@ -112,7 +112,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

 async def scrape() -> None:
    cached_urls = CACHE_FILE.load()
+
    cached_count = len(cached_urls)
+
    urls.update(cached_urls)

    log.info(f"Loaded {cached_count} event(s) from cache")
@ -169,6 +171,7 @@ async def scrape() -> None:

    if new_count := len(cached_urls) - cached_count:
        log.info(f"Collected and cached {new_count} new event(s)")
+
    else:
        log.info("No new events found")