diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index 2dba43f..8d61668 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -20,6 +20,7 @@ BASE_URL = "http://www.fawanews.sc/" async def process_event(url: str, url_num: int) -> str | None: if not (html_data := await network.request(url, log=log)): log.info(f"URL {url_num}) Failed to load url.") + return valid_m3u8 = re.compile( @@ -29,9 +30,11 @@ async def process_event(url: str, url_num: int) -> str | None: if not (match := valid_m3u8.search(html_data.text)): log.info(f"URL {url_num}) No M3U8 found") + return log.info(f"URL {url_num}) Captured M3U8") + return match[2] @@ -83,8 +86,11 @@ async def get_events(cached_hrefs: set[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_hrefs = {entry["href"] for entry in cached_urls.values()} + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -137,6 +143,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/istreameast.py b/M3U8/scrapers/istreameast.py index 255992c..be8410d 100644 --- a/M3U8/scrapers/istreameast.py +++ b/M3U8/scrapers/istreameast.py @@ -21,27 +21,33 @@ async def process_event(url: str, url_num: int) -> str | None: if not (event_data := await network.request(url, log=log)): log.info(f"URL {url_num}) Failed to load url.") + return soup = HTMLParser(event_data.content) if not (iframe := soup.css_first("iframe#wp_player")): log.warning(f"URL {url_num}) No iframe element found.") + return if not (iframe_src := iframe.attributes.get("src")): log.warning(f"URL {url_num}) No iframe source found.") + return if not (iframe_src_data := await network.request(iframe_src, log=log)): log.info(f"URL {url_num}) Failed to load iframe source.") + return if not (match := pattern.search(iframe_src_data.text)): log.warning(f"URL {url_num}) No Clappr source found.") + return log.info(f"URL {url_num}) Captured M3U8") + return base64.b64decode(match[1]).decode("utf-8") @@ -98,7 +104,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -137,6 +145,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/lotus.py b/M3U8/scrapers/lotus.py index f4be812..cc7e1a2 100644 --- a/M3U8/scrapers/lotus.py +++ b/M3U8/scrapers/lotus.py @@ -72,7 +72,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -129,6 +131,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index 3933435..de9ab0a 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -55,7 +55,9 @@ async def get_events() -> dict[str, dict[str, str | float]]: continue event_name = event["match_name"] + channel_info: dict[str, str] = event["channel"] + category: dict[str, str] = channel_info["TVCategory"] sport = category["name"] @@ -82,7 +84,9 @@ async def get_events() -> dict[str, dict[str, str | float]]: async def scrape() -> None: if cached := CACHE_FILE.load(): urls.update(cached) + log.info(f"Loaded {len(urls)} event(s) from cache") + return log.info(f'Scraping from "{BASE_URL}"') diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 74dc5ad..5a0ca62 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -39,6 +39,7 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str API_FILE.write(api_data) now = Time.clean(Time.now()) + start_dt = now.delta(minutes=-30) end_dt = now.delta(minutes=30) @@ -50,8 +51,11 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str for event in stream_group.get("streams", []): name = event.get("name") + start_ts = event.get("starts_at") + logo = event.get("poster") + iframe = event.get("iframe") if not (name and start_ts and iframe): @@ -80,7 +84,9 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -148,6 +154,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index daf4b7d..868c2e8 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -41,9 +41,11 @@ async def process_event(url: str, url_num: int) -> str | None: if not (match := valid_m3u8.search(html_data.text)): log.info(f"URL {url_num}) No M3U8 found") + return log.info(f"URL {url_num}) Captured M3U8") + return match[1] @@ -133,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -183,6 +187,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py index 79f0596..26297f2 100644 --- a/M3U8/scrapers/shark.py +++ b/M3U8/scrapers/shark.py @@ -21,15 +21,18 @@ BASE_URL = "https://sharkstreams.net" async def process_event(url: str, url_num: int) -> str | None: if not (r := await network.request(url, log=log)): log.info(f"URL {url_num}) Failed to load url.") + return data: dict[str, list[str]] = r.json() if not (urls := data.get("urls")): log.info(f"URL {url_num}) No M3U8 found") + return log.info(f"URL {url_num}) Captured M3U8") + return urls[0] @@ -47,6 +50,7 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]] for row in soup.css(".row"): date_node = row.css_first(".ch-date") + sport_node = row.css_first(".ch-category") name_node = row.css_first(".ch-name") @@ -54,7 +58,9 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]] continue event_dt = Time.from_str(date_node.text(strip=True), timezone="EST") + sport = sport_node.text(strip=True) + event_name = name_node.text(strip=True) embed_btn = row.css_first("a.hd-link.secondary") @@ -107,7 +113,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -157,6 +165,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index 4e62361..70b19e5 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -52,7 +52,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: continue sport = sport_node.text(strip=True) + team_1_node = card.css_first(".team1 .team-name") + team_2_node = card.css_first(".team2 .team-name") if team_1_node and team_2_node: @@ -88,7 +90,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -146,6 +150,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py index 159f5df..968519a 100644 --- a/M3U8/scrapers/streamcenter.py +++ b/M3U8/scrapers/streamcenter.py @@ -91,7 +91,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -148,6 +150,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/streamfree.py b/M3U8/scrapers/streamfree.py index bfb4fee..f065ccd 100644 --- a/M3U8/scrapers/streamfree.py +++ b/M3U8/scrapers/streamfree.py @@ -70,7 +70,9 @@ async def get_events() -> dict[str, dict[str, str | float]]: async def scrape() -> None: if cached := CACHE_FILE.load(): urls.update(cached) + log.info(f"Loaded {len(urls)} event(s) from cache") + return log.info(f'Scraping from "{BASE_URL}"') diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index 7d10f9d..7268cc4 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -135,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -194,6 +196,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py index ffa5806..c328b99 100644 --- a/M3U8/scrapers/streamsgate.py +++ b/M3U8/scrapers/streamsgate.py @@ -125,7 +125,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -182,6 +184,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index 54bfd6c..c78b2d4 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -117,7 +117,9 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -180,6 +182,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/tvpass.py b/M3U8/scrapers/tvpass.py index ae46218..85c15f6 100644 --- a/M3U8/scrapers/tvpass.py +++ b/M3U8/scrapers/tvpass.py @@ -26,7 +26,9 @@ async def get_events() -> dict[str, dict[str, str | float]]: for i, line in enumerate(data, start=1): if line.startswith("#EXTINF"): tvg_id_match = re.search(r'tvg-id="([^"]*)"', line) + tvg_name_match = re.search(r'tvg-name="([^"]*)"', line) + group_title_match = re.search(r'group-title="([^"]*)"', line) tvg = tvg_id_match[1] if tvg_id_match else None @@ -57,7 +59,9 @@ async def get_events() -> dict[str, dict[str, str | float]]: async def scrape() -> None: if cached := CACHE_FILE.load(): urls.update(cached) + log.info(f"Loaded {len(urls)} event(s) from cache") + return log.info(f'Scraping from "{BASE_URL}"') diff --git a/M3U8/scrapers/utils/caching.py b/M3U8/scrapers/utils/caching.py index 0da3521..7e7ebbb 100644 --- a/M3U8/scrapers/utils/caching.py +++ b/M3U8/scrapers/utils/caching.py @@ -7,7 +7,9 @@ from .config import Time class Cache: def __init__(self, file: str, exp: int | float) -> None: self.file = Path(__file__).parent.parent / "caches" / file + self.exp = exp + self.now_ts = Time.now().timestamp() def is_fresh(self, entry: dict) -> bool: diff --git a/M3U8/scrapers/utils/config.py b/M3U8/scrapers/utils/config.py index 77103a5..e0062a1 100644 --- a/M3U8/scrapers/utils/config.py +++ b/M3U8/scrapers/utils/config.py @@ -45,11 +45,13 @@ class Time(datetime): def to_tz(self, tzone: str) -> "Time": dt = self.astimezone(self.ZONES[tzone]) + return self.__class__.fromtimestamp(dt.timestamp(), tz=self.ZONES[tzone]) @classmethod def _to_class_tz(cls, dt) -> "Time": dt = dt.astimezone(cls.TZ) + return cls.fromtimestamp(dt.timestamp(), tz=cls.TZ) @classmethod diff --git a/M3U8/scrapers/utils/logger.py b/M3U8/scrapers/utils/logger.py index e0b2455..51c4082 100644 --- a/M3U8/scrapers/utils/logger.py +++ b/M3U8/scrapers/utils/logger.py @@ -22,9 +22,13 @@ COLORS = { class ColorFormatter(logging.Formatter): def format(self, record) -> str: color = COLORS.get(record.levelname, COLORS["reset"]) + levelname = record.levelname + record.levelname = f"{color}{levelname:<8}{COLORS['reset']}" + formatted = super().format(record) + record.levelname = levelname return formatted @@ -38,10 +42,15 @@ def get_logger(name: str | None = None) -> logging.Logger: if not logger.hasHandlers(): handler = logging.StreamHandler() + formatter = ColorFormatter(LOG_FMT, datefmt="%Y-%m-%d | %H:%M:%S") + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + logger.propagate = False return logger diff --git a/M3U8/scrapers/utils/webwork.py b/M3U8/scrapers/utils/webwork.py index 6da6ae2..d3c00db 100644 --- a/M3U8/scrapers/utils/webwork.py +++ b/M3U8/scrapers/utils/webwork.py @@ -12,6 +12,8 @@ from playwright.async_api import Browser, BrowserContext, Playwright, Request from .logger import get_logger +logger = get_logger(__name__) + T = TypeVar("T") @@ -32,8 +34,6 @@ class Network: http2=True, ) - self._logger = get_logger("network") - @staticmethod def build_proxy_url( tag: str, @@ -58,16 +58,18 @@ class Network: **kwargs, ) -> httpx.Response | None: - log = log or self._logger + log = log or logger try: r = await self.client.get(url, **kwargs) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{url}": {e}\n{kwargs = }') - return "" - return r + r.raise_for_status() + + return r + except (httpx.HTTPError, httpx.TimeoutException) as e: + log.error(f'Failed to fetch "{url}": {e}') + + return "" async def get_base(self, mirrors: list[str]) -> str | None: random.shuffle(mirrors) @@ -89,7 +91,7 @@ class Network: log: logging.Logger | None = None, ) -> T | None: - log = log or get_logger("network") + log = log or logger task = asyncio.create_task(fn()) @@ -104,13 +106,15 @@ class Network: await task except asyncio.CancelledError: pass + except Exception as e: log.debug(f"URL {url_num}) Ignore exception after timeout: {e}") - return None + return except Exception as e: log.error(f"URL {url_num}) Unexpected error: {e}") - return None + + return @staticmethod def capture_req( @@ -141,7 +145,7 @@ class Network: log: logging.Logger | None = None, ) -> str | None: - log = log or self._logger + log = log or logger page = await context.new_page() @@ -170,6 +174,7 @@ class Network: await asyncio.wait_for(wait_task, timeout=timeout) except asyncio.TimeoutError: log.warning(f"URL {url_num}) Timed out waiting for M3U8.") + return finally: @@ -183,17 +188,21 @@ class Network: if captured: log.info(f"URL {url_num}) Captured M3U8") + return captured[0] log.warning(f"URL {url_num}) No M3U8 captured after waiting.") + return except Exception as e: log.warning(f"URL {url_num}) Exception while processing: {e}") + return finally: page.remove_listener("request", handler) + await page.close() @staticmethod @@ -205,7 +214,9 @@ class Network: if browser == "brave": brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222") + context = brwsr.contexts[0] + else: brwsr = await playwright.firefox.launch(headless=True) diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index dbbbcb4..49aa03e 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -104,12 +104,14 @@ async def process_event( text = await header.inner_text() except TimeoutError: log.warning(f"URL {url_num}) Can't find stream links header.") + return match = re.search(r"\((\d+)\)", text) if not match or int(match[1]) == 0: log.warning(f"URL {url_num}) No available stream links.") + return first_available = await page.wait_for_selector( @@ -124,6 +126,7 @@ async def process_event( await asyncio.wait_for(wait_task, timeout=6) except asyncio.TimeoutError: log.warning(f"URL {url_num}) Timed out waiting for M3U8.") + return finally: @@ -137,17 +140,21 @@ async def process_event( if captured: log.info(f"URL {url_num}) Captured M3U8") + return captured[-1] log.warning(f"URL {url_num}) No M3U8 captured after waiting.") + return except Exception as e: log.warning(f"URL {url_num}) Exception while processing: {e}") + return finally: page.remove_listener("request", handler) + await page.close() @@ -213,8 +220,11 @@ async def get_events( async def scrape() -> None: cached_urls = CACHE_FILE.load() + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + valid_count = cached_count = len(valid_urls) + urls.update(valid_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -225,7 +235,9 @@ async def scrape() -> None: if not (base_url and api_url): log.warning("No working Watch Footy mirrors") + CACHE_FILE.write(cached_urls) + return log.info(f'Scraping from "{base_url}"') @@ -287,6 +299,7 @@ async def scrape() -> None: if new_count := valid_count - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found") diff --git a/M3U8/scrapers/webcast.py b/M3U8/scrapers/webcast.py index 40d9d7e..1b9ee12 100644 --- a/M3U8/scrapers/webcast.py +++ b/M3U8/scrapers/webcast.py @@ -112,7 +112,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def scrape() -> None: cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) log.info(f"Loaded {cached_count} event(s) from cache") @@ -169,6 +171,7 @@ async def scrape() -> None: if new_count := len(cached_urls) - cached_count: log.info(f"Collected and cached {new_count} new event(s)") + else: log.info("No new events found")