diff --git a/M3U8/scrape/caches/ppv.json b/M3U8/scrape/caches/ppv.json deleted file mode 100644 index 1f0d085..0000000 --- a/M3U8/scrape/caches/ppv.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "[Baseball] Miami Marlins vs. Washington Nationals": { - "url": "https://gg.poocloud.in/nationals/index.m3u8", - "logo": "https://ppv.to/assets/thumb/bad1eb8c65c4a28ddc70454d76d70568-thumbnail.jpg" - }, - "[Baseball] New York Mets vs. Detroit Tigers": { - "url": "https://gg.poocloud.in/tigers/index.m3u8", - "logo": "https://ppv.to/assets/thumb/a5f2e902529c2d54e844d2bb9812a6ea-thumbnail.jpg" - }, - "[Basketball] Finland vs Germany (Tampere Finland)": { - "url": "https://gg.poocloud.in/courtside-1891-207182/index.m3u8?md5=RQ_pDO7V2EkcFAgE78sCNw&expires=1756933200&net=MjA5LjYuMTc5LjE5Nw%3D%3D", - "logo": "https://ppv.to/assets/thumb/bfdced3b90f5a92b880285b406504422-thumbnail.jpg" - }, - "[Basketball] T\u00fcrkiye vs Serbia (Riga Latvia)": { - "url": "https://gg.poocloud.in/courtside-1891-207184/index.m3u8?md5=lzBYrzi_vsI3wmLvOpo0pQ&expires=1756935900&net=MjA5LjYuMTc5LjE5Nw%3D%3D", - "logo": "https://ppv.to/assets/thumb/ed52e46b484e2a7d939aca2be2e6af23-thumbnail.jpg" - } -} \ No newline at end of file diff --git a/M3U8/scrape/livetvsx.py b/M3U8/scrape/livetvsx.py index 87e7e53..0ef1f88 100644 --- a/M3U8/scrape/livetvsx.py +++ b/M3U8/scrape/livetvsx.py @@ -9,7 +9,7 @@ from pathlib import Path import httpx from playwright.async_api import Request, async_playwright -from .utils import LOGOS, TZ, get_logger, safe_process_event +from .utils import LOGOS, TZ, get_logger, now, safe_process_event log = get_logger(__name__) @@ -54,7 +54,7 @@ async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext: if CERT_FILE.is_file(): mtime = datetime.fromtimestamp(CERT_FILE.stat().st_mtime) - if datetime.now() - mtime < timedelta(days=30): + if now - mtime < timedelta(days=30): return ssl.create_default_context(cafile=CERT_FILE) log.info("Refreshing cached certificate") @@ -68,9 +68,7 @@ def load_cache() -> dict[str, dict[str, str | str]]: try: data: dict = json.loads(CACHE_FILE.read_text(encoding="utf-8")) - now = datetime.now(TZ).timestamp() - - age: float = now - data.get("timestamp", 0) + age: float = now.timestamp() - data.get("timestamp", 0) return {k: v for k, v in data.items() if age < 14400} # 4 hours except (FileNotFoundError, json.JSONDecodeError): @@ -229,7 +227,6 @@ async def get_events( events: list[dict[str, str]] = [] pub_date_format = "%a, %d %b %Y %H:%M:%S %z" - now = datetime.now(TZ) window_start, window_end = now - timedelta(hours=3), now + timedelta(hours=1) @@ -294,8 +291,6 @@ async def main(client: httpx.AsyncClient) -> None: log.info(f"Processing {len(events)} URLs") - now_ts = datetime.now(TZ).timestamp() - for i, ev in enumerate(events, start=1): sport = ev["sport"] event = ev["event"] @@ -317,15 +312,11 @@ async def main(client: httpx.AsyncClient) -> None: sport, "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png", ), - "timestamp": now_ts, + "timestamp": now.timestamp(), } urls[key] = cached_urls[key] = entry CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8") - new_count = len(cached_urls) - cached_count - - log.info(f"Cached {cached_count} event(s)") - - log.info(f"Collected {new_count} new event(s)") + log.info(f"Collected {len(cached_urls) - cached_count} event(s)") diff --git a/M3U8/scrape/ppv.py b/M3U8/scrape/ppv.py index dcc41bb..7144cad 100644 --- a/M3U8/scrape/ppv.py +++ b/M3U8/scrape/ppv.py @@ -10,7 +10,7 @@ from urllib.parse import urljoin import httpx from playwright.async_api import Request, async_playwright -from .utils import TZ, get_base, get_logger, safe_process_event +from .utils import TZ, get_base, get_logger, now, safe_process_event log = get_logger(__name__) @@ -38,7 +38,11 @@ async def refresh_api_cache(client: httpx.AsyncClient, url: str) -> dict: def load_cache() -> dict[str, dict[str, str | str]]: try: - return json.loads(CACHE_FILE.read_text(encoding="utf-8")) + data: dict = json.loads(CACHE_FILE.read_text(encoding="utf-8")) + + age: float = now.timestamp() - data.get("timestamp", 0) + + return {k: v for k, v in data.items() if age < 14400} # 4 hours except (FileNotFoundError, json.JSONDecodeError): return {} @@ -47,7 +51,7 @@ def load_api_cache() -> dict[str, dict[str, str | str]]: try: data: dict = json.loads(API_FILE.read_text(encoding="utf-8")) - age: float = datetime.now(TZ).timestamp() - data.get("timestamp", 0) + age: float = now.timestamp() - data.get("timestamp", 0) return data if age < 86400 else {} # 24 hours except (FileNotFoundError, json.JSONDecodeError): @@ -78,7 +82,7 @@ async def process_event(url: str, url_num: int) -> str | None: page.on("request", capture_req) try: - await page.goto(url, wait_until="domcontentloaded", timeout=10_000) + await page.goto(url, wait_until="domcontentloaded", timeout=15_000) wait_task = asyncio.create_task(got_one.wait()) @@ -125,8 +129,6 @@ async def get_events( base_url = re.match(r"(https?://.+?)/", api_url)[1] - now = datetime.now(TZ) - if not (api_data := load_api_cache()): api_data = await refresh_api_cache(client, api_url) API_FILE.write_text(json.dumps(api_data, indent=2), encoding="utf-8") @@ -202,6 +204,7 @@ async def main(client: httpx.AsyncClient) -> None: entry = { "url": url, "logo": ev["logo"], + "timestamp": now.timestamp(), } key = f"[{ev['sport']}] {ev['event']}" @@ -210,11 +213,7 @@ async def main(client: httpx.AsyncClient) -> None: CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8") - new_count = len(cached_urls) - cached_count - - log.info(f"Cached {cached_count} event(s)") - - log.info(f"Collected {new_count} new event(s)") + log.info(f"Collected {len(cached_urls) - cached_count} event(s)") # works if no cloudflare bot detection diff --git a/M3U8/scrape/tvpass.py b/M3U8/scrape/tvpass.py index d15f92e..7b075dc 100644 --- a/M3U8/scrape/tvpass.py +++ b/M3U8/scrape/tvpass.py @@ -1,11 +1,10 @@ import json import re -from datetime import datetime from pathlib import Path import httpx -from .utils import LOGOS, TZ, get_logger +from .utils import LOGOS, get_logger, now log = get_logger(__name__) @@ -19,7 +18,7 @@ CACHE_FILE = Path(__file__).parent / "caches" / "tvpass.json" def load_cache() -> dict[str, str]: try: data = json.loads(CACHE_FILE.read_text(encoding="utf-8")) - return {} if 8 <= datetime.now(TZ).hour <= 12 else data + return {} if 8 <= now.hour <= 12 else data except (FileNotFoundError, json.JSONDecodeError): return {} diff --git a/M3U8/scrape/utils/__init__.py b/M3U8/scrape/utils/__init__.py index 50cbf88..d7cfcb4 100644 --- a/M3U8/scrape/utils/__init__.py +++ b/M3U8/scrape/utils/__init__.py @@ -1,3 +1,10 @@ -from .config import LOGOS, TZ, get_base, get_logger, safe_process_event +from .config import LOGOS, TZ, get_base, get_logger, now, safe_process_event -__all__ = ["LOGOS", "TZ", "get_base", "get_logger", "safe_process_event"] +__all__ = [ + "LOGOS", + "TZ", + "get_base", + "get_logger", + "now", + "safe_process_event", +] diff --git a/M3U8/scrape/utils/config.py b/M3U8/scrape/utils/config.py index 8e64fe1..862df3f 100644 --- a/M3U8/scrape/utils/config.py +++ b/M3U8/scrape/utils/config.py @@ -1,5 +1,6 @@ import asyncio import logging +from datetime import datetime from pathlib import Path from typing import Any @@ -8,6 +9,8 @@ import pytz TZ = pytz.timezone("America/New_York") +now = datetime.now(TZ) + LOGOS = { "MLB": "https://i.gyazo.com/0fe7865ef2f06c9507791b24f04dbca8.png", "NBA": "https://i.gyazo.com/773c23570f095a5d549c23b9401d83f4.png",