This commit is contained in:
doms9 2025-09-04 09:59:19 -04:00
parent 1257f84f58
commit 00000d940e
6 changed files with 29 additions and 48 deletions

View file

@ -10,7 +10,7 @@ from urllib.parse import urljoin
import httpx
from playwright.async_api import Request, async_playwright
from .utils import TZ, get_base, get_logger, safe_process_event
from .utils import TZ, get_base, get_logger, now, safe_process_event
log = get_logger(__name__)
@ -38,7 +38,11 @@ async def refresh_api_cache(client: httpx.AsyncClient, url: str) -> dict:
def load_cache() -> dict[str, dict[str, str | str]]:
try:
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
data: dict = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
age: float = now.timestamp() - data.get("timestamp", 0)
return {k: v for k, v in data.items() if age < 14400} # 4 hours
except (FileNotFoundError, json.JSONDecodeError):
return {}
@ -47,7 +51,7 @@ def load_api_cache() -> dict[str, dict[str, str | str]]:
try:
data: dict = json.loads(API_FILE.read_text(encoding="utf-8"))
age: float = datetime.now(TZ).timestamp() - data.get("timestamp", 0)
age: float = now.timestamp() - data.get("timestamp", 0)
return data if age < 86400 else {} # 24 hours
except (FileNotFoundError, json.JSONDecodeError):
@ -78,7 +82,7 @@ async def process_event(url: str, url_num: int) -> str | None:
page.on("request", capture_req)
try:
await page.goto(url, wait_until="domcontentloaded", timeout=10_000)
await page.goto(url, wait_until="domcontentloaded", timeout=15_000)
wait_task = asyncio.create_task(got_one.wait())
@ -125,8 +129,6 @@ async def get_events(
base_url = re.match(r"(https?://.+?)/", api_url)[1]
now = datetime.now(TZ)
if not (api_data := load_api_cache()):
api_data = await refresh_api_cache(client, api_url)
API_FILE.write_text(json.dumps(api_data, indent=2), encoding="utf-8")
@ -202,6 +204,7 @@ async def main(client: httpx.AsyncClient) -> None:
entry = {
"url": url,
"logo": ev["logo"],
"timestamp": now.timestamp(),
}
key = f"[{ev['sport']}] {ev['event']}"
@ -210,11 +213,7 @@ async def main(client: httpx.AsyncClient) -> None:
CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
new_count = len(cached_urls) - cached_count
log.info(f"Cached {cached_count} event(s)")
log.info(f"Collected {new_count} new event(s)")
log.info(f"Collected {len(cached_urls) - cached_count} event(s)")
# works if no cloudflare bot detection