fix scraper crashing if api url(s) do not work
misc. edits
This commit is contained in:
doms9 2025-12-29 09:57:40 -05:00
parent 51598ce2a2
commit 00000d903e
9 changed files with 18 additions and 24 deletions

View file

@ -18,12 +18,14 @@ API_URL = "https://api.cdn-live.tv/api/v1/events/sports"
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
events = []
if not (api_data := API_FILE.load(per_entry=False)):
log.info("Refreshing API cache")
api_data = {}
api_data = {"timestamp": now.timestamp()}
if r := await network.request(
API_URL,
@ -32,13 +34,13 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
):
api_data: dict = r.json()
api_data["timestamp"] = now.timestamp()
API_FILE.write(api_data)
if not (data := api_data.get("cdn-live-tv")):
return events
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)

View file

@ -27,7 +27,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (api_data := API_CACHE.load(per_entry=False)):
log.info("Refreshing API cache")
api_data = {}
api_data = {"timestamp": now.timestamp()}
if r := await network.request(BASE_URL, log=log):
api_data: dict = r.json()

View file

@ -28,19 +28,19 @@ BASE_MIRRORS = [
async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str]]:
events = []
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False)):
log.info("Refreshing API cache")
api_data = {}
api_data = {"timestamp": now.timestamp()}
if r := await network.request(api_url, log=log):
api_data: dict = r.json()
API_FILE.write(api_data)
now = Time.clean(Time.now())
events = []
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)

View file

@ -14,7 +14,7 @@ CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"
BASE_URL = "https://backend.streamcenter.live/api/Parties"
CATEGORIES = {
4: "Basketball",
@ -37,7 +37,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
log.info("Refreshing API cache")
api_data = []
api_data = [{"timestamp": now.timestamp()}]
if r := await network.request(
BASE_URL,

View file

@ -59,7 +59,7 @@ async def refresh_api_cache(now_ts: float) -> list[dict[str, Any]]:
results = await asyncio.gather(*tasks)
if not (data := [*chain.from_iterable(r.json() for r in results if r)]):
return []
return [{"timestamp": now_ts}]
for ev in data:
ev["ts"] = ev.pop("timestamp")

View file

@ -39,7 +39,7 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
log.info("Refreshing API cache")
api_data = []
api_data = [{"timestamp": now.timestamp()}]
if r := await network.request(
urljoin(url, "api/matches/all-today"),

View file

@ -82,13 +82,10 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (time_node := node.css_first(".col-3")):
continue
if not time_node.text(strip=True) == "MatchStarted":
if time_node.text(strip=True) != "MatchStarted":
continue
if not (href := node.attributes.get("href")):
continue
if href.startswith("http"):
if not (href := node.attributes.get("href")) or href.startswith("http"):
continue
sport = fix_league(sport)

View file

@ -60,7 +60,7 @@ async def refresh_api_cache(now: Time) -> list[dict[str, Any]]:
results = await asyncio.gather(*tasks)
if not (data := [*chain.from_iterable(r.json() for r in results if r)]):
return []
return [{"timestamp": now.timestamp()}]
for ev in data:
ev["ts"] = ev.pop("timestamp")

View file

@ -1,4 +1,3 @@
import asyncio
from functools import partial
from playwright.async_api import async_playwright
@ -33,10 +32,6 @@ async def refresh_html_cache() -> dict[str, dict[str, str | float]]:
soup = HTMLParser(html_data.content)
title = soup.css_first("title").text(strip=True)
sport = "NFL" if "NFL" in title else "NHL"
date_text = now.strftime("%B %d, %Y")
if date_row := soup.css_first("tr.mdatetitle"):
@ -66,10 +61,10 @@ async def refresh_html_cache() -> dict[str, dict[str, str | float]]:
event = fix_event(event_name)
key = f"[{sport}] {event} ({TAG})"
key = f"[NHL] {event} ({TAG})"
events[key] = {
"sport": sport,
"sport": "NHL",
"event": event,
"link": href,
"event_ts": event_dt.timestamp(),