This commit is contained in:
doms9 2025-12-06 00:26:59 -05:00
parent a979f0bf75
commit 00000d97a5
8 changed files with 40 additions and 29 deletions

View file

@ -36,11 +36,12 @@ async def process_event(
re.IGNORECASE,
)
if match := valid_m3u8.search(r.text):
log.info(f"URL {url_num}) Captured M3U8")
return match[2]
if not (match := valid_m3u8.search(r.text)):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) No M3U8 found")
log.info(f"URL {url_num}) Captured M3U8")
return match[2]
async def get_events(

View file

@ -48,11 +48,12 @@ async def process_event(
re.IGNORECASE,
)
if match := valid_m3u8.search(r.text):
log.info(f"URL {url_num}) Captured M3U8")
return match[1]
if not (match := valid_m3u8.search(r.text)):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) No M3U8 found")
log.info(f"URL {url_num}) Captured M3U8")
return match[1]
async def refresh_html_cache(
@ -62,8 +63,6 @@ async def refresh_html_cache(
now_ts: float,
) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
try:
r = await client.get(url)
r.raise_for_status()
@ -116,6 +115,8 @@ async def get_events(
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
tasks = [
refresh_html_cache(
client,

View file

@ -60,6 +60,8 @@ async def refresh_html_cache(
return {}
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
soup = HTMLParser(r.content)
events = {}
@ -81,8 +83,6 @@ async def refresh_html_cache(
if not embed_btn or not (onclick := embed_btn.attributes.get("onclick")):
continue
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
if not (match := pattern.search(onclick)):
continue

View file

@ -40,13 +40,14 @@ async def process_event(
valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
if match := valid_m3u8.search(r.text):
encoded = match[2][::-1]
decoded = base64.b64decode(encoded[::-1]).decode("utf-8")
log.info(f"URL {url_num}) Captured M3U8")
return decoded
if not (match := valid_m3u8.search(r.text)):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) No M3U8 found")
encoded = match[2][::-1]
decoded = base64.b64decode(encoded[::-1]).decode("utf-8")
log.info(f"URL {url_num}) Captured M3U8")
return decoded
async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:

View file

@ -118,7 +118,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
log.info(f'Scraping from "https://streamcenter.xyz"')
events = await get_events(client, set(cached_urls.keys()))

View file

@ -37,7 +37,7 @@ TAG = "STRMSG8"
def get_event(t1: str, t2: str) -> str:
match t1:
case "RED ZONE":
return "Red Zone"
return "NFL RedZone"
case "TBD":
return "TBD"
@ -58,7 +58,9 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An
return r.json()
async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
async def refresh_api_cache(
client: httpx.AsyncClient, ts: float
) -> list[dict[str, Any]]:
log.info("Refreshing API cache")
tasks = [
@ -73,7 +75,7 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
for ev in data:
ev["ts"] = ev.pop("timestamp")
data[-1]["timestamp"] = Time.now().timestamp()
data[-1]["timestamp"] = ts
return data
@ -81,14 +83,15 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
async def get_events(
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
api_data = await refresh_api_cache(client)
api_data = await refresh_api_cache(client, now.timestamp())
API_FILE.write(api_data)
events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)

View file

@ -39,7 +39,9 @@ def fix_sport(s: str) -> str:
async def refresh_api_cache(
client: httpx.AsyncClient,
url: str,
ts: float,
) -> list[dict[str, Any]]:
log.info("Refreshing API cache")
try:
@ -52,7 +54,7 @@ async def refresh_api_cache(
data = r.json()
data[-1]["timestamp"] = Time.now().timestamp()
data[-1]["timestamp"] = ts
return data
@ -126,16 +128,19 @@ async def get_events(
cached_keys: set[str],
) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
api_data = await refresh_api_cache(
client, urljoin(url, "api/matches/all-today")
client,
urljoin(url, "api/matches/all-today"),
now.timestamp(),
)
API_FILE.write(api_data)
events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
pattern = re.compile(r"[\n\r]+|\s{2,}")

View file

@ -27,8 +27,6 @@ def fix_event(s: str) -> str:
async def refresh_html_cache(
client: httpx.AsyncClient, url: str
) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
try:
r = await client.get(url)
r.raise_for_status()
@ -95,6 +93,8 @@ async def get_events(
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
tasks = [refresh_html_cache(client, url) for url in BASE_URLS.values()]
results = await asyncio.gather(*tasks)