This commit is contained in:
doms9 2025-12-06 00:26:59 -05:00
parent a979f0bf75
commit 00000d97a5
8 changed files with 40 additions and 29 deletions

View file

@ -36,11 +36,12 @@ async def process_event(
re.IGNORECASE, re.IGNORECASE,
) )
if match := valid_m3u8.search(r.text): if not (match := valid_m3u8.search(r.text)):
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) No M3U8 found")
return match[2] return
log.info(f"URL {url_num}) No M3U8 found") log.info(f"URL {url_num}) Captured M3U8")
return match[2]
async def get_events( async def get_events(

View file

@ -48,11 +48,12 @@ async def process_event(
re.IGNORECASE, re.IGNORECASE,
) )
if match := valid_m3u8.search(r.text): if not (match := valid_m3u8.search(r.text)):
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) No M3U8 found")
return match[1] return
log.info(f"URL {url_num}) No M3U8 found") log.info(f"URL {url_num}) Captured M3U8")
return match[1]
async def refresh_html_cache( async def refresh_html_cache(
@ -62,8 +63,6 @@ async def refresh_html_cache(
now_ts: float, now_ts: float,
) -> dict[str, dict[str, str | float]]: ) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
@ -116,6 +115,8 @@ async def get_events(
now = Time.clean(Time.now()) now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()): if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
tasks = [ tasks = [
refresh_html_cache( refresh_html_cache(
client, client,

View file

@ -60,6 +60,8 @@ async def refresh_html_cache(
return {} return {}
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
soup = HTMLParser(r.content) soup = HTMLParser(r.content)
events = {} events = {}
@ -81,8 +83,6 @@ async def refresh_html_cache(
if not embed_btn or not (onclick := embed_btn.attributes.get("onclick")): if not embed_btn or not (onclick := embed_btn.attributes.get("onclick")):
continue continue
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
if not (match := pattern.search(onclick)): if not (match := pattern.search(onclick)):
continue continue

View file

@ -40,13 +40,14 @@ async def process_event(
valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE) valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
if match := valid_m3u8.search(r.text): if not (match := valid_m3u8.search(r.text)):
encoded = match[2][::-1] log.info(f"URL {url_num}) No M3U8 found")
decoded = base64.b64decode(encoded[::-1]).decode("utf-8") return
log.info(f"URL {url_num}) Captured M3U8")
return decoded
log.info(f"URL {url_num}) No M3U8 found") encoded = match[2][::-1]
decoded = base64.b64decode(encoded[::-1]).decode("utf-8")
log.info(f"URL {url_num}) Captured M3U8")
return decoded
async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:

View file

@ -118,7 +118,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "https://streamcenter.xyz"')
events = await get_events(client, set(cached_urls.keys())) events = await get_events(client, set(cached_urls.keys()))

View file

@ -37,7 +37,7 @@ TAG = "STRMSG8"
def get_event(t1: str, t2: str) -> str: def get_event(t1: str, t2: str) -> str:
match t1: match t1:
case "RED ZONE": case "RED ZONE":
return "Red Zone" return "NFL RedZone"
case "TBD": case "TBD":
return "TBD" return "TBD"
@ -58,7 +58,9 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An
return r.json() return r.json()
async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: async def refresh_api_cache(
client: httpx.AsyncClient, ts: float
) -> list[dict[str, Any]]:
log.info("Refreshing API cache") log.info("Refreshing API cache")
tasks = [ tasks = [
@ -73,7 +75,7 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
for ev in data: for ev in data:
ev["ts"] = ev.pop("timestamp") ev["ts"] = ev.pop("timestamp")
data[-1]["timestamp"] = Time.now().timestamp() data[-1]["timestamp"] = ts
return data return data
@ -81,14 +83,15 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
async def get_events( async def get_events(
client: httpx.AsyncClient, cached_keys: set[str] client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)): if not (api_data := API_FILE.load(per_entry=False, index=-1)):
api_data = await refresh_api_cache(client) api_data = await refresh_api_cache(client, now.timestamp())
API_FILE.write(api_data) API_FILE.write(api_data)
events = [] events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30) start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30) end_dt = now.delta(minutes=30)

View file

@ -39,7 +39,9 @@ def fix_sport(s: str) -> str:
async def refresh_api_cache( async def refresh_api_cache(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
ts: float,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
log.info("Refreshing API cache") log.info("Refreshing API cache")
try: try:
@ -52,7 +54,7 @@ async def refresh_api_cache(
data = r.json() data = r.json()
data[-1]["timestamp"] = Time.now().timestamp() data[-1]["timestamp"] = ts
return data return data
@ -126,16 +128,19 @@ async def get_events(
cached_keys: set[str], cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)): if not (api_data := API_FILE.load(per_entry=False, index=-1)):
api_data = await refresh_api_cache( api_data = await refresh_api_cache(
client, urljoin(url, "api/matches/all-today") client,
urljoin(url, "api/matches/all-today"),
now.timestamp(),
) )
API_FILE.write(api_data) API_FILE.write(api_data)
events = [] events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30) start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30) end_dt = now.delta(minutes=30)
pattern = re.compile(r"[\n\r]+|\s{2,}") pattern = re.compile(r"[\n\r]+|\s{2,}")

View file

@ -27,8 +27,6 @@ def fix_event(s: str) -> str:
async def refresh_html_cache( async def refresh_html_cache(
client: httpx.AsyncClient, url: str client: httpx.AsyncClient, url: str
) -> dict[str, dict[str, str | float]]: ) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
@ -95,6 +93,8 @@ async def get_events(
now = Time.clean(Time.now()) now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()): if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
tasks = [refresh_html_cache(client, url) for url in BASE_URLS.values()] tasks = [refresh_html_cache(client, url) for url in BASE_URLS.values()]
results = await asyncio.gather(*tasks) results = await asyncio.gather(*tasks)