e
This commit is contained in:
parent
a979f0bf75
commit
00000d97a5
8 changed files with 40 additions and 29 deletions
|
|
@ -36,11 +36,12 @@ async def process_event(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
if match := valid_m3u8.search(r.text):
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return match[2]
|
||||
if not (match := valid_m3u8.search(r.text)):
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
return
|
||||
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return match[2]
|
||||
|
||||
|
||||
async def get_events(
|
||||
|
|
|
|||
|
|
@ -48,11 +48,12 @@ async def process_event(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
if match := valid_m3u8.search(r.text):
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return match[1]
|
||||
if not (match := valid_m3u8.search(r.text)):
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
return
|
||||
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return match[1]
|
||||
|
||||
|
||||
async def refresh_html_cache(
|
||||
|
|
@ -62,8 +63,6 @@ async def refresh_html_cache(
|
|||
now_ts: float,
|
||||
) -> dict[str, dict[str, str | float]]:
|
||||
|
||||
log.info("Refreshing HTML cache")
|
||||
|
||||
try:
|
||||
r = await client.get(url)
|
||||
r.raise_for_status()
|
||||
|
|
@ -116,6 +115,8 @@ async def get_events(
|
|||
now = Time.clean(Time.now())
|
||||
|
||||
if not (events := HTML_CACHE.load()):
|
||||
log.info("Refreshing HTML cache")
|
||||
|
||||
tasks = [
|
||||
refresh_html_cache(
|
||||
client,
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ async def refresh_html_cache(
|
|||
|
||||
return {}
|
||||
|
||||
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
|
||||
|
||||
soup = HTMLParser(r.content)
|
||||
|
||||
events = {}
|
||||
|
|
@ -81,8 +83,6 @@ async def refresh_html_cache(
|
|||
if not embed_btn or not (onclick := embed_btn.attributes.get("onclick")):
|
||||
continue
|
||||
|
||||
pattern = re.compile(r"openEmbed\('([^']+)'\)", re.IGNORECASE)
|
||||
|
||||
if not (match := pattern.search(onclick)):
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -40,13 +40,14 @@ async def process_event(
|
|||
|
||||
valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
|
||||
|
||||
if match := valid_m3u8.search(r.text):
|
||||
encoded = match[2][::-1]
|
||||
decoded = base64.b64decode(encoded[::-1]).decode("utf-8")
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return decoded
|
||||
if not (match := valid_m3u8.search(r.text)):
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
return
|
||||
|
||||
log.info(f"URL {url_num}) No M3U8 found")
|
||||
encoded = match[2][::-1]
|
||||
decoded = base64.b64decode(encoded[::-1]).decode("utf-8")
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
return decoded
|
||||
|
||||
|
||||
async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]:
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
|
|||
|
||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||
|
||||
log.info(f'Scraping from "{BASE_URL}"')
|
||||
log.info(f'Scraping from "https://streamcenter.xyz"')
|
||||
|
||||
events = await get_events(client, set(cached_urls.keys()))
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ TAG = "STRMSG8"
|
|||
def get_event(t1: str, t2: str) -> str:
|
||||
match t1:
|
||||
case "RED ZONE":
|
||||
return "Red Zone"
|
||||
return "NFL RedZone"
|
||||
|
||||
case "TBD":
|
||||
return "TBD"
|
||||
|
|
@ -58,7 +58,9 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An
|
|||
return r.json()
|
||||
|
||||
|
||||
async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
|
||||
async def refresh_api_cache(
|
||||
client: httpx.AsyncClient, ts: float
|
||||
) -> list[dict[str, Any]]:
|
||||
log.info("Refreshing API cache")
|
||||
|
||||
tasks = [
|
||||
|
|
@ -73,7 +75,7 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
|
|||
for ev in data:
|
||||
ev["ts"] = ev.pop("timestamp")
|
||||
|
||||
data[-1]["timestamp"] = Time.now().timestamp()
|
||||
data[-1]["timestamp"] = ts
|
||||
|
||||
return data
|
||||
|
||||
|
|
@ -81,14 +83,15 @@ async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
|
|||
async def get_events(
|
||||
client: httpx.AsyncClient, cached_keys: set[str]
|
||||
) -> list[dict[str, str]]:
|
||||
now = Time.clean(Time.now())
|
||||
|
||||
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
|
||||
api_data = await refresh_api_cache(client)
|
||||
api_data = await refresh_api_cache(client, now.timestamp())
|
||||
|
||||
API_FILE.write(api_data)
|
||||
|
||||
events = []
|
||||
|
||||
now = Time.clean(Time.now())
|
||||
start_dt = now.delta(minutes=-30)
|
||||
end_dt = now.delta(minutes=30)
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,9 @@ def fix_sport(s: str) -> str:
|
|||
async def refresh_api_cache(
|
||||
client: httpx.AsyncClient,
|
||||
url: str,
|
||||
ts: float,
|
||||
) -> list[dict[str, Any]]:
|
||||
|
||||
log.info("Refreshing API cache")
|
||||
|
||||
try:
|
||||
|
|
@ -52,7 +54,7 @@ async def refresh_api_cache(
|
|||
|
||||
data = r.json()
|
||||
|
||||
data[-1]["timestamp"] = Time.now().timestamp()
|
||||
data[-1]["timestamp"] = ts
|
||||
|
||||
return data
|
||||
|
||||
|
|
@ -126,16 +128,19 @@ async def get_events(
|
|||
cached_keys: set[str],
|
||||
) -> list[dict[str, str]]:
|
||||
|
||||
now = Time.clean(Time.now())
|
||||
|
||||
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
|
||||
api_data = await refresh_api_cache(
|
||||
client, urljoin(url, "api/matches/all-today")
|
||||
client,
|
||||
urljoin(url, "api/matches/all-today"),
|
||||
now.timestamp(),
|
||||
)
|
||||
|
||||
API_FILE.write(api_data)
|
||||
|
||||
events = []
|
||||
|
||||
now = Time.clean(Time.now())
|
||||
start_dt = now.delta(minutes=-30)
|
||||
end_dt = now.delta(minutes=30)
|
||||
pattern = re.compile(r"[\n\r]+|\s{2,}")
|
||||
|
|
|
|||
|
|
@ -27,8 +27,6 @@ def fix_event(s: str) -> str:
|
|||
async def refresh_html_cache(
|
||||
client: httpx.AsyncClient, url: str
|
||||
) -> dict[str, dict[str, str | float]]:
|
||||
log.info("Refreshing HTML cache")
|
||||
|
||||
try:
|
||||
r = await client.get(url)
|
||||
r.raise_for_status()
|
||||
|
|
@ -95,6 +93,8 @@ async def get_events(
|
|||
now = Time.clean(Time.now())
|
||||
|
||||
if not (events := HTML_CACHE.load()):
|
||||
log.info("Refreshing HTML cache")
|
||||
|
||||
tasks = [refresh_html_cache(client, url) for url in BASE_URLS.values()]
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue