This commit is contained in:
doms9 2025-10-30 19:54:06 -04:00
parent ff4daf915f
commit 00000d91dc
3 changed files with 52 additions and 37 deletions

View file

@ -1,10 +1,10 @@
import asyncio import asyncio
import re
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from urllib.parse import urljoin from urllib.parse import urljoin
import httpx import httpx
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
@ -25,6 +25,31 @@ CACHE_FILE = Cache(Path(__file__).parent / "caches" / "roxie.json", exp=10_800)
HTML_CACHE = Cache(Path(__file__).parent / "caches" / "roxie_html.json", exp=28_800) HTML_CACHE = Cache(Path(__file__).parent / "caches" / "roxie_html.json", exp=28_800)
async def process_event(
client: httpx.AsyncClient,
url: str,
url_num: int,
) -> str | None:
try:
r = await client.get(url)
r.raise_for_status()
except Exception as e:
log.error(f'URL {url_num}) Failed to fetch "{url}": {e}')
return
valid_m3u8 = re.compile(
r"showPlayer\(['\"]clappr['\"],\s*['\"]([^'\"]+?\.m3u8(?:\?[^'\"]*)?)['\"]\)",
re.IGNORECASE,
)
if match := valid_m3u8.search(r.text):
log.info(f"URL {url_num}) Captured M3U8")
return match[1]
log.info(f"URL {url_num}) No M3U8 found")
async def refresh_html_cache( async def refresh_html_cache(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
@ -135,43 +160,36 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
async with async_playwright() as p: for i, ev in enumerate(events, start=1):
browser, context = await network.browser(p) handler = partial(
process_event,
client=client,
url=ev["link"],
url_num=i,
)
for i, ev in enumerate(events, start=1): url = await network.safe_process(
handler = partial( handler,
network.process_event, url_num=i,
url=ev["link"], log=log,
url_num=i, )
context=context,
timeout=15,
log=log,
)
url = await network.safe_process( if url:
handler, sport, event, ts = ev["sport"], ev["event"], ev["event_ts"]
url_num=i,
log=log,
)
if url: tvg_id, logo = leagues.info(sport)
sport, event, ts = ev["sport"], ev["event"], ev["event_ts"]
tvg_id, logo = leagues.info(sport) key = f"[{sport}] {event} (ROXIE)"
key = f"[{sport}] {event} (ROXIE)" entry = {
"url": url,
"logo": logo,
"base": "",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
}
entry = { urls[key] = cached_urls[key] = entry
"url": url,
"logo": logo,
"base": "",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
}
urls[key] = cached_urls[key] = entry
await browser.close()
if new_count := len(cached_urls) - cached_count: if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)") log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -124,10 +124,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
if events: if events:
async with async_playwright() as p: async with async_playwright() as p:
try: browser, context = await network.browser(p, browser="brave")
browser, context = await network.browser(p, browser="brave")
except Exception:
browser, context = await network.browser(p)
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(

View file

@ -150,7 +150,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
if events: if events:
async with async_playwright() as p: async with async_playwright() as p:
browser, context = await network.browser(p, "brave") browser, context = await network.browser(p, browser="brave")
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(