e
This commit is contained in:
parent
a02d30459a
commit
00000d9855
5 changed files with 98 additions and 42 deletions
|
|
@ -50,7 +50,7 @@ async def get_schedule(client: httpx.AsyncClient, base_url: str) -> list[dict]:
|
|||
)
|
||||
|
||||
if " - " in after_time:
|
||||
sport, event_name = [x.strip() for x in after_time.split(" - ", 1)]
|
||||
sport, event_name = (x.strip() for x in after_time.split(" - ", 1))
|
||||
else:
|
||||
sport, event_name = "", after_time
|
||||
|
||||
|
|
|
|||
|
|
@ -131,4 +131,7 @@ async def main(client: httpx.AsyncClient) -> None:
|
|||
),
|
||||
}
|
||||
|
||||
log.info(f"Collected {len(urls)} live events")
|
||||
log.info(f"Collected {len(urls)} live event(s)")
|
||||
|
||||
|
||||
# add caching
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import ssl
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timedelta
|
||||
|
|
@ -30,12 +31,15 @@ CERT_BUNDL_URLS = [
|
|||
|
||||
CERT_FILE = Path(__file__).parent / "cached-ca.pem"
|
||||
|
||||
CACHE_FILE = Path(__file__).parent / "livetvsx.json"
|
||||
|
||||
async def safe_process_event(fn, timeout_sec=20) -> Any | None:
|
||||
|
||||
async def safe_process_event(fn, url_num: int, timeout=20) -> Any | None:
|
||||
try:
|
||||
return await asyncio.wait_for(fn(), timeout=timeout_sec)
|
||||
return await asyncio.wait_for(fn(), timeout=timeout)
|
||||
except asyncio.TimeoutError:
|
||||
log.warning(f"Timed out after {timeout_sec}s, skipping event")
|
||||
log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
|
||||
return
|
||||
|
||||
|
||||
async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
|
||||
|
|
@ -71,6 +75,21 @@ async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
|
|||
return ssl.create_default_context(cafile=CERT_FILE)
|
||||
|
||||
|
||||
def load_cache() -> dict[str, dict[str, str | str]]:
|
||||
try:
|
||||
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||
|
||||
now = datetime.now().timestamp()
|
||||
|
||||
return {
|
||||
k: v
|
||||
for k, v in data.items()
|
||||
if now - v.get("timestamp", 0) < timedelta(hours=4).total_seconds()
|
||||
}
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
|
||||
buffer = io.BytesIO()
|
||||
|
||||
|
|
@ -91,8 +110,13 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
|
|||
return io.BytesIO(b"")
|
||||
|
||||
|
||||
async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]:
|
||||
events = []
|
||||
async def parse_feed(
|
||||
url: str,
|
||||
ssl_ctx: ssl.SSLContext,
|
||||
cached_keys: set[str],
|
||||
) -> list[dict[str, str]]:
|
||||
|
||||
events: list[dict[str, str]] = []
|
||||
pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
|
||||
now = datetime.now(TZ)
|
||||
|
||||
|
|
@ -124,21 +148,28 @@ async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, s
|
|||
else ("", "")
|
||||
)
|
||||
|
||||
events.append(
|
||||
{
|
||||
"sport": sport,
|
||||
"event": event,
|
||||
"title": title,
|
||||
"link": link,
|
||||
}
|
||||
)
|
||||
key = f"[{sport}: {event}] {title}"
|
||||
|
||||
if key in cached_keys:
|
||||
elem.clear()
|
||||
continue
|
||||
|
||||
elif not tvp_sports & {sport, event}:
|
||||
events.append(
|
||||
{
|
||||
"sport": sport,
|
||||
"event": event,
|
||||
"title": title,
|
||||
"link": link,
|
||||
}
|
||||
)
|
||||
|
||||
elem.clear()
|
||||
|
||||
return events
|
||||
|
||||
|
||||
async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||
async def process_event(url: str, url_num: int, max_wait_ms=15_000) -> str | None:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.firefox.launch(headless=True)
|
||||
|
||||
|
|
@ -179,16 +210,18 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
|||
|
||||
await ev_page.wait_for_timeout(500)
|
||||
except Exception as e:
|
||||
log.debug(f"Failed to click Browser Links tab: {e}")
|
||||
log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}")
|
||||
return
|
||||
else:
|
||||
log.warning("Browser Links tab not found")
|
||||
log.warning(f"URL {url_num}) Browser Links tab not found")
|
||||
|
||||
link_img = await ev_page.query_selector(
|
||||
"tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
|
||||
)
|
||||
|
||||
if not link_img:
|
||||
log.warning("No browser link to click.")
|
||||
log.warning(f"URL {url_num}) No browser link to click.")
|
||||
return
|
||||
|
||||
ev_page.on("request", capture_req)
|
||||
|
||||
|
|
@ -198,7 +231,7 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
|||
await link_img.click()
|
||||
except Exception as e:
|
||||
log.debug(
|
||||
f"Click failed (popup might have already been opened): {e}"
|
||||
f"URL {url_num}) Click failed (popup might have already been opened): {e}"
|
||||
)
|
||||
|
||||
popup = await popup_info.value
|
||||
|
|
@ -209,7 +242,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
|||
try:
|
||||
await link_img.click()
|
||||
except Exception as e:
|
||||
log.debug(f"Fallback click failed: {e}")
|
||||
log.debug(f"URL {url_num}) Fallback click failed: {e}")
|
||||
return
|
||||
|
||||
wait_task = asyncio.create_task(got_one.wait())
|
||||
|
||||
|
|
@ -217,7 +251,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
|||
await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
log.warning("Timed out waiting for m3u8.")
|
||||
log.warning(f"URL {url_num}) Timed out waiting for m3u8.")
|
||||
return
|
||||
|
||||
finally:
|
||||
if not wait_task.done():
|
||||
|
|
@ -238,10 +273,12 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
|||
await ev_page.close()
|
||||
|
||||
if captured:
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
|
||||
return captured[-1]
|
||||
|
||||
log.warning("No m3u8 captured in popup or inline playback.")
|
||||
|
||||
log.warning(f"URL {url_num}) No m3u8 captured in popup or inline playback.")
|
||||
return
|
||||
except Exception as e:
|
||||
try:
|
||||
ev_page.remove_listener("request", capture_req)
|
||||
|
|
@ -263,29 +300,44 @@ async def main(client: httpx.AsyncClient) -> None:
|
|||
|
||||
cert = await get_cert(client)
|
||||
|
||||
events = await parse_feed(BASE_URL, cert)
|
||||
cached_urls = load_cache()
|
||||
cached_keys = set(cached_urls.keys())
|
||||
cached_count = len(cached_urls)
|
||||
|
||||
log.info(f"Processing {len(events)} events")
|
||||
events = await parse_feed(BASE_URL, cert, cached_keys)
|
||||
|
||||
for ev in events:
|
||||
if tvp_sports & {
|
||||
sport := ev["sport"],
|
||||
event := ev["event"],
|
||||
}: # already in tvpass
|
||||
continue
|
||||
log.info(f"Processing {len(events)} URLs")
|
||||
|
||||
url = await safe_process_event(lambda: process_event(ev["link"]))
|
||||
now_ts = datetime.now().timestamp()
|
||||
|
||||
for num, ev in enumerate(events, start=1):
|
||||
sport = ev["sport"]
|
||||
event = ev["event"]
|
||||
title = ev["title"]
|
||||
link = ev["link"]
|
||||
|
||||
key = f"[{sport}: {event}] {title}"
|
||||
|
||||
url = await safe_process_event(
|
||||
lambda: process_event(link, url_num=num), url_num=num
|
||||
)
|
||||
|
||||
if url:
|
||||
urls[f"[{sport}: {event}] {ev['title']}"] = {
|
||||
entry = {
|
||||
"url": url,
|
||||
"logo": logos.get(
|
||||
sport,
|
||||
"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
|
||||
),
|
||||
"timestamp": now_ts,
|
||||
}
|
||||
|
||||
log.info(f"Collected {len(urls)} live events")
|
||||
urls[key] = cached_urls[key] = entry
|
||||
|
||||
CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
|
||||
|
||||
# add caching
|
||||
new_count = len(cached_urls) - cached_count
|
||||
|
||||
log.info(f"Cached {cached_count} event(s)")
|
||||
|
||||
log.info(f"Collected {new_count} new event(s)")
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
|
|||
async def main(client: httpx.AsyncClient) -> None:
|
||||
if cached := load_cache():
|
||||
urls.update(cached)
|
||||
log.info(f"Collected {len(urls)} events from cache")
|
||||
log.info(f"Collected {len(urls)} event(s) from cache")
|
||||
return
|
||||
|
||||
log.info(f'Scraping from "{base_url}"')
|
||||
|
|
@ -87,4 +87,4 @@ async def main(client: httpx.AsyncClient) -> None:
|
|||
if urls:
|
||||
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
|
||||
|
||||
log.info(f"Cached {len(urls)} events")
|
||||
log.info(f"Cached {len(urls)} event(s)")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue