This commit is contained in:
doms9 2025-10-01 11:57:49 -04:00
parent 7103b0f1c4
commit 00000d9937
17 changed files with 597 additions and 524 deletions

View file

@ -2,24 +2,14 @@ import asyncio
import io
import ssl
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
from datetime import timedelta
from functools import partial
from pathlib import Path
import httpx
from playwright.async_api import async_playwright
from .utils import (
TZ,
capture_req,
get_logger,
leagues,
load_cache,
new_browser,
now,
safe_process_event,
write_cache,
)
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
@ -36,7 +26,7 @@ CERT_BUNDLE_URLS = [
CERT_FILE = Path(__file__).parent / "caches" / "cached-cert.pem"
CACHE_FILE = Path(__file__).parent / "caches" / "livetvsx.json"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "livetvsx.json", exp=10_800)
async def write_to_cert(
@ -64,9 +54,9 @@ async def refresh_cert_cache(client: httpx.AsyncClient) -> ssl.SSLContext:
async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
if CERT_FILE.is_file():
mtime = datetime.fromtimestamp(CERT_FILE.stat().st_mtime, TZ)
mtime = Time.from_ts(CERT_FILE.stat().st_mtime)
if now - mtime < timedelta(days=30):
if Time.now() - mtime < timedelta(days=30):
return ssl.create_default_context(cafile=CERT_FILE)
log.info("Refreshing cached certificate")
@ -101,7 +91,7 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO | No
async def process_event(url: str, url_num: int) -> str | None:
async with async_playwright() as p:
browser, context = await new_browser(p, ignore_https_errors=True)
browser, context = await network.browser(p, ignore_https_errors=True)
page = await context.new_page()
@ -109,7 +99,7 @@ async def process_event(url: str, url_num: int) -> str | None:
got_one = asyncio.Event()
handler = partial(capture_req, captured=captured, got_one=got_one)
handler = partial(network.capture_req, captured=captured, got_one=got_one)
popup = None
@ -149,9 +139,7 @@ async def process_event(url: str, url_num: int) -> str | None:
try:
await link_img.click()
except Exception as e:
log.debug(
f"URL {url_num}) Click failed (popup might have already been opened): {e}"
)
log.debug(f"URL {url_num}) Click failed: {e}")
popup = await popup_info.value
@ -194,8 +182,9 @@ async def process_event(url: str, url_num: int) -> str | None:
return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured in popup or inline playback.")
log.warning(f"URL {url_num}) No M3U8 captured")
return
except Exception:
try:
page.remove_listener("request", handler)
@ -220,73 +209,84 @@ async def get_events(
events: list[dict[str, str]] = []
start_dt = now - timedelta(minutes=30)
end_dt = now + timedelta(minutes=30)
now = Time.now()
if buffer := await fetch_xml_stream(url, ssl_ctx):
pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
for _, elem in ET.iterparse(buffer, events=("end",)):
if elem.tag == "item":
title = elem.findtext("title")
desc = elem.findtext("description")
pub_date = elem.findtext("pubDate")
link = elem.findtext("link")
if not (buffer := await fetch_xml_stream(url, ssl_ctx)):
return events
try:
dt = datetime.strptime(pub_date, pub_date_format)
dt = dt.astimezone(TZ)
except Exception:
elem.clear()
continue
pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
if not start_dt <= dt <= end_dt:
elem.clear()
continue
sport, event = (
(
desc.split(".")[0].strip(),
" ".join(p.strip() for p in desc.split(".")[1:]),
)
if desc
else ("", "")
)
key = f"[{sport}: {event}] {title} (LTVSX)"
if cached_keys & {key}:
elem.clear()
continue
events.append(
{
"sport": sport,
"event": event,
"title": title,
"link": link,
}
)
for _, elem in ET.iterparse(buffer, events=("end",)):
if elem.tag == "item":
title = elem.findtext("title") or ""
desc = elem.findtext("description") or ""
pub_date = elem.findtext("pubDate") or ""
link = elem.findtext("link") or ""
if not all([title, pub_date, link]):
elem.clear()
continue
try:
event_dt = Time.from_str(pub_date, pub_date_format)
except Exception:
elem.clear()
continue
if not start_dt <= event_dt <= end_dt:
elem.clear()
continue
if desc:
parts = desc.split(".")
sport = parts[0].strip() if parts else ""
event = parts[1].strip() if parts else ""
else:
sport, event = "", ""
key = f"[{sport}: {event}] {title} (LTVSX)"
if cached_keys & {key}:
elem.clear()
continue
events.append(
{
"sport": sport,
"event": event,
"title": title,
"link": link,
"timestamp": event_dt.timestamp(),
}
)
elem.clear()
return events
async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = load_cache(CACHE_FILE, exp=10_800)
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Collected {cached_count} event(s) from cache")
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
cert = await get_cert(client)
ssl_ctx = await get_cert(client)
if not ssl_ctx:
log.error("Failed to create SSL context, aborting")
CACHE_FILE.write(cached_urls)
return
events = await get_events(
BASE_URL,
cert,
ssl_ctx,
set(cached_urls.keys()),
)
@ -295,16 +295,19 @@ async def scrape(client: httpx.AsyncClient) -> None:
for i, ev in enumerate(events, start=1):
link = ev["link"]
url = await safe_process_event(
url = await network.safe_process(
lambda: process_event(link, url_num=i),
url_num=i,
log=log,
)
if url:
sport = ev["sport"]
event = ev["event"]
title = ev["title"]
sport, event, title, ts = (
ev["sport"],
ev["event"],
ev["title"],
ev["timestamp"],
)
key = f"[{sport}: {event}] {title} (LTVSX)"
@ -316,9 +319,9 @@ async def scrape(client: httpx.AsyncClient) -> None:
entry = {
"url": url,
"logo": logo,
"id": tvg_id,
"id": tvg_id or "Live.Event.us",
"base": "https://livetv.sx/enx/",
"timestamp": now.timestamp(),
"timestamp": ts,
}
urls[key] = cached_urls[key] = entry
@ -328,4 +331,4 @@ async def scrape(client: httpx.AsyncClient) -> None:
else:
log.info("No new events found")
write_cache(CACHE_FILE, cached_urls)
CACHE_FILE.write(cached_urls)