This commit is contained in:
doms9 2025-09-03 00:00:22 -04:00
parent a02d30459a
commit 00000d9855
5 changed files with 98 additions and 42 deletions

View file

@ -41,13 +41,14 @@ async def main() -> None:
tasks = [ tasks = [
# ace.main(client), # ace.main(client),
# fstv.main(client), # fstv.main(client),
livetvsx.main(CLIENT), asyncio.create_task(livetvsx.main(CLIENT)),
tvpass.main(CLIENT), asyncio.create_task(tvpass.main(CLIENT)),
vanilla_fetch(),
] ]
await asyncio.gather(*tasks) results = await asyncio.gather(*tasks)
base_m3u8, tvg_chno = await vanilla_fetch() base_m3u8, tvg_chno = results[-1]
additions = ace.urls | fstv.urls | livetvsx.urls | tvpass.urls additions = ace.urls | fstv.urls | livetvsx.urls | tvpass.urls

View file

@ -50,7 +50,7 @@ async def get_schedule(client: httpx.AsyncClient, base_url: str) -> list[dict]:
) )
if " - " in after_time: if " - " in after_time:
sport, event_name = [x.strip() for x in after_time.split(" - ", 1)] sport, event_name = (x.strip() for x in after_time.split(" - ", 1))
else: else:
sport, event_name = "", after_time sport, event_name = "", after_time

View file

@ -131,4 +131,7 @@ async def main(client: httpx.AsyncClient) -> None:
), ),
} }
log.info(f"Collected {len(urls)} live events") log.info(f"Collected {len(urls)} live event(s)")
# add caching

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import asyncio import asyncio
import io import io
import json
import ssl import ssl
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -30,12 +31,15 @@ CERT_BUNDL_URLS = [
CERT_FILE = Path(__file__).parent / "cached-ca.pem" CERT_FILE = Path(__file__).parent / "cached-ca.pem"
CACHE_FILE = Path(__file__).parent / "livetvsx.json"
async def safe_process_event(fn, timeout_sec=20) -> Any | None:
async def safe_process_event(fn, url_num: int, timeout=20) -> Any | None:
try: try:
return await asyncio.wait_for(fn(), timeout=timeout_sec) return await asyncio.wait_for(fn(), timeout=timeout)
except asyncio.TimeoutError: except asyncio.TimeoutError:
log.warning(f"Timed out after {timeout_sec}s, skipping event") log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
return
async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None: async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
@ -71,6 +75,21 @@ async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
return ssl.create_default_context(cafile=CERT_FILE) return ssl.create_default_context(cafile=CERT_FILE)
def load_cache() -> dict[str, dict[str, str | str]]:
try:
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
now = datetime.now().timestamp()
return {
k: v
for k, v in data.items()
if now - v.get("timestamp", 0) < timedelta(hours=4).total_seconds()
}
except (FileNotFoundError, json.JSONDecodeError):
return {}
async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO: async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
buffer = io.BytesIO() buffer = io.BytesIO()
@ -91,8 +110,13 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
return io.BytesIO(b"") return io.BytesIO(b"")
async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]: async def parse_feed(
events = [] url: str,
ssl_ctx: ssl.SSLContext,
cached_keys: set[str],
) -> list[dict[str, str]]:
events: list[dict[str, str]] = []
pub_date_format = "%a, %d %b %Y %H:%M:%S %z" pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
now = datetime.now(TZ) now = datetime.now(TZ)
@ -124,21 +148,28 @@ async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, s
else ("", "") else ("", "")
) )
events.append( key = f"[{sport}: {event}] {title}"
{
"sport": sport, if key in cached_keys:
"event": event, elem.clear()
"title": title, continue
"link": link,
} elif not tvp_sports & {sport, event}:
) events.append(
{
"sport": sport,
"event": event,
"title": title,
"link": link,
}
)
elem.clear() elem.clear()
return events return events
async def process_event(url: str, max_wait_ms=15_000) -> str | None: async def process_event(url: str, url_num: int, max_wait_ms=15_000) -> str | None:
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.firefox.launch(headless=True) browser = await p.firefox.launch(headless=True)
@ -179,16 +210,18 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
await ev_page.wait_for_timeout(500) await ev_page.wait_for_timeout(500)
except Exception as e: except Exception as e:
log.debug(f"Failed to click Browser Links tab: {e}") log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}")
return
else: else:
log.warning("Browser Links tab not found") log.warning(f"URL {url_num}) Browser Links tab not found")
link_img = await ev_page.query_selector( link_img = await ev_page.query_selector(
"tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img" "tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
) )
if not link_img: if not link_img:
log.warning("No browser link to click.") log.warning(f"URL {url_num}) No browser link to click.")
return
ev_page.on("request", capture_req) ev_page.on("request", capture_req)
@ -198,7 +231,7 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
await link_img.click() await link_img.click()
except Exception as e: except Exception as e:
log.debug( log.debug(
f"Click failed (popup might have already been opened): {e}" f"URL {url_num}) Click failed (popup might have already been opened): {e}"
) )
popup = await popup_info.value popup = await popup_info.value
@ -209,7 +242,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
try: try:
await link_img.click() await link_img.click()
except Exception as e: except Exception as e:
log.debug(f"Fallback click failed: {e}") log.debug(f"URL {url_num}) Fallback click failed: {e}")
return
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
@ -217,7 +251,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000) await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
except asyncio.TimeoutError: except asyncio.TimeoutError:
log.warning("Timed out waiting for m3u8.") log.warning(f"URL {url_num}) Timed out waiting for m3u8.")
return
finally: finally:
if not wait_task.done(): if not wait_task.done():
@ -238,10 +273,12 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
await ev_page.close() await ev_page.close()
if captured: if captured:
log.info(f"URL {url_num}) Captured M3U8")
return captured[-1] return captured[-1]
log.warning("No m3u8 captured in popup or inline playback.") log.warning(f"URL {url_num}) No m3u8 captured in popup or inline playback.")
return
except Exception as e: except Exception as e:
try: try:
ev_page.remove_listener("request", capture_req) ev_page.remove_listener("request", capture_req)
@ -263,29 +300,44 @@ async def main(client: httpx.AsyncClient) -> None:
cert = await get_cert(client) cert = await get_cert(client)
events = await parse_feed(BASE_URL, cert) cached_urls = load_cache()
cached_keys = set(cached_urls.keys())
cached_count = len(cached_urls)
log.info(f"Processing {len(events)} events") events = await parse_feed(BASE_URL, cert, cached_keys)
for ev in events: log.info(f"Processing {len(events)} URLs")
if tvp_sports & {
sport := ev["sport"],
event := ev["event"],
}: # already in tvpass
continue
url = await safe_process_event(lambda: process_event(ev["link"])) now_ts = datetime.now().timestamp()
for num, ev in enumerate(events, start=1):
sport = ev["sport"]
event = ev["event"]
title = ev["title"]
link = ev["link"]
key = f"[{sport}: {event}] {title}"
url = await safe_process_event(
lambda: process_event(link, url_num=num), url_num=num
)
if url: if url:
urls[f"[{sport}: {event}] {ev['title']}"] = { entry = {
"url": url, "url": url,
"logo": logos.get( "logo": logos.get(
sport, sport,
"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png", "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
), ),
"timestamp": now_ts,
} }
log.info(f"Collected {len(urls)} live events") urls[key] = cached_urls[key] = entry
CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
# add caching new_count = len(cached_urls) - cached_count
log.info(f"Cached {cached_count} event(s)")
log.info(f"Collected {new_count} new event(s)")

View file

@ -51,7 +51,7 @@ async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
async def main(client: httpx.AsyncClient) -> None: async def main(client: httpx.AsyncClient) -> None:
if cached := load_cache(): if cached := load_cache():
urls.update(cached) urls.update(cached)
log.info(f"Collected {len(urls)} events from cache") log.info(f"Collected {len(urls)} event(s) from cache")
return return
log.info(f'Scraping from "{base_url}"') log.info(f'Scraping from "{base_url}"')
@ -87,4 +87,4 @@ async def main(client: httpx.AsyncClient) -> None:
if urls: if urls:
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8") base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
log.info(f"Cached {len(urls)} events") log.info(f"Cached {len(urls)} event(s)")