e
This commit is contained in:
parent
a02d30459a
commit
00000d9855
5 changed files with 98 additions and 42 deletions
|
|
@ -41,13 +41,14 @@ async def main() -> None:
|
||||||
tasks = [
|
tasks = [
|
||||||
# ace.main(client),
|
# ace.main(client),
|
||||||
# fstv.main(client),
|
# fstv.main(client),
|
||||||
livetvsx.main(CLIENT),
|
asyncio.create_task(livetvsx.main(CLIENT)),
|
||||||
tvpass.main(CLIENT),
|
asyncio.create_task(tvpass.main(CLIENT)),
|
||||||
|
vanilla_fetch(),
|
||||||
]
|
]
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
base_m3u8, tvg_chno = await vanilla_fetch()
|
base_m3u8, tvg_chno = results[-1]
|
||||||
|
|
||||||
additions = ace.urls | fstv.urls | livetvsx.urls | tvpass.urls
|
additions = ace.urls | fstv.urls | livetvsx.urls | tvpass.urls
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ async def get_schedule(client: httpx.AsyncClient, base_url: str) -> list[dict]:
|
||||||
)
|
)
|
||||||
|
|
||||||
if " - " in after_time:
|
if " - " in after_time:
|
||||||
sport, event_name = [x.strip() for x in after_time.split(" - ", 1)]
|
sport, event_name = (x.strip() for x in after_time.split(" - ", 1))
|
||||||
else:
|
else:
|
||||||
sport, event_name = "", after_time
|
sport, event_name = "", after_time
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -131,4 +131,7 @@ async def main(client: httpx.AsyncClient) -> None:
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(f"Collected {len(urls)} live events")
|
log.info(f"Collected {len(urls)} live event(s)")
|
||||||
|
|
||||||
|
|
||||||
|
# add caching
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import asyncio
|
import asyncio
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import ssl
|
import ssl
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
@ -30,12 +31,15 @@ CERT_BUNDL_URLS = [
|
||||||
|
|
||||||
CERT_FILE = Path(__file__).parent / "cached-ca.pem"
|
CERT_FILE = Path(__file__).parent / "cached-ca.pem"
|
||||||
|
|
||||||
|
CACHE_FILE = Path(__file__).parent / "livetvsx.json"
|
||||||
|
|
||||||
async def safe_process_event(fn, timeout_sec=20) -> Any | None:
|
|
||||||
|
async def safe_process_event(fn, url_num: int, timeout=20) -> Any | None:
|
||||||
try:
|
try:
|
||||||
return await asyncio.wait_for(fn(), timeout=timeout_sec)
|
return await asyncio.wait_for(fn(), timeout=timeout)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
log.warning(f"Timed out after {timeout_sec}s, skipping event")
|
log.warning(f"URL {url_num}) Timed out after {timeout}s, skipping event")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
|
async def write_to_cert(client: httpx.AsyncClient, url: str, cert: Path) -> None:
|
||||||
|
|
@ -71,6 +75,21 @@ async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
|
||||||
return ssl.create_default_context(cafile=CERT_FILE)
|
return ssl.create_default_context(cafile=CERT_FILE)
|
||||||
|
|
||||||
|
|
||||||
|
def load_cache() -> dict[str, dict[str, str | str]]:
|
||||||
|
try:
|
||||||
|
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
now = datetime.now().timestamp()
|
||||||
|
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in data.items()
|
||||||
|
if now - v.get("timestamp", 0) < timedelta(hours=4).total_seconds()
|
||||||
|
}
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
|
async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
|
|
||||||
|
|
@ -91,8 +110,13 @@ async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO:
|
||||||
return io.BytesIO(b"")
|
return io.BytesIO(b"")
|
||||||
|
|
||||||
|
|
||||||
async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, str]]:
|
async def parse_feed(
|
||||||
events = []
|
url: str,
|
||||||
|
ssl_ctx: ssl.SSLContext,
|
||||||
|
cached_keys: set[str],
|
||||||
|
) -> list[dict[str, str]]:
|
||||||
|
|
||||||
|
events: list[dict[str, str]] = []
|
||||||
pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
|
pub_date_format = "%a, %d %b %Y %H:%M:%S %z"
|
||||||
now = datetime.now(TZ)
|
now = datetime.now(TZ)
|
||||||
|
|
||||||
|
|
@ -124,21 +148,28 @@ async def parse_feed(url: str, ssl_ctx: ssl.SSLContext) -> dict[str, dict[str, s
|
||||||
else ("", "")
|
else ("", "")
|
||||||
)
|
)
|
||||||
|
|
||||||
events.append(
|
key = f"[{sport}: {event}] {title}"
|
||||||
{
|
|
||||||
"sport": sport,
|
if key in cached_keys:
|
||||||
"event": event,
|
elem.clear()
|
||||||
"title": title,
|
continue
|
||||||
"link": link,
|
|
||||||
}
|
elif not tvp_sports & {sport, event}:
|
||||||
)
|
events.append(
|
||||||
|
{
|
||||||
|
"sport": sport,
|
||||||
|
"event": event,
|
||||||
|
"title": title,
|
||||||
|
"link": link,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
elem.clear()
|
elem.clear()
|
||||||
|
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|
||||||
async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
async def process_event(url: str, url_num: int, max_wait_ms=15_000) -> str | None:
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.firefox.launch(headless=True)
|
browser = await p.firefox.launch(headless=True)
|
||||||
|
|
||||||
|
|
@ -179,16 +210,18 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||||
|
|
||||||
await ev_page.wait_for_timeout(500)
|
await ev_page.wait_for_timeout(500)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"Failed to click Browser Links tab: {e}")
|
log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}")
|
||||||
|
return
|
||||||
else:
|
else:
|
||||||
log.warning("Browser Links tab not found")
|
log.warning(f"URL {url_num}) Browser Links tab not found")
|
||||||
|
|
||||||
link_img = await ev_page.query_selector(
|
link_img = await ev_page.query_selector(
|
||||||
"tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
|
"tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not link_img:
|
if not link_img:
|
||||||
log.warning("No browser link to click.")
|
log.warning(f"URL {url_num}) No browser link to click.")
|
||||||
|
return
|
||||||
|
|
||||||
ev_page.on("request", capture_req)
|
ev_page.on("request", capture_req)
|
||||||
|
|
||||||
|
|
@ -198,7 +231,7 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||||
await link_img.click()
|
await link_img.click()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(
|
log.debug(
|
||||||
f"Click failed (popup might have already been opened): {e}"
|
f"URL {url_num}) Click failed (popup might have already been opened): {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
popup = await popup_info.value
|
popup = await popup_info.value
|
||||||
|
|
@ -209,7 +242,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||||
try:
|
try:
|
||||||
await link_img.click()
|
await link_img.click()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"Fallback click failed: {e}")
|
log.debug(f"URL {url_num}) Fallback click failed: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
wait_task = asyncio.create_task(got_one.wait())
|
wait_task = asyncio.create_task(got_one.wait())
|
||||||
|
|
||||||
|
|
@ -217,7 +251,8 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||||
await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
|
await asyncio.wait_for(wait_task, timeout=max_wait_ms / 1000)
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
log.warning("Timed out waiting for m3u8.")
|
log.warning(f"URL {url_num}) Timed out waiting for m3u8.")
|
||||||
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if not wait_task.done():
|
if not wait_task.done():
|
||||||
|
|
@ -238,10 +273,12 @@ async def process_event(url: str, max_wait_ms=15_000) -> str | None:
|
||||||
await ev_page.close()
|
await ev_page.close()
|
||||||
|
|
||||||
if captured:
|
if captured:
|
||||||
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return captured[-1]
|
return captured[-1]
|
||||||
|
|
||||||
log.warning("No m3u8 captured in popup or inline playback.")
|
log.warning(f"URL {url_num}) No m3u8 captured in popup or inline playback.")
|
||||||
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
ev_page.remove_listener("request", capture_req)
|
ev_page.remove_listener("request", capture_req)
|
||||||
|
|
@ -263,29 +300,44 @@ async def main(client: httpx.AsyncClient) -> None:
|
||||||
|
|
||||||
cert = await get_cert(client)
|
cert = await get_cert(client)
|
||||||
|
|
||||||
events = await parse_feed(BASE_URL, cert)
|
cached_urls = load_cache()
|
||||||
|
cached_keys = set(cached_urls.keys())
|
||||||
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} events")
|
events = await parse_feed(BASE_URL, cert, cached_keys)
|
||||||
|
|
||||||
for ev in events:
|
log.info(f"Processing {len(events)} URLs")
|
||||||
if tvp_sports & {
|
|
||||||
sport := ev["sport"],
|
|
||||||
event := ev["event"],
|
|
||||||
}: # already in tvpass
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = await safe_process_event(lambda: process_event(ev["link"]))
|
now_ts = datetime.now().timestamp()
|
||||||
|
|
||||||
|
for num, ev in enumerate(events, start=1):
|
||||||
|
sport = ev["sport"]
|
||||||
|
event = ev["event"]
|
||||||
|
title = ev["title"]
|
||||||
|
link = ev["link"]
|
||||||
|
|
||||||
|
key = f"[{sport}: {event}] {title}"
|
||||||
|
|
||||||
|
url = await safe_process_event(
|
||||||
|
lambda: process_event(link, url_num=num), url_num=num
|
||||||
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
urls[f"[{sport}: {event}] {ev['title']}"] = {
|
entry = {
|
||||||
"url": url,
|
"url": url,
|
||||||
"logo": logos.get(
|
"logo": logos.get(
|
||||||
sport,
|
sport,
|
||||||
"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
|
"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
|
||||||
),
|
),
|
||||||
|
"timestamp": now_ts,
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(f"Collected {len(urls)} live events")
|
urls[key] = cached_urls[key] = entry
|
||||||
|
|
||||||
|
CACHE_FILE.write_text(json.dumps(cached_urls, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
# add caching
|
new_count = len(cached_urls) - cached_count
|
||||||
|
|
||||||
|
log.info(f"Cached {cached_count} event(s)")
|
||||||
|
|
||||||
|
log.info(f"Collected {new_count} new event(s)")
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
|
||||||
async def main(client: httpx.AsyncClient) -> None:
|
async def main(client: httpx.AsyncClient) -> None:
|
||||||
if cached := load_cache():
|
if cached := load_cache():
|
||||||
urls.update(cached)
|
urls.update(cached)
|
||||||
log.info(f"Collected {len(urls)} events from cache")
|
log.info(f"Collected {len(urls)} event(s) from cache")
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f'Scraping from "{base_url}"')
|
log.info(f'Scraping from "{base_url}"')
|
||||||
|
|
@ -87,4 +87,4 @@ async def main(client: httpx.AsyncClient) -> None:
|
||||||
if urls:
|
if urls:
|
||||||
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
|
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
log.info(f"Cached {len(urls)} events")
|
log.info(f"Cached {len(urls)} event(s)")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue