This commit is contained in:
doms9 2025-10-13 14:17:56 -04:00
parent e49b2217c0
commit 00000d914b
2 changed files with 384 additions and 369 deletions

View file

@ -2,23 +2,16 @@
import asyncio import asyncio
from pathlib import Path from pathlib import Path
from scrapers import ( from scrapers import fstv, streambtw, streameast, streamed, strmd, tvpass, watchfooty
fstv,
livetvsx,
streambtw,
streameast,
streamed,
strmd,
tvpass,
watchfooty,
)
from scrapers.utils import get_logger, network from scrapers.utils import get_logger, network
log = get_logger(__name__) log = get_logger(__name__)
BASE_FILE = Path(__file__).parent / "base.m3u8" BASE_FILE = Path(__file__).parent / "base.m3u8"
M3U8_FILE = Path(__file__).parent / "TV.m3u8" EVENTS_FILE = Path(__file__).parent / "events.m3u8"
COMBINED_FILE = Path(__file__).parent / "TV.m3u8"
def load_base() -> tuple[list[str], int]: def load_base() -> tuple[list[str], int]:
@ -36,7 +29,6 @@ async def main() -> None:
tasks = [ tasks = [
asyncio.create_task(fstv.scrape(network.client)), asyncio.create_task(fstv.scrape(network.client)),
# asyncio.create_task(livetvsx.scrape(network.client)),
asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streambtw.scrape(network.client)),
asyncio.create_task(streameast.scrape(network.client)), asyncio.create_task(streameast.scrape(network.client)),
asyncio.create_task(streamed.scrape(network.client)), asyncio.create_task(streamed.scrape(network.client)),
@ -49,7 +41,6 @@ async def main() -> None:
additions = ( additions = (
fstv.urls fstv.urls
| livetvsx.urls
| streambtw.urls | streambtw.urls
| streameast.urls | streameast.urls
| streamed.urls | streamed.urls
@ -58,25 +49,49 @@ async def main() -> None:
| watchfooty.urls | watchfooty.urls
) )
live_events = [] live_events: list[str] = []
for chnl_num, (event, info) in enumerate( combined_channels: list[str] = []
for i, (event, info) in enumerate(
sorted(additions.items()), sorted(additions.items()),
start=tvg_chno + 1, start=1,
): ):
live_events.extend( extinf_all = (
( f'#EXTINF:-1 tvg-chno="{tvg_chno + i}" tvg-id="{info["id"]}" '
f'\n#EXTINF:-1 tvg-chno="{chnl_num}" tvg-id="{info["id"]}" tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}', f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
f'#EXTVLCOPT:http-referrer={info["base"]}',
f'#EXTVLCOPT:http-origin={info["base"]}',
f"#EXTVLCOPT:http-user-agent={network.UA}",
info["url"],
)
) )
M3U8_FILE.write_text("\n".join(base_m3u8 + live_events), encoding="utf-8") extinf_live = (
f'#EXTINF:-1 tvg-chno="{i}" tvg-id="{info["id"]}" '
f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
)
log.info(f"M3U8 saved to {M3U8_FILE.name}") vlc_block = [
f'#EXTVLCOPT:http-referrer={info["base"]}',
f'#EXTVLCOPT:http-origin={info["base"]}',
f"#EXTVLCOPT:http-user-agent={network.UA}",
info["url"],
]
combined_channels.extend(["\n" + extinf_all, *vlc_block])
live_events.extend(["\n" + extinf_live, *vlc_block])
COMBINED_FILE.write_text(
"\n".join(base_m3u8 + combined_channels),
encoding="utf-8",
)
log.info(f"Base + Events saved to {COMBINED_FILE.name}")
EVENTS_FILE.write_text(
'#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/EPG/TV.xml"\n'
+ "\n".join(live_events),
encoding="utf-8",
)
log.info(f"Events saved to {EVENTS_FILE.name}")
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,343 +1,343 @@
import asyncio import asyncio
import io import io
import ssl import ssl
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import timedelta from datetime import timedelta
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
import httpx import httpx
from playwright.async_api import BrowserContext, async_playwright from playwright.async_api import BrowserContext, async_playwright
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__) log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {} urls: dict[str, dict[str, str | float]] = {}
BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml" BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml"
CERT_BUNDLE_URLS = [ CERT_BUNDLE_URLS = [
"https://curl.se/ca/cacert.pem", "https://curl.se/ca/cacert.pem",
"https://ssl.com/repo/certs/Cloudflare-TLS-I-E1.pem", "https://ssl.com/repo/certs/Cloudflare-TLS-I-E1.pem",
"https://ssl.com/repo/certs/SSL.com-TLS-T-ECC-R2.pem", "https://ssl.com/repo/certs/SSL.com-TLS-T-ECC-R2.pem",
"https://ssl.com/repo/certs/Sectigo-AAA-Root.pem", "https://ssl.com/repo/certs/Sectigo-AAA-Root.pem",
] ]
CERT_FILE = Path(__file__).parent / "caches" / "cached-cert.pem" CERT_FILE = Path(__file__).parent / "caches" / "cached-cert.pem"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "livetvsx.json", exp=10_800) CACHE_FILE = Cache(Path(__file__).parent / "caches" / "livetvsx.json", exp=10_800)
async def write_to_cert( async def write_to_cert(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
cert: Path, cert: Path,
) -> None: ) -> None:
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
except Exception: except Exception:
log.error(f"Failed to write fetch: {url} returned {r.status_code}") log.error(f"Failed to write fetch: {url} returned {r.status_code}")
with cert.open("a", encoding="utf-8") as f: with cert.open("a", encoding="utf-8") as f:
f.write(f"{r.text}\n") f.write(f"{r.text}\n")
async def refresh_cert_cache(client: httpx.AsyncClient) -> ssl.SSLContext: async def refresh_cert_cache(client: httpx.AsyncClient) -> ssl.SSLContext:
CERT_FILE.unlink(missing_ok=True) CERT_FILE.unlink(missing_ok=True)
tasks = [write_to_cert(client, url, CERT_FILE) for url in CERT_BUNDLE_URLS] tasks = [write_to_cert(client, url, CERT_FILE) for url in CERT_BUNDLE_URLS]
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext: async def get_cert(client: httpx.AsyncClient) -> ssl.SSLContext:
if CERT_FILE.is_file(): if CERT_FILE.is_file():
mtime = Time.from_ts(CERT_FILE.stat().st_mtime) mtime = Time.from_ts(CERT_FILE.stat().st_mtime)
if Time.now() - mtime < timedelta(days=30): if Time.now() - mtime < timedelta(days=30):
return ssl.create_default_context(cafile=CERT_FILE) return ssl.create_default_context(cafile=CERT_FILE)
log.info("Refreshing cached certificate") log.info("Refreshing cached certificate")
await refresh_cert_cache(client) await refresh_cert_cache(client)
return ssl.create_default_context(cafile=CERT_FILE) return ssl.create_default_context(cafile=CERT_FILE)
async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO | None: async def fetch_xml_stream(url: str, ssl_ctx: ssl.SSLContext) -> io.BytesIO | None:
buffer = io.BytesIO() buffer = io.BytesIO()
try: try:
async with httpx.AsyncClient( async with httpx.AsyncClient(
timeout=10, timeout=10,
verify=ssl_ctx, verify=ssl_ctx,
follow_redirects=True, follow_redirects=True,
) as client: ) as client:
async with client.stream("GET", url) as r: async with client.stream("GET", url) as r:
r.raise_for_status() r.raise_for_status()
async for chunk in r.aiter_bytes(8192): async for chunk in r.aiter_bytes(8192):
buffer.write(chunk) buffer.write(chunk)
buffer.seek(0) buffer.seek(0)
return buffer return buffer
except Exception as e: except Exception as e:
log.error(f"Failed to fetch {url}: {e}") log.error(f"Failed to fetch {url}: {e}")
return return
async def process_event( async def process_event(
url: str, url: str,
url_num: int, url_num: int,
context: BrowserContext, context: BrowserContext,
) -> str | None: ) -> str | None:
page = await context.new_page() page = await context.new_page()
captured: list[str] = [] captured: list[str] = []
got_one = asyncio.Event() got_one = asyncio.Event()
handler = partial(network.capture_req, captured=captured, got_one=got_one) handler = partial(network.capture_req, captured=captured, got_one=got_one)
popup = None popup = None
try: try:
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=10_000, timeout=10_000,
) )
btn = await page.query_selector(".lnkhdr > tbody > tr > td:nth-child(2)") btn = await page.query_selector(".lnkhdr > tbody > tr > td:nth-child(2)")
if btn: if btn:
try: try:
await btn.click() await btn.click()
await page.wait_for_timeout(500) await page.wait_for_timeout(500)
except Exception as e: except Exception as e:
log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}") log.debug(f"URL {url_num}) Failed to click Browser Links tab: {e}")
return return
else: else:
log.warning(f"URL {url_num}) Browser Links tab not found") log.warning(f"URL {url_num}) Browser Links tab not found")
return return
link_img = await page.query_selector( link_img = await page.query_selector(
"tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img" "tr:nth-child(2) > td:nth-child(1) td:nth-child(6) img"
) )
if not link_img: if not link_img:
log.warning(f"URL {url_num}) No browser link to click.") log.warning(f"URL {url_num}) No browser link to click.")
return return
page.on("request", handler) page.on("request", handler)
try: try:
async with page.expect_popup(timeout=5_000) as popup_info: async with page.expect_popup(timeout=5_000) as popup_info:
try: try:
await link_img.click() await link_img.click()
except Exception as e: except Exception as e:
log.debug(f"URL {url_num}) Click failed: {e}") log.debug(f"URL {url_num}) Click failed: {e}")
popup = await popup_info.value popup = await popup_info.value
popup.on("request", handler) popup.on("request", handler)
except Exception: except Exception:
try: try:
await link_img.click() await link_img.click()
except Exception as e: except Exception as e:
log.debug(f"URL {url_num}) Fallback click failed: {e}") log.debug(f"URL {url_num}) Fallback click failed: {e}")
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
try: try:
await asyncio.wait_for(wait_task, timeout=15) await asyncio.wait_for(wait_task, timeout=15)
except asyncio.TimeoutError: except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.") log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return return
finally: finally:
if not wait_task.done(): if not wait_task.done():
wait_task.cancel() wait_task.cancel()
try: try:
await wait_task await wait_task
except asyncio.CancelledError: except asyncio.CancelledError:
pass pass
page.remove_listener("request", handler) page.remove_listener("request", handler)
if popup: if popup:
popup.remove_listener("request", handler) popup.remove_listener("request", handler)
await popup.close() await popup.close()
await page.close() await page.close()
if captured: if captured:
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return captured[-1] return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured") log.warning(f"URL {url_num}) No M3U8 captured")
return return
except Exception: except Exception:
try: try:
page.remove_listener("request", handler) page.remove_listener("request", handler)
if popup: if popup:
popup.remove_listener("request", handler) popup.remove_listener("request", handler)
await popup.close() await popup.close()
await page.close() await page.close()
except Exception: except Exception:
pass pass
async def get_events( async def get_events(
url: str, url: str,
ssl_ctx: ssl.SSLContext, ssl_ctx: ssl.SSLContext,
cached_keys: set[str], cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
events: list[dict[str, str]] = [] events: list[dict[str, str]] = []
now = Time.clean(Time.now()) now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30) start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30) end_dt = now.delta(minutes=30)
if not (buffer := await fetch_xml_stream(url, ssl_ctx)): if not (buffer := await fetch_xml_stream(url, ssl_ctx)):
return events return events
for _, elem in ET.iterparse(buffer, events=("end",)): for _, elem in ET.iterparse(buffer, events=("end",)):
if elem.tag == "item": if elem.tag == "item":
title = elem.findtext("title") or "" title = elem.findtext("title") or ""
desc = elem.findtext("description") or "" desc = elem.findtext("description") or ""
pub_date = elem.findtext("pubDate") or "" pub_date = elem.findtext("pubDate") or ""
link = elem.findtext("link") or "" link = elem.findtext("link") or ""
if not all([title, pub_date, link]): if not all([title, pub_date, link]):
elem.clear() elem.clear()
continue continue
try: try:
event_dt = Time.from_str(pub_date) event_dt = Time.from_str(pub_date)
except Exception: except Exception:
elem.clear() elem.clear()
continue continue
if not start_dt <= event_dt <= end_dt: if not start_dt <= event_dt <= end_dt:
elem.clear() elem.clear()
continue continue
if desc: if desc:
parts = desc.split(".") parts = desc.split(".")
sport = parts[0].strip() if parts else "" sport = parts[0].strip() if parts else ""
event = parts[1].strip() if parts else "" event = parts[1].strip() if parts else ""
else: else:
sport, event = "", "" sport, event = "", ""
key = f"[{sport}: {event}] {title} (LTVSX)" key = f"[{sport}: {event}] {title} (LTVSX)"
if cached_keys & {key}: if cached_keys & {key}:
elem.clear() elem.clear()
continue continue
events.append( events.append(
{ {
"sport": sport, "sport": sport,
"event": event, "event": event,
"title": title, "title": title,
"link": link, "link": link,
"timestamp": event_dt.timestamp(), "timestamp": event_dt.timestamp(),
} }
) )
elem.clear() elem.clear()
return events return events
async def scrape(client: httpx.AsyncClient) -> None: async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = CACHE_FILE.load() cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls) cached_count = len(cached_urls)
urls.update({k: v for k, v in cached_urls.items() if v["url"]}) urls.update({k: v for k, v in cached_urls.items() if v["url"]})
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "{BASE_URL}"')
ssl_ctx = await get_cert(client) ssl_ctx = await get_cert(client)
if not ssl_ctx: if not ssl_ctx:
log.error("Failed to create SSL context, aborting") log.error("Failed to create SSL context, aborting")
CACHE_FILE.write(cached_urls) CACHE_FILE.write(cached_urls)
return return
events = await get_events( events = await get_events(
BASE_URL, BASE_URL,
ssl_ctx, ssl_ctx,
set(cached_urls.keys()), set(cached_urls.keys()),
) )
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
async with async_playwright() as p: async with async_playwright() as p:
browser, context = await network.browser(p, ignore_https_errors=True) browser, context = await network.browser(p, ignore_https_errors=True)
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
link = ev["link"] link = ev["link"]
url = await network.safe_process( url = await network.safe_process(
lambda: process_event( lambda: process_event(
link, link,
url_num=i, url_num=i,
context=context, context=context,
), ),
url_num=i, url_num=i,
log=log, log=log,
) )
sport, event, title, ts = ( sport, event, title, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["title"], ev["title"],
ev["timestamp"], ev["timestamp"],
) )
key = f"[{sport}: {event}] {title} (LTVSX)" key = f"[{sport}: {event}] {title} (LTVSX)"
tvg_id, logo = leagues.info(event) tvg_id, logo = leagues.info(event)
if not tvg_id: if not tvg_id:
tvg_id, logo = leagues.info(sport) tvg_id, logo = leagues.info(sport)
entry = { entry = {
"url": url, "url": url,
"logo": logo, "logo": logo,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"base": "https://livetv.sx/enx/", "base": "https://livetv.sx/enx/",
"timestamp": ts, "timestamp": ts,
} }
cached_urls[key] = entry cached_urls[key] = entry
if url: if url:
urls[key] = entry urls[key] = entry
await browser.close() await browser.close()
if new_count := len(cached_urls) - cached_count: if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)") log.info(f"Collected and cached {new_count} new event(s)")
else: else:
log.info("No new events found") log.info("No new events found")
CACHE_FILE.write(cached_urls) CACHE_FILE.write(cached_urls)