edit urls for websites
This commit is contained in:
doms9 2026-02-28 15:42:50 -05:00
parent b6f835b575
commit 00000d9595
5 changed files with 75 additions and 48 deletions

View file

@ -1,3 +1,4 @@
import re
from functools import partial
from playwright.async_api import Browser
@ -14,7 +15,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
MIRRORS = [
API_MIRRORS = [
"https://api.ppv.to/api/streams",
"https://api.ppv.cx/api/streams",
"https://api.ppv.sh/api/streams",
@ -22,6 +23,12 @@ MIRRORS = [
]
def fix_url(s: str) -> str:
pattern = re.compile(r"index\.m3u8$", re.I)
return pattern.sub(r"tracks-v1a1/mono.ts.m3u8", s)
async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
@ -90,16 +97,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Loaded {cached_count} event(s) from cache")
if not (base_url := await network.get_base(MIRRORS)):
if not (api_url := await network.get_base(API_MIRRORS)):
log.warning("No working PPV mirrors")
CACHE_FILE.write(cached_urls)
return
log.info(f'Scraping from "{base_url}"')
log.info(f'Scraping from "{api_url}"')
events = await get_events(base_url, cached_urls.keys())
events = await get_events(api_url, cached_urls.keys())
if events:
log.info(f"Processing {len(events)} new URL(s)")
@ -148,6 +155,8 @@ async def scrape(browser: Browser) -> None:
if url:
valid_count += 1
entry["url"] = fix_url(url)
urls[key] = entry
if new_count := valid_count - cached_count: