edit urls for websites
This commit is contained in:
doms9 2026-02-28 15:42:50 -05:00
parent b6f835b575
commit 00000d9595
5 changed files with 75 additions and 48 deletions

View file

@ -1,3 +1,4 @@
import re
from functools import partial from functools import partial
from playwright.async_api import Browser from playwright.async_api import Browser
@ -14,7 +15,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800) API_FILE = Cache(f"{TAG}-api", exp=19_800)
MIRRORS = [ API_MIRRORS = [
"https://api.ppv.to/api/streams", "https://api.ppv.to/api/streams",
"https://api.ppv.cx/api/streams", "https://api.ppv.cx/api/streams",
"https://api.ppv.sh/api/streams", "https://api.ppv.sh/api/streams",
@ -22,6 +23,12 @@ MIRRORS = [
] ]
def fix_url(s: str) -> str:
pattern = re.compile(r"index\.m3u8$", re.I)
return pattern.sub(r"tracks-v1a1/mono.ts.m3u8", s)
async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]: async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now()) now = Time.clean(Time.now())
@ -90,16 +97,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
if not (base_url := await network.get_base(MIRRORS)): if not (api_url := await network.get_base(API_MIRRORS)):
log.warning("No working PPV mirrors") log.warning("No working PPV mirrors")
CACHE_FILE.write(cached_urls) CACHE_FILE.write(cached_urls)
return return
log.info(f'Scraping from "{base_url}"') log.info(f'Scraping from "{api_url}"')
events = await get_events(base_url, cached_urls.keys()) events = await get_events(api_url, cached_urls.keys())
if events: if events:
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
@ -148,6 +155,8 @@ async def scrape(browser: Browser) -> None:
if url: if url:
valid_count += 1 valid_count += 1
entry["url"] = fix_url(url)
urls[key] = entry urls[key] = entry
if new_count := valid_count - cached_count: if new_count := valid_count - cached_count:

View file

@ -31,9 +31,11 @@ async def process_event(url: str, url_num: int) -> str | None:
return return
pattern = re.compile(r"playlist\.m3u8\?.*$", re.I)
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return urls[0] return pattern.sub(r"chunks.m3u8", urls[0])
async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]: async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:

View file

@ -92,9 +92,11 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape(browser: Browser) -> None: async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load() cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls) valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
urls.update(cached_urls) valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
@ -123,29 +125,35 @@ async def scrape(browser: Browser) -> None:
log=log, log=log,
) )
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": "https://streamcenter.xyz",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url: if url:
sport, event, ts = ( valid_count += 1
ev["sport"],
ev["event"],
ev["timestamp"],
)
key = f"[{sport}] {event} ({TAG})" entry["url"] = url.split("?")[0]
tvg_id, logo = leagues.get_tvg_info(sport, event) urls[key] = entry
entry = { if new_count := valid_count - cached_count:
"url": url,
"logo": logo,
"base": "https://streamcenter.xyz",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)") log.info(f"Collected and cached {new_count} new event(s)")
else: else:

View file

@ -123,9 +123,11 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape(browser: Browser) -> None: async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load() cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls) valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
urls.update(cached_urls) valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
@ -154,29 +156,35 @@ async def scrape(browser: Browser) -> None:
log=log, log=log,
) )
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": "https://instreams.click/",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url: if url:
sport, event, ts = ( valid_count += 1
ev["sport"],
ev["event"],
ev["timestamp"],
)
key = f"[{sport}] {event} ({TAG})" entry["url"] = url.split("&e")[0]
tvg_id, logo = leagues.get_tvg_info(sport, event) urls[key] = entry
entry = { if new_count := valid_count - cached_count:
"url": url,
"logo": logo,
"base": "https://instreams.click/",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)") log.info(f"Collected and cached {new_count} new event(s)")
else: else:

View file

@ -15,7 +15,7 @@ TAG = "TOTALSPRTK"
CACHE_FILE = Cache(TAG, exp=28_800) CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://live3.totalsportek777.com" BASE_URL = "https://live3.totalsportekarmy.com"
def fix_txt(s: str) -> str: def fix_txt(s: str) -> str: