- change scrape window for istreameast.py
- harden scraping method for roxie.py
- catch nulls for ppv.py
- change scraping method for totalsportek.py
- misc edits.
This commit is contained in:
doms9 2026-02-18 15:47:50 -05:00
parent 0fe9c5b1dd
commit 00000d940c
7 changed files with 141 additions and 87 deletions

View file

@ -17,7 +17,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
MIRRORS = ["https://streamhub.pro", "https://livesports4u.net"]
BASE_URL = "https://livesports4u.net"
CATEGORIES = {
"Soccer": "sport_68c02a4464a38",
@ -35,7 +35,6 @@ CATEGORIES = {
async def refresh_html_cache(
url: str,
date: str,
sport_id: str,
ts: float,
@ -45,7 +44,7 @@ async def refresh_html_cache(
if not (
html_data := await network.request(
urljoin(url, f"events/{date}"),
urljoin(BASE_URL, f"events/{date}"),
log=log,
params={"sport_id": sport_id},
)
@ -95,7 +94,7 @@ async def refresh_html_cache(
return events
async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
@ -103,7 +102,6 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
tasks = [
refresh_html_cache(
url,
date,
sport_id,
now.timestamp(),
@ -146,16 +144,9 @@ async def scrape(browser: Browser) -> None:
log.info(f"Loaded {cached_count} event(s) from cache")
if not (base_url := await network.get_base(MIRRORS)):
log.warning("No working PPV mirrors")
log.info(f'Scraping from "{BASE_URL}"')
CACHE_FILE.write(cached_urls)
return
log.info(f'Scraping from "{base_url}"')
events = await get_events(base_url, cached_urls.keys())
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")