- add tflix.py
- remove strmd.py
- modify playwright browser/context usage
- misc. edits
This commit is contained in:
doms9 2026-01-23 23:44:59 -05:00
parent 1aa60a8ce1
commit 00000d9638
24 changed files with 481 additions and 462 deletions

View file

@ -1,7 +1,7 @@
import asyncio
from functools import partial
from playwright.async_api import async_playwright
from playwright.async_api import BrowserContext
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -12,9 +12,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "WEBCAST"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
CACHE_FILE = Cache(TAG, exp=10_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=86_400)
HTML_CACHE = Cache(f"{TAG}-html", exp=86_400)
BASE_URLS = {"NFL": "https://nflwebcast.com", "NHL": "https://slapstreams.com"}
@ -110,7 +110,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape() -> None:
async def scrape(browser: BrowserContext) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -126,16 +126,14 @@ async def scrape() -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with async_playwright() as p:
browser, context = await network.browser(p)
try:
for i, ev in enumerate(events, start=1):
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
context=context,
page=page,
log=log,
)
@ -169,9 +167,6 @@ async def scrape() -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")