fix watchfooty scraping
This commit is contained in:
doms9 2025-12-19 13:25:40 -05:00
parent b4f5824bb5
commit 00000d91c7
2 changed files with 28 additions and 13 deletions

View file

@ -67,6 +67,14 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (rank_elem := li_item.css_first(".f1-podium--rank")):
continue
if not (time_elem := li_item.css_first(".SaatZamanBilgisi")):
continue
time_text = time_elem.text(strip=True)
if not pattern.search(time_text):
continue
sport = rank_elem.text(strip=True)
if not (driver_elem := li_item.css_first(".f1-podium--driver")):
@ -83,14 +91,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (href := link.attributes.get("href")):
continue
if not (time_elem := li_item.css_first(".SaatZamanBilgisi")):
continue
time_text = time_elem.text(strip=True)
if not pattern.search(time_text):
continue
events.append(
{
"sport": sport,

View file

@ -17,7 +17,7 @@ TAG = "WATCHFTY"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
API_URL = "https://api.watchfooty.st"
@ -118,11 +118,26 @@ async def process_event(
return
first_available = await page.wait_for_selector(
'a[href*="/stream/"]', timeout=3_000
)
try:
first_available = await page.wait_for_selector(
'a[href*="/stream/"]',
timeout=3_000,
)
except TimeoutError:
log.warning(f"URL {url_num}) No available stream links.")
await first_available.click()
return
if not (href := await first_available.get_attribute("href")):
log.warning(f"URL {url_num}) No available stream links.")
return
await page.goto(
href,
wait_until="domcontentloaded",
timeout=5_000,
)
wait_task = asyncio.create_task(got_one.wait())