This commit is contained in:
doms9 2025-11-16 02:31:30 -05:00
parent a4338b96ad
commit 00000d9b09

View file

@ -47,7 +47,6 @@ async def process_event(
async def refresh_html_cache( async def refresh_html_cache(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
sport: str,
now_ts: float, now_ts: float,
) -> dict[str, str | float]: ) -> dict[str, str | float]:
@ -61,6 +60,15 @@ async def refresh_html_cache(
soup = HTMLParser(r.text) soup = HTMLParser(r.text)
pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE)
h2_title = soup.css_first("h2").text(strip=True)
if sport_name := pattern.search(h2_title):
sport = sport_name[1]
else:
sport = "Event"
events = {} events = {}
for row in soup.css("table#eventsTable tbody tr"): for row in soup.css("table#eventsTable tbody tr"):
@ -96,7 +104,7 @@ async def refresh_html_cache(
async def get_events( async def get_events(
client: httpx.AsyncClient, client: httpx.AsyncClient,
sport_urls: dict[str, str], sport_urls: list[str],
cached_keys: set[str], cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
@ -107,10 +115,9 @@ async def get_events(
refresh_html_cache( refresh_html_cache(
client, client,
url, url,
sport,
now.timestamp(), now.timestamp(),
) )
for sport, url in sport_urls.items() for url in sport_urls
] ]
results = await asyncio.gather(*tasks) results = await asyncio.gather(*tasks)
@ -145,10 +152,10 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "{BASE_URL}"')
sport_urls = { sport_urls = [
sport: urljoin(BASE_URL, sport.lower()) urljoin(BASE_URL, sport)
for sport in ["Soccer", "MLB", "NBA", "NFL", "Fighting", "Motorsports"] for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"]
} ]
events = await get_events( events = await get_events(
client, client,