e
This commit is contained in:
parent
a4338b96ad
commit
00000d9b09
1 changed files with 15 additions and 8 deletions
|
|
@ -47,7 +47,6 @@ async def process_event(
|
||||||
async def refresh_html_cache(
|
async def refresh_html_cache(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
url: str,
|
url: str,
|
||||||
sport: str,
|
|
||||||
now_ts: float,
|
now_ts: float,
|
||||||
) -> dict[str, str | float]:
|
) -> dict[str, str | float]:
|
||||||
|
|
||||||
|
|
@ -61,6 +60,15 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
soup = HTMLParser(r.text)
|
soup = HTMLParser(r.text)
|
||||||
|
|
||||||
|
pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE)
|
||||||
|
|
||||||
|
h2_title = soup.css_first("h2").text(strip=True)
|
||||||
|
|
||||||
|
if sport_name := pattern.search(h2_title):
|
||||||
|
sport = sport_name[1]
|
||||||
|
else:
|
||||||
|
sport = "Event"
|
||||||
|
|
||||||
events = {}
|
events = {}
|
||||||
|
|
||||||
for row in soup.css("table#eventsTable tbody tr"):
|
for row in soup.css("table#eventsTable tbody tr"):
|
||||||
|
|
@ -96,7 +104,7 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
async def get_events(
|
async def get_events(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
sport_urls: dict[str, str],
|
sport_urls: list[str],
|
||||||
cached_keys: set[str],
|
cached_keys: set[str],
|
||||||
) -> list[dict[str, str]]:
|
) -> list[dict[str, str]]:
|
||||||
|
|
||||||
|
|
@ -107,10 +115,9 @@ async def get_events(
|
||||||
refresh_html_cache(
|
refresh_html_cache(
|
||||||
client,
|
client,
|
||||||
url,
|
url,
|
||||||
sport,
|
|
||||||
now.timestamp(),
|
now.timestamp(),
|
||||||
)
|
)
|
||||||
for sport, url in sport_urls.items()
|
for url in sport_urls
|
||||||
]
|
]
|
||||||
|
|
||||||
results = await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
@ -145,10 +152,10 @@ async def scrape(client: httpx.AsyncClient) -> None:
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
||||||
sport_urls = {
|
sport_urls = [
|
||||||
sport: urljoin(BASE_URL, sport.lower())
|
urljoin(BASE_URL, sport)
|
||||||
for sport in ["Soccer", "MLB", "NBA", "NFL", "Fighting", "Motorsports"]
|
for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"]
|
||||||
}
|
]
|
||||||
|
|
||||||
events = await get_events(
|
events = await get_events(
|
||||||
client,
|
client,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue