This commit is contained in:
doms9 2025-11-18 00:38:40 -05:00
parent f04c9a4b0f
commit 00000d9cbf
2 changed files with 19 additions and 14 deletions

View file

@ -54,7 +54,7 @@ async def main() -> None:
asyncio.create_task(streamfree.scrape(network.client)),
asyncio.create_task(strmd.scrape(network.client)),
asyncio.create_task(tvpass.scrape(network.client)),
#asyncio.create_task(volo.scrape(network.client)),
# asyncio.create_task(volo.scrape(network.client)),
asyncio.create_task(watchfooty.scrape(network.client)),
]

View file

@ -18,6 +18,15 @@ HTML_CACHE = Cache("roxie-html.json", exp=19_800)
BASE_URL = "https://roxiestreams.live"
valid_sports = {
"fighting": "Fighting",
"mlb": "MLB",
"motorsports": "Racing",
"nba": "NBA",
"nfl": "American Football",
"soccer": "Soccer",
}
async def process_event(
client: httpx.AsyncClient,
@ -47,6 +56,7 @@ async def process_event(
async def refresh_html_cache(
client: httpx.AsyncClient,
url: str,
sport: str,
now_ts: float,
) -> dict[str, str | float]:
@ -60,11 +70,6 @@ async def refresh_html_cache(
soup = HTMLParser(r.text)
pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE)
h2_title = soup.css_first("h2").text(strip=True)
sport = sport_name[1] if (sport_name := pattern.search(h2_title)) else "Event"
events = {}
for row in soup.css("table#eventsTable tbody tr"):
@ -85,10 +90,12 @@ async def refresh_html_cache(
event_dt = Time.from_str(data_start, timezone="PST")
key = f"[{sport}] {event} (ROXIE)"
event_sport = valid_sports[sport]
key = f"[{event_sport}] {event} (ROXIE)"
events[key] = {
"sport": sport,
"sport": event_sport,
"event": event,
"link": href,
"event_ts": event_dt.timestamp(),
@ -100,7 +107,7 @@ async def refresh_html_cache(
async def get_events(
client: httpx.AsyncClient,
sport_urls: list[str],
sport_urls: dict[str, str],
cached_keys: set[str],
) -> list[dict[str, str]]:
@ -111,9 +118,10 @@ async def get_events(
refresh_html_cache(
client,
url,
sport,
now.timestamp(),
)
for url in sport_urls
for sport, url in sport_urls.items()
]
results = await asyncio.gather(*tasks)
@ -148,10 +156,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f'Scraping from "{BASE_URL}"')
sport_urls = [
urljoin(BASE_URL, sport)
for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"]
]
sport_urls = {sport: urljoin(BASE_URL, sport) for sport in valid_sports}
events = await get_events(
client,