e
This commit is contained in:
parent
f04c9a4b0f
commit
00000d9cbf
2 changed files with 19 additions and 14 deletions
|
|
@ -54,7 +54,7 @@ async def main() -> None:
|
||||||
asyncio.create_task(streamfree.scrape(network.client)),
|
asyncio.create_task(streamfree.scrape(network.client)),
|
||||||
asyncio.create_task(strmd.scrape(network.client)),
|
asyncio.create_task(strmd.scrape(network.client)),
|
||||||
asyncio.create_task(tvpass.scrape(network.client)),
|
asyncio.create_task(tvpass.scrape(network.client)),
|
||||||
#asyncio.create_task(volo.scrape(network.client)),
|
# asyncio.create_task(volo.scrape(network.client)),
|
||||||
asyncio.create_task(watchfooty.scrape(network.client)),
|
asyncio.create_task(watchfooty.scrape(network.client)),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,15 @@ HTML_CACHE = Cache("roxie-html.json", exp=19_800)
|
||||||
|
|
||||||
BASE_URL = "https://roxiestreams.live"
|
BASE_URL = "https://roxiestreams.live"
|
||||||
|
|
||||||
|
valid_sports = {
|
||||||
|
"fighting": "Fighting",
|
||||||
|
"mlb": "MLB",
|
||||||
|
"motorsports": "Racing",
|
||||||
|
"nba": "NBA",
|
||||||
|
"nfl": "American Football",
|
||||||
|
"soccer": "Soccer",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def process_event(
|
async def process_event(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
|
|
@ -47,6 +56,7 @@ async def process_event(
|
||||||
async def refresh_html_cache(
|
async def refresh_html_cache(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
url: str,
|
url: str,
|
||||||
|
sport: str,
|
||||||
now_ts: float,
|
now_ts: float,
|
||||||
) -> dict[str, str | float]:
|
) -> dict[str, str | float]:
|
||||||
|
|
||||||
|
|
@ -60,11 +70,6 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
soup = HTMLParser(r.text)
|
soup = HTMLParser(r.text)
|
||||||
|
|
||||||
pattern = re.compile(r'Upcoming ([^"]*) Events', re.IGNORECASE)
|
|
||||||
|
|
||||||
h2_title = soup.css_first("h2").text(strip=True)
|
|
||||||
|
|
||||||
sport = sport_name[1] if (sport_name := pattern.search(h2_title)) else "Event"
|
|
||||||
events = {}
|
events = {}
|
||||||
|
|
||||||
for row in soup.css("table#eventsTable tbody tr"):
|
for row in soup.css("table#eventsTable tbody tr"):
|
||||||
|
|
@ -85,10 +90,12 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
event_dt = Time.from_str(data_start, timezone="PST")
|
event_dt = Time.from_str(data_start, timezone="PST")
|
||||||
|
|
||||||
key = f"[{sport}] {event} (ROXIE)"
|
event_sport = valid_sports[sport]
|
||||||
|
|
||||||
|
key = f"[{event_sport}] {event} (ROXIE)"
|
||||||
|
|
||||||
events[key] = {
|
events[key] = {
|
||||||
"sport": sport,
|
"sport": event_sport,
|
||||||
"event": event,
|
"event": event,
|
||||||
"link": href,
|
"link": href,
|
||||||
"event_ts": event_dt.timestamp(),
|
"event_ts": event_dt.timestamp(),
|
||||||
|
|
@ -100,7 +107,7 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
async def get_events(
|
async def get_events(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
sport_urls: list[str],
|
sport_urls: dict[str, str],
|
||||||
cached_keys: set[str],
|
cached_keys: set[str],
|
||||||
) -> list[dict[str, str]]:
|
) -> list[dict[str, str]]:
|
||||||
|
|
||||||
|
|
@ -111,9 +118,10 @@ async def get_events(
|
||||||
refresh_html_cache(
|
refresh_html_cache(
|
||||||
client,
|
client,
|
||||||
url,
|
url,
|
||||||
|
sport,
|
||||||
now.timestamp(),
|
now.timestamp(),
|
||||||
)
|
)
|
||||||
for url in sport_urls
|
for sport, url in sport_urls.items()
|
||||||
]
|
]
|
||||||
|
|
||||||
results = await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
@ -148,10 +156,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
||||||
sport_urls = [
|
sport_urls = {sport: urljoin(BASE_URL, sport) for sport in valid_sports}
|
||||||
urljoin(BASE_URL, sport)
|
|
||||||
for sport in ["fighting", "mlb", "motorsports", "nba", "nfl", "soccer"]
|
|
||||||
]
|
|
||||||
|
|
||||||
events = await get_events(
|
events = await get_events(
|
||||||
client,
|
client,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue