e
This commit is contained in:
parent
308d607660
commit
00000d9a4d
3 changed files with 29 additions and 23 deletions
|
|
@ -46,7 +46,7 @@ async def main() -> None:
|
||||||
tasks = [
|
tasks = [
|
||||||
asyncio.create_task(fawa.scrape(network.client)),
|
asyncio.create_task(fawa.scrape(network.client)),
|
||||||
asyncio.create_task(lotus.scrape(network.client)),
|
asyncio.create_task(lotus.scrape(network.client)),
|
||||||
asyncio.create_task(pixel.scrape(network.client)),
|
asyncio.create_task(pixel.scrape()),
|
||||||
asyncio.create_task(ppv.scrape(network.client)),
|
asyncio.create_task(ppv.scrape(network.client)),
|
||||||
asyncio.create_task(roxie.scrape(network.client)),
|
asyncio.create_task(roxie.scrape(network.client)),
|
||||||
asyncio.create_task(streambtw.scrape(network.client)),
|
asyncio.create_task(streambtw.scrape(network.client)),
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import httpx
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
from .utils import Cache, Time, get_logger, leagues
|
from .utils import Cache, Time, get_logger, leagues, network
|
||||||
|
|
||||||
log = get_logger(__name__)
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
|
@ -16,36 +17,45 @@ BASE_URL = "https://pixelsport.tv/backend/livetv/events"
|
||||||
|
|
||||||
|
|
||||||
async def refresh_api_cache(
|
async def refresh_api_cache(
|
||||||
client: httpx.AsyncClient,
|
|
||||||
url: str,
|
url: str,
|
||||||
ts: float,
|
ts: float,
|
||||||
) -> dict[str, list[dict, str, str]]:
|
) -> dict[str, list[dict, str, str]]:
|
||||||
log.info("Refreshing API cache")
|
log.info("Refreshing API cache")
|
||||||
|
|
||||||
try:
|
async with async_playwright() as p:
|
||||||
r = await client.get(url)
|
try:
|
||||||
r.raise_for_status()
|
browser, context = await network.browser(p)
|
||||||
except Exception as e:
|
|
||||||
log.error(f'Failed to fetch "{url}": {e}')
|
|
||||||
|
|
||||||
return {}
|
page = await context.new_page()
|
||||||
|
|
||||||
data = r.json()
|
await page.goto(
|
||||||
|
url,
|
||||||
|
wait_until="domcontentloaded",
|
||||||
|
timeout=10_000,
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_json = await page.locator("pre").inner_text()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f'Failed to fetch "{url}": {e}')
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
data = json.loads(raw_json)
|
||||||
|
|
||||||
data["timestamp"] = ts
|
data["timestamp"] = ts
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
async def get_events(
|
async def get_events(cached_keys: set[str]) -> dict[str, str | float]:
|
||||||
client: httpx.AsyncClient,
|
|
||||||
cached_keys: set[str],
|
|
||||||
) -> dict[str, str | float]:
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
if not (api_data := API_FILE.load(per_entry=False)):
|
if not (api_data := API_FILE.load(per_entry=False)):
|
||||||
api_data = await refresh_api_cache(
|
api_data = await refresh_api_cache(
|
||||||
client,
|
|
||||||
BASE_URL,
|
BASE_URL,
|
||||||
now.timestamp(),
|
now.timestamp(),
|
||||||
)
|
)
|
||||||
|
|
@ -100,7 +110,7 @@ async def get_events(
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|
||||||
async def scrape(client: httpx.AsyncClient) -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
@ -109,7 +119,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
||||||
events = await get_events(client, set(cached_urls.keys()))
|
events = await get_events(set(cached_urls.keys()))
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
for d in (urls, cached_urls):
|
for d in (urls, cached_urls):
|
||||||
|
|
|
||||||
|
|
@ -64,11 +64,7 @@ async def refresh_html_cache(
|
||||||
|
|
||||||
h2_title = soup.css_first("h2").text(strip=True)
|
h2_title = soup.css_first("h2").text(strip=True)
|
||||||
|
|
||||||
if sport_name := pattern.search(h2_title):
|
sport = sport_name[1] if (sport_name := pattern.search(h2_title)) else "Event"
|
||||||
sport = sport_name[1]
|
|
||||||
else:
|
|
||||||
sport = "Event"
|
|
||||||
|
|
||||||
events = {}
|
events = {}
|
||||||
|
|
||||||
for row in soup.css("table#eventsTable tbody tr"):
|
for row in soup.css("table#eventsTable tbody tr"):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue