mirror of
https://github.com/doms9/iptv.git
synced 2026-03-07 11:18:25 +01:00
e
edit tvapp.py scraping
This commit is contained in:
parent
166fb66aa1
commit
00000d9812
1 changed files with 9 additions and 17 deletions
|
|
@ -25,7 +25,7 @@ def fix_url(s: str) -> str:
|
||||||
return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
|
return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
|
||||||
|
|
||||||
|
|
||||||
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
async def get_events() -> list[dict[str, str]]:
|
||||||
events = []
|
events = []
|
||||||
|
|
||||||
if not (html_data := await network.request(BASE_URL, log=log)):
|
if not (html_data := await network.request(BASE_URL, log=log)):
|
||||||
|
|
@ -45,9 +45,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
for a in row.css("a.list-group-item[href]"):
|
for a in row.css("a.list-group-item[href]"):
|
||||||
event_name = a.text(strip=True).split(":", 1)[0]
|
event_name = a.text(strip=True).split(":", 1)[0]
|
||||||
|
|
||||||
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not (href := a.attributes.get("href")):
|
if not (href := a.attributes.get("href")):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -63,17 +60,16 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
|
|
||||||
async def scrape(browser: Browser) -> None:
|
async def scrape(browser: Browser) -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
if cached := CACHE_FILE.load():
|
||||||
|
urls.update(cached)
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
log.info(f"Loaded {len(urls)} event(s) from cache")
|
||||||
|
|
||||||
urls.update(cached_urls)
|
return
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events()
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
|
|
@ -118,12 +114,8 @@ async def scrape(browser: Browser) -> None:
|
||||||
"link": link,
|
"link": link,
|
||||||
}
|
}
|
||||||
|
|
||||||
urls[key] = cached_urls[key] = entry
|
urls[key] = entry
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
log.info(f"Collected and cached {len(urls)} new event(s)")
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
|
||||||
|
|
||||||
else:
|
CACHE_FILE.write(urls)
|
||||||
log.info("No new events found")
|
|
||||||
|
|
||||||
CACHE_FILE.write(cached_urls)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue