diff --git a/M3U8/scrapers/ace.py b/M3U8/scrapers/ace.py deleted file mode 100644 index 807e053..0000000 --- a/M3U8/scrapers/ace.py +++ /dev/null @@ -1,127 +0,0 @@ -import asyncio -import re -from urllib.parse import urljoin - -import httpx -from selectolax.parser import HTMLParser, Node - -from .utils import get_base, get_logger, leagues - -log = get_logger(__name__) - -urls: dict[str, dict[str, str]] = {} - -MIRRORS = ["https://aceztrims.pages.dev/", "https://acestrlms.pages.dev/"] - - -def is_valid_href(a: Node) -> bool: - href = a.attributes.get("href", "") - return href.startswith("/") and href != "/news/" - - -async def get_schedule(client: httpx.AsyncClient, base_url: str) -> list[dict]: - log.info(f'Scraping from "{base_url}"') - - try: - r = await client.get(base_url) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{base_url}": {e}') - return [] - - html = re.sub(r"", "", r.text, flags=re.DOTALL) - - tree = HTMLParser(html) - - events = [] - - for a in filter(is_valid_href, tree.css("a[href]")): - href = a.attributes.get("href", "") - - title_text = a.text(strip=True) - - after_time = ( - title_text.split("//", 1)[1].strip() if "//" in title_text else title_text - ) - - if " - " in after_time: - sport, event_name = (x.strip() for x in after_time.split(" - ", 1)) - else: - sport, event_name = "", after_time - - events.append( - {"sport": sport, "event": event_name, "href": urljoin(base_url, href)} - ) - - return events - - -async def get_m3u8_links(client: httpx.AsyncClient, url: str) -> list[str]: - try: - r = await client.get(url) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') - return [] - - html = re.sub(r"", "", r.text, flags=re.DOTALL) - - soup = HTMLParser(html) - - m3u8_links = [] - - for btn in soup.css("button[onclick]"): - onclick = btn.attributes.get("onclick", "") - - if match := re.search(r"src\s*=\s*['\"](.*?)['\"]", onclick): - link = match[1] - - if ".m3u8" in link: - m3u8_links.append(link) - - if iframe := soup.css_first("iframe#iframe"): - src = iframe.attributes.get("src", "") - - if ".m3u8" in src and src not in m3u8_links: - m3u8_links.insert( - 0, - src.split("cors.ricohspaces.app/")[-1], - ) - - return m3u8_links - - -async def scrape(client: httpx.AsyncClient) -> None: - if not (base_url := await get_base(client, MIRRORS)): - log.warning("No working ace mirrors") - return - - schedule = await get_schedule(client, base_url) - - tasks = [get_m3u8_links(client, item["href"]) for item in schedule] - - results = await asyncio.gather(*tasks) - - for item, m3u8_urls in zip(schedule, results): - if not m3u8_urls: - continue - - for i, link in enumerate(m3u8_urls, start=1): - sport, event = item["sport"], item["event"] - - key = f"[{sport}] {event} (S{i})" - - tvg_id, logo = leagues.info(sport) - - entry = { - "url": link, - "logo": logo, - "id": tvg_id, - } - - urls[key] = entry - - log.info(f"Collected {len(urls)} events") - - -# add caching diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 1cfad0f..489ed0f 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -140,7 +140,7 @@ async def get_events( if not ( api_data := load_cache( API_FILE, - exp=86400, + exp=86_400, nearest_hr=True, per_entry=False, ) @@ -238,6 +238,3 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info("No new events found") write_cache(CACHE_FILE, cached_urls) - - -# works if no cloudflare bot detection diff --git a/M3U8/scrapers/streamed.py b/M3U8/scrapers/streamed.py index 67fd929..8d9a956 100644 --- a/M3U8/scrapers/streamed.py +++ b/M3U8/scrapers/streamed.py @@ -134,7 +134,8 @@ async def refresh_html_cache(client: httpx.AsyncClient, url: str) -> dict[str, s "sport": sport, "event": event, "link": f"https://streamed.site/set.php?{m[1]}", - "ts": event_dt.timestamp(), + "event_ts": event_dt.timestamp(), + "timestamp": now.timestamp(), } return events @@ -146,7 +147,7 @@ async def get_events( cached_keys: set[str], ) -> list[dict[str, str]]: - if not (events := load_cache(HTML_CACHE, exp=10_800)): + if not (events := load_cache(HTML_CACHE, exp=86_400)): events = await refresh_html_cache(client, url) write_cache(HTML_CACHE, events) @@ -158,7 +159,7 @@ async def get_events( if cached_keys & {k}: continue - if not start_ts <= v["ts"] < end_ts: + if not start_ts <= v["event_ts"] < end_ts: continue live.append({**v})