diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 407da34..7fc19b0 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -4,6 +4,7 @@ import re from pathlib import Path from scrapers import ( + cdnlivetv, embedhd, fawa, istreameast, @@ -50,6 +51,7 @@ async def main() -> None: base_m3u8, tvg_chno = load_base() tasks = [ + asyncio.create_task(cdnlivetv.scrape()), asyncio.create_task(embedhd.scrape()), asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), @@ -74,7 +76,8 @@ async def main() -> None: await watchfooty.scrape() additions = ( - embedhd.urls + cdnlivetv.urls + | embedhd.urls | fawa.urls | istreameast.urls | pixel.urls diff --git a/M3U8/scrapers/cdnlivetv.py b/M3U8/scrapers/cdnlivetv.py new file mode 100644 index 0000000..7cb925e --- /dev/null +++ b/M3U8/scrapers/cdnlivetv.py @@ -0,0 +1,154 @@ +from functools import partial + +from playwright.async_api import async_playwright + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "CDNTV" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) + +API_URL = "https://api.cdn-live.tv/api/v1/events/sports" + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + events = [] + + if not (api_data := API_FILE.load(per_entry=False)): + api_data = {} + + if r := await network.request( + API_URL, + log=log, + params={"user": "cdnlivetv", "plan": "free"}, + ): + api_data: dict = r.json() + + API_FILE.write(api_data) + + if not (data := api_data.get("cdn-live-tv")): + return events + + now = Time.clean(Time.now()) + + start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=30) + + sports = [key for key in data.keys() if not key.islower()] + + for sport in sports: + event_info = data[sport] + + for event in event_info: + t1, t2 = event["awayTeam"], event["homeTeam"] + + name = f"{t1} vs {t2}" + + league = event["tournament"] + + if f"[{league}] {name} ({TAG})" in cached_keys: + continue + + event_dt = Time.from_str(event["start"], timezone="UTC") + + if not start_dt <= event_dt <= end_dt: + continue + + channels: list[str] = [channel["url"] for channel in event["channels"]] + + # if not ( + # link := ( + # channels[0] + # if len(channels) == 1 + # else await network.get_base(channels) + # ) + # ): + # continue + + events.append( + { + "sport": league, + "event": name, + "link": channels[0], # link + "timestamp": event_dt.timestamp(), + } + ) + + return events + + +async def scrape() -> None: + cached_urls = CACHE_FILE.load() + + cached_count = len(cached_urls) + + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{API_URL}"') + + events = await get_events(cached_urls.keys()) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + async with async_playwright() as p: + browser, context = await network.browser(p) + + try: + for i, ev in enumerate(events, start=1): + handler = partial( + network.process_event, + url=ev["link"], + url_num=i, + context=context, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.PW_S, + log=log, + ) + + if url: + sport, event, ts, link = ( + ev["sport"], + ev["event"], + ev["timestamp"], + ev["link"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": "", + "timestamp": ts, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + urls[key] = cached_urls[key] = entry + + finally: + await browser.close() + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index 8bf8b37..f23bcb2 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -15,7 +15,7 @@ TAG = "STRMBTW" CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) -MIRRORS = ["https://hiteasport.info/", "https://streambtw.com/"] +BASE_URL = "https://hiteasport.info/" def fix_league(s: str) -> str: @@ -45,10 +45,10 @@ async def process_event(url: str, url_num: int) -> str | None: return stream_link -async def get_events(url: str) -> list[dict[str, str]]: +async def get_events() -> list[dict[str, str]]: events = [] - if not (html_data := await network.request(url, log=log)): + if not (html_data := await network.request(BASE_URL, log=log)): return events soup = HTMLParser(html_data.content) @@ -72,7 +72,7 @@ async def get_events(url: str) -> list[dict[str, str]]: { "sport": fix_league(league), "event": name, - "link": urljoin(url, href), + "link": urljoin(BASE_URL, href), } ) @@ -87,14 +87,9 @@ async def scrape() -> None: return - if not (base_url := await network.get_base(MIRRORS)): - log.warning("No working StreamBTW mirrors") + log.info(f'Scraping from "{BASE_URL}"') - return - - log.info(f'Scraping from "{base_url}"') - - events = await get_events(base_url) + events = await get_events(BASE_URL) log.info(f"Processing {len(events)} new URL(s)")