diff --git a/M3U8/fetch.py b/M3U8/fetch.py index cd5f2cf..8186196 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -5,6 +5,7 @@ from pathlib import Path from scrapers import ( fawa, + lotus, pixel, ppv, roxie, @@ -45,6 +46,7 @@ async def main() -> None: tasks = [ asyncio.create_task(fawa.scrape(network.client)), + asyncio.create_task(lotus.scrape(network.client)), asyncio.create_task(pixel.scrape()), asyncio.create_task(ppv.scrape(network.client)), asyncio.create_task(roxie.scrape(network.client)), @@ -62,6 +64,7 @@ async def main() -> None: additions = ( fawa.urls + | lotus.urls | pixel.urls | ppv.urls | roxie.urls diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index 292b434..34507fb 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -15,6 +15,8 @@ CACHE_FILE = Cache("fawa.json", exp=10_800) BASE_URL = "http://www.fawanews.sc/" +TAG = "FAWA" + async def process_event( client: httpx.AsyncClient, @@ -130,7 +132,7 @@ async def scrape(client: httpx.AsyncClient) -> None: if url: sport, event = ev["sport"], ev["event"] - key = f"[{sport}] {event} (FAWA)" + key = f"[{sport}] {event} ({TAG})" tvg_id, logo = leagues.get_tvg_info(sport, event) diff --git a/M3U8/scrapers/lotus.py b/M3U8/scrapers/lotus.py new file mode 100644 index 0000000..c849b38 --- /dev/null +++ b/M3U8/scrapers/lotus.py @@ -0,0 +1,163 @@ +from functools import partial + +import httpx +from playwright.async_api import async_playwright + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +CACHE_FILE = Cache("lotus.json", exp=5_400) + +API_CACHE = Cache("lotus-api.json", exp=28_800) + +BASE_URL = "https://lotusgamehd.xyz/api-event.php" + +TAG = "LOTUS" + + +def fix_league(s: str) -> str: + return " ".join(x.capitalize() for x in s.split()) if len(s) > 5 else s.upper() + + +async def refresh_api_cache( + client: httpx.AsyncClient, + url: str, + ts: float, +) -> dict[str, dict[str, str]]: + log.info("Refreshing API cache") + + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return {} + + data = r.json() + + data["timestamp"] = ts + + return data + + +async def get_events( + client: httpx.AsyncClient, + url: str, + cached_keys: set[str], +) -> list[dict[str, str]]: + now = Time.now() + + if not (api_data := API_CACHE.load(per_entry=False)): + api_data = await refresh_api_cache( + client, + url, + now.timestamp(), + ) + + API_CACHE.write(api_data) + + events = [] + + for info in api_data.get("days", []): + event_dt = Time.from_str(info["day_et"], timezone="ET") + + if now.date() != event_dt.date(): + continue + + for event in info["items"]: + event_league = event["league"] + + if event_league == "channel tv": + continue + + event_streams: list[dict[str, str]] = event["streams"] + + if not (event_link := event_streams[0].get("link")): + continue + + sport = fix_league(event_league) + event_name = event["title"] + + key = f"[{sport}] {event_name} ({TAG})" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": event_name, + "link": event_link, + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events( + client, + BASE_URL, + set(cached_urls.keys()), + ) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + now = Time.now().timestamp() + + async with async_playwright() as p: + browser, context = await network.browser(p) + + for i, ev in enumerate(events, start=1): + handler = partial( + network.process_event, + url=ev["link"], + url_num=i, + context=context, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + log=log, + ) + + if url: + sport, event = ev["sport"], ev["event"] + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + key = f"[{sport}] {event} ({TAG})" + + entry = { + "url": url, + "logo": logo, + "base": "https://vividmosaica.com/", + "timestamp": now, + "id": tvg_id or "Live.Event.us", + } + + urls[key] = cached_urls[key] = entry + + await browser.close() + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/old/fstv.py b/M3U8/scrapers/old/fstv.py new file mode 100644 index 0000000..c08141b --- /dev/null +++ b/M3U8/scrapers/old/fstv.py @@ -0,0 +1,181 @@ +from functools import partial +from urllib.parse import unquote, urljoin + +import httpx +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +CACHE_FILE = Cache("fstv.json", exp=10_800) + +MIRRORS = ["https://fstv.zip", "https://fstv.space"] + +TAG = "FSTV" + + +async def process_event( + client: httpx.AsyncClient, + url: str, + url_num: int, +) -> tuple[str, str]: + + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'URL {url_num}) Failed to fetch "{url}": {e}') + + return "", "" + + soup = HTMLParser(r.content) + + match_name = None + + if category_links := soup.css(".common-list-category .category-item a"): + match_name = category_links[-1].text(strip=True) + + if not match_name or match_name.lower() == "vs": + if og_title := soup.css_first("meta[property='og:title']"): + match_name = ( + og_title.attributes.get("content", "").split(" start on")[0].strip() + ) + + if not (ifr := soup.css_first("iframe")): + log.info(f"URL {url_num}) No M3U8 found") + return "", "" + + if src := ifr.attributes.get("src"): + log.info(f"URL {url_num}) Captured M3U8") + return match_name or "", unquote(src).split("link=")[-1] + + +async def get_events( + client: httpx.AsyncClient, + url: str, + cached_hrefs: set[str], +) -> list[dict[str, str]]: + + try: + r = await client.get(url) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{url}": {e}') + + return [] + + soup = HTMLParser(r.content) + + events = [] + + for wrpr in soup.css("div.fixtures-live-wrapper"): + for league_block in wrpr.css(".match-table-item > .league-info-wrapper"): + if not ( + league_name_el := league_block.css_first(".league-info a.league-name") + ): + continue + + full_text = league_name_el.text(strip=True) + + if "]" in full_text: + event_name = full_text.split("]", 1)[1].strip() + + else: + event_name = full_text + + parent_item = league_block.parent + + for game in parent_item.css(".common-table-row a[href*='/match/']"): + if not (href := game.attributes.get("href")): + continue + + if cached_hrefs & {href}: + continue + + cached_hrefs.add(href) + + events.append( + { + "sport": event_name, + "link": urljoin(url, href), + "href": href, + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + cached_hrefs = {entry["href"] for entry in cached_urls.values()} + cached_count = len(cached_urls) + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + if not (base_url := await network.get_base(MIRRORS)): + log.warning("No working FSTV mirrors") + CACHE_FILE.write(cached_urls) + return + + log.info(f'Scraping from "{base_url}"') + + events = await get_events( + client, + base_url, + cached_hrefs, + ) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + now = Time.now().timestamp() + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + client=client, + url=ev["link"], + url_num=i, + ) + + match_name, url = await network.safe_process( + handler, + url_num=i, + log=log, + ) + + if url: + sport = ev["sport"] + + key = ( + f"[{sport}] {match_name} ({TAG})" + if match_name + else f"[{sport}] ({TAG})" + ) + + tvg_id, logo = leagues.info(sport) + + entry = { + "url": url, + "logo": logo, + "base": base_url, + "timestamp": now, + "id": tvg_id or "Live.Event.us", + "href": ev["href"], + } + + urls[key] = cached_urls[key] = entry + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) + + +# cloudflare bot check added diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index d67697d..9338317 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -13,6 +13,8 @@ CACHE_FILE = Cache("pixel.json", exp=19_800) BASE_URL = "https://pixelsport.tv/backend/livetv/events" +TAG = "PIXL" + async def get_api_data(url: str) -> dict[str, list[dict, str, str]]: async with async_playwright() as p: @@ -69,7 +71,7 @@ async def get_events() -> dict[str, dict[str, str | float]]: for z, stream_url in stream_urls: if stream_link := channel_info.get(stream_url): if pattern.search(stream_link): - key = f"[{sport}] {event_name} {z} (PIXL)" + key = f"[{sport}] {event_name} {z} ({TAG})" tvg_id, logo = leagues.get_tvg_info(sport, event_name) diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 81b5ed4..2e0faeb 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -16,6 +16,8 @@ API_FILE = Cache("ppv-api.json", exp=19_800) BASE_URL = "https://ppv.to" +TAG = "PPV" + async def refresh_api_cache( client: httpx.AsyncClient, @@ -70,7 +72,7 @@ async def get_events( if not (name and start_ts and iframe): continue - key = f"[{sport}] {name} (PPV)" + key = f"[{sport}] {name} ({TAG})" if cached_keys & {key}: continue @@ -134,7 +136,7 @@ async def scrape(client: httpx.AsyncClient) -> None: ev["timestamp"], ) - key = f"[{sport}] {event} (PPV)" + key = f"[{sport}] {event} ({TAG})" tvg_id, pic = leagues.get_tvg_info(sport, event) diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index e82b44b..8540394 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -27,6 +27,8 @@ valid_sports = { "soccer": "Soccer", } +TAG = "ROXIE" + async def process_event( client: httpx.AsyncClient, @@ -90,7 +92,7 @@ async def refresh_html_cache( event_sport = valid_sports[sport] - key = f"[{event_sport}] {event} (ROXIE)" + key = f"[{event_sport}] {event} ({TAG})" events[key] = { "sport": event_sport, @@ -184,7 +186,7 @@ async def scrape(client: httpx.AsyncClient) -> None: tvg_id, logo = leagues.get_tvg_info(sport, event) - key = f"[{sport}] {event} (ROXIE)" + key = f"[{sport}] {event} ({TAG})" entry = { "url": url, diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py index d6c3e13..9875d15 100644 --- a/M3U8/scrapers/shark.py +++ b/M3U8/scrapers/shark.py @@ -16,6 +16,8 @@ HTML_CACHE = Cache("shark-html.json", exp=19_800) BASE_URL = "https://sharkstreams.net" +TAG = "SHARK" + async def process_event( client: httpx.AsyncClient, @@ -84,7 +86,7 @@ async def refresh_html_cache( link = match[1].replace("player.php", "get-stream.php") - key = f"[{sport}] {event_name} (SHARK)" + key = f"[{sport}] {event_name} ({TAG})" events[key] = { "sport": sport, @@ -162,7 +164,7 @@ async def scrape(client: httpx.AsyncClient) -> None: tvg_id, logo = leagues.get_tvg_info(sport, event) - key = f"[{sport}] {event} (SHARK)" + key = f"[{sport}] {event} ({TAG})" entry = { "url": url, diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index 1804a0f..717c100 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -16,6 +16,8 @@ CACHE_FILE = Cache("sport9.json", exp=3_600) BASE_URL = "https://sport9.ru" +TAG = "SPRT9" + async def get_html( client: httpx.AsyncClient, @@ -86,7 +88,7 @@ async def get_events( if not (href := card.attributes.get("href")): continue - key = f"[{sport}] {event} (SPRT9)" + key = f"[{sport}] {event} ({TAG})" if cached_keys & {key}: continue @@ -139,7 +141,7 @@ async def scrape(client: httpx.AsyncClient) -> None: if url: sport, event = ev["sport"], ev["event"] - key = f"[{sport}] {event} (SPRT9)" + key = f"[{sport}] {event} ({TAG})" tvg_id, logo = leagues.get_tvg_info(sport, event) diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index 383501d..3ad69ca 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -16,6 +16,8 @@ CACHE_FILE = Cache("streambtw.json", exp=19_800) BASE_URL = "https://streambtw.com" +TAG = "SBTW" + async def process_event( client: httpx.AsyncClient, @@ -108,7 +110,7 @@ async def scrape(client: httpx.AsyncClient) -> None: if url: sport, event = ev["sport"], ev["event"] - key = f"[{sport}] {event} (SBTW)" + key = f"[{sport}] {event} ({TAG})" tvg_id, logo = leagues.get_tvg_info(sport, event) diff --git a/M3U8/scrapers/streameast.py b/M3U8/scrapers/streameast.py index 4fe40a5..870ba9a 100644 --- a/M3U8/scrapers/streameast.py +++ b/M3U8/scrapers/streameast.py @@ -35,6 +35,8 @@ MIRRORS = [ *[f"https://thestreameast.{ext}" for ext in prefixes if prefixes[ext] == "the"], ] +TAG = "STRMEST" + async def get_events( client: httpx.AsyncClient, @@ -86,7 +88,7 @@ async def get_events( timestamp = int(a.attributes.get("data-time", Time.default_8())) - key = f"[{sport}] {name} (SEAST)" + key = f"[{sport}] {name} ({TAG})" if cached_keys & {key}: continue @@ -152,7 +154,7 @@ async def scrape(client: httpx.AsyncClient) -> None: tvg_id, logo = leagues.get_tvg_info(sport, event) - key = f"[{sport}] {event} (SEAST)" + key = f"[{sport}] {event} ({TAG})" entry = { "url": url, diff --git a/M3U8/scrapers/streamfree.py b/M3U8/scrapers/streamfree.py index 09a0440..f34b9e9 100644 --- a/M3U8/scrapers/streamfree.py +++ b/M3U8/scrapers/streamfree.py @@ -12,6 +12,8 @@ CACHE_FILE = Cache("streamfree.json", exp=19_800) BASE_URL = "https://streamfree.to" +TAG = "STRMFR" + async def refresh_api_cache( client: httpx.AsyncClient, @@ -40,7 +42,7 @@ async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | flo for stream in streams: sport, name = stream["league"], stream["name"] - key = f"[{sport}] {name} (STRMFR)" + key = f"[{sport}] {name} ({TAG})" tvg_id, logo = leagues.get_tvg_info(sport, name) diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index 774d300..ac192de 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -17,7 +17,13 @@ CACHE_FILE = Cache("strmd.json", exp=10_800) API_FILE = Cache("strmd-api.json", exp=28_800) -MIRRORS = ["https://streami.su", "https://streamed.st", "https://streamed.pk"] +MIRRORS = [ + "https://streami.su", + "https://streamed.st", + "https://streamed.pk", +] + +TAG = "STRMD" def fix_sport(s: str) -> str: @@ -116,17 +122,14 @@ async def process_event( async def get_events( client: httpx.AsyncClient, - base_url: str, + url: str, cached_keys: set[str], ) -> list[dict[str, str]]: if not (api_data := API_FILE.load(per_entry=False, index=-1)): api_data = await refresh_api_cache( client, - urljoin( - base_url, - "api/matches/all-today", - ), + urljoin(url, "api/matches/all-today"), ) API_FILE.write(api_data) @@ -157,9 +160,9 @@ async def get_events( parts = pattern.split(event["title"].strip()) name = " | ".join(p.strip() for p in parts if p.strip()) - logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None + logo = urljoin(url, poster) if (poster := event.get("poster")) else None - key = f"[{sport}] {name} (STRMD)" + key = f"[{sport}] {name} ({TAG})" if cached_keys & {key}: continue @@ -244,7 +247,7 @@ async def scrape(client: httpx.AsyncClient) -> None: ev["timestamp"], ) - key = f"[{sport}] {event} (STRMD)" + key = f"[{sport}] {event} ({TAG})" tvg_id, pic = leagues.get_tvg_info(sport, event) diff --git a/M3U8/scrapers/tvpass.py b/M3U8/scrapers/tvpass.py index e549516..3cfd844 100644 --- a/M3U8/scrapers/tvpass.py +++ b/M3U8/scrapers/tvpass.py @@ -12,6 +12,8 @@ CACHE_FILE = Cache("tvpass.json", exp=86_400) BASE_URL = "https://tvpass.org/playlist/m3u" +TAG = "TVP" + async def get_data(client: httpx.AsyncClient) -> list[str]: try: @@ -46,7 +48,7 @@ async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | flo event = "(".join(tvg_name.split("(")[:-1]).strip() - key = f"[{sport}] {event} (TVP)" + key = f"[{sport}] {event} ({TAG})" channel = url.split("/")[-2] diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index 18fd339..eb0e689 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -37,6 +37,8 @@ SPORT_ENDPOINTS = [ # "volleyball", ] +TAG = "WFTY" + async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, Any]]: try: @@ -164,12 +166,12 @@ async def process_event( async def get_events( client: httpx.AsyncClient, - base_url: str, + url: str, cached_keys: set[str], ) -> list[dict[str, str]]: if not (api_data := API_FILE.load(per_entry=False, index=-1)): - api_data = await refresh_api_cache(client, base_url) + api_data = await refresh_api_cache(client, url) API_FILE.write(api_data) @@ -200,9 +202,9 @@ async def get_events( sport = pattern.split(league, 1)[0].strip() - logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None + logo = urljoin(url, poster) if (poster := event.get("poster")) else None - key = f"[{sport}] {name} (WFTY)" + key = f"[{sport}] {name} ({TAG})" if cached_keys & {key}: continue @@ -211,7 +213,7 @@ async def get_events( { "sport": sport, "event": name, - "link": urljoin(base_url, f"stream/{match_id}"), + "link": urljoin(url, f"stream/{match_id}"), "logo": logo, "timestamp": event_dt.timestamp(), } @@ -268,7 +270,7 @@ async def scrape(client: httpx.AsyncClient) -> None: ev["timestamp"], ) - key = f"[{sport}] {event} (WFTY)" + key = f"[{sport}] {event} ({TAG})" tvg_id, pic = leagues.get_tvg_info(sport, event)