diff --git a/M3U8/fetch.py b/M3U8/fetch.py index cddfd93..dd74d69 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -14,6 +14,7 @@ from scrapers import ( streambtw, streamcenter, streamfree, + streamhub, streamsgate, strmd, timstreams, @@ -58,6 +59,7 @@ async def main() -> None: asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streamcenter.scrape(network.client)), asyncio.create_task(streamfree.scrape(network.client)), + asyncio.create_task(streamhub.scrape(network.client)), asyncio.create_task(streamsgate.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(timstreams.scrape(network.client)), @@ -80,6 +82,7 @@ async def main() -> None: | streamcenter.urls | strmd.urls | streamfree.urls + | streamhub.urls | streamsgate.urls | timstreams.urls | tvpass.urls diff --git a/M3U8/scrapers/fawa.py b/M3U8/scrapers/fawa.py index a80b444..83db6b8 100644 --- a/M3U8/scrapers/fawa.py +++ b/M3U8/scrapers/fawa.py @@ -11,12 +11,12 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("fawa.json", exp=10_800) +TAG = "FAWA" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) BASE_URL = "http://www.fawanews.sc/" -TAG = "FAWA" - async def process_event( client: httpx.AsyncClient, diff --git a/M3U8/scrapers/lotus.py b/M3U8/scrapers/lotus.py index 84e9087..831d674 100644 --- a/M3U8/scrapers/lotus.py +++ b/M3U8/scrapers/lotus.py @@ -9,14 +9,14 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("lotus.json", exp=5_400) +TAG = "LOTUS" -API_CACHE = Cache("lotus-api.json", exp=28_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=5_400) + +API_CACHE = Cache(f"{TAG.lower()}-api.json", exp=28_800) BASE_URL = "https://lotusgamehd.xyz/api-event.php" -TAG = "LOTUS" - def fix_league(s: str) -> str: return " ".join(x.capitalize() for x in s.split()) if len(s) > 5 else s.upper() @@ -46,16 +46,14 @@ async def refresh_api_cache( async def get_events( - client: httpx.AsyncClient, - url: str, - cached_keys: set[str], + client: httpx.AsyncClient, cached_keys: set[str] ) -> list[dict[str, str]]: now = Time.now() if not (api_data := API_CACHE.load(per_entry=False)): api_data = await refresh_api_cache( client, - url, + BASE_URL, now.timestamp(), ) @@ -108,11 +106,7 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f'Scraping from "{BASE_URL}"') - events = await get_events( - client, - BASE_URL, - set(cached_urls.keys()), - ) + events = await get_events(client, set(cached_urls.keys())) log.info(f"Processing {len(events)} new URL(s)") diff --git a/M3U8/scrapers/pixel.py b/M3U8/scrapers/pixel.py index 0e6944c..0eab75f 100644 --- a/M3U8/scrapers/pixel.py +++ b/M3U8/scrapers/pixel.py @@ -9,14 +9,14 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("pixel.json", exp=19_800) +TAG = "PIXEL" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800) BASE_URL = "https://pixelsport.tv/backend/livetv/events" -TAG = "PIXL" - -async def get_api_data(url: str) -> dict[str, list[dict, str, str]]: +async def get_api_data() -> dict[str, list[dict, str, str]]: async with async_playwright() as p: try: browser, context = await network.browser(p) @@ -24,7 +24,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]: page = await context.new_page() await page.goto( - url, + BASE_URL, wait_until="domcontentloaded", timeout=10_000, ) @@ -32,7 +32,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]: raw_json = await page.locator("pre").inner_text(timeout=5_000) except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') + log.error(f'Failed to fetch "{BASE_URL}": {e}') return {} @@ -45,7 +45,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]: async def get_events() -> dict[str, dict[str, str | float]]: now = Time.clean(Time.now()) - api_data = await get_api_data(BASE_URL) + api_data = await get_api_data() events = {} diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py index 243947d..8d020c6 100644 --- a/M3U8/scrapers/ppv.py +++ b/M3U8/scrapers/ppv.py @@ -9,9 +9,11 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("ppv.json", exp=10_800) +TAG = "PPV" -API_FILE = Cache("ppv-api.json", exp=19_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800) API_MIRRORS = [ "https://old.ppv.to/api/streams", @@ -25,8 +27,6 @@ BASE_MIRRORS = [ "https://ppv.to", ] -TAG = "PPV" - async def refresh_api_cache( client: httpx.AsyncClient, diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index ed99701..005b8ee 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -12,9 +12,11 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("roxie.json", exp=10_800) +TAG = "ROXIE" -HTML_CACHE = Cache("roxie-html.json", exp=19_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800) BASE_URL = "https://roxiestreams.live" @@ -28,8 +30,6 @@ SPORT_ENDPOINTS = { "soccer": "Soccer", } -TAG = "ROXIE" - async def process_event( client: httpx.AsyncClient, diff --git a/M3U8/scrapers/shark.py b/M3U8/scrapers/shark.py index 197bfb7..54b4587 100644 --- a/M3U8/scrapers/shark.py +++ b/M3U8/scrapers/shark.py @@ -10,14 +10,14 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("shark.json", exp=10_800) +TAG = "SHARK" -HTML_CACHE = Cache("shark-html.json", exp=19_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800) BASE_URL = "https://sharkstreams.net" -TAG = "SHARK" - async def process_event( client: httpx.AsyncClient, @@ -45,18 +45,16 @@ async def process_event( async def refresh_html_cache( - client: httpx.AsyncClient, - url: str, - now_ts: float, + client: httpx.AsyncClient, now_ts: float ) -> dict[str, dict[str, str | float]]: log.info("Refreshing HTML cache") try: - r = await client.get(url) + r = await client.get(BASE_URL) r.raise_for_status() except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') + log.error(f'Failed to fetch "{BASE_URL}": {e}') return {} @@ -108,11 +106,7 @@ async def get_events( now = Time.clean(Time.now()) if not (events := HTML_CACHE.load()): - events = await refresh_html_cache( - client, - BASE_URL, - now.timestamp(), - ) + events = await refresh_html_cache(client, now.timestamp()) HTML_CACHE.write(events) diff --git a/M3U8/scrapers/sport9.py b/M3U8/scrapers/sport9.py index 6583b09..a835aad 100644 --- a/M3U8/scrapers/sport9.py +++ b/M3U8/scrapers/sport9.py @@ -12,12 +12,12 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("sport9.json", exp=3_600) +TAG = "SPRT9" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) BASE_URL = "https://sport9.ru" -TAG = "SPRT9" - async def get_html( client: httpx.AsyncClient, diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py index 20ff73f..75e653b 100644 --- a/M3U8/scrapers/streambtw.py +++ b/M3U8/scrapers/streambtw.py @@ -12,12 +12,12 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("streambtw.json", exp=3_600) +TAG = "STRMBTW" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) BASE_URL = "https://streambtw.com" -TAG = "STRMBTW" - def fix_league(s: str) -> str: pattern = re.compile(r"^\w*-\w*", re.IGNORECASE) diff --git a/M3U8/scrapers/streamcenter.py b/M3U8/scrapers/streamcenter.py index cd83b05..2cf5a1e 100644 --- a/M3U8/scrapers/streamcenter.py +++ b/M3U8/scrapers/streamcenter.py @@ -9,13 +9,14 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("streamcenter.json", exp=10_800) +TAG = "STRMCNTR" -API_FILE = Cache("streamcenter-api.json", exp=28_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties" -TAG = "STRMCNTR" categories = { 4: "Basketball", @@ -33,17 +34,15 @@ categories = { async def refresh_api_cache( - client: httpx.AsyncClient, - url: str, - now_ts: float, + client: httpx.AsyncClient, now_ts: float ) -> list[dict[str, str | int]]: log.info("Refreshing API cache") try: - r = await client.get(url, params={"pageNumber": 1, "pageSize": 500}) + r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500}) r.raise_for_status() except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') + log.error(f'Failed to fetch "{BASE_URL}": {e}') return [] @@ -62,11 +61,7 @@ async def get_events( now = Time.clean(Time.now()) if not (api_data := API_FILE.load(per_entry=False, index=-1)): - api_data = await refresh_api_cache( - client, - BASE_URL, - now.timestamp(), - ) + api_data = await refresh_api_cache(client, now.timestamp()) API_FILE.write(api_data) diff --git a/M3U8/scrapers/streamfree.py b/M3U8/scrapers/streamfree.py index 5a4348d..49fbda0 100644 --- a/M3U8/scrapers/streamfree.py +++ b/M3U8/scrapers/streamfree.py @@ -8,18 +8,17 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("streamfree.json", exp=19_800) +TAG = "STRMFREE" -BASE_URL = "https://streamfree.to" +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800) -TAG = "STRMFR" +BASE_URL = "https://streamfree.to/" -async def refresh_api_cache( - client: httpx.AsyncClient, - url: str, -) -> dict[str, dict[str, list]]: +async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]: try: + url = urljoin(BASE_URL, "streams") + r = await client.get(url) r.raise_for_status() except Exception as e: @@ -31,7 +30,7 @@ async def refresh_api_cache( async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | float]]: - api_data = await refresh_api_cache(client, urljoin(BASE_URL, "streams")) + api_data = await refresh_api_cache(client) events = {} diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py new file mode 100644 index 0000000..98bc5b2 --- /dev/null +++ b/M3U8/scrapers/streamhub.py @@ -0,0 +1,142 @@ +from functools import partial + +import httpx +from playwright.async_api import async_playwright +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "STRMHUB" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +BASE_URL = "https://streamhub.pro/live-now" + + +async def get_events( + client: httpx.AsyncClient, cached_keys: set[str] +) -> list[dict[str, str]]: + try: + r = await client.get(BASE_URL) + r.raise_for_status() + except Exception as e: + log.error(f'Failed to fetch "{BASE_URL}": {e}') + + return [] + + soup = HTMLParser(r.content) + + events = [] + + for event in soup.css(".events-section"): + if not (title_node := event.css_first(".section-titlte")): + continue + + sport = title_node.text(strip=True) + + if not event.css_first(".event-competitors"): + continue + + home_team = event.css_first(".event-home-team").text(strip=True) + away_team = event.css_first(".event-visitor-team").text(strip=True) + + logo = event.css_first(".league-icon img").attributes.get("src") + + if not (event_button := event.css_first("div.event-button a")) or not ( + href := event_button.attributes.get("href") + ): + continue + + event_name = f"{away_team} vs {home_team}" + + key = f"[{sport}] {event_name} ({TAG})" + + if cached_keys & {key}: + continue + + events.append( + { + "sport": sport, + "event": event_name, + "link": href, + "logo": logo, + } + ) + + return events + + +async def scrape(client: httpx.AsyncClient) -> None: + cached_urls = CACHE_FILE.load() + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + valid_count = cached_count = len(valid_urls) + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events(client, set(cached_urls.keys())) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + now = Time.now().timestamp() + + async with async_playwright() as p: + browser, context = await network.browser(p) + + for i, ev in enumerate(events, start=1): + handler = partial( + network.process_event, + url=ev["link"], + url_num=i, + context=context, + timeout=5, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + log=log, + ) + + sport, event, logo, link = ( + ev["sport"], + ev["event"], + ev["logo"], + ev["link"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, pic = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo or pic, + "base": "https://storytrench.net/", + "timestamp": now, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + urls[key] = entry + + await browser.close() + + if new_count := valid_count - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py index 92a14dc..953f585 100644 --- a/M3U8/scrapers/streamsgate.py +++ b/M3U8/scrapers/streamsgate.py @@ -13,9 +13,11 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("streamsgate.json", exp=10_800) +TAG = "STRMSG8" -API_FILE = Cache("streamsgate-api.json", exp=28_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) BASE_URL = "https://streamingon.org" @@ -31,8 +33,6 @@ SPORT_ENDPOINTS = [ "f1", ] -TAG = "STRMSG8" - def get_event(t1: str, t2: str) -> str: match t1: @@ -94,8 +94,8 @@ async def get_events( events = [] - start_dt = now.delta(minutes=-30) - end_dt = now.delta(minutes=30) + start_dt = now.delta(hours=-1) + end_dt = now.delta(minutes=10) for stream_group in api_data: event_ts = stream_group.get("ts") diff --git a/M3U8/scrapers/strmd.py b/M3U8/scrapers/strmd.py index d36cce0..6fc6645 100644 --- a/M3U8/scrapers/strmd.py +++ b/M3U8/scrapers/strmd.py @@ -12,9 +12,11 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("strmd.json", exp=10_800) +TAG = "STRMD" -API_FILE = Cache("strmd-api.json", exp=28_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) MIRRORS = [ "https://streami.su", @@ -22,8 +24,6 @@ MIRRORS = [ "https://streamed.pk", ] -TAG = "STRMD" - def fix_sport(s: str) -> str: if "-" in s: diff --git a/M3U8/scrapers/timstreams.py b/M3U8/scrapers/timstreams.py index 3997c37..3442438 100644 --- a/M3U8/scrapers/timstreams.py +++ b/M3U8/scrapers/timstreams.py @@ -10,7 +10,9 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("timstreams.json", exp=10_800) +TAG = "TIM" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) API_URL = "https://api.timstreams.site/main" @@ -21,7 +23,6 @@ BASE_MIRRORS = [ "https://timstreams.top", ] -TAG = "TIM" sport_genres = { 1: "Soccer", @@ -44,14 +45,12 @@ sport_genres = { } -async def refresh_api_cache( - client: httpx.AsyncClient, url: str -) -> list[dict[str, Any]]: +async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: try: - r = await client.get(url) + r = await client.get(API_URL) r.raise_for_status() except Exception as e: - log.error(f'Failed to fetch "{url}": {e}') + log.error(f'Failed to fetch "{API_URL}": {e}') return [] @@ -59,12 +58,9 @@ async def refresh_api_cache( async def get_events( - client: httpx.AsyncClient, - url: str, - cached_keys: set[str], + client: httpx.AsyncClient, cached_keys: set[str] ) -> list[dict[str, str]]: - - api_data = await refresh_api_cache(client, url) + api_data = await refresh_api_cache(client) now = Time.now().timestamp() @@ -125,11 +121,7 @@ async def scrape(client: httpx.AsyncClient) -> None: log.info(f'Scraping from "{base_url}"') - events = await get_events( - client, - API_URL, - set(cached_urls.keys()), - ) + events = await get_events(client, set(cached_urls.keys())) log.info(f"Processing {len(events)} new URL(s)") diff --git a/M3U8/scrapers/tvpass.py b/M3U8/scrapers/tvpass.py index 3cfd844..2fb9b32 100644 --- a/M3U8/scrapers/tvpass.py +++ b/M3U8/scrapers/tvpass.py @@ -8,12 +8,12 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("tvpass.json", exp=86_400) +TAG = "TVP" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=86_400) BASE_URL = "https://tvpass.org/playlist/m3u" -TAG = "TVP" - async def get_data(client: httpx.AsyncClient) -> list[str]: try: diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index a7e7b98..29c02c1 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -14,9 +14,11 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("watchfty.json", exp=10_800) +TAG = "WFTY" -API_FILE = Cache("watchfty-api.json", exp=28_800) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800) API_MIRRORS = ["https://api.watchfooty.top", "https://api.watchfooty.st"] @@ -39,8 +41,6 @@ SPORT_ENDPOINTS = [ # "volleyball", ] -TAG = "WFTY" - async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, Any]]: try: @@ -55,8 +55,7 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An async def refresh_api_cache( - client: httpx.AsyncClient, - url: str, + client: httpx.AsyncClient, url: str ) -> list[dict[str, Any]]: log.info("Refreshing API cache") diff --git a/M3U8/scrapers/webcast.py b/M3U8/scrapers/webcast.py index d546b5c..07d8ec2 100644 --- a/M3U8/scrapers/webcast.py +++ b/M3U8/scrapers/webcast.py @@ -11,14 +11,14 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -CACHE_FILE = Cache("webcast.json", exp=10_800) +TAG = "WEBCST" -HTML_CACHE = Cache("webcast-html.json", exp=86_400) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=86_400) BASE_URLS = {"NFL": "https://nflwebcast.com", "NHL": "https://slapstreams.com"} -TAG = "WEBCST" - def fix_event(s: str) -> str: return " vs ".join(s.split("@"))