diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 849d0e9..77baaa7 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -11,13 +11,11 @@ from scrapers import ( roxie, shark, sport9, - streambtw, streamcenter, streamfree, streamhub, streamsgate, strmd, - timstreams, tvpass, watchfooty, webcast, @@ -56,13 +54,11 @@ async def main() -> None: asyncio.create_task(roxie.scrape(network.client)), asyncio.create_task(shark.scrape(network.client)), asyncio.create_task(sport9.scrape(network.client)), - # asyncio.create_task(streambtw.scrape(network.client)), asyncio.create_task(streamcenter.scrape(network.client)), asyncio.create_task(streamfree.scrape(network.client)), asyncio.create_task(streamhub.scrape(network.client)), asyncio.create_task(streamsgate.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)), - # asyncio.create_task(timstreams.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)), asyncio.create_task(webcast.scrape(network.client)), @@ -78,13 +74,11 @@ async def main() -> None: | roxie.urls | shark.urls | sport9.urls - | streambtw.urls | streamcenter.urls | strmd.urls | streamfree.urls | streamhub.urls | streamsgate.urls - | timstreams.urls | tvpass.urls | watchfooty.urls | webcast.urls diff --git a/M3U8/scrapers/streambtw.py b/M3U8/scrapers/streambtw.py deleted file mode 100644 index 75e653b..0000000 --- a/M3U8/scrapers/streambtw.py +++ /dev/null @@ -1,145 +0,0 @@ -import base64 -import re -from functools import partial -from urllib.parse import urljoin - -import httpx -from selectolax.parser import HTMLParser - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -TAG = "STRMBTW" - -CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) - -BASE_URL = "https://streambtw.com" - - -def fix_league(s: str) -> str: - pattern = re.compile(r"^\w*-\w*", re.IGNORECASE) - - return " ".join(s.split("-")) if pattern.search(s) else s - - -async def process_event( - client: httpx.AsyncClient, - url: str, - url_num: int, -) -> str | None: - - try: - r = await client.get(url) - r.raise_for_status() - except Exception as e: - log.error(f'URL {url_num}) Failed to fetch "{url}": {e}') - return - - valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE) - - if not (match := valid_m3u8.search(r.text)): - log.info(f"URL {url_num}) No M3U8 found") - return - - stream_link: str = match[2] - - if not stream_link.startswith("http"): - stream_link = base64.b64decode(stream_link).decode("utf-8") - - log.info(f"URL {url_num}) Captured M3U8") - - return stream_link - - -async def get_events(client: httpx.AsyncClient) -> list[dict[str, str]]: - try: - r = await client.get(BASE_URL) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{BASE_URL}": {e}') - - return [] - - soup = HTMLParser(r.content) - - events = [] - - for card in soup.css("div.container div.card"): - link = card.css_first("a.btn.btn-primary") - - if not (href := link.attrs.get("href")): - continue - - league = card.css_first("h5.card-title").text(strip=True) - - name = card.css_first("p.card-text").text(strip=True) - - events.append( - { - "sport": fix_league(league), - "event": name, - "link": urljoin(BASE_URL, href), - } - ) - - return events - - -async def scrape(client: httpx.AsyncClient) -> None: - if cached := CACHE_FILE.load(): - urls.update(cached) - log.info(f"Loaded {len(urls)} event(s) from cache") - return - - log.info(f'Scraping from "{BASE_URL}"') - - events = await get_events(client) - - log.info(f"Processing {len(events)} new URL(s)") - - if events: - now = Time.now().timestamp() - - for i, ev in enumerate(events, start=1): - handler = partial( - process_event, - client=client, - url=ev["link"], - url_num=i, - ) - - url = await network.safe_process( - handler, - url_num=i, - log=log, - timeout=10, - ) - - if url: - sport, event, link = ( - ev["sport"], - ev["event"], - ev["link"], - ) - - key = f"[{sport}] {event} ({TAG})" - - tvg_id, logo = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo, - "base": link, - "timestamp": now, - "id": tvg_id or "Live.Event.us", - "link": link, - } - - urls[key] = entry - - log.info(f"Collected {len(urls)} event(s)") - - CACHE_FILE.write(urls) diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index cd0a827..6b259e5 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -142,12 +142,13 @@ async def get_events( live = [] start_ts = now.delta(minutes=-30).timestamp() + end_ts = now.delta(minutes=5).timestamp() for k, v in events.items(): if cached_keys & {k}: continue - if not start_ts <= v["event_ts"]: + if not start_ts <= v["event_ts"] <= end_ts: continue live.append({**v}) diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py index f35bfeb..39037cc 100644 --- a/M3U8/scrapers/streamsgate.py +++ b/M3U8/scrapers/streamsgate.py @@ -95,6 +95,7 @@ async def get_events( events = [] start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=5) for stream_group in api_data: event_ts = stream_group.get("ts") @@ -108,7 +109,7 @@ async def get_events( event_dt = Time.from_ts(event_ts) - if not start_dt <= event_dt: + if not start_dt <= event_dt <= end_dt: continue event = get_event(t1, t2) diff --git a/M3U8/scrapers/timstreams.py b/M3U8/scrapers/timstreams.py deleted file mode 100644 index 1a93ce1..0000000 --- a/M3U8/scrapers/timstreams.py +++ /dev/null @@ -1,175 +0,0 @@ -from functools import partial -from typing import Any - -import httpx -from playwright.async_api import async_playwright - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -TAG = "TIM" - -CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) - -API_URL = "https://api.timstreams.site/main" - -BASE_MIRRORS = [ - "https://timstreams.site", - "https://timstreams.space", - "https://timstreams.top", -] - -SPORT_GENRES = { - 1: "Soccer", - 2: "Motorsport", - 3: "MMA", - 4: "Fight", - 5: "Boxing", - 6: "Wrestling", - 7: "Basketball", - 8: "American Football", - 9: "Baseball", - 10: "Tennis", - 11: "Hockey", - 12: "Darts", - 13: "Cricket", - 14: "Cycling", - 15: "Rugby", - 16: "Live Shows", - 17: "Other", -} - - -async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]: - try: - r = await client.get(API_URL) - r.raise_for_status() - except Exception as e: - log.error(f'Failed to fetch "{API_URL}": {e}') - - return [] - - return r.json() - - -async def get_events( - client: httpx.AsyncClient, cached_keys: set[str] -) -> list[dict[str, str]]: - api_data = await refresh_api_cache(client) - - now = Time.now().timestamp() - - events = [] - - for info in api_data: - if not (category := info.get("category")) or category != "Events": - continue - - stream_events: list[dict[str, Any]] = info["events"] - - for ev in stream_events: - name: str = ev["name"] - - logo = ev.get("logo") - - if (genre := ev["genre"]) in {16, 17}: - continue - - sport = SPORT_GENRES.get(genre, "Live Event") - - streams: list[dict[str, str]] = ev["streams"] - - for stream in streams: - key = f"[{sport}] {name} ({TAG})" - - if cached_keys & {key}: - continue - - if not (url := stream.get("url")): - continue - - events.append( - { - "key": key, - "sport": sport, - "event": name, - "link": url, - "logo": logo, - "timestamp": now, - } - ) - - return events - - -async def scrape(client: httpx.AsyncClient) -> None: - cached_urls = CACHE_FILE.load() - cached_count = len(cached_urls) - urls.update(cached_urls) - - log.info(f"Loaded {cached_count} event(s) from cache") - - if not (base_url := await network.get_base(BASE_MIRRORS)): - log.warning("No working Timstreams mirrors") - CACHE_FILE.write(cached_urls) - return - - log.info(f'Scraping from "{base_url}"') - - events = await get_events(client, set(cached_urls.keys())) - - log.info(f"Processing {len(events)} new URL(s)") - - if events: - async with async_playwright() as p: - browser, context = await network.browser(p) - - for i, ev in enumerate(events, start=1): - handler = partial( - network.process_event, - url=ev["link"], - url_num=i, - context=context, - log=log, - ) - - url = await network.safe_process( - handler, - url_num=i, - log=log, - ) - - if url: - sport, event, logo, ts, link, key = ( - ev["sport"], - ev["event"], - ev["logo"], - ev["timestamp"], - ev["link"], - ev["key"], - ) - - tvg_id, pic = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo or pic, - "base": base_url, - "timestamp": ts, - "id": tvg_id or "Live.Event.us", - "link": link, - } - - urls[key] = cached_urls[key] = entry - - await browser.close() - - if new_count := len(cached_urls) - cached_count: - log.info(f"Collected and cached {new_count} new event(s)") - else: - log.info("No new events found") - - CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/watchfooty.py b/M3U8/scrapers/watchfooty.py index 41b8d81..fcc9ce7 100644 --- a/M3U8/scrapers/watchfooty.py +++ b/M3U8/scrapers/watchfooty.py @@ -104,7 +104,7 @@ async def process_event( timeout=15_000, ) - await page.wait_for_timeout(1_500) + await page.wait_for_timeout(2_000) try: header = await page.wait_for_selector( @@ -178,6 +178,8 @@ async def get_events( now = Time.clean(Time.now()) start_dt = now.delta(minutes=-30) + end_dt = now.delta(minutes=5) + pattern = re.compile(r"\-+|\(") for event in api_data: @@ -195,7 +197,7 @@ async def get_events( event_dt = Time.from_ts(start_ts) - if not start_dt <= event_dt: + if not start_dt <= event_dt <= end_dt: continue sport = pattern.split(league, 1)[0].strip()