From 00000d9eed0cb7dfaf2db7f528dc819c48999833 Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:15:39 -0500 Subject: [PATCH] e - add totalsportek1.py - misc edits --- M3U8/fetch.py | 11 +- M3U8/scrapers/pawa.py | 4 +- M3U8/scrapers/roxie.py | 26 +-- M3U8/scrapers/streamhub.py | 25 +-- M3U8/scrapers/streamsgate.py | 31 ++-- M3U8/scrapers/totalsportek1.py | 170 ++++++++++++++++++ .../{totalsportek.py => totalsportek3.py} | 16 +- M3U8/scrapers/utils/config.py | 7 + M3U8/scrapers/volokit.py | 20 +-- M3U8/scrapers/xstreameast.py | 27 ++- 10 files changed, 259 insertions(+), 78 deletions(-) create mode 100644 M3U8/scrapers/totalsportek1.py rename M3U8/scrapers/{totalsportek.py => totalsportek3.py} (92%) diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 741ff0d7..2a4037b7 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -22,7 +22,8 @@ from scrapers import ( streamhub, streamsgate, timstreams, - totalsportek, + totalsportek1, + totalsportek3, tvapp, volokit, watchfooty, @@ -80,10 +81,11 @@ async def main() -> None: asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), asyncio.create_task(ovogoal.scrape()), - # asyncio.create_task(pawa.scrape()), + asyncio.create_task(pawa.scrape()), asyncio.create_task(shark.scrape()), asyncio.create_task(streambtw.scrape()), - asyncio.create_task(totalsportek.scrape()), + asyncio.create_task(totalsportek1.scrape()), + asyncio.create_task(totalsportek3.scrape()), asyncio.create_task(tvapp.scrape()), asyncio.create_task(volokit.scrape()), # asyncio.create_task(xstreameast.scrape()), @@ -121,7 +123,8 @@ async def main() -> None: | streamhub.urls | streamsgate.urls | timstreams.urls - | totalsportek.urls + | totalsportek1.urls + | totalsportek3.urls | tvapp.urls | volokit.urls | watchfooty.urls diff --git a/M3U8/scrapers/pawa.py b/M3U8/scrapers/pawa.py index 55400e63..6e496dee 100644 --- a/M3U8/scrapers/pawa.py +++ b/M3U8/scrapers/pawa.py @@ -50,7 +50,9 @@ async def process_event(url: str, url_num: int) -> str | None: log.info(f"URL {url_num}) Captured M3U8") - return base64.b64decode(match[1]).decode("utf-8") + m3u = base64.b64decode(match[1]).decode("utf-8") + + return m3u.split("&remote")[0] async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: diff --git a/M3U8/scrapers/roxie.py b/M3U8/scrapers/roxie.py index 1b892607..8a176de0 100644 --- a/M3U8/scrapers/roxie.py +++ b/M3U8/scrapers/roxie.py @@ -19,14 +19,18 @@ HTML_CACHE = Cache(f"{TAG}-html", exp=19_800) BASE_URL = "https://roxiestreams.info" -SPORT_ENDPOINTS = { - "fighting": "Fighting", - "mlb": "MLB", - "motorsports": "Racing", - "nba": "NBA", - # "nfl": "American Football", - "nhl": "NHL", - "soccer": "Soccer", +SPORT_URLS = { + "Racing": urljoin(BASE_URL, "motorsports"), + # "American Football": urljoin(BASE_URL, "nfl"), +} | { + sport: urljoin(BASE_URL, sport.lower()) + for sport in [ + "Fighting", + "MLB", + "NBA", + "NHL", + "Soccer", + ] } @@ -59,7 +63,7 @@ async def refresh_html_cache( event_dt = Time.from_str(data_start, timezone="PST") - event_sport = SPORT_ENDPOINTS[sport] + event_sport = next((k for k, v in SPORT_URLS.items() if v == url), "Live Event") key = f"[{event_sport}] {event} ({TAG})" @@ -159,15 +163,13 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: if not (events := HTML_CACHE.load()): log.info("Refreshing HTML cache") - sport_urls = {sport: urljoin(BASE_URL, sport) for sport in SPORT_ENDPOINTS} - tasks = [ refresh_html_cache( url, sport, now.timestamp(), ) - for sport, url in sport_urls.items() + for sport, url in SPORT_URLS.items() ] results = await asyncio.gather(*tasks) diff --git a/M3U8/scrapers/streamhub.py b/M3U8/scrapers/streamhub.py index 61e5f704..ef9db254 100644 --- a/M3U8/scrapers/streamhub.py +++ b/M3U8/scrapers/streamhub.py @@ -19,16 +19,19 @@ HTML_CACHE = Cache(f"{TAG}-html", exp=19_800) BASE_URL = "https://livesports4u.net" -CATEGORIES = { - # "American Football": "sport_68c02a4465113", - "Baseball": "sport_68c02a446582f", - "Basketball": "sport_68c02a4466011", - "Hockey": "sport_68c02a4466f56", - "MMA": "sport_68c02a44674e9", - "Racing": "sport_68c02a4467a48", - "Soccer": "sport_68c02a4464a38", - "Tennis": "sport_68c02a4468cf7", -} +SPORT_ENDPOINTS = [ + f"sport_{sport_id}" + for sport_id in [ + # "68c02a4465113", # American Football + "68c02a446582f", # Baseball + "68c02a4466011", # Basketball + "68c02a4466f56", # Hockey + "68c02a44674e9", # MMA + "68c02a4467a48", # Racing + "68c02a4464a38", # Soccer + "68c02a4468cf7", # Tennis + ] +] async def refresh_html_cache( @@ -104,7 +107,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: now.timestamp(), ) for date in [now.date(), now.delta(days=1).date()] - for sport_id in CATEGORIES.values() + for sport_id in SPORT_ENDPOINTS.values() ] results = await asyncio.gather(*tasks) diff --git a/M3U8/scrapers/streamsgate.py b/M3U8/scrapers/streamsgate.py index a94c0b1e..2a75cd73 100644 --- a/M3U8/scrapers/streamsgate.py +++ b/M3U8/scrapers/streamsgate.py @@ -20,16 +20,19 @@ API_FILE = Cache(f"{TAG}-api", exp=19_800) BASE_URL = "https://streamingon.org" -SPORT_ENDPOINTS = [ - "boxing", - # "cfb", - "f1", - "mlb", - "nba", - # "nfl", - "nhl", - "soccer", - "ufc", +SPORT_URLS = [ + urljoin(BASE_URL, f"data/{sport}.json") + for sport in [ + "boxing", + # "cfb", + "f1", + "mlb", + "nba", + # "nfl", + "nhl", + "soccer", + "ufc", + ] ] @@ -46,13 +49,7 @@ def get_event(t1: str, t2: str) -> str: async def refresh_api_cache(now_ts: float) -> list[dict[str, Any]]: - tasks = [ - network.request( - urljoin(BASE_URL, f"data/{sport}.json"), - log=log, - ) - for sport in SPORT_ENDPOINTS - ] + tasks = [network.request(url, log=log) for url in SPORT_URLS] results = await asyncio.gather(*tasks) diff --git a/M3U8/scrapers/totalsportek1.py b/M3U8/scrapers/totalsportek1.py new file mode 100644 index 00000000..74a7d3d5 --- /dev/null +++ b/M3U8/scrapers/totalsportek1.py @@ -0,0 +1,170 @@ +import re +from functools import partial +from urllib.parse import urljoin, urlparse + +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "TOTALSPRTK1" + +CACHE_FILE = Cache(TAG, exp=28_800) + +BASE_URL = "https://live.totalsportekarmy.com" + + +def fix_txt(s: str) -> str: + s = " ".join(s.split()) + + return s.upper() if s.islower() else s + + +async def process_event(url: str, url_num: int) -> str | None: + if not (event_data := await network.request(url, log=log)): + log.warning(f"URL {url_num}) Failed to load url.") + + return + + soup_1 = HTMLParser(event_data.content) + + if not (iframe := soup_1.css_first("iframe")): + log.warning(f"URL {url_num}) No iframe element found.") + + return + + if not (iframe_url := iframe.attributes.get("src")): + log.warning(f"URL {url_num}) No iframe source found.") + + return + + if not (iframe_src := await network.request(iframe_url, log=log)): + log.warning(f"URL {url_num}) Failed to load iframe source.") + + return + + valid_m3u8 = re.compile(r'const\s+hexEncoded\s+=\s+"([^"]*)"', re.I) + + if not (match := valid_m3u8.search(iframe_src.text)): + log.warning(f"URL {url_num}) No Clappr source found.") + + return + + log.info(f"URL {url_num}) Captured M3U8") + + return bytes.fromhex(match[1]).decode("utf-8") + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + events = [] + + if not (html_data := await network.request(BASE_URL, log=log)): + return events + + soup = HTMLParser(html_data.content) + + sport = "Live Event" + + for node in soup.css("a"): + if not node.attributes.get("class"): + continue + + if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""): + if span := node.css_first("span"): + sport = span.text(strip=True) + + sport = fix_txt(sport) + + if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]): + continue + + if not (href := node.attributes.get("href")): + continue + + href = urlparse(href).path if href.startswith("http") else href + + if not (time_node := node.css_first(".col-3 span")): + continue + + if time_node.text(strip=True).lower() != "matchstarted": + continue + + event_name = fix_txt(" vs ".join(teams)) + + if f"[{sport}] {event_name} ({TAG})" in cached_keys: + continue + + events.append( + { + "sport": sport, + "event": event_name, + "link": urljoin(BASE_URL, href), + } + ) + + return events + + +async def scrape() -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + if events := await get_events(cached_urls.keys()): + log.info(f"Processing {len(events)} new URL(s)") + + now = Time.clean(Time.now()) + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + url=(link := ev["link"]), + url_num=i, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.HTTP_S, + log=log, + ) + + sport, event = ev["sport"], ev["event"] + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": link, + "timestamp": now.timestamp(), + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + log.info(f"Collected and cached {valid_count - cached_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/totalsportek.py b/M3U8/scrapers/totalsportek3.py similarity index 92% rename from M3U8/scrapers/totalsportek.py rename to M3U8/scrapers/totalsportek3.py index af6f7169..0c90d96d 100644 --- a/M3U8/scrapers/totalsportek.py +++ b/M3U8/scrapers/totalsportek3.py @@ -11,7 +11,7 @@ log = get_logger(__name__) urls: dict[str, dict[str, str | float]] = {} -TAG = "TOTALSPRTK" +TAG = "TOTALSPRTK3" CACHE_FILE = Cache(TAG, exp=28_800) @@ -33,29 +33,29 @@ async def process_event(url: str, url_num: int) -> str | None: soup_1 = HTMLParser(event_data.content) if not (iframe_1 := soup_1.css_first("iframe")): - log.warning(f"URL {url_num}) No iframe element found.") + log.warning(f"URL {url_num}) No iframe element found. (IFR1)") return if not (iframe_1_src := iframe_1.attributes.get("src")): - log.warning(f"URL {url_num}) No iframe source found.") + log.warning(f"URL {url_num}) No iframe source found. (IFR1)") return if not (iframe_1_src_data := await network.request(iframe_1_src, log=log)): - log.warning(f"URL {url_num}) Failed to load iframe source.") + log.warning(f"URL {url_num}) Failed to load iframe source. (IFR1)") return soup_2 = HTMLParser(iframe_1_src_data.content) if not (iframe_2 := soup_2.css_first("iframe")): - log.warning(f"URL {url_num}) No iframe element found.") + log.warning(f"URL {url_num}) No iframe element found. (IFR2)") return if not (iframe_2_src := iframe_2.attributes.get("src")): - log.warning(f"URL {url_num}) No iframe source found.") + log.warning(f"URL {url_num}) No iframe source found. (IFR2)") return @@ -66,14 +66,14 @@ async def process_event(url: str, url_num: int) -> str | None: headers={"Referer": iframe_1_src}, ) ): - log.warning(f"URL {url_num}) Failed to load iframe source.") + log.warning(f"URL {url_num}) Failed to load iframe source. (IFR2)") return valid_m3u8 = re.compile(r'currentStreamUrl\s+=\s+"([^"]*)"', re.I) if not (match := valid_m3u8.search(iframe_2_src_data.text)): - log.warning(f"URL {url_num}) No Clappr source found.") + log.warning(f"URL {url_num}) No Clappr source found. (IFR2)") return diff --git a/M3U8/scrapers/utils/config.py b/M3U8/scrapers/utils/config.py index 4a43ae6d..5cda4646 100644 --- a/M3U8/scrapers/utils/config.py +++ b/M3U8/scrapers/utils/config.py @@ -194,6 +194,13 @@ class Leagues: else self.info("Hockey") ) + case "Baseball" | "MLB": + return ( + self.info("MLB") + if self.is_valid(event, "MLB") + else self.info("Baseball") + ) + case _: return self.info(sport) diff --git a/M3U8/scrapers/volokit.py b/M3U8/scrapers/volokit.py index ff5546ae..d77d17ea 100644 --- a/M3U8/scrapers/volokit.py +++ b/M3U8/scrapers/volokit.py @@ -17,10 +17,14 @@ CACHE_FILE = Cache(TAG, exp=19_800) BASE_URL = "http://volokit.xyz" -SPORT_ENDPOINTS = { - "mlb": "MLB", - # "nfl": "NFL", - "nhl": "NHL", +SPORT_URLS = { + sport: urljoin(BASE_URL, f"sport/{sport.lower()}/") + for sport in [ + "MLB", + "NHL", + # "NFL", + "WBC", + ] } @@ -70,11 +74,7 @@ async def process_event(url: str, url_num: int) -> str | None: async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: - sport_urls = { - sport.upper(): urljoin(BASE_URL, f"sport/{sport}/") for sport in SPORT_ENDPOINTS - } - - tasks = [network.request(url, log=log) for url in sport_urls.values()] + tasks = [network.request(url, log=log) for url in SPORT_URLS.values()] results = await asyncio.gather(*tasks) @@ -95,7 +95,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: name = fix_event(name_node.text(strip=True)) - sport = next((k for k, v in sport_urls.items() if v == url), "Live Event") + sport = next((k for k, v in SPORT_URLS.items() if v == url), "Live Event") if f"[{sport}] {name} ({TAG})" in cached_keys: continue diff --git a/M3U8/scrapers/xstreameast.py b/M3U8/scrapers/xstreameast.py index 593d6555..cf768b1a 100644 --- a/M3U8/scrapers/xstreameast.py +++ b/M3U8/scrapers/xstreameast.py @@ -17,14 +17,17 @@ CACHE_FILE = Cache(TAG, exp=10_800) BASE_URL = "https://xstreameast.com" -SPORT_ENDPOINTS = [ - # "mlb", - "mma", - "nba", - # "nfl", - # "nhl", - "soccer", - "wwe", +SPORT_URLS = [ + urljoin(BASE_URL, f"categories/{sport}/") + for sport in [ + # "mlb", + "mma", + "nba", + # "nfl", + # "nhl", + "soccer", + "wwe", + ] ] @@ -66,13 +69,7 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None] async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: - tasks = [ - network.request( - urljoin(BASE_URL, f"categories/{sport}/"), - log=log, - ) - for sport in SPORT_ENDPOINTS - ] + tasks = [network.request(url, log=log) for url in SPORT_URLS] results = await asyncio.gather(*tasks)