From 00000d9113b3109ac8e4991156629424a94dd70d Mon Sep 17 00:00:00 2001 From: doms9 <96013514+doms9@users.noreply.github.com> Date: Fri, 24 Apr 2026 20:40:43 -0400 Subject: [PATCH] e - remove ppv.py - remove ovogoal.py - re-add fsports.py - misc edits. --- M3U8/fetch.py | 9 +- M3U8/scrapers/embedhd.py | 2 +- M3U8/scrapers/fsports.py | 125 ++++++++++++++++++++++++++ M3U8/scrapers/ovogoal.py | 144 ----------------------------- M3U8/scrapers/ppv.py | 165 ---------------------------------- M3U8/scrapers/totalsportek.py | 7 +- 6 files changed, 134 insertions(+), 318 deletions(-) create mode 100644 M3U8/scrapers/fsports.py delete mode 100644 M3U8/scrapers/ovogoal.py delete mode 100644 M3U8/scrapers/ppv.py diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 470e04b2..be93be88 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -8,12 +8,11 @@ from scrapers import ( cdnlivetv, embedhd, fawa, + fsports, istreameast, livetvsx, mainportal, - ovogoal, pawa, - ppv, roxie, shark, streamcenter, @@ -62,7 +61,7 @@ async def main() -> None: pw_tasks = [ asyncio.create_task(cdnlivetv.scrape(xtrnl_brwsr)), asyncio.create_task(embedhd.scrape(hdl_brwsr)), - # asyncio.create_task(ppv.scrape(xtrnl_brwsr)), + asyncio.create_task(fsports.scrape(xtrnl_brwsr)), asyncio.create_task(roxie.scrape(hdl_brwsr)), asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), asyncio.create_task(watchfooty.scrape(xtrnl_brwsr)), @@ -72,7 +71,6 @@ async def main() -> None: asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), asyncio.create_task(mainportal.scrape()), - # asyncio.create_task(ovogoal.scrape()), asyncio.create_task(pawa.scrape()), asyncio.create_task(shark.scrape()), asyncio.create_task(streamcenter.scrape()), @@ -97,12 +95,11 @@ async def main() -> None: cdnlivetv.urls | embedhd.urls | fawa.urls + | fsports.urls | istreameast.urls | livetvsx.urls | mainportal.urls - | ovogoal.urls | pawa.urls - | ppv.urls | roxie.urls | shark.urls | streamcenter.urls diff --git a/M3U8/scrapers/embedhd.py b/M3U8/scrapers/embedhd.py index 7989cc25..72ea32ff 100644 --- a/M3U8/scrapers/embedhd.py +++ b/M3U8/scrapers/embedhd.py @@ -123,7 +123,7 @@ async def scrape(browser: Browser) -> None: entry = { "url": url, "logo": logo, - "base": "https://vividmosaica.com/", + "base": "https://exposestrat.com/", "timestamp": ts, "id": tvg_id or "Live.Event.us", "link": link, diff --git a/M3U8/scrapers/fsports.py b/M3U8/scrapers/fsports.py new file mode 100644 index 00000000..f02bbd07 --- /dev/null +++ b/M3U8/scrapers/fsports.py @@ -0,0 +1,125 @@ +from functools import partial +from urllib.parse import urljoin + +from playwright.async_api import Browser +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "FSPRTS" + +CACHE_FILE = Cache(TAG, exp=5_400) + +BASE_URL = "https://fsportshdd.club" + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + events = [] + + if not (html_data := await network.request(BASE_URL, log=log)): + return events + + soup = HTMLParser(html_data.content) + + sport = "Live Event" + + for card in soup.css(".media.btn.btn-default.btn-lg.btn-block"): + if card.css_first('[id^="countdown-"]'): + continue + + if not (name_elem := card.css_first("h4")): + continue + + if sport_elem := card.css_first("h5"): + if (sport := sport_elem.text(strip=True)).lower() == "no league": + sport = "Live Event" + + if not (a_elem := card.css_first("a")) or not ( + href := a_elem.attributes.get("href") + ): + continue + + name = name_elem.text(strip=True) + + if f"[{sport}] {name} ({TAG})" in cached_keys: + continue + + events.append( + { + "sport": sport, + "event": name, + "link": urljoin(BASE_URL, href), + } + ) + + return events + + +async def scrape(browser: Browser) -> None: + cached_urls = CACHE_FILE.load() + + valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} + + valid_count = cached_count = len(valid_urls) + + urls.update(valid_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + if events := await get_events(cached_urls.keys()): + log.info(f"Processing {len(events)} new URL(s)") + + now = Time.clean(Time.now()) + + async with network.event_context(browser, stealth=False) as context: + for i, ev in enumerate(events, start=1): + async with network.event_page(context) as page: + handler = partial( + network.process_event, + url=(link := ev["link"]), + url_num=i, + page=page, + log=log, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.PW_S, + log=log, + ) + + sport, event = ev["sport"], ev["event"] + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": "https://exposestrat.com/", + "timestamp": now.timestamp(), + "id": tvg_id or "Live.Event.us", + "link": link, + } + + cached_urls[key] = entry + + if url: + valid_count += 1 + + urls[key] = entry + + log.info(f"Collected and cached {valid_count - cached_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/ovogoal.py b/M3U8/scrapers/ovogoal.py deleted file mode 100644 index 71644b80..00000000 --- a/M3U8/scrapers/ovogoal.py +++ /dev/null @@ -1,144 +0,0 @@ -import re -from functools import partial -from urllib.parse import urljoin - -from selectolax.parser import HTMLParser - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -TAG = "OVO" - -CACHE_FILE = Cache(TAG, exp=28_800) - -BASE_URL = "https://ovogoaal.com" - - -async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: - nones = None, None - - if not (html_data := await network.request(url, log=log)): - log.warning(f"URL {url_num}) Failed to load url.") - return nones - - soup = HTMLParser(html_data.content) - - iframe = soup.css_first("iframe") - - if not iframe or not (iframe_src := iframe.attributes.get("src")): - log.warning(f"URL {url_num}) No iframe element found.") - return nones - - if not ( - iframe_src_data := await network.request( - iframe_src, - headers={"Referer": url}, - log=log, - ) - ): - log.warning(f"URL {url_num}) Failed to load iframe source.") - return nones - - valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.I) - - if not (match := valid_m3u8.search(iframe_src_data.text)): - log.warning(f"URL {url_num}) No Clappr source found.") - return nones - - log.info(f"URL {url_num}) Captured M3U8") - - return match[3], iframe_src - - -async def get_events() -> list[dict[str, str]]: - events = [] - - if not (html_data := await network.request(BASE_URL, log=log)): - return events - - soup = HTMLParser(html_data.content) - - sport = "Live Event" - - for card in soup.css(".main-content .stream-row"): - if (not (watch_btn_elem := card.css_first(".watch-btn"))) or ( - not (onclick := watch_btn_elem.attributes.get("onclick")) - ): - continue - - if not (event_name_elem := card.css_first(".stream-info")): - continue - - href = onclick.split(".href=")[-1].replace("'", "") - - event_name = event_name_elem.text(strip=True) - - events.append( - { - "sport": sport, - "event": event_name, - "link": urljoin(f"{html_data.url}", href), - } - ) - - return events - - -async def scrape() -> None: - if cached_urls := CACHE_FILE.load(): - urls.update({k: v for k, v in cached_urls.items() if v["url"]}) - - log.info(f"Loaded {len(urls)} event(s) from cache") - - return - - log.info(f'Scraping from "{BASE_URL}"') - - if events := await get_events(): - log.info(f"Processing {len(events)} URL(s)") - - now = Time.clean(Time.now()) - - for i, ev in enumerate(events, start=1): - handler = partial( - process_event, - url=(link := ev["link"]), - url_num=i, - ) - - url, iframe = await network.safe_process( - handler, - url_num=i, - semaphore=network.HTTP_S, - log=log, - ) - - sport, event = ev["sport"], ev["event"] - - key = f"[{sport}] {event} ({TAG})" - - tvg_id, logo = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo, - "base": iframe, - "timestamp": now.timestamp(), - "id": tvg_id or "Live.Event.us", - "link": link, - } - - cached_urls[key] = entry - - if url: - urls[key] = entry - - log.info(f"Collected and cached {len(urls)} event(s)") - - else: - log.info("No events found") - - CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/ppv.py b/M3U8/scrapers/ppv.py deleted file mode 100644 index 6b8ce1b4..00000000 --- a/M3U8/scrapers/ppv.py +++ /dev/null @@ -1,165 +0,0 @@ -import re -from functools import partial - -from playwright.async_api import Browser - -from .utils import Cache, Time, get_logger, leagues, network - -log = get_logger(__name__) - -urls: dict[str, dict[str, str | float]] = {} - -TAG = "PPV" - -CACHE_FILE = Cache(TAG, exp=10_800) - -API_FILE = Cache(f"{TAG}-api", exp=19_800) - -API_MIRRORS = [ - "https://api.ppv.to/api/streams", - "https://api.ppv.cx/api/streams", - # "https://api.ppv.sh/api/streams", - # "https://api.ppv.la/api/streams", -] - - -def fix_url(s: str) -> str: - pattern = re.compile(r"index\.m3u8$", re.I) - - return pattern.sub(r"tracks-v1a1/mono.ts.m3u8", s) - - -async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]: - now = Time.clean(Time.now()) - - if not (api_data := API_FILE.load(per_entry=False)): - log.info("Refreshing API cache") - - api_data = {"timestamp": now.timestamp()} - - if r := await network.request(url, log=log): - api_data: dict = r.json() - - API_FILE.write(api_data) - - events = [] - - start_dt = now.delta(hours=-1) - end_dt = now.delta(minutes=5) - - for stream_group in api_data.get("streams", []): - sport = stream_group["category"] - - if sport == "24/7 Streams": - continue - - for event in stream_group.get("streams", []): - name = event.get("name") - - start_ts = event.get("starts_at") - - logo = event.get("poster") - - iframe = event.get("iframe") - - if not (name and start_ts and iframe): - continue - - if f"[{sport}] {name} ({TAG})" in cached_keys: - continue - - event_dt = Time.from_ts(start_ts) - - if not start_dt <= event_dt <= end_dt: - continue - - events.append( - { - "sport": sport, - "event": name, - "link": f"{iframe}#player=clappr#autoplay=true", - "logo": logo, - "timestamp": event_dt.timestamp(), - } - ) - - return events - - -async def scrape(browser: Browser) -> None: - cached_urls = CACHE_FILE.load() - - valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} - - valid_count = cached_count = len(valid_urls) - - urls.update(valid_urls) - - log.info(f"Loaded {cached_count} event(s) from cache") - - if not (api_url := await network.get_base(API_MIRRORS)): - log.warning("No working PPV mirrors") - - CACHE_FILE.write(cached_urls) - - return - - log.info(f'Scraping from "{api_url}"') - - if events := await get_events(api_url, cached_urls.keys()): - log.info(f"Processing {len(events)} new URL(s)") - - async with network.event_context(browser, stealth=False) as context: - for i, ev in enumerate(events, start=1): - async with network.event_page(context) as page: - handler = partial( - network.process_event, - url=(link := ev["link"]), - url_num=i, - page=page, - timeout=6, - log=log, - ) - - url = await network.safe_process( - handler, - url_num=i, - semaphore=network.PW_S, - log=log, - ) - - sport, event, logo, ts = ( - ev["sport"], - ev["event"], - ev["logo"], - ev["timestamp"], - ) - - key = f"[{sport}] {event} ({TAG})" - - tvg_id, pic = leagues.get_tvg_info(sport, event) - - entry = { - "url": url, - "logo": logo or pic, - "base": link, - "timestamp": ts, - "id": tvg_id or "Live.Event.us", - "link": link, - } - - cached_urls[key] = entry - - if url: - valid_count += 1 - - entry["url"] = fix_url(url) - - urls[key] = entry - - log.info(f"Collected and cached {valid_count - cached_count} new event(s)") - - else: - log.info("No new events found") - - CACHE_FILE.write(cached_urls) diff --git a/M3U8/scrapers/totalsportek.py b/M3U8/scrapers/totalsportek.py index 47e57de2..895bfcd2 100644 --- a/M3U8/scrapers/totalsportek.py +++ b/M3U8/scrapers/totalsportek.py @@ -51,7 +51,7 @@ async def process_ts3(ifr_src: str, url_num: int) -> str | None: soup_2 = HTMLParser(ifr_1_src_data.content) - ifr_2 = soup_2.css_first("iframe") + ifr_2 = soup_2.css_first("iframe[width='100%']") if not ifr_2 or not (ifr_2_src := ifr_2.attributes.get("src")): log.warning(f"URL {url_num}) No iframe element found. (IFR2)") @@ -130,7 +130,10 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: if not (time_node := node.css_first(".col-3 span")): continue - if time_node.text(strip=True).lower() != "matchstarted": + if time_node.text(strip=True).lower() not in [ + "matchstarted", + "1minfrom now", + ]: continue event_name = fix_txt(" vs ".join(teams))