This commit is contained in:
doms9 2025-10-11 13:59:24 -04:00
parent 00000d9766
commit 00000d950a

View file

@ -1,279 +1,279 @@
import asyncio import asyncio
import re import re
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from urllib.parse import urljoin from urllib.parse import urljoin
import httpx import httpx
from playwright.async_api import BrowserContext, async_playwright from playwright.async_api import BrowserContext, async_playwright
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__) log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {} urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "strmd_api.json", exp=28_800) API_FILE = Cache(Path(__file__).parent / "caches" / "strmd_api.json", exp=28_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmd.json", exp=10_800) CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmd.json", exp=10_800)
MIRRORS = ["https://streamed.pk", "https://streami.su", "https://streamed.st"] MIRRORS = ["https://streamed.pk", "https://streami.su", "https://streamed.st"]
def validate_category(s: str) -> str: def validate_category(s: str) -> str:
if "-" in s: if "-" in s:
return " ".join([i.capitalize() for i in s.split("-")]) return " ".join(i.capitalize() for i in s.split("-"))
elif s == "fight": elif s == "fight":
return "Fight (UFC/Boxing)" return "Fight (UFC/Boxing)"
return s.capitalize() return s.capitalize()
def get_tvg_id(sport: str, event: str) -> tuple[str | None, str]: def get_tvg_info(sport: str, event: str) -> tuple[str | None, str]:
match sport: match sport:
case "American Football": case "American Football":
if leagues.is_valid(event, "NFL"): if leagues.is_valid(event, "NFL"):
return leagues.info("NFL") return leagues.info("NFL")
else: else:
return leagues.info("NCAA") return leagues.info("NCAA")
case "Basketball": case "Basketball":
if leagues.is_valid(event, "NBA"): if leagues.is_valid(event, "NBA"):
return leagues.info("NBA") return leagues.info("NBA")
elif leagues.is_valid(event, "WNBA"): elif leagues.is_valid(event, "WNBA"):
return leagues.info("WNBA") return leagues.info("WNBA")
# NCAA # NCAA
else: else:
return leagues.info("Basketball") return leagues.info("Basketball")
case "Hockey": case "Hockey":
return leagues.info("NHL") return leagues.info("NHL")
case _: case _:
return leagues.info(sport) return leagues.info(sport)
async def refresh_api_cache( async def refresh_api_cache(
client: httpx.AsyncClient, url: str client: httpx.AsyncClient, url: str
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
log.info("Refreshing API cache") log.info("Refreshing API cache")
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{url}"\n{e}') log.error(f'Failed to fetch "{url}"\n{e}')
return {} return {}
data = r.json() data = r.json()
data[0]["timestamp"] = Time.now().timestamp() data[0]["timestamp"] = Time.now().timestamp()
return data return data
async def process_event( async def process_event(
url: str, url: str,
url_num: int, url_num: int,
context: BrowserContext, context: BrowserContext,
) -> str | None: ) -> str | None:
page = await context.new_page() page = await context.new_page()
captured: list[str] = [] captured: list[str] = []
got_one = asyncio.Event() got_one = asyncio.Event()
handler = partial(network.capture_req, captured=captured, got_one=got_one) handler = partial(network.capture_req, captured=captured, got_one=got_one)
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=15_000, timeout=15_000,
) )
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
try: try:
await asyncio.wait_for(wait_task, timeout=10) await asyncio.wait_for(wait_task, timeout=10)
except asyncio.TimeoutError: except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.") log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return return
finally: finally:
if not wait_task.done(): if not wait_task.done():
wait_task.cancel() wait_task.cancel()
try: try:
await wait_task await wait_task
except asyncio.CancelledError: except asyncio.CancelledError:
pass pass
if captured: if captured:
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return captured[-1] return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.") log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) Exception while processing: {e}")
return return
finally: finally:
page.remove_listener("request", handler) page.remove_listener("request", handler)
await page.close() await page.close()
async def get_events( async def get_events(
client: httpx.AsyncClient, client: httpx.AsyncClient,
base_url: str, base_url: str,
cached_keys: set[str], cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
if not (api_data := API_FILE.load(per_entry=False, index=True)): if not (api_data := API_FILE.load(per_entry=False, index=True)):
api_data = await refresh_api_cache( api_data = await refresh_api_cache(
client, client,
urljoin( urljoin(
base_url, base_url,
"api/matches/all-today", "api/matches/all-today",
), ),
) )
API_FILE.write(api_data) API_FILE.write(api_data)
events: list[dict[str, str]] = [] events: list[dict[str, str]] = []
now = Time.clean(Time.now()) now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30) start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30) end_dt = now.delta(minutes=30)
pattern = re.compile(r"[\n\r]+|\s{2,}") pattern = re.compile(r"[\n\r]+|\s{2,}")
for event in api_data: for event in api_data:
category = event["category"] category = event["category"]
if category == "other": if category == "other":
continue continue
sport = validate_category(category) if not (ts := event["date"]):
continue
parts = pattern.split(event["title"].strip())
name = " | ".join(p.strip() for p in parts if p.strip()) start_ts = int(str(ts)[:-3])
logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None event_dt = Time.from_ts(start_ts)
key = f"[{sport}] {name} (STRMD)" if not start_dt <= event_dt <= end_dt:
continue
if cached_keys & {key}:
continue sport = validate_category(category)
if not (ts := event["date"]): parts = pattern.split(event["title"].strip())
continue name = " | ".join(p.strip() for p in parts if p.strip())
start_ts = int(str(ts)[:-3]) logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None
event_dt = Time.from_ts(start_ts) key = f"[{sport}] {name} (STRMD)"
if not start_dt <= event_dt <= end_dt: if cached_keys & {key}:
continue continue
sources: list[dict[str, str]] = event["sources"] sources: list[dict[str, str]] = event["sources"]
if not sources: if not sources:
continue continue
source = sources[0] source = sources[0]
# source = sources[1] if len(sources) > 1 else sources[0] # source = sources[1] if len(sources) > 1 else sources[0]
source_type = source.get("source") source_type = source.get("source")
stream_id = source.get("id") stream_id = source.get("id")
if not (source_type and stream_id): if not (source_type and stream_id):
continue continue
events.append( events.append(
{ {
"sport": sport, "sport": sport,
"event": name, "event": name,
"link": f"https://embedsports.top/embed/{source_type}/{stream_id}/1", "link": f"https://embedsports.top/embed/{source_type}/{stream_id}/1",
"logo": logo, "logo": logo,
"timestamp": event_dt.timestamp(), "timestamp": event_dt.timestamp(),
} }
) )
return events return events
async def scrape(client: httpx.AsyncClient) -> None: async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = CACHE_FILE.load() cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls) cached_count = len(cached_urls)
urls.update(cached_urls) urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
if not (base_url := await network.get_base(MIRRORS)): if not (base_url := await network.get_base(MIRRORS)):
log.warning("No working PPV mirrors") log.warning("No working PPV mirrors")
CACHE_FILE.write(cached_urls) CACHE_FILE.write(cached_urls)
return return
log.info(f'Scraping from "{base_url}"') log.info(f'Scraping from "{base_url}"')
events = await get_events( events = await get_events(
client, client,
base_url, base_url,
set(cached_urls.keys()), set(cached_urls.keys()),
) )
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
async with async_playwright() as p: async with async_playwright() as p:
browser, context = await network.browser(p, "brave") browser, context = await network.browser(p, "brave")
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
url = await network.safe_process( url = await network.safe_process(
lambda: process_event( lambda: process_event(
ev["link"], ev["link"],
url_num=i, url_num=i,
context=context, context=context,
), ),
url_num=i, url_num=i,
log=log, log=log,
) )
if url: if url:
sport, event, logo, ts = ( sport, event, logo, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["logo"], ev["logo"],
ev["timestamp"], ev["timestamp"],
) )
key = f"[{sport}] {event} (STRMD)" key = f"[{sport}] {event} (STRMD)"
tvg_id, pic = get_tvg_id(sport, event) tvg_id, pic = get_tvg_info(sport, event)
entry = { entry = {
"url": url, "url": url,
"logo": logo or pic, "logo": logo or pic,
"base": "https://embedsports.top/", "base": "https://embedsports.top/",
"timestamp": ts, "timestamp": ts,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
} }
urls[key] = cached_urls[key] = entry urls[key] = cached_urls[key] = entry
await browser.close() await browser.close()
if new_count := len(cached_urls) - cached_count: if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)") log.info(f"Collected and cached {new_count} new event(s)")
else: else:
log.info("No new events found") log.info("No new events found")
CACHE_FILE.write(cached_urls) CACHE_FILE.write(cached_urls)