This commit is contained in:
doms9 2025-10-11 13:45:14 -04:00
parent 4291e8f407
commit 00000d9766

View file

@ -1,275 +1,279 @@
import asyncio import asyncio
import re import re
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from urllib.parse import urljoin from urllib.parse import urljoin
import httpx import httpx
from playwright.async_api import BrowserContext, async_playwright from playwright.async_api import BrowserContext, async_playwright
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__) log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {} urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "strmd_api.json", exp=28_800) API_FILE = Cache(Path(__file__).parent / "caches" / "strmd_api.json", exp=28_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmd.json", exp=10_800) CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmd.json", exp=10_800)
MIRRORS = ["https://streamed.pk", "https://streami.su", "https://streamed.st"] MIRRORS = ["https://streamed.pk", "https://streami.su", "https://streamed.st"]
def validate_category(s: str) -> str: def validate_category(s: str) -> str:
if "-" in s: if "-" in s:
return " ".join([i.capitalize() for i in s.split("-")]) return " ".join([i.capitalize() for i in s.split("-")])
elif s == "fight": elif s == "fight":
return "Fight (UFC/Boxing)" return "Fight (UFC/Boxing)"
return s.capitalize() return s.capitalize()
def get_tvg(sport: str, event: str) -> str: def get_tvg_id(sport: str, event: str) -> tuple[str | None, str]:
match sport: match sport:
case "American Football": case "American Football":
if leagues.is_valid(event, "NFL"): if leagues.is_valid(event, "NFL"):
return "NFL.Dummy.us" return leagues.info("NFL")
else: else:
return "NCAA.Sports.Dummy.us" return leagues.info("NCAA")
case "Basketball": case "Basketball":
if leagues.is_valid(event, "NBA"): if leagues.is_valid(event, "NBA"):
return "NBA.Basketball.Dummy.us" return leagues.info("NBA")
elif leagues.is_valid(event, "WNBA"): elif leagues.is_valid(event, "WNBA"):
return "WNBA.dummy.us" return leagues.info("WNBA")
# NCAA # NCAA
else: else:
return "Basketball.Dummy.us" return leagues.info("Basketball")
case "Hockey": case "Hockey":
return "NHL.Hockey.Dummy.us" return leagues.info("NHL")
case _: case _:
return leagues.info(sport)[0] return leagues.info(sport)
async def refresh_api_cache( async def refresh_api_cache(
client: httpx.AsyncClient, url: str client: httpx.AsyncClient, url: str
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
log.info("Refreshing API cache") log.info("Refreshing API cache")
try: try:
r = await client.get(url) r = await client.get(url)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{url}"\n{e}') log.error(f'Failed to fetch "{url}"\n{e}')
return {} return {}
data = r.json() data = r.json()
data[0]["timestamp"] = Time.now().timestamp() data[0]["timestamp"] = Time.now().timestamp()
return data return data
async def process_event( async def process_event(
url: str, url: str,
url_num: int, url_num: int,
context: BrowserContext, context: BrowserContext,
) -> str | None: ) -> str | None:
page = await context.new_page() page = await context.new_page()
captured: list[str] = [] captured: list[str] = []
got_one = asyncio.Event() got_one = asyncio.Event()
handler = partial(network.capture_req, captured=captured, got_one=got_one) handler = partial(network.capture_req, captured=captured, got_one=got_one)
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=15_000, timeout=15_000,
) )
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
try: try:
await asyncio.wait_for(wait_task, timeout=10) await asyncio.wait_for(wait_task, timeout=10)
except asyncio.TimeoutError: except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.") log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return return
finally: finally:
if not wait_task.done(): if not wait_task.done():
wait_task.cancel() wait_task.cancel()
try: try:
await wait_task await wait_task
except asyncio.CancelledError: except asyncio.CancelledError:
pass pass
if captured: if captured:
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return captured[-1] return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.") log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) Exception while processing: {e}")
return return
finally: finally:
page.remove_listener("request", handler) page.remove_listener("request", handler)
await page.close() await page.close()
async def get_events( async def get_events(
client: httpx.AsyncClient, client: httpx.AsyncClient,
base_url: str, base_url: str,
cached_keys: set[str], cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
if not (api_data := API_FILE.load(per_entry=False, index=True)): if not (api_data := API_FILE.load(per_entry=False, index=True)):
api_data = await refresh_api_cache( api_data = await refresh_api_cache(
client, client,
urljoin( urljoin(
base_url, base_url,
"api/matches/all-today", "api/matches/all-today",
), ),
) )
API_FILE.write(api_data) API_FILE.write(api_data)
events: list[dict[str, str]] = [] events: list[dict[str, str]] = []
now = Time.clean(Time.now()) now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30) start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30) end_dt = now.delta(minutes=30)
pattern = re.compile(r"[\n\r]+|\s{2,}") pattern = re.compile(r"[\n\r]+|\s{2,}")
for event in api_data: for event in api_data:
category = event["category"] category = event["category"]
if category == "other": if category == "other":
continue continue
sport = validate_category(category) sport = validate_category(category)
parts = pattern.split(event["title"].strip()) parts = pattern.split(event["title"].strip())
name = " | ".join(p.strip() for p in parts if p.strip()) name = " | ".join(p.strip() for p in parts if p.strip())
logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None logo = urljoin(base_url, poster) if (poster := event.get("poster")) else None
key = f"[{sport}] {name} (STRMD)"
key = f"[{sport}] {name} (STRMD)"
if cached_keys & {key}:
continue if cached_keys & {key}:
continue
if not (ts := event["date"]):
continue if not (ts := event["date"]):
continue
start_ts = int(str(ts)[:-3])
start_ts = int(str(ts)[:-3])
event_dt = Time.from_ts(start_ts)
event_dt = Time.from_ts(start_ts)
if not start_dt <= event_dt <= end_dt:
continue if not start_dt <= event_dt <= end_dt:
continue
sources: list[dict[str, str]] = event["sources"]
sources: list[dict[str, str]] = event["sources"]
if not sources:
continue if not sources:
continue
first_source = sources[0]
source_type = first_source.get("source") source = sources[0]
stream_id = first_source.get("id") # source = sources[1] if len(sources) > 1 else sources[0]
source_type = source.get("source")
if not (source_type and stream_id): stream_id = source.get("id")
continue
if not (source_type and stream_id):
events.append( continue
{
"sport": sport, events.append(
"event": name, {
"link": f"https://embedsports.top/embed/{source_type}/{stream_id}/1", "sport": sport,
"logo": logo, "event": name,
"timestamp": event_dt.timestamp(), "link": f"https://embedsports.top/embed/{source_type}/{stream_id}/1",
} "logo": logo,
) "timestamp": event_dt.timestamp(),
}
return events )
return events
async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls) async def scrape(client: httpx.AsyncClient) -> None:
urls.update(cached_urls) cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache") urls.update(cached_urls)
if not (base_url := await network.get_base(MIRRORS)): log.info(f"Loaded {cached_count} event(s) from cache")
log.warning("No working PPV mirrors")
CACHE_FILE.write(cached_urls) if not (base_url := await network.get_base(MIRRORS)):
return log.warning("No working PPV mirrors")
CACHE_FILE.write(cached_urls)
log.info(f'Scraping from "{base_url}"') return
events = await get_events( log.info(f'Scraping from "{base_url}"')
client,
base_url, events = await get_events(
set(cached_urls.keys()), client,
) base_url,
set(cached_urls.keys()),
log.info(f"Processing {len(events)} new URL(s)") )
async with async_playwright() as p: log.info(f"Processing {len(events)} new URL(s)")
browser, context = await network.browser(p, "brave")
async with async_playwright() as p:
for i, ev in enumerate(events, start=1): browser, context = await network.browser(p, "brave")
url = await network.safe_process(
lambda: process_event( for i, ev in enumerate(events, start=1):
ev["link"], url = await network.safe_process(
url_num=i, lambda: process_event(
context=context, ev["link"],
), url_num=i,
url_num=i, context=context,
log=log, ),
) url_num=i,
log=log,
if url: )
sport, event, logo, ts = (
ev["sport"], if url:
ev["event"], sport, event, logo, ts = (
ev["logo"], ev["sport"],
ev["timestamp"], ev["event"],
) ev["logo"],
ev["timestamp"],
key = f"[{sport}] {event} (STRMD)" )
entry = { key = f"[{sport}] {event} (STRMD)"
"url": url,
"logo": logo or leagues.info(sport)[1], tvg_id, pic = get_tvg_id(sport, event)
"base": "https://embedsports.top/",
"timestamp": ts, entry = {
"id": get_tvg(sport, event) or "Live.Event.us", "url": url,
} "logo": logo or pic,
"base": "https://embedsports.top/",
urls[key] = cached_urls[key] = entry "timestamp": ts,
"id": tvg_id or "Live.Event.us",
await browser.close() }
if new_count := len(cached_urls) - cached_count: urls[key] = cached_urls[key] = entry
log.info(f"Collected and cached {new_count} new event(s)")
else: await browser.close()
log.info("No new events found")
if new_count := len(cached_urls) - cached_count:
CACHE_FILE.write(cached_urls) log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)