This commit is contained in:
doms9 2025-09-30 17:27:42 -04:00
parent e0e04c8635
commit 00000d9362
10 changed files with 230 additions and 9 deletions

View file

@ -2,7 +2,7 @@
import asyncio
from pathlib import Path
from scrapers import fstv, livetvsx, ppv, streambtw, streameast, tvpass
from scrapers import fstv, livetvsx, ppv, streambtw, streameast, streamed, tvpass
from scrapers.utils import CLIENT, UA, get_logger
log = get_logger(__name__)
@ -42,6 +42,7 @@ async def main() -> None:
| ppv.urls
| streambtw.urls
| streameast.urls
| streamed.urls
| tvpass.urls
)

View file

@ -116,7 +116,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
entry = {
"url": link,
"logo": logo,
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
}
urls[key] = entry

View file

@ -164,7 +164,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
"logo": logo,
"base": base_url,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
"href": ev["href"],
}

View file

@ -240,6 +240,9 @@ async def get_events(
elem.clear()
continue
if start_dt.date() != now.date():
continue
if not start_dt <= dt <= end_dt:
elem.clear()
continue
@ -316,7 +319,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
entry = {
"url": url,
"logo": logo,
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
"base": "https://livetv.sx/enx/",
"timestamp": now.timestamp(),
}

View file

@ -173,6 +173,9 @@ async def get_events(
end_dt = datetime.fromtimestamp(end_ts, tz=TZ) + timedelta(minutes=30)
if start_dt.date() != now.date():
continue
if not start_dt <= now < end_dt:
continue

View file

@ -103,7 +103,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
}
urls[key] = entry

View file

@ -206,7 +206,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
"logo": logo,
"base": base_url,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
}
urls[key] = cached_urls[key] = entry

214
M3U8/scrapers/streamed.py Normal file
View file

@ -0,0 +1,214 @@
import asyncio
from datetime import datetime, timedelta
from functools import partial
from pathlib import Path
import httpx
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser
from .utils import (
TZ,
capture_req,
get_logger,
leagues,
load_cache,
new_browser,
now,
safe_process_event,
write_cache,
)
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Path(__file__).parent / "caches" / "streamed.json"
HTML_CACHE = Path(__file__).parent / "caches" / "streamed_php.json"
base_url = "https://streamed.site/webmaster.php"
def get_date(s: str) -> datetime:
try:
return datetime.strptime(s, "%Y-%m-%d %H:%M %Z").astimezone(TZ)
except ValueError:
s = s.replace("ET", "").strip()
return datetime.strptime(s, "%Y-%m-%d %H:%M").astimezone(TZ)
async def process_event(url: str, url_num: int) -> str | None:
async with async_playwright() as p:
browser, context = await new_browser(p, browser="brave")
page = await context.new_page()
captured: list[str] = []
got_one = asyncio.Event()
handler = partial(capture_req, captured=captured, got_one=got_one)
page.on("request", handler)
try:
await page.goto(url, wait_until="domcontentloaded", timeout=15_000)
wait_task = asyncio.create_task(got_one.wait())
try:
await asyncio.wait_for(wait_task, timeout=10)
except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return
finally:
if not wait_task.done():
wait_task.cancel()
try:
await wait_task
except asyncio.CancelledError:
pass
if captured:
log.info(f"URL {url_num}) Captured M3U8")
return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
return
finally:
page.remove_listener("request", handler)
await page.close()
await browser.close()
async def refresh_html_cache(client: httpx.AsyncClient, url: str) -> dict[str, str]:
try:
r = await client.get(url)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}"\n{e}')
return []
soup = HTMLParser(r.text)
events = {}
for row in soup.css("div.wrap div.row"):
if not (date := row.css_first("div.date")):
continue
event_dt = get_date(date.text(strip=True))
if event_dt.date() != now.date():
continue
league = row.css_first("div.league")
title = row.css_first("div.title")
hds_a = row.css_first("div.hds a")
if not (league and title and hds_a):
continue
sport, event = league.text(strip=True), title.text(strip=True)
onclick = hds_a.attributes.get("onclick", "")
if not (chnl_id := "".join(s for s in onclick if s.isdigit())):
continue
key = f"[{sport}] {event} (STRMD)"
events[key] = {
"sport": sport,
"event": event,
"link": f"https://streamed.site/set.php?{chnl_id}",
"ts": event_dt.timestamp(),
}
return events
async def get_events(
client: httpx.AsyncClient,
url: str,
cached_keys: set[str],
) -> list[dict[str, str]]:
if not (events := load_cache(HTML_CACHE, exp=10_800)):
events = await refresh_html_cache(client, url)
write_cache(HTML_CACHE, events)
live = []
start_ts = (now - timedelta(minutes=30)).timestamp()
end_ts = (now + timedelta(minutes=30)).timestamp()
for k, v in events.items():
if cached_keys & {k}:
continue
if not start_ts <= v["ts"] < end_ts:
continue
live.append({**v})
return live
async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = load_cache(CACHE_FILE, exp=10_800)
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Collected {cached_count} event(s) from cache")
log.info(f'Scraping from "{base_url}"')
events = await get_events(
client,
base_url,
set(cached_urls.keys()),
)
log.info(f"Processing {len(events)} new URL(s)")
for i, ev in enumerate(events, start=1):
url = await safe_process_event(
lambda: process_event(ev["link"], url_num=i),
url_num=i,
log=log,
)
if url:
sport, event = ev["sport"], ev["event"]
tvg_id, logo = leagues.info(sport)
key = f"[{sport}] {event} (STRMD)"
entry = {
"url": url,
"logo": logo,
"base": "https://streamed.site/",
"timestamp": now.timestamp(),
"id": tvg_id,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
write_cache(CACHE_FILE, cached_urls)

View file

@ -56,7 +56,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
entry = {
"url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8",
"logo": logo,
"id": tvg_id or "Live.Event.us",
"id": tvg_id,
"base": "https://tvpass.org",
"timestamp": now.timestamp(),
}

View file

@ -21,7 +21,7 @@ class Leagues:
def teams(self, league: str) -> list[str]:
return self.data["teams"].get(league, [])
def info(self, name: str) -> tuple[str | None, str]:
def info(self, name: str) -> tuple[str | str]:
name = name.upper()
if match := next(
@ -38,7 +38,7 @@ class Leagues:
return (tvg_id, logo or live_img)
return (None, live_img)
return ("Live.Event.us", live_img)
def is_valid(self, event: str, league: str) -> bool:
if match := re.search(r"(\-|vs.?)", event):