add streamhub
rename cache files
This commit is contained in:
doms9 2025-12-13 16:57:14 -05:00
parent 4d72d60227
commit 00000d930e
18 changed files with 230 additions and 112 deletions

View file

@ -14,6 +14,7 @@ from scrapers import (
streambtw,
streamcenter,
streamfree,
streamhub,
streamsgate,
strmd,
timstreams,
@ -58,6 +59,7 @@ async def main() -> None:
asyncio.create_task(streambtw.scrape(network.client)),
asyncio.create_task(streamcenter.scrape(network.client)),
asyncio.create_task(streamfree.scrape(network.client)),
asyncio.create_task(streamhub.scrape(network.client)),
asyncio.create_task(streamsgate.scrape(network.client)),
asyncio.create_task(strmd.scrape(network.client)),
asyncio.create_task(timstreams.scrape(network.client)),
@ -80,6 +82,7 @@ async def main() -> None:
| streamcenter.urls
| strmd.urls
| streamfree.urls
| streamhub.urls
| streamsgate.urls
| timstreams.urls
| tvpass.urls

View file

@ -11,12 +11,12 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("fawa.json", exp=10_800)
TAG = "FAWA"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "http://www.fawanews.sc/"
TAG = "FAWA"
async def process_event(
client: httpx.AsyncClient,

View file

@ -9,14 +9,14 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("lotus.json", exp=5_400)
TAG = "LOTUS"
API_CACHE = Cache("lotus-api.json", exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=5_400)
API_CACHE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://lotusgamehd.xyz/api-event.php"
TAG = "LOTUS"
def fix_league(s: str) -> str:
return " ".join(x.capitalize() for x in s.split()) if len(s) > 5 else s.upper()
@ -46,16 +46,14 @@ async def refresh_api_cache(
async def get_events(
client: httpx.AsyncClient,
url: str,
cached_keys: set[str],
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
now = Time.now()
if not (api_data := API_CACHE.load(per_entry=False)):
api_data = await refresh_api_cache(
client,
url,
BASE_URL,
now.timestamp(),
)
@ -108,11 +106,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(
client,
BASE_URL,
set(cached_urls.keys()),
)
events = await get_events(client, set(cached_urls.keys()))
log.info(f"Processing {len(events)} new URL(s)")

View file

@ -9,14 +9,14 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("pixel.json", exp=19_800)
TAG = "PIXEL"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
BASE_URL = "https://pixelsport.tv/backend/livetv/events"
TAG = "PIXL"
async def get_api_data(url: str) -> dict[str, list[dict, str, str]]:
async def get_api_data() -> dict[str, list[dict, str, str]]:
async with async_playwright() as p:
try:
browser, context = await network.browser(p)
@ -24,7 +24,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]:
page = await context.new_page()
await page.goto(
url,
BASE_URL,
wait_until="domcontentloaded",
timeout=10_000,
)
@ -32,7 +32,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]:
raw_json = await page.locator("pre").inner_text(timeout=5_000)
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return {}
@ -45,7 +45,7 @@ async def get_api_data(url: str) -> dict[str, list[dict, str, str]]:
async def get_events() -> dict[str, dict[str, str | float]]:
now = Time.clean(Time.now())
api_data = await get_api_data(BASE_URL)
api_data = await get_api_data()
events = {}

View file

@ -9,9 +9,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("ppv.json", exp=10_800)
TAG = "PPV"
API_FILE = Cache("ppv-api.json", exp=19_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
API_MIRRORS = [
"https://old.ppv.to/api/streams",
@ -25,8 +27,6 @@ BASE_MIRRORS = [
"https://ppv.to",
]
TAG = "PPV"
async def refresh_api_cache(
client: httpx.AsyncClient,

View file

@ -12,9 +12,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("roxie.json", exp=10_800)
TAG = "ROXIE"
HTML_CACHE = Cache("roxie-html.json", exp=19_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800)
BASE_URL = "https://roxiestreams.live"
@ -28,8 +30,6 @@ SPORT_ENDPOINTS = {
"soccer": "Soccer",
}
TAG = "ROXIE"
async def process_event(
client: httpx.AsyncClient,

View file

@ -10,14 +10,14 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("shark.json", exp=10_800)
TAG = "SHARK"
HTML_CACHE = Cache("shark-html.json", exp=19_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800)
BASE_URL = "https://sharkstreams.net"
TAG = "SHARK"
async def process_event(
client: httpx.AsyncClient,
@ -45,18 +45,16 @@ async def process_event(
async def refresh_html_cache(
client: httpx.AsyncClient,
url: str,
now_ts: float,
client: httpx.AsyncClient, now_ts: float
) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
try:
r = await client.get(url)
r = await client.get(BASE_URL)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return {}
@ -108,11 +106,7 @@ async def get_events(
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
events = await refresh_html_cache(
client,
BASE_URL,
now.timestamp(),
)
events = await refresh_html_cache(client, now.timestamp())
HTML_CACHE.write(events)

View file

@ -12,12 +12,12 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("sport9.json", exp=3_600)
TAG = "SPRT9"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
BASE_URL = "https://sport9.ru"
TAG = "SPRT9"
async def get_html(
client: httpx.AsyncClient,

View file

@ -12,12 +12,12 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("streambtw.json", exp=3_600)
TAG = "STRMBTW"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
BASE_URL = "https://streambtw.com"
TAG = "STRMBTW"
def fix_league(s: str) -> str:
pattern = re.compile(r"^\w*-\w*", re.IGNORECASE)

View file

@ -9,13 +9,14 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("streamcenter.json", exp=10_800)
TAG = "STRMCNTR"
API_FILE = Cache("streamcenter-api.json", exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://backendstreamcenter.youshop.pro:488/api/Parties"
TAG = "STRMCNTR"
categories = {
4: "Basketball",
@ -33,17 +34,15 @@ categories = {
async def refresh_api_cache(
client: httpx.AsyncClient,
url: str,
now_ts: float,
client: httpx.AsyncClient, now_ts: float
) -> list[dict[str, str | int]]:
log.info("Refreshing API cache")
try:
r = await client.get(url, params={"pageNumber": 1, "pageSize": 500})
r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return []
@ -62,11 +61,7 @@ async def get_events(
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
api_data = await refresh_api_cache(
client,
BASE_URL,
now.timestamp(),
)
api_data = await refresh_api_cache(client, now.timestamp())
API_FILE.write(api_data)

View file

@ -8,18 +8,17 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("streamfree.json", exp=19_800)
TAG = "STRMFREE"
BASE_URL = "https://streamfree.to"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
TAG = "STRMFR"
BASE_URL = "https://streamfree.to/"
async def refresh_api_cache(
client: httpx.AsyncClient,
url: str,
) -> dict[str, dict[str, list]]:
async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
try:
url = urljoin(BASE_URL, "streams")
r = await client.get(url)
r.raise_for_status()
except Exception as e:
@ -31,7 +30,7 @@ async def refresh_api_cache(
async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | float]]:
api_data = await refresh_api_cache(client, urljoin(BASE_URL, "streams"))
api_data = await refresh_api_cache(client)
events = {}

142
M3U8/scrapers/streamhub.py Normal file
View file

@ -0,0 +1,142 @@
from functools import partial
import httpx
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMHUB"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://streamhub.pro/live-now"
async def get_events(
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
try:
r = await client.get(BASE_URL)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return []
soup = HTMLParser(r.content)
events = []
for event in soup.css(".events-section"):
if not (title_node := event.css_first(".section-titlte")):
continue
sport = title_node.text(strip=True)
if not event.css_first(".event-competitors"):
continue
home_team = event.css_first(".event-home-team").text(strip=True)
away_team = event.css_first(".event-visitor-team").text(strip=True)
logo = event.css_first(".league-icon img").attributes.get("src")
if not (event_button := event.css_first("div.event-button a")) or not (
href := event_button.attributes.get("href")
):
continue
event_name = f"{away_team} vs {home_team}"
key = f"[{sport}] {event_name} ({TAG})"
if cached_keys & {key}:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": href,
"logo": logo,
}
)
return events
async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(client, set(cached_urls.keys()))
log.info(f"Processing {len(events)} new URL(s)")
if events:
now = Time.now().timestamp()
async with async_playwright() as p:
browser, context = await network.browser(p)
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
context=context,
timeout=5,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
log=log,
)
sport, event, logo, link = (
ev["sport"],
ev["event"],
ev["logo"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, pic = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo or pic,
"base": "https://storytrench.net/",
"timestamp": now,
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
await browser.close()
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -13,9 +13,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("streamsgate.json", exp=10_800)
TAG = "STRMSG8"
API_FILE = Cache("streamsgate-api.json", exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://streamingon.org"
@ -31,8 +33,6 @@ SPORT_ENDPOINTS = [
"f1",
]
TAG = "STRMSG8"
def get_event(t1: str, t2: str) -> str:
match t1:
@ -94,8 +94,8 @@ async def get_events(
events = []
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
start_dt = now.delta(hours=-1)
end_dt = now.delta(minutes=10)
for stream_group in api_data:
event_ts = stream_group.get("ts")

View file

@ -12,9 +12,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("strmd.json", exp=10_800)
TAG = "STRMD"
API_FILE = Cache("strmd-api.json", exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
MIRRORS = [
"https://streami.su",
@ -22,8 +24,6 @@ MIRRORS = [
"https://streamed.pk",
]
TAG = "STRMD"
def fix_sport(s: str) -> str:
if "-" in s:

View file

@ -10,7 +10,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("timstreams.json", exp=10_800)
TAG = "TIM"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_URL = "https://api.timstreams.site/main"
@ -21,7 +23,6 @@ BASE_MIRRORS = [
"https://timstreams.top",
]
TAG = "TIM"
sport_genres = {
1: "Soccer",
@ -44,14 +45,12 @@ sport_genres = {
}
async def refresh_api_cache(
client: httpx.AsyncClient, url: str
) -> list[dict[str, Any]]:
async def refresh_api_cache(client: httpx.AsyncClient) -> list[dict[str, Any]]:
try:
r = await client.get(url)
r = await client.get(API_URL)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
log.error(f'Failed to fetch "{API_URL}": {e}')
return []
@ -59,12 +58,9 @@ async def refresh_api_cache(
async def get_events(
client: httpx.AsyncClient,
url: str,
cached_keys: set[str],
client: httpx.AsyncClient, cached_keys: set[str]
) -> list[dict[str, str]]:
api_data = await refresh_api_cache(client, url)
api_data = await refresh_api_cache(client)
now = Time.now().timestamp()
@ -125,11 +121,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f'Scraping from "{base_url}"')
events = await get_events(
client,
API_URL,
set(cached_urls.keys()),
)
events = await get_events(client, set(cached_urls.keys()))
log.info(f"Processing {len(events)} new URL(s)")

View file

@ -8,12 +8,12 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("tvpass.json", exp=86_400)
TAG = "TVP"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=86_400)
BASE_URL = "https://tvpass.org/playlist/m3u"
TAG = "TVP"
async def get_data(client: httpx.AsyncClient) -> list[str]:
try:

View file

@ -14,9 +14,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("watchfty.json", exp=10_800)
TAG = "WFTY"
API_FILE = Cache("watchfty-api.json", exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
API_MIRRORS = ["https://api.watchfooty.top", "https://api.watchfooty.st"]
@ -39,8 +41,6 @@ SPORT_ENDPOINTS = [
# "volleyball",
]
TAG = "WFTY"
async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, Any]]:
try:
@ -55,8 +55,7 @@ async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, An
async def refresh_api_cache(
client: httpx.AsyncClient,
url: str,
client: httpx.AsyncClient, url: str
) -> list[dict[str, Any]]:
log.info("Refreshing API cache")

View file

@ -11,14 +11,14 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("webcast.json", exp=10_800)
TAG = "WEBCST"
HTML_CACHE = Cache("webcast-html.json", exp=86_400)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=86_400)
BASE_URLS = {"NFL": "https://nflwebcast.com", "NHL": "https://slapstreams.com"}
TAG = "WEBCST"
def fix_event(s: str) -> str:
return " vs ".join(s.split("@"))