Compare commits

...

36 commits

Author SHA1 Message Date
doms9
00000d9ebc e
fix streamhub scraping
misc edits
2025-12-15 15:53:36 -05:00
GitHub Actions Bot
86a88e206e health log 2025-12-15 20:44:37 +00:00
GitHub Actions Bot
9ceaf58464 update M3U8 2025-12-15 15:31:33 -05:00
GitHub Actions Bot
38d0b4789b update M3U8 2025-12-15 15:01:42 -05:00
GitHub Actions Bot
1ed6ae2fa2 update M3U8 2025-12-15 14:32:02 -05:00
GitHub Actions Bot
25099a00bc update M3U8 2025-12-15 14:01:36 -05:00
GitHub Actions Bot
45b9e13357 update EPG 2025-12-15 18:59:48 +00:00
GitHub Actions Bot
c2176bfa6c update M3U8 2025-12-15 13:31:33 -05:00
GitHub Actions Bot
290177daaa update M3U8 2025-12-15 13:01:38 -05:00
GitHub Actions Bot
aaa01c8496 update M3U8 2025-12-15 12:02:30 -05:00
GitHub Actions Bot
33129a8005 update M3U8 2025-12-15 11:01:32 -05:00
GitHub Actions Bot
d846a24f62 update M3U8 2025-12-15 10:02:04 -05:00
GitHub Actions Bot
6cc57b8353 health log 2025-12-15 14:51:09 +00:00
GitHub Actions Bot
7404d016da update M3U8 2025-12-15 09:00:40 -05:00
GitHub Actions Bot
ed889f0c52 update M3U8 2025-12-15 08:01:54 -05:00
GitHub Actions Bot
3cde03ff54 update EPG 2025-12-15 10:56:56 +00:00
GitHub Actions Bot
345f6df1d6 health log 2025-12-15 08:53:42 +00:00
doms9
00000d9cc1 e
cache all events for streamhub instead of live events
2025-12-15 02:06:46 -05:00
GitHub Actions Bot
f755ffc78b update M3U8 2025-12-14 23:30:26 -05:00
GitHub Actions Bot
91e4994c32 update M3U8 2025-12-14 23:01:04 -05:00
GitHub Actions Bot
2f47e80d83 update EPG 2025-12-15 03:56:37 +00:00
GitHub Actions Bot
31f5671034 health log 2025-12-15 03:52:12 +00:00
GitHub Actions Bot
783953d797 update M3U8 2025-12-14 22:30:45 -05:00
GitHub Actions Bot
a953d526df update M3U8 2025-12-14 22:00:51 -05:00
GitHub Actions Bot
5302dccdac update M3U8 2025-12-14 21:30:30 -05:00
GitHub Actions Bot
58d4140a2e update M3U8 2025-12-14 21:01:09 -05:00
GitHub Actions Bot
6f5f9c45fd update M3U8 2025-12-14 20:30:51 -05:00
GitHub Actions Bot
7fdcefb0c1 update M3U8 2025-12-14 20:01:11 -05:00
GitHub Actions Bot
819b3f5f1f update M3U8 2025-12-14 19:31:07 -05:00
GitHub Actions Bot
65a5e11448 update M3U8 2025-12-14 19:01:52 -05:00
GitHub Actions Bot
19cb160712 update M3U8 2025-12-14 18:31:37 -05:00
GitHub Actions Bot
d5f714251e update M3U8 2025-12-14 18:02:27 -05:00
GitHub Actions Bot
69d67c467c update M3U8 2025-12-14 17:31:25 -05:00
GitHub Actions Bot
3e1cac41c1 update M3U8 2025-12-14 17:01:45 -05:00
GitHub Actions Bot
bb3600ede9 update M3U8 2025-12-14 16:32:15 -05:00
GitHub Actions Bot
a1b593d216 update M3U8 2025-12-14 16:02:56 -05:00
12 changed files with 89623 additions and 91548 deletions

175318
EPG/TV.xml

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -62,7 +62,7 @@ async def main() -> None:
asyncio.create_task(streamhub.scrape(network.client)), asyncio.create_task(streamhub.scrape(network.client)),
asyncio.create_task(streamsgate.scrape(network.client)), asyncio.create_task(streamsgate.scrape(network.client)),
asyncio.create_task(strmd.scrape(network.client)), asyncio.create_task(strmd.scrape(network.client)),
# asyncio.create_task(timstreams.scrape(network.client)), asyncio.create_task(timstreams.scrape(network.client)),
asyncio.create_task(tvpass.scrape(network.client)), asyncio.create_task(tvpass.scrape(network.client)),
asyncio.create_task(watchfooty.scrape(network.client)), asyncio.create_task(watchfooty.scrape(network.client)),
asyncio.create_task(webcast.scrape(network.client)), asyncio.create_task(webcast.scrape(network.client)),

View file

@ -1,5 +1,4 @@
import json import json
import re
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
@ -49,8 +48,6 @@ async def get_events() -> dict[str, dict[str, str | float]]:
events = {} events = {}
pattern = re.compile(r"https?://[^\s'\"]+?\.m3u8(?:\?[^\s'\"]*)?", re.IGNORECASE)
for event in api_data.get("events", []): for event in api_data.get("events", []):
event_dt = Time.from_str(event["date"], timezone="UTC") event_dt = Time.from_str(event["date"], timezone="UTC")
@ -66,19 +63,18 @@ async def get_events() -> dict[str, dict[str, str | float]]:
stream_urls = [(i, f"server{i}URL") for i in range(1, 4)] stream_urls = [(i, f"server{i}URL") for i in range(1, 4)]
for z, stream_url in stream_urls: for z, stream_url in stream_urls:
if stream_link := channel_info.get(stream_url): if (stream_link := channel_info.get(stream_url)) and stream_link != "null":
if pattern.search(stream_link): key = f"[{sport}] {event_name} {z} ({TAG})"
key = f"[{sport}] {event_name} {z} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event_name) tvg_id, logo = leagues.get_tvg_info(sport, event_name)
events[key] = { events[key] = {
"url": stream_link, "url": stream_link,
"logo": logo, "logo": logo,
"base": "https://pixelsport.tv", "base": "https://pixelsport.tv",
"timestamp": now.timestamp(), "timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
} }
return events return events

View file

@ -57,6 +57,18 @@ async def process_event(
return match[1] return match[1]
async def get_html_data(client: httpx.AsyncClient, url: str) -> bytes:
try:
r = await client.get(url)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return b""
return r.content
async def refresh_html_cache( async def refresh_html_cache(
client: httpx.AsyncClient, client: httpx.AsyncClient,
url: str, url: str,
@ -64,15 +76,9 @@ async def refresh_html_cache(
now_ts: float, now_ts: float,
) -> dict[str, dict[str, str | float]]: ) -> dict[str, dict[str, str | float]]:
try: html_data = await get_html_data(client, url)
r = await client.get(url)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {} soup = HTMLParser(html_data)
soup = HTMLParser(r.content)
events = {} events = {}
@ -108,16 +114,15 @@ async def refresh_html_cache(
async def get_events( async def get_events(
client: httpx.AsyncClient, client: httpx.AsyncClient, cached_keys: set[str]
sport_urls: dict[str, str],
cached_keys: set[str],
) -> list[dict[str, str]]: ) -> list[dict[str, str]]:
now = Time.clean(Time.now()) now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()): if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache") log.info("Refreshing HTML cache")
sport_urls = {sport: urljoin(BASE_URL, sport) for sport in SPORT_ENDPOINTS}
tasks = [ tasks = [
refresh_html_cache( refresh_html_cache(
client, client,
@ -160,13 +165,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "{BASE_URL}"')
sport_urls = {sport: urljoin(BASE_URL, sport) for sport in SPORT_ENDPOINTS} events = await get_events(client, set(cached_urls.keys()))
events = await get_events(
client,
sport_urls,
set(cached_urls.keys()),
)
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")

View file

@ -47,7 +47,6 @@ async def process_event(
async def refresh_html_cache( async def refresh_html_cache(
client: httpx.AsyncClient, now_ts: float client: httpx.AsyncClient, now_ts: float
) -> dict[str, dict[str, str | float]]: ) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache") log.info("Refreshing HTML cache")
try: try:

View file

@ -29,7 +29,7 @@ async def get_html_data(
r = await client.get(url, params={"date": date}) r = await client.get(url, params={"date": date})
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{url}": {e}') log.error(f'Failed to fetch "{r.url}": {e}')
return b"" return b""

View file

@ -42,7 +42,7 @@ async def refresh_api_cache(
r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500}) r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}') log.error(f'Failed to fetch "{r.url}": {e}')
return [] return []

View file

@ -17,12 +17,10 @@ BASE_URL = "https://streamfree.to/"
async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]: async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
try: try:
url = urljoin(BASE_URL, "streams") r = await client.get(urljoin(BASE_URL, "streams"))
r = await client.get(url)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{url}": {e}') log.error(f'Failed to fetch "{r.url}": {e}')
return {} return {}

View file

@ -1,5 +1,6 @@
import asyncio import asyncio
from functools import partial from functools import partial
from urllib.parse import urljoin
import httpx import httpx
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
@ -15,7 +16,9 @@ TAG = "STRMHUB"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://streamhub.pro/live-now" HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=28_800)
BASE_URL = "https://streamhub.pro/"
CATEGORIES = { CATEGORIES = {
@ -33,69 +36,126 @@ CATEGORIES = {
} }
async def get_html_data(client: httpx.AsyncClient, sport: str) -> bytes: async def get_html_data(
client: httpx.AsyncClient,
date: str,
sport_id: str,
) -> bytes:
try: try:
r = await client.get(BASE_URL, params={"sport_id": sport}) r = await client.get(
urljoin(BASE_URL, f"events/{date}"),
params={"sport_id": sport_id},
)
r.raise_for_status() r.raise_for_status()
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}') log.error(f'Failed to fetch "{r.url}": {e}')
return b"" return b""
return r.content return r.content
async def get_events( async def refresh_html_cache(
client: httpx.AsyncClient, cached_keys: set[str] client: httpx.AsyncClient,
) -> list[dict[str, str]]: date: str,
sport_id: str,
ts: float,
) -> dict[str, dict[str, str | float]]:
tasks = [get_html_data(client, sport) for sport in CATEGORIES.values()] html_data = await get_html_data(client, date, sport_id)
results = await asyncio.gather(*tasks) soup = HTMLParser(html_data)
soups = [HTMLParser(html) for html in results] events = {}
events = [] for section in soup.css(".events-section"):
if not (sport_node := section.css_first(".section-titlte")):
continue
for soup in soups: sport = sport_node.text(strip=True)
for section in soup.css(".events-section"):
if not (sport_node := section.css_first(".section-titlte")): logo = section.css_first(".league-icon img").attributes.get("src")
for event in section.css(".section-event"):
event_name = "Live Event"
if teams := event.css_first(".event-competitors"):
home, away = teams.text(strip=True).split("vs.")
event_name = f"{away} vs {home}"
if not (event_button := event.css_first(".event-button a")) or not (
href := event_button.attributes.get("href")
):
continue continue
sport = sport_node.text(strip=True) event_date = event.css_first(".event-countdown").attributes.get(
"data-start"
)
logo = section.css_first(".league-icon img").attributes.get("src") event_dt = Time.from_str(event_date, timezone="UTC")
for event in section.css(".section-event"): key = f"[{sport}] {event_name} ({TAG})"
event_name = "Live Event"
if teams := event.css_first(".event-competitors"): events[key] = {
home, away = teams.text(strip=True).split("vs.") "sport": sport,
"event": event_name,
event_name = f"{away} vs {home}" "link": href,
"logo": logo,
if not (event_button := event.css_first("div.event-button a")) or not ( "timestamp": ts,
href := event_button.attributes.get("href") "event_ts": event_dt.timestamp(),
): }
continue
key = f"[{sport}] {event_name} ({TAG})"
if cached_keys & {key}:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": href,
"logo": logo,
}
)
return events return events
async def get_events(
client: httpx.AsyncClient,
cached_keys: set[str],
) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
dates = [now.date(), now.delta(days=1).date()]
tasks = [
refresh_html_cache(
client,
date,
sport_id,
now.timestamp(),
)
for date in dates
for sport_id in CATEGORIES.values()
]
results = await asyncio.gather(*tasks)
events = {k: v for data in results for k, v in data.items()}
HTML_CACHE.write(events)
live = []
start_ts = now.delta(hours=-1).timestamp()
end_ts = now.delta(minutes=5).timestamp()
for k, v in events.items():
if cached_keys & {k}:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append({**v})
return live
async def scrape(client: httpx.AsyncClient) -> None: async def scrape(client: httpx.AsyncClient) -> None:
cached_urls = CACHE_FILE.load() cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]} valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
@ -111,8 +171,6 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
now = Time.now().timestamp()
async with async_playwright() as p: async with async_playwright() as p:
browser, context = await network.browser(p) browser, context = await network.browser(p)
@ -132,11 +190,12 @@ async def scrape(client: httpx.AsyncClient) -> None:
log=log, log=log,
) )
sport, event, logo, link = ( sport, event, logo, link, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["logo"], ev["logo"],
ev["link"], ev["link"],
ev["event_ts"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"
@ -147,7 +206,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
"url": url, "url": url,
"logo": logo or pic, "logo": logo or pic,
"base": "https://storytrench.net/", "base": "https://storytrench.net/",
"timestamp": now, "timestamp": ts,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"link": link, "link": link,
} }

View file

@ -1,12 +1,10 @@
## Base Log @ 2025-12-14 20:40 UTC ## Base Log @ 2025-12-15 20:44 UTC
### ✅ Working Streams: 143<br>❌ Dead Streams: 3 ### ✅ Working Streams: 145<br>❌ Dead Streams: 1
| Channel | Error (Code) | Link | | Channel | Error (Code) | Link |
| ------- | ------------ | ---- | | ------- | ------------ | ---- |
| FDSN Florida | HTTP Error (403) | `http://cord-cutter.net:8080/k4Svp2/645504/46794` | | FDSN Florida | HTTP Error (403) | `http://cord-cutter.net:8080/k4Svp2/645504/46794` |
| Spectrum SportsNet LA Dodgers | HTTP Error (502) | `http://cord-cutter.net:8080/k4Svp2/645504/31636` |
| getTV | HTTP Error (403) | `http://cord-cutter.net:8080/k4Svp2/645504/18366` |
--- ---
#### Base Channels URL #### Base Channels URL
``` ```