Compare commits

...

37 commits

Author SHA1 Message Date
GitHub Actions Bot
5cbfb6888e update EPG 2026-02-08 04:53:26 +00:00
GitHub Actions Bot
f8dd16100b health log 2026-02-08 04:41:44 +00:00
GitHub Actions Bot
320d7c76b0 update M3U8 2026-02-07 23:31:25 -05:00
GitHub Actions Bot
a84b5754e1 update M3U8 2026-02-07 23:02:17 -05:00
GitHub Actions Bot
21f2c4c518 update M3U8 2026-02-07 22:31:48 -05:00
GitHub Actions Bot
49b93b08e9 update M3U8 2026-02-07 22:02:40 -05:00
GitHub Actions Bot
9c93cce6f2 update M3U8 2026-02-07 21:31:22 -05:00
GitHub Actions Bot
6c71a7cfcb update M3U8 2026-02-07 21:03:13 -05:00
GitHub Actions Bot
4b3d7e094e update M3U8 2026-02-07 20:33:46 -05:00
GitHub Actions Bot
a0b78f4670 update M3U8 2026-02-07 20:05:37 -05:00
GitHub Actions Bot
8718d9e27b update M3U8 2026-02-07 19:32:28 -05:00
GitHub Actions Bot
de3ba7fade update M3U8 2026-02-07 19:06:17 -05:00
GitHub Actions Bot
f94e4e1e17 update M3U8 2026-02-07 18:33:21 -05:00
GitHub Actions Bot
b5943aab21 update M3U8 2026-02-07 18:06:02 -05:00
GitHub Actions Bot
046c3c0cc9 update M3U8 2026-02-07 17:31:41 -05:00
doms9
00000d991e e
fix tvpass.py scraping
2026-02-07 17:17:19 -05:00
GitHub Actions Bot
c1a3e4ba68 update M3U8 2026-02-07 17:04:10 -05:00
GitHub Actions Bot
c304ae3390 update M3U8 2026-02-07 16:34:19 -05:00
GitHub Actions Bot
66b0c3eaf2 update M3U8 2026-02-07 16:11:54 -05:00
GitHub Actions Bot
bc79f9b45a health log 2026-02-07 20:49:56 +00:00
GitHub Actions Bot
354ff4365b update M3U8 2026-02-07 15:33:09 -05:00
GitHub Actions Bot
a2747b0f15 update M3U8 2026-02-07 15:09:15 -05:00
doms9
00000d9f21 e
re-add totalsportek.py
fix xstreameast.py scraping
2026-02-07 14:49:12 -05:00
GitHub Actions Bot
bdc2048a26 update M3U8 2026-02-07 14:36:42 -05:00
GitHub Actions Bot
79226f6d75 update M3U8 2026-02-07 14:14:32 -05:00
GitHub Actions Bot
25d0a0811f update EPG 2026-02-07 19:04:28 +00:00
GitHub Actions Bot
be3e8a3a9e update M3U8 2026-02-07 13:35:17 -05:00
GitHub Actions Bot
f7ff7c67d0 update M3U8 2026-02-07 13:12:10 -05:00
doms9
00000d98b2 e
fix streambtw.py scraping
2026-02-07 12:52:02 -05:00
GitHub Actions Bot
f3f1f3cd82 update M3U8 2026-02-07 12:17:06 -05:00
GitHub Actions Bot
f2cc586921 update M3U8 2026-02-07 11:09:34 -05:00
GitHub Actions Bot
aef52b9c9e update M3U8 2026-02-07 10:26:56 -05:00
GitHub Actions Bot
43f67652c2 health log 2026-02-07 14:51:02 +00:00
GitHub Actions Bot
35e118f16f update M3U8 2026-02-07 09:12:33 -05:00
GitHub Actions Bot
39fe38b259 update M3U8 2026-02-07 08:19:34 -05:00
GitHub Actions Bot
d6c432c42c update EPG 2026-02-07 10:59:12 +00:00
GitHub Actions Bot
f8a3e5ef19 health log 2026-02-07 08:54:58 +00:00
13 changed files with 98450 additions and 101581 deletions

File diff suppressed because it is too large Load diff

196644
M3U8/TV.xml

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -18,9 +18,9 @@ from scrapers import (
sport9, sport9,
streambtw, streambtw,
streamcenter, streamcenter,
streamfree,
streamhub, streamhub,
streamsgate, streamsgate,
totalsportek,
tvpass, tvpass,
watchfooty, watchfooty,
webcast, webcast,
@ -70,6 +70,8 @@ async def main() -> None:
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)), asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)), asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(totalsportek.scrape(hdl_brwsr)),
asyncio.create_task(tvpass.scrape(hdl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)), asyncio.create_task(webcast.scrape(hdl_brwsr)),
] ]
@ -79,9 +81,7 @@ async def main() -> None:
asyncio.create_task(pawa.scrape()), asyncio.create_task(pawa.scrape()),
asyncio.create_task(shark.scrape()), asyncio.create_task(shark.scrape()),
asyncio.create_task(streambtw.scrape()), asyncio.create_task(streambtw.scrape()),
asyncio.create_task(streamfree.scrape()), asyncio.create_task(xstreameast.scrape()),
asyncio.create_task(tvpass.scrape()),
# asyncio.create_task(xstreameast.scrape()),
] ]
await asyncio.gather(*(pw_tasks + httpx_tasks)) await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -111,9 +111,9 @@ async def main() -> None:
| sport9.urls | sport9.urls
| streambtw.urls | streambtw.urls
| streamcenter.urls | streamcenter.urls
| streamfree.urls
| streamhub.urls | streamhub.urls
| streamsgate.urls | streamsgate.urls
| totalsportek.urls
| tvpass.urls | tvpass.urls
| watchfooty.urls | watchfooty.urls
| webcast.urls | webcast.urls

View file

@ -1,7 +1,7 @@
import base64 import base64
import json
import re import re
from functools import partial from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
@ -53,26 +53,47 @@ async def get_events() -> list[dict[str, str]]:
soup = HTMLParser(html_data.content) soup = HTMLParser(html_data.content)
for card in soup.css(".league"): script_text = None
if not (league_elem := card.css_first(".league-title")):
continue
for event in card.css(".match"): for s in soup.css("script"):
if not (match_elem := event.css_first(".match-name")): t = s.text() or ""
continue
if (not (watch_btn := event.css_first("a.watch-btn"))) or ( if "const DATA" in t:
not (href := watch_btn.attributes.get("href")) script_text = t
): break
continue
league, name = league_elem.text(strip=True), match_elem.text(strip=True) if not script_text:
return events
if not (
match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
):
return events
data_js = match[1].replace("\n ", "").replace("\n ", "")
s1 = re.sub(r"{\s", '{"', data_js)
s2 = re.sub(r':"', '":"', s1)
s3 = re.sub(r":\[", '":[', s2)
s4 = re.sub(r"},\]", "}]", s3)
s5 = re.sub(r'",\s', '","', s4)
data: list[dict[str, str]] = json.loads(s5)
for matches in data:
league = matches["title"]
items: list[dict[str, str]] = matches["items"]
for info in items:
title = info["title"]
url = info["url"]
events.append( events.append(
{ {
"sport": fix_league(league), "sport": fix_league(league),
"event": name, "event": title,
"link": urljoin(BASE_URL, href), "link": url,
} }
) )

View file

@ -1,85 +0,0 @@
from urllib.parse import urljoin
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMFREE"
CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://streamfree.to/"
async def get_events() -> dict[str, dict[str, str | float]]:
events = {}
if not (
r := await network.request(
urljoin(BASE_URL, "streams"),
log=log,
)
):
return events
api_data: dict = r.json()
now = Time.clean(Time.now())
for streams in api_data.get("streams", {}).values():
if not streams:
continue
for stream in streams:
sport, name, stream_key = (
stream.get("league"),
stream.get("name"),
stream.get("stream_key"),
)
if not (sport and name and stream_key):
continue
key = f"[{sport}] {name} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, name)
events[key] = {
"url": network.build_proxy_url(
tag=TAG,
path=f"{stream_key}/index.m3u8",
query={"stream_name": name},
),
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
}
return events
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await network.safe_process(
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
urls.update(events or {})
CACHE_FILE.write(urls)
log.info(f"Collected and cached {len(urls)} new event(s)")

View file

@ -0,0 +1,147 @@
from functools import partial
from urllib.parse import urljoin, urlparse
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "TOTALSPRTK"
CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://live3.totalsportek777.com/"
def fix_txt(s: str) -> str:
s = " ".join(s.split())
return s.upper() if s.islower() else s
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = []
if not (html_data := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
sport = "Live Event"
for node in soup.css("a"):
if not node.attributes.get("class"):
continue
if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""):
if span := node.css_first("span"):
sport = span.text(strip=True)
sport = fix_txt(sport)
if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]):
continue
if not (href := node.attributes.get("href")):
continue
href = urlparse(href).path if href.startswith("http") else href
if not (time_node := node.css_first(".col-3 span")):
continue
if time_node.text(strip=True) != "MatchStarted":
continue
event_name = fix_txt(" vs ".join(teams))
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": urljoin(BASE_URL, href),
}
)
return events
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
now = Time.clean(Time.now())
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
timeout=6,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": link,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,4 +1,8 @@
import re from functools import partial
from urllib.parse import urljoin
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
@ -8,73 +12,144 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "TVPASS" TAG = "TVPASS"
CACHE_FILE = Cache(TAG, exp=86_400) CACHE_FILE = Cache(TAG, exp=10_800)
BASE_URL = "https://tvpass.org/playlist/m3u" HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
BASE_URL = "https://thetvapp.to"
async def get_events() -> dict[str, dict[str, str | float]]: async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
events = {} events = {}
if not (r := await network.request(BASE_URL, log=log)): if not (html_data := await network.request(BASE_URL, log=log)):
return events return events
now = Time.clean(Time.now()) soup = HTMLParser(html_data.content)
data = r.text.splitlines() for row in soup.css(".row"):
if not (h3_elem := row.css_first("h3")):
continue
for i, line in enumerate(data, start=1): sport = h3_elem.text(strip=True)
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line) if sport.lower() == "live tv channels":
continue
group_title_match = re.search(r'group-title="([^"]*)"', line) for a in row.css("a.list-group-item[href]"):
if not (href := a.attributes.get("href")):
continue
tvg = tvg_id_match[1] if tvg_id_match else None if not (span := a.css_first("span")):
continue
if not tvg and (url := data[i]).endswith("/sd"): event_time = span.text(strip=True)
if tvg_name := tvg_name_match[1]:
sport = group_title_match[1].upper().strip()
event = "(".join(tvg_name.split("(")[:-1]).strip() event_dt = Time.from_str(event_time, timezone="UTC")
key = f"[{sport}] {event} ({TAG})" event_name = a.text(strip=True).split(":")[0]
channel = url.split("/")[-2] key = f"[{sport}] {event_name} ({TAG})"
tvg_id, logo = leagues.info(sport) events[key] = {
"sport": sport,
events[key] = { "event": event_name,
"url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8", "link": urljoin(BASE_URL, href),
"logo": logo, "event_ts": event_dt.timestamp(),
"id": tvg_id or "Live.Event.us", "timestamp": now_ts,
"base": "https://tvpass.org", }
"timestamp": now.timestamp(),
}
return events return events
async def scrape() -> None: async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if cached := CACHE_FILE.load(): now = Time.clean(Time.now())
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache") if not (events := HTML_CACHE.load()):
events = await refresh_html_cache(now.timestamp())
return HTML_CACHE.write(events)
live = []
start_ts = now.delta(minutes=-30).timestamp()
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items():
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append({**v})
return live
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"') log.info(f'Scraping from "{BASE_URL}"')
events = await network.safe_process( events = await get_events(cached_urls.keys())
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
urls.update(events or {}) log.info(f"Processing {len(events)} new URL(s)")
CACHE_FILE.write(urls) if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
log.info(f"Collected and cached {len(urls)} new event(s)") url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, ts, link = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": BASE_URL,
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -100,6 +100,7 @@ class Time(datetime):
"%Y-%m-%d %I:%M %p", "%Y-%m-%d %I:%M %p",
"%Y-%m-%d %H:%M %p", "%Y-%m-%d %H:%M %p",
"%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%fZ",
"%Y/%m/%d %H:%M", "%Y/%m/%d %H:%M",
"%Y/%m/%d %H:%M:%S", "%Y/%m/%d %H:%M:%S",

View file

@ -6,7 +6,6 @@ from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import partial
from typing import AsyncGenerator, TypeVar from typing import AsyncGenerator, TypeVar
from urllib.parse import urlencode, urljoin
import httpx import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
@ -39,21 +38,6 @@ class Network:
http2=True, http2=True,
) )
@staticmethod
def build_proxy_url(
tag: str,
path: str,
query: dict | None = None,
) -> str:
tag = tag.lower()
return (
f"{urljoin(network.proxy_base, f'{tag}/{path}')}?{urlencode(query)}"
if query
else urljoin(network.proxy_base, f"{tag}/{path}")
)
async def request( async def request(
self, self,
url: str, url: str,

View file

@ -76,7 +76,7 @@ async def process_event(
page: Page, page: Page,
) -> tuple[str | None, str | None]: ) -> tuple[str | None, str | None]:
nones = [None for _ in range(2)] nones = None, None
pattern = re.compile(r"\((\d+)\)") pattern = re.compile(r"\((\d+)\)")

View file

@ -30,9 +30,9 @@ SPORT_ENDPOINTS = [
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE) valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
nones = [None for _ in range(2)] nones = None, None
if not (html_data := await network.request(url, log=log)): if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.") log.info(f"URL {url_num}) Failed to load url.")
@ -58,9 +58,12 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]
log.warning(f"URL {url_num}) No Clappr source found.") log.warning(f"URL {url_num}) No Clappr source found.")
return nones return nones
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(match[2]).decode("utf-8"), iframe_src return bytes.fromhex(encoded).decode("utf-8"), iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

View file

@ -1,11 +1,16 @@
## Base Log @ 2026-02-07 04:18 UTC ## Base Log @ 2026-02-08 04:41 UTC
### ✅ Working Streams: 144<br>❌ Dead Streams: 2 ### ✅ Working Streams: 139<br>❌ Dead Streams: 7
| Channel | Error (Code) | Link | | Channel | Error (Code) | Link |
| ------- | ------------ | ---- | | ------- | ------------ | ---- |
| CW | HTTP Error (404) | `https://fl1.moveonjoy.com/CW_ORLANDO/index.m3u8` | | CW | HTTP Error (404) | `https://fl1.moveonjoy.com/CW_ORLANDO/index.m3u8` |
| ESPN | HTTP Error (404) | `http://41.205.93.154/ESPN/index.m3u8` |
| FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` | | FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` |
| Spectrum SportsNet LA Dodgers | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/31636.m3u8` |
| Sportsnet 360 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/2219.m3u8` |
| Sportsnet One | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57297.m3u8` |
| TSN1 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57292.m3u8` |
--- ---
#### Base Channels URL #### Base Channels URL
``` ```