Compare commits

..

No commits in common. "5cbfb6888edc63db833f73b49592b9d9cb560da0" and "8252766a332ac0cb0cf1b9a57b1bf53dd62e0f14" have entirely different histories.

13 changed files with 101543 additions and 98412 deletions

File diff suppressed because it is too large Load diff

196364
M3U8/TV.xml

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -18,9 +18,9 @@ from scrapers import (
sport9,
streambtw,
streamcenter,
streamfree,
streamhub,
streamsgate,
totalsportek,
tvpass,
watchfooty,
webcast,
@ -70,8 +70,6 @@ async def main() -> None:
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(totalsportek.scrape(hdl_brwsr)),
asyncio.create_task(tvpass.scrape(hdl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)),
]
@ -81,7 +79,9 @@ async def main() -> None:
asyncio.create_task(pawa.scrape()),
asyncio.create_task(shark.scrape()),
asyncio.create_task(streambtw.scrape()),
asyncio.create_task(xstreameast.scrape()),
asyncio.create_task(streamfree.scrape()),
asyncio.create_task(tvpass.scrape()),
# asyncio.create_task(xstreameast.scrape()),
]
await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -111,9 +111,9 @@ async def main() -> None:
| sport9.urls
| streambtw.urls
| streamcenter.urls
| streamfree.urls
| streamhub.urls
| streamsgate.urls
| totalsportek.urls
| tvpass.urls
| watchfooty.urls
| webcast.urls

View file

@ -1,7 +1,7 @@
import base64
import json
import re
from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser
@ -53,47 +53,26 @@ async def get_events() -> list[dict[str, str]]:
soup = HTMLParser(html_data.content)
script_text = None
for card in soup.css(".league"):
if not (league_elem := card.css_first(".league-title")):
continue
for s in soup.css("script"):
t = s.text() or ""
for event in card.css(".match"):
if not (match_elem := event.css_first(".match-name")):
continue
if "const DATA" in t:
script_text = t
break
if (not (watch_btn := event.css_first("a.watch-btn"))) or (
not (href := watch_btn.attributes.get("href"))
):
continue
if not script_text:
return events
if not (
match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
):
return events
data_js = match[1].replace("\n ", "").replace("\n ", "")
s1 = re.sub(r"{\s", '{"', data_js)
s2 = re.sub(r':"', '":"', s1)
s3 = re.sub(r":\[", '":[', s2)
s4 = re.sub(r"},\]", "}]", s3)
s5 = re.sub(r'",\s', '","', s4)
data: list[dict[str, str]] = json.loads(s5)
for matches in data:
league = matches["title"]
items: list[dict[str, str]] = matches["items"]
for info in items:
title = info["title"]
url = info["url"]
league, name = league_elem.text(strip=True), match_elem.text(strip=True)
events.append(
{
"sport": fix_league(league),
"event": title,
"link": url,
"event": name,
"link": urljoin(BASE_URL, href),
}
)

View file

@ -0,0 +1,85 @@
from urllib.parse import urljoin
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMFREE"
CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://streamfree.to/"
async def get_events() -> dict[str, dict[str, str | float]]:
events = {}
if not (
r := await network.request(
urljoin(BASE_URL, "streams"),
log=log,
)
):
return events
api_data: dict = r.json()
now = Time.clean(Time.now())
for streams in api_data.get("streams", {}).values():
if not streams:
continue
for stream in streams:
sport, name, stream_key = (
stream.get("league"),
stream.get("name"),
stream.get("stream_key"),
)
if not (sport and name and stream_key):
continue
key = f"[{sport}] {name} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, name)
events[key] = {
"url": network.build_proxy_url(
tag=TAG,
path=f"{stream_key}/index.m3u8",
query={"stream_name": name},
),
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
}
return events
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await network.safe_process(
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
urls.update(events or {})
CACHE_FILE.write(urls)
log.info(f"Collected and cached {len(urls)} new event(s)")

View file

@ -1,147 +0,0 @@
from functools import partial
from urllib.parse import urljoin, urlparse
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "TOTALSPRTK"
CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://live3.totalsportek777.com/"
def fix_txt(s: str) -> str:
s = " ".join(s.split())
return s.upper() if s.islower() else s
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = []
if not (html_data := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
sport = "Live Event"
for node in soup.css("a"):
if not node.attributes.get("class"):
continue
if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""):
if span := node.css_first("span"):
sport = span.text(strip=True)
sport = fix_txt(sport)
if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]):
continue
if not (href := node.attributes.get("href")):
continue
href = urlparse(href).path if href.startswith("http") else href
if not (time_node := node.css_first(".col-3 span")):
continue
if time_node.text(strip=True) != "MatchStarted":
continue
event_name = fix_txt(" vs ".join(teams))
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": urljoin(BASE_URL, href),
}
)
return events
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
now = Time.clean(Time.now())
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
timeout=6,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": link,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,8 +1,4 @@
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
import re
from .utils import Cache, Time, get_logger, leagues, network
@ -12,144 +8,73 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "TVPASS"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(TAG, exp=86_400)
HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
BASE_URL = "https://thetvapp.to"
BASE_URL = "https://tvpass.org/playlist/m3u"
async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
async def get_events() -> dict[str, dict[str, str | float]]:
events = {}
if not (html_data := await network.request(BASE_URL, log=log)):
if not (r := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
now = Time.clean(Time.now())
for row in soup.css(".row"):
if not (h3_elem := row.css_first("h3")):
continue
data = r.text.splitlines()
sport = h3_elem.text(strip=True)
for i, line in enumerate(data, start=1):
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
if sport.lower() == "live tv channels":
continue
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
for a in row.css("a.list-group-item[href]"):
if not (href := a.attributes.get("href")):
continue
group_title_match = re.search(r'group-title="([^"]*)"', line)
if not (span := a.css_first("span")):
continue
tvg = tvg_id_match[1] if tvg_id_match else None
event_time = span.text(strip=True)
if not tvg and (url := data[i]).endswith("/sd"):
if tvg_name := tvg_name_match[1]:
sport = group_title_match[1].upper().strip()
event_dt = Time.from_str(event_time, timezone="UTC")
event = "(".join(tvg_name.split("(")[:-1]).strip()
event_name = a.text(strip=True).split(":")[0]
key = f"[{sport}] {event} ({TAG})"
key = f"[{sport}] {event_name} ({TAG})"
channel = url.split("/")[-2]
events[key] = {
"sport": sport,
"event": event_name,
"link": urljoin(BASE_URL, href),
"event_ts": event_dt.timestamp(),
"timestamp": now_ts,
}
tvg_id, logo = leagues.info(sport)
events[key] = {
"url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8",
"logo": logo,
"id": tvg_id or "Live.Event.us",
"base": "https://tvpass.org",
"timestamp": now.timestamp(),
}
return events
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
if not (events := HTML_CACHE.load()):
events = await refresh_html_cache(now.timestamp())
log.info(f"Loaded {len(urls)} event(s) from cache")
HTML_CACHE.write(events)
live = []
start_ts = now.delta(minutes=-30).timestamp()
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items():
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append({**v})
return live
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
events = await network.safe_process(
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
log.info(f"Processing {len(events)} new URL(s)")
urls.update(events or {})
if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
CACHE_FILE.write(urls)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, ts, link = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": BASE_URL,
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)
log.info(f"Collected and cached {len(urls)} new event(s)")

View file

@ -100,7 +100,6 @@ class Time(datetime):
"%Y-%m-%d %I:%M %p",
"%Y-%m-%d %H:%M %p",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%fZ",
"%Y/%m/%d %H:%M",
"%Y/%m/%d %H:%M:%S",

View file

@ -6,6 +6,7 @@ from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager
from functools import partial
from typing import AsyncGenerator, TypeVar
from urllib.parse import urlencode, urljoin
import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
@ -38,6 +39,21 @@ class Network:
http2=True,
)
@staticmethod
def build_proxy_url(
tag: str,
path: str,
query: dict | None = None,
) -> str:
tag = tag.lower()
return (
f"{urljoin(network.proxy_base, f'{tag}/{path}')}?{urlencode(query)}"
if query
else urljoin(network.proxy_base, f"{tag}/{path}")
)
async def request(
self,
url: str,

View file

@ -76,7 +76,7 @@ async def process_event(
page: Page,
) -> tuple[str | None, str | None]:
nones = None, None
nones = [None for _ in range(2)]
pattern = re.compile(r"\((\d+)\)")

View file

@ -30,9 +30,9 @@ SPORT_ENDPOINTS = [
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
nones = None, None
nones = [None for _ in range(2)]
if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
@ -58,12 +58,9 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]
log.warning(f"URL {url_num}) No Clappr source found.")
return nones
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8"), iframe_src
return bytes.fromhex(match[2]).decode("utf-8"), iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

View file

@ -1,16 +1,11 @@
## Base Log @ 2026-02-08 04:41 UTC
## Base Log @ 2026-02-07 04:18 UTC
### ✅ Working Streams: 139<br>❌ Dead Streams: 7
### ✅ Working Streams: 144<br>❌ Dead Streams: 2
| Channel | Error (Code) | Link |
| ------- | ------------ | ---- |
| CW | HTTP Error (404) | `https://fl1.moveonjoy.com/CW_ORLANDO/index.m3u8` |
| ESPN | HTTP Error (404) | `http://41.205.93.154/ESPN/index.m3u8` |
| FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` |
| Spectrum SportsNet LA Dodgers | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/31636.m3u8` |
| Sportsnet 360 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/2219.m3u8` |
| Sportsnet One | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57297.m3u8` |
| TSN1 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57292.m3u8` |
---
#### Base Channels URL
```