Compare commits

..

No commits in common. "5cbfb6888edc63db833f73b49592b9d9cb560da0" and "8252766a332ac0cb0cf1b9a57b1bf53dd62e0f14" have entirely different histories.

13 changed files with 101543 additions and 98412 deletions

File diff suppressed because it is too large Load diff

196364
M3U8/TV.xml

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -18,9 +18,9 @@ from scrapers import (
sport9, sport9,
streambtw, streambtw,
streamcenter, streamcenter,
streamfree,
streamhub, streamhub,
streamsgate, streamsgate,
totalsportek,
tvpass, tvpass,
watchfooty, watchfooty,
webcast, webcast,
@ -70,8 +70,6 @@ async def main() -> None:
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)), asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)), asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(totalsportek.scrape(hdl_brwsr)),
asyncio.create_task(tvpass.scrape(hdl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)), asyncio.create_task(webcast.scrape(hdl_brwsr)),
] ]
@ -81,7 +79,9 @@ async def main() -> None:
asyncio.create_task(pawa.scrape()), asyncio.create_task(pawa.scrape()),
asyncio.create_task(shark.scrape()), asyncio.create_task(shark.scrape()),
asyncio.create_task(streambtw.scrape()), asyncio.create_task(streambtw.scrape()),
asyncio.create_task(xstreameast.scrape()), asyncio.create_task(streamfree.scrape()),
asyncio.create_task(tvpass.scrape()),
# asyncio.create_task(xstreameast.scrape()),
] ]
await asyncio.gather(*(pw_tasks + httpx_tasks)) await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -111,9 +111,9 @@ async def main() -> None:
| sport9.urls | sport9.urls
| streambtw.urls | streambtw.urls
| streamcenter.urls | streamcenter.urls
| streamfree.urls
| streamhub.urls | streamhub.urls
| streamsgate.urls | streamsgate.urls
| totalsportek.urls
| tvpass.urls | tvpass.urls
| watchfooty.urls | watchfooty.urls
| webcast.urls | webcast.urls

View file

@ -1,7 +1,7 @@
import base64 import base64
import json
import re import re
from functools import partial from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
@ -53,47 +53,26 @@ async def get_events() -> list[dict[str, str]]:
soup = HTMLParser(html_data.content) soup = HTMLParser(html_data.content)
script_text = None for card in soup.css(".league"):
if not (league_elem := card.css_first(".league-title")):
continue
for s in soup.css("script"): for event in card.css(".match"):
t = s.text() or "" if not (match_elem := event.css_first(".match-name")):
continue
if "const DATA" in t: if (not (watch_btn := event.css_first("a.watch-btn"))) or (
script_text = t not (href := watch_btn.attributes.get("href"))
break
if not script_text:
return events
if not (
match := re.search(r"const\s+DATA\s*=\s*(\[\s*.*?\s*\]);", script_text, re.S)
): ):
return events continue
data_js = match[1].replace("\n ", "").replace("\n ", "") league, name = league_elem.text(strip=True), match_elem.text(strip=True)
s1 = re.sub(r"{\s", '{"', data_js)
s2 = re.sub(r':"', '":"', s1)
s3 = re.sub(r":\[", '":[', s2)
s4 = re.sub(r"},\]", "}]", s3)
s5 = re.sub(r'",\s', '","', s4)
data: list[dict[str, str]] = json.loads(s5)
for matches in data:
league = matches["title"]
items: list[dict[str, str]] = matches["items"]
for info in items:
title = info["title"]
url = info["url"]
events.append( events.append(
{ {
"sport": fix_league(league), "sport": fix_league(league),
"event": title, "event": name,
"link": url, "link": urljoin(BASE_URL, href),
} }
) )

View file

@ -0,0 +1,85 @@
from urllib.parse import urljoin
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMFREE"
CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://streamfree.to/"
async def get_events() -> dict[str, dict[str, str | float]]:
events = {}
if not (
r := await network.request(
urljoin(BASE_URL, "streams"),
log=log,
)
):
return events
api_data: dict = r.json()
now = Time.clean(Time.now())
for streams in api_data.get("streams", {}).values():
if not streams:
continue
for stream in streams:
sport, name, stream_key = (
stream.get("league"),
stream.get("name"),
stream.get("stream_key"),
)
if not (sport and name and stream_key):
continue
key = f"[{sport}] {name} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, name)
events[key] = {
"url": network.build_proxy_url(
tag=TAG,
path=f"{stream_key}/index.m3u8",
query={"stream_name": name},
),
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
}
return events
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await network.safe_process(
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
urls.update(events or {})
CACHE_FILE.write(urls)
log.info(f"Collected and cached {len(urls)} new event(s)")

View file

@ -1,147 +0,0 @@
from functools import partial
from urllib.parse import urljoin, urlparse
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "TOTALSPRTK"
CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://live3.totalsportek777.com/"
def fix_txt(s: str) -> str:
s = " ".join(s.split())
return s.upper() if s.islower() else s
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = []
if not (html_data := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
sport = "Live Event"
for node in soup.css("a"):
if not node.attributes.get("class"):
continue
if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""):
if span := node.css_first("span"):
sport = span.text(strip=True)
sport = fix_txt(sport)
if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]):
continue
if not (href := node.attributes.get("href")):
continue
href = urlparse(href).path if href.startswith("http") else href
if not (time_node := node.css_first(".col-3 span")):
continue
if time_node.text(strip=True) != "MatchStarted":
continue
event_name = fix_txt(" vs ".join(teams))
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": urljoin(BASE_URL, href),
}
)
return events
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
now = Time.clean(Time.now())
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
timeout=6,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": link,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,8 +1,4 @@
from functools import partial import re
from urllib.parse import urljoin
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
@ -12,144 +8,73 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "TVPASS" TAG = "TVPASS"
CACHE_FILE = Cache(TAG, exp=10_800) CACHE_FILE = Cache(TAG, exp=86_400)
HTML_CACHE = Cache(f"{TAG}-html", exp=28_800) BASE_URL = "https://tvpass.org/playlist/m3u"
BASE_URL = "https://thetvapp.to"
async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]: async def get_events() -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
events = {} events = {}
if not (html_data := await network.request(BASE_URL, log=log)): if not (r := await network.request(BASE_URL, log=log)):
return events return events
soup = HTMLParser(html_data.content)
for row in soup.css(".row"):
if not (h3_elem := row.css_first("h3")):
continue
sport = h3_elem.text(strip=True)
if sport.lower() == "live tv channels":
continue
for a in row.css("a.list-group-item[href]"):
if not (href := a.attributes.get("href")):
continue
if not (span := a.css_first("span")):
continue
event_time = span.text(strip=True)
event_dt = Time.from_str(event_time, timezone="UTC")
event_name = a.text(strip=True).split(":")[0]
key = f"[{sport}] {event_name} ({TAG})"
events[key] = {
"sport": sport,
"event": event_name,
"link": urljoin(BASE_URL, href),
"event_ts": event_dt.timestamp(),
"timestamp": now_ts,
}
return events
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now()) now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()): data = r.text.splitlines()
events = await refresh_html_cache(now.timestamp())
HTML_CACHE.write(events) for i, line in enumerate(data, start=1):
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
live = [] tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
start_ts = now.delta(minutes=-30).timestamp() group_title_match = re.search(r'group-title="([^"]*)"', line)
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items(): tvg = tvg_id_match[1] if tvg_id_match else None
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts: if not tvg and (url := data[i]).endswith("/sd"):
continue if tvg_name := tvg_name_match[1]:
sport = group_title_match[1].upper().strip()
live.append({**v}) event = "(".join(tvg_name.split("(")[:-1]).strip()
return live
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, ts, link = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event) channel = url.split("/")[-2]
entry = { tvg_id, logo = leagues.info(sport)
"url": url,
events[key] = {
"url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8",
"logo": logo, "logo": logo,
"base": BASE_URL,
"timestamp": ts,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"link": link, "base": "https://tvpass.org",
"timestamp": now.timestamp(),
} }
urls[key] = cached_urls[key] = entry return events
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else: async def scrape() -> None:
log.info("No new events found") if cached := CACHE_FILE.load():
urls.update(cached)
CACHE_FILE.write(cached_urls) log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await network.safe_process(
get_events,
url_num=1,
semaphore=network.HTTP_S,
log=log,
)
urls.update(events or {})
CACHE_FILE.write(urls)
log.info(f"Collected and cached {len(urls)} new event(s)")

View file

@ -100,7 +100,6 @@ class Time(datetime):
"%Y-%m-%d %I:%M %p", "%Y-%m-%d %I:%M %p",
"%Y-%m-%d %H:%M %p", "%Y-%m-%d %H:%M %p",
"%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%fZ",
"%Y/%m/%d %H:%M", "%Y/%m/%d %H:%M",
"%Y/%m/%d %H:%M:%S", "%Y/%m/%d %H:%M:%S",

View file

@ -6,6 +6,7 @@ from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import partial
from typing import AsyncGenerator, TypeVar from typing import AsyncGenerator, TypeVar
from urllib.parse import urlencode, urljoin
import httpx import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
@ -38,6 +39,21 @@ class Network:
http2=True, http2=True,
) )
@staticmethod
def build_proxy_url(
tag: str,
path: str,
query: dict | None = None,
) -> str:
tag = tag.lower()
return (
f"{urljoin(network.proxy_base, f'{tag}/{path}')}?{urlencode(query)}"
if query
else urljoin(network.proxy_base, f"{tag}/{path}")
)
async def request( async def request(
self, self,
url: str, url: str,

View file

@ -76,7 +76,7 @@ async def process_event(
page: Page, page: Page,
) -> tuple[str | None, str | None]: ) -> tuple[str | None, str | None]:
nones = None, None nones = [None for _ in range(2)]
pattern = re.compile(r"\((\d+)\)") pattern = re.compile(r"\((\d+)\)")

View file

@ -30,9 +30,9 @@ SPORT_ENDPOINTS = [
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]: async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE) valid_m3u8 = re.compile(r'var\s+(\w+)\s*=\s*"([^"]*)"', re.IGNORECASE)
nones = None, None nones = [None for _ in range(2)]
if not (html_data := await network.request(url, log=log)): if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.") log.info(f"URL {url_num}) Failed to load url.")
@ -58,12 +58,9 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]
log.warning(f"URL {url_num}) No Clappr source found.") log.warning(f"URL {url_num}) No Clappr source found.")
return nones return nones
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8"), iframe_src return bytes.fromhex(match[2]).decode("utf-8"), iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:

View file

@ -1,16 +1,11 @@
## Base Log @ 2026-02-08 04:41 UTC ## Base Log @ 2026-02-07 04:18 UTC
### ✅ Working Streams: 139<br>❌ Dead Streams: 7 ### ✅ Working Streams: 144<br>❌ Dead Streams: 2
| Channel | Error (Code) | Link | | Channel | Error (Code) | Link |
| ------- | ------------ | ---- | | ------- | ------------ | ---- |
| CW | HTTP Error (404) | `https://fl1.moveonjoy.com/CW_ORLANDO/index.m3u8` | | CW | HTTP Error (404) | `https://fl1.moveonjoy.com/CW_ORLANDO/index.m3u8` |
| ESPN | HTTP Error (404) | `http://41.205.93.154/ESPN/index.m3u8` |
| FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` | | FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` |
| Spectrum SportsNet LA Dodgers | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/31636.m3u8` |
| Sportsnet 360 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/2219.m3u8` |
| Sportsnet One | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57297.m3u8` |
| TSN1 | HTTP Error (403) | `http://mytvstream.net:8080/live/bn80NG/909467/57292.m3u8` |
--- ---
#### Base Channels URL #### Base Channels URL
``` ```