Compare commits

..

No commits in common. "9c158240dbb63af6590c0cb8b9b6124df9de49ac" and "c5b8bcda06513a7273ede2b0c3947b89d4b79852" have entirely different histories.

10 changed files with 126213 additions and 126855 deletions

File diff suppressed because it is too large Load diff

246615
M3U8/TV.xml

File diff suppressed because one or more lines are too long

View file

@ -306,8 +306,8 @@ http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/749
#EXTINF:-1 tvg-chno="102" tvg-id="NBA.TV.HD.us2" tvg-name="NBA TV" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s32281_dark_360w_270h.png" group-title="TV",NBA TV
http://212.102.60.231/NBA_TV/index.m3u8
#EXTINF:-1 tvg-chno="103" tvg-id="WTVJ-DT.us_locals1" tvg-name="NBC" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s10991_dark_360w_270h.png" group-title="TV",NBC
http://stream.cammonitorplus.net/1804/index.m3u8
#EXTINF:-1 tvg-chno="103" tvg-id="WNBC-DT.us_locals1" tvg-name="NBC" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s10991_dark_360w_270h.png" group-title="TV",NBC
http://stream.cammonitorplus.net/1812/index.m3u8
#EXTINF:-1 tvg-chno="104" tvg-id="NBC.Sports.Bay.Area.HD.us2" tvg-name="NBC Sports Bay Area" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s63138_dark_360w_270h.png" group-title="TV",NBC Sports Bay Area
http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/45785

File diff suppressed because it is too large Load diff

View file

@ -24,6 +24,7 @@ from scrapers import (
tvapp,
watchfooty,
webcast,
xstreameast,
)
from scrapers.utils import get_logger, network
@ -64,7 +65,6 @@ async def main() -> None:
asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(ppv.scrape(xtrnl_brwsr)),
asyncio.create_task(roxie.scrape(hdl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
]
httpx_tasks = [
@ -75,11 +75,13 @@ async def main() -> None:
asyncio.create_task(pawa.scrape()),
asyncio.create_task(shark.scrape()),
asyncio.create_task(streamcenter.scrape()),
asyncio.create_task(streamhub.scrape()),
asyncio.create_task(streamsgate.scrape()),
asyncio.create_task(streamtpnew.scrape()),
asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvapp.scrape()),
asyncio.create_task(webcast.scrape()),
# asyncio.create_task(xstreameast.scrape()),
]
await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -115,6 +117,7 @@ async def main() -> None:
| tvapp.urls
| watchfooty.urls
| webcast.urls
| xstreameast.urls
)
live_events: list[str] = []

View file

@ -1,8 +1,8 @@
import asyncio
import re
from functools import partial
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
from playwright.async_api import Browser, Page, TimeoutError
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -34,86 +34,59 @@ SPORT_ENDPOINTS = [
]
async def process_event(
url: str,
url_num: int,
page: Page,
) -> str | None:
captured: list[str] = []
got_one = asyncio.Event()
handler = partial(
network.capture_req,
captured=captured,
got_one=got_one,
)
page.on("request", handler)
try:
resp = await page.goto(
url,
wait_until="domcontentloaded",
timeout=6_000,
)
if not resp or resp.status != 200:
log.warning(
f"URL {url_num}) Status Code: {resp.status if resp else 'None'}"
)
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
if not (event_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return
try:
btn = page.locator("button.btn.btn-sm.btn-success.streamLink")
soup_1 = HTMLParser(event_data.content)
iframe_src = await btn.get_attribute("data-src", timeout=1_250)
except TimeoutError:
log.warning(f"URL {url_num}) No iframe source found.")
ifr_1 = soup_1.css_first("iframe#playerIframe")
if not ifr_1 or not (src := ifr_1.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found.")
return
await page.goto(
iframe_src,
wait_until="domcontentloaded",
timeout=5_000,
parsed = urlparse(src)
ifr_1_src = urljoin(
BASE_URL,
f"embed1/{parsed.path.split('/')[-1].split('_')[0]}.php",
)
wait_task = asyncio.create_task(got_one.wait())
try:
await asyncio.wait_for(wait_task, timeout=10)
except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
if not (
ifr_1_src_data := await network.request(
ifr_1_src,
headers={"Referer": url},
log=log,
)
):
log.warning(f"URL {url_num}) Failed to load iframe source. (IFR1)")
return
finally:
if not wait_task.done():
wait_task.cancel()
soup_2 = HTMLParser(ifr_1_src_data.content)
try:
await wait_task
except asyncio.CancelledError:
pass
ifr_2 = soup_2.css_first("center iframe")
if not ifr_2 or not (ifr_2_src := ifr_2.attributes.get("src")):
log.warning(f"URL {url_num}) Unable to locate iframe. (IFR2)")
return
ifr_2_src = f"https:{ifr_2_src}" if ifr_2_src.startswith("//") else ifr_2_src
if not (ifr_2_src_data := await network.request(ifr_2_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source.")
return
valid_m3u8 = re.compile(r"src:\s+(\'|\")([^\']+)(\'|\")", re.I)
if not (match := valid_m3u8.search(ifr_2_src_data.text)):
log.warning(f"URL {url_num}) No source found.")
return
if captured:
log.info(f"URL {url_num}) Captured M3U8")
return captured[0]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return
except Exception as e:
log.warning(f"URL {url_num}) {e}")
return
finally:
page.remove_listener("request", handler)
return match[2]
async def refresh_html_cache(
@ -212,7 +185,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
@ -227,15 +200,13 @@ async def scrape(browser: Browser) -> None:
if events := await get_events(cached_urls.keys()):
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
page=page,
)
url = await network.safe_process(
@ -262,6 +233,7 @@ async def scrape(browser: Browser) -> None:
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
"UA": "curl/8.19.0",
}
cached_urls[key] = entry
@ -269,8 +241,6 @@ async def scrape(browser: Browser) -> None:
if url:
valid_count += 1
entry["url"] = url.split("?st")[0]
urls[key] = entry
log.info(f"Collected and cached {valid_count - cached_count} new event(s)")

View file

@ -11,12 +11,11 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("TSPRTK", exp=28_800)
TAG = "TOTALSPRTK"
BASES = {
"TSPRTK1": "https://live.totalsportek.fyi",
"TSPRTK3": "https://live3.totalsportek.fyi",
}
CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://live3.totalsportek.fyi"
def fix_txt(s: str) -> str:
@ -25,43 +24,36 @@ def fix_txt(s: str) -> str:
return s.upper() if s.islower() else s
async def process_ts1(ifr_src: str, url_num: int) -> str | None:
if not (ifr_src_data := await network.request(ifr_src, log=log)):
log.info(f"URL {url_num}) Failed to load iframe source.")
async def process_event(url: str, url_num: int) -> str | None:
if not (event_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.I)
soup_1 = HTMLParser(event_data.content)
if not (match := valid_m3u8.search(ifr_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
iframe_1 = soup_1.css_first("iframe")
if not iframe_1 or not (iframe_1_src := iframe_1.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found. (IFR1)")
return
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8")
async def process_ts3(ifr_src: str, url_num: int) -> str | None:
if not (ifr_1_src_data := await network.request(ifr_src, log=log)):
if not (iframe_1_src_data := await network.request(iframe_1_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source. (IFR1)")
return
soup_2 = HTMLParser(ifr_1_src_data.content)
soup_2 = HTMLParser(iframe_1_src_data.content)
ifr_2 = soup_2.css_first("iframe")
iframe_2 = soup_2.css_first("iframe")
if not ifr_2 or not (ifr_2_src := ifr_2.attributes.get("src")):
if not iframe_2 or not (iframe_2_src := iframe_2.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found. (IFR2)")
return
if not (
ifr_2_src_data := await network.request(
ifr_2_src,
headers={"Referer": ifr_src},
iframe_2_src_data := await network.request(
iframe_2_src,
log=log,
headers={"Referer": iframe_1_src},
)
):
log.warning(f"URL {url_num}) Failed to load iframe source. (IFR2)")
@ -69,7 +61,7 @@ async def process_ts3(ifr_src: str, url_num: int) -> str | None:
valid_m3u8 = re.compile(r'currentStreamUrl\s+=\s+"([^"]*)"', re.I)
if not (match := valid_m3u8.search(ifr_2_src_data.text)):
if not (match := valid_m3u8.search(iframe_2_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return
@ -78,37 +70,16 @@ async def process_ts3(ifr_src: str, url_num: int) -> str | None:
return json.loads(f'"{match[1]}"')
async def process_event(url: str, url_num: int, tag: str) -> str | None:
if not (event_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return
soup = HTMLParser(event_data.content)
iframe = soup.css_first("iframe")
if not iframe or not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No valid iframe source found.")
return
return (
await process_ts1(iframe_src, url_num)
if tag == "TSPRTK1"
else await process_ts3(iframe_src, url_num)
)
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = []
if not (html_data := await network.request(BASES["TSPRTK1"], log=log)):
if not (html_data := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
sport = "Live Event"
for tag, url in BASES.items():
for node in soup.css("a"):
if not node.attributes.get("class"):
continue
@ -135,15 +106,14 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
event_name = fix_txt(" vs ".join(teams))
if f"[{sport}] {event_name} ({tag})" in cached_keys:
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"tag": tag,
"link": urljoin(url, href),
"link": urljoin(f"{html_data.url}", href),
}
)
@ -161,7 +131,7 @@ async def scrape() -> None:
log.info(f"Loaded {cached_count} event(s) from cache")
log.info('Scraping from "https://live.totalsportek.fyi"')
log.info(f'Scraping from "{BASE_URL}"')
if events := await get_events(cached_urls.keys()):
log.info(f"Processing {len(events)} new URL(s)")
@ -173,7 +143,6 @@ async def scrape() -> None:
process_event,
url=(link := ev["link"]),
url_num=i,
tag=(tag := ev["tag"]),
)
url = await network.safe_process(
@ -185,7 +154,7 @@ async def scrape() -> None:
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({tag})"
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)

View file

@ -45,8 +45,6 @@ async def get_events() -> list[dict[str, str]]:
if not (html_data := await network.request(BASE_URL, log=log)):
return events
now = Time.clean(Time.now())
soup = HTMLParser(html_data.content)
for row in soup.css(".row"):
@ -57,14 +55,9 @@ async def get_events() -> list[dict[str, str]]:
continue
for a in row.css("a.list-group-item[href]"):
x, y = a.text(strip=True).split(":", 1)
splits = a.text(strip=True).split(":")
event_name = x.split("@")[0].strip()
event_dt = Time.from_str(y.split(":", 1)[-1], timezone="UTC")
if event_dt.date() != now.date():
continue
event_name = ":".join(splits[:2]).split("@")[0].strip()
if not (href := a.attributes.get("href")):
continue
@ -74,7 +67,6 @@ async def get_events() -> list[dict[str, str]]:
"sport": sport,
"event": event_name,
"link": urljoin(f"{html_data.url}", href),
"timestamp": now.timestamp(),
}
)
@ -94,6 +86,8 @@ async def scrape() -> None:
if events := await get_events():
log.info(f"Processing {len(events)} URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
@ -108,11 +102,7 @@ async def scrape() -> None:
log=log,
)
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
@ -122,7 +112,7 @@ async def scrape() -> None:
"url": url,
"logo": logo,
"base": BASE_URL,
"timestamp": ts,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}

View file

@ -0,0 +1,179 @@
import asyncio
import re
from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "XSTRMEST"
CACHE_FILE = Cache(TAG, exp=10_800)
BASE_URL = "https://xstreameast.com"
SPORT_URLS = [
urljoin(BASE_URL, f"categories/{sport}/")
for sport in [
# "mlb",
"mma",
"nba",
# "nfl",
# "nhl",
"soccer",
"wwe",
]
]
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
nones = None, None
if not (html_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return nones
soup = HTMLParser(html_data.content)
iframe = soup.css_first("iframe")
if not iframe or not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found.")
return nones
elif iframe_src == "about:blank":
log.warning(f"URL {url_num}) No iframe element found.")
return nones
if not (iframe_src_data := await network.request(iframe_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source.")
return nones
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s*=\s*"([^"]*)"', re.I)
if not (match := valid_m3u8.search(iframe_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return nones
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8"), iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
tasks = [network.request(url, log=log) for url in SPORT_URLS]
results = await asyncio.gather(*tasks)
events = []
if not (soups := [HTMLParser(html.content) for html in results if html]):
return events
sport = "Live Event"
for soup in soups:
if sport_header := soup.css_first("h1.text-3xl"):
header = sport_header.text(strip=True)
sport = header.split("Streams")[0].strip()
for card in soup.css("article.game-card"):
if not (team_elem := card.css_first("h2.text-xl.font-semibold")):
continue
if not (link_elem := card.css_first("a.stream-button")) or not (
href := link_elem.attributes.get("href")
):
continue
if (
not (live_badge := card.css_first("span.bg-green-600"))
or live_badge.text(strip=True) != "LIVE"
):
continue
event_name = team_elem.text(strip=True)
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": href,
}
)
return events
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
if events := await get_events(cached_urls.keys()):
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
)
url, iframe = await network.safe_process(
handler,
url_num=i,
semaphore=network.HTTP_S,
log=log,
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": iframe,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,16 +1,16 @@
## Base Log @ 2026-04-18 16:10 UTC
## Base Log @ 2026-04-17 16:09 UTC
### ✅ Working Streams: 151<br>❌ Dead Streams: 10
| Channel | Error (Code) | Link |
| ------- | ------------ | ---- |
| Discovery Life | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/201208` |
| FDSN SoCal | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/296681` |
| FDSN Southeast | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/82301` |
| FX Movie Channel | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/39873` |
| getTV | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/194187` |
| Grit TV | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/15086` |
| Hallmark Family | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/304609` |
| Hallmark Mystery | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/3388` |
| HBO Family | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/760` |
| INSP | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/30900` |
| TLC | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/2362` |
| TV Land | HTTP Error (502) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/2364` |