Compare commits

..

No commits in common. "111375348d628b82c983acafcf21a5a5c3221d61" and "952a168416aef576fe18ff5b95077b520f95c083" have entirely different histories.

10 changed files with 134142 additions and 131403 deletions

File diff suppressed because it is too large Load diff

261213
M3U8/TV.xml

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ from scrapers import (
istreameast, istreameast,
mainportal, mainportal,
ovogoal, ovogoal,
pawa,
roxie, roxie,
shark, shark,
streamcenter, streamcenter,
@ -22,6 +23,7 @@ from scrapers import (
tvapp, tvapp,
watchfooty, watchfooty,
webcast, webcast,
xstreameast,
) )
from scrapers.utils import get_logger, network from scrapers.utils import get_logger, network
@ -61,7 +63,7 @@ async def main() -> None:
asyncio.create_task(embedhd.scrape(hdl_brwsr)), asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(fsports.scrape(xtrnl_brwsr)), asyncio.create_task(fsports.scrape(xtrnl_brwsr)),
asyncio.create_task(roxie.scrape(hdl_brwsr)), asyncio.create_task(roxie.scrape(hdl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), # asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
] ]
httpx_tasks = [ httpx_tasks = [
@ -69,6 +71,7 @@ async def main() -> None:
asyncio.create_task(istreameast.scrape()), asyncio.create_task(istreameast.scrape()),
asyncio.create_task(mainportal.scrape()), asyncio.create_task(mainportal.scrape()),
asyncio.create_task(ovogoal.scrape()), asyncio.create_task(ovogoal.scrape()),
# asyncio.create_task(pawa.scrape()),
asyncio.create_task(shark.scrape()), asyncio.create_task(shark.scrape()),
asyncio.create_task(streamcenter.scrape()), asyncio.create_task(streamcenter.scrape()),
asyncio.create_task(streamsgate.scrape()), asyncio.create_task(streamsgate.scrape()),
@ -76,6 +79,7 @@ async def main() -> None:
asyncio.create_task(totalsportek.scrape()), asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvapp.scrape()), asyncio.create_task(tvapp.scrape()),
asyncio.create_task(webcast.scrape()), asyncio.create_task(webcast.scrape()),
# asyncio.create_task(xstreameast.scrape()),
] ]
await asyncio.gather(*(pw_tasks + httpx_tasks)) await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -99,6 +103,7 @@ async def main() -> None:
| istreameast.urls | istreameast.urls
| mainportal.urls | mainportal.urls
| ovogoal.urls | ovogoal.urls
| pawa.urls
| roxie.urls | roxie.urls
| shark.urls | shark.urls
| streamcenter.urls | streamcenter.urls
@ -109,6 +114,7 @@ async def main() -> None:
| tvapp.urls | tvapp.urls
| watchfooty.urls | watchfooty.urls
| webcast.urls | webcast.urls
| xstreameast.urls
) )
live_events: list[str] = [] live_events: list[str] = []

142
M3U8/scrapers/pawa.py Normal file
View file

@ -0,0 +1,142 @@
import base64
import re
from functools import partial
import feedparser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "PAWA"
CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://pawastreams.net/feed/"
async def process_event(url: str, url_num: int) -> str | None:
if not (event_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return
soup = HTMLParser(event_data.content)
if not (iframe := soup.css_first("iframe")):
log.warning(f"URL {url_num}) No iframe element found.")
return
if not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe source found.")
return
if not (iframe_src_data := await network.request(iframe_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source.")
return
pattern = re.compile(r"source:\s*window\.atob\(\s*'([^']+)'\s*\)", re.I)
if not (match := pattern.search(iframe_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return
log.info(f"URL {url_num}) Captured M3U8")
m3u = base64.b64decode(match[1]).decode("utf-8")
return m3u.split("&remote")[0]
async def get_events() -> list[dict[str, str]]:
events = []
if not (html_data := await network.request(BASE_URL, log=log)):
return events
feed = feedparser.parse(html_data.content)
sport = "Live Event"
for entry in feed.entries:
if not (link := entry.get("link")):
continue
if not (title := entry.get("title")):
continue
title = title.replace(" v ", " vs ")
events.append(
{
"sport": sport,
"event": title,
"link": link,
}
)
return events
async def scrape() -> None:
if cached_urls := CACHE_FILE.load():
urls.update({k: v for k, v in cached_urls.items() if v["url"]})
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
if events := await get_events():
log.info(f"Processing {len(events)} URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.HTTP_S,
log=log,
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": link,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
urls[key] = entry
log.info(f"Collected and cached {len(urls)} event(s)")
else:
log.info("No events found")
CACHE_FILE.write(cached_urls)

View file

@ -97,9 +97,18 @@ async def process_event(
pass pass
if captured: if captured:
if "smarthard.click" not in (m3u8 := captured[0]).lower():
log.warning(f"URL {url_num}) Invalid M3U8 found.")
return
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return captured[0] return m3u8
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) {e}") log.warning(f"URL {url_num}) {e}")
@ -215,7 +224,7 @@ async def scrape(browser: Browser) -> None:
entry = { entry = {
"url": url, "url": url,
"logo": logo, "logo": logo,
"base": "http://streamobs.click/", "base": "https://hardsmart.click",
"timestamp": ts, "timestamp": ts,
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"link": link, "link": link,

View file

@ -15,10 +15,7 @@ TAG = "TSPRTK"
CACHE_FILE = Cache(TAG, exp=19_800) CACHE_FILE = Cache(TAG, exp=19_800)
BASES = { BASE_URL = "https://live3.totalsportek.fyi"
"TSPRTK1": "https://live.totalsportek.fyi",
"TSPRTK3": "https://live3.totalsportek.fyi",
}
def fix_txt(s: str) -> str: def fix_txt(s: str) -> str:
@ -27,43 +24,36 @@ def fix_txt(s: str) -> str:
return s.upper() if s.islower() else s return s.upper() if s.islower() else s
async def process_ts1(ifr_src: str, url_num: int) -> str | None: async def process_event(url: str, url_num: int) -> str | None:
if not (ifr_src_data := await network.request(ifr_src, log=log)): if not (event_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load iframe source.") log.warning(f"URL {url_num}) Failed to load url.")
return return
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s+=\s+"([^"]*)"', re.I) soup_1 = HTMLParser(event_data.content)
if not (match := valid_m3u8.search(ifr_src_data.text)): iframe_1 = soup_1.css_first("iframe")
log.warning(f"URL {url_num}) No Clappr source found.")
if not iframe_1 or not (iframe_1_src := iframe_1.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found. (IFR1)")
return return
if len(encoded := match[2]) < 20: if not (iframe_1_src_data := await network.request(iframe_1_src, log=log)):
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8")
async def process_ts3(ifr_src: str, url_num: int) -> str | None:
if not (ifr_1_src_data := await network.request(ifr_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source. (IFR1)") log.warning(f"URL {url_num}) Failed to load iframe source. (IFR1)")
return return
soup = HTMLParser(ifr_1_src_data.content) soup_2 = HTMLParser(iframe_1_src_data.content)
ifr_2 = soup.css_first("iframe") iframe_2 = soup_2.css_first("iframe")
if not ifr_2 or not (ifr_2_src := ifr_2.attributes.get("src")): if not iframe_2 or not (iframe_2_src := iframe_2.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found. (IFR2)") log.warning(f"URL {url_num}) No iframe element found. (IFR2)")
return return
if not ( if not (
ifr_2_src_data := await network.request( iframe_2_src_data := await network.request(
ifr_2_src, iframe_2_src,
headers={"Referer": ifr_src},
log=log, log=log,
headers={"Referer": iframe_1_src},
) )
): ):
log.warning(f"URL {url_num}) Failed to load iframe source. (IFR2)") log.warning(f"URL {url_num}) Failed to load iframe source. (IFR2)")
@ -71,7 +61,7 @@ async def process_ts3(ifr_src: str, url_num: int) -> str | None:
valid_m3u8 = re.compile(r'currentStreamUrl\s+=\s+"([^"]*)"', re.I) valid_m3u8 = re.compile(r'currentStreamUrl\s+=\s+"([^"]*)"', re.I)
if not (match := valid_m3u8.search(ifr_2_src_data.text)): if not (match := valid_m3u8.search(iframe_2_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.") log.warning(f"URL {url_num}) No Clappr source found.")
return return
@ -80,86 +70,52 @@ async def process_ts3(ifr_src: str, url_num: int) -> str | None:
return json.loads(f'"{match[1]}"') return json.loads(f'"{match[1]}"')
async def process_event(
url: str,
url_num: int,
tag: str,
) -> tuple[str | None, str | None]:
nones = None, None
if not (event_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return nones
soup = HTMLParser(event_data.content)
iframe = soup.css_first("iframe")
if not iframe or not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No valid iframe source found.")
return nones
m3u8 = (
await process_ts1(iframe_src, url_num)
if tag == "TSPRTK1"
else await process_ts3(iframe_src, url_num)
)
return (m3u8, iframe_src) if m3u8 else nones
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = [] events = []
if not (html_data := await network.request(BASES["TSPRTK1"], log=log)): if not (html_data := await network.request(BASE_URL, log=log)):
return events return events
soup = HTMLParser(html_data.content) soup = HTMLParser(html_data.content)
sport = "Live Event" sport = "Live Event"
for tag, url in BASES.items(): for node in soup.css("a"):
for node in soup.css("a"): if not node.attributes.get("class"):
if not node.attributes.get("class"): continue
continue
if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""): if (parent := node.parent) and "my-1" in parent.attributes.get("class", ""):
if span := node.css_first("span"): if span := node.css_first("span"):
sport = span.text(strip=True) sport = span.text(strip=True)
sport = fix_txt(sport) sport = fix_txt(sport)
if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]): if not (teams := [t.text(strip=True) for t in node.css(".col-7 .col-12")]):
continue continue
if not (href := node.attributes.get("href")): if not (href := node.attributes.get("href")):
continue continue
href = urlparse(href).path if href.startswith("http") else href href = urlparse(href).path if href.startswith("http") else href
# if not (time_node := node.css_first(".col-3 span")): # if not (time_node := node.css_first(".col-3 span")):
# continue # continue
# if time_node.text(strip=True).lower() not in [ # if time_node.text(strip=True).lower() != "matchstarted":
# "matchstarted", # continue
# "1minfrom now",
# ]:
# continue
event_name = fix_txt(" vs ".join(teams)) event_name = fix_txt(" vs ".join(teams))
if f"[{sport}] {event_name} ({tag})" in cached_keys: if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue continue
events.append( events.append(
{ {
"sport": sport, "sport": sport,
"event": event_name, "event": event_name,
"tag": tag, "link": urljoin(f"{html_data.url}", href),
"link": urljoin(url, href), }
} )
)
return events return events
@ -175,7 +131,7 @@ async def scrape() -> None:
log.info(f"Loaded {cached_count} event(s) from cache") log.info(f"Loaded {cached_count} event(s) from cache")
log.info('Scraping from "https://live.totalsportek.fyi"') log.info(f'Scraping from "{BASE_URL}"')
if events := await get_events(cached_urls.keys()): if events := await get_events(cached_urls.keys()):
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
@ -187,10 +143,9 @@ async def scrape() -> None:
process_event, process_event,
url=(link := ev["link"]), url=(link := ev["link"]),
url_num=i, url_num=i,
tag=(tag := ev["tag"]),
) )
url, iframe = await network.safe_process( url = await network.safe_process(
handler, handler,
url_num=i, url_num=i,
semaphore=network.HTTP_S, semaphore=network.HTTP_S,
@ -199,14 +154,14 @@ async def scrape() -> None:
sport, event = ev["sport"], ev["event"] sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({tag})" key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event) tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = { entry = {
"url": url, "url": url,
"logo": logo, "logo": logo,
"base": iframe, "base": link,
"timestamp": now.timestamp(), "timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"link": link, "link": link,

View file

@ -164,6 +164,9 @@ async def process_event(
log.info(f"URL {url_num}) Captured M3U8") log.info(f"URL {url_num}) Captured M3U8")
return captured[0], iframe_url return captured[0], iframe_url
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return nones
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) {e}") log.warning(f"URL {url_num}) {e}")
return nones return nones

View file

@ -0,0 +1,179 @@
import asyncio
import re
from functools import partial
from urllib.parse import urljoin
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "XSTRMEAST"
CACHE_FILE = Cache(TAG, exp=10_800)
BASE_URL = "https://xstreameast.com"
SPORT_URLS = [
urljoin(BASE_URL, f"categories/{sport}/")
for sport in [
# "mlb",
# "mma",
"nba",
# "nfl",
# "nhl",
"soccer",
# "wwe",
]
]
async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]:
nones = None, None
if not (html_data := await network.request(url, log=log)):
log.warning(f"URL {url_num}) Failed to load url.")
return nones
soup = HTMLParser(html_data.content)
iframe = soup.css_first("iframe")
if not iframe or not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe element found.")
return nones
elif iframe_src == "about:blank":
log.warning(f"URL {url_num}) No iframe element found.")
return nones
if not (iframe_src_data := await network.request(iframe_src, log=log)):
log.warning(f"URL {url_num}) Failed to load iframe source.")
return nones
valid_m3u8 = re.compile(r'(var|const)\s+(\w+)\s+=\s+"([^"]*)"', re.I)
if not (match := valid_m3u8.search(iframe_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return nones
if len(encoded := match[2]) < 20:
encoded = match[3]
log.info(f"URL {url_num}) Captured M3U8")
return bytes.fromhex(encoded).decode("utf-8"), iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
tasks = [network.request(url, log=log) for url in SPORT_URLS]
results = await asyncio.gather(*tasks)
events = []
if not (soups := [HTMLParser(html.content) for html in results if html]):
return events
sport = "Live Event"
for soup in soups:
if sport_header := soup.css_first("h1.text-3xl"):
header = sport_header.text(strip=True)
sport = header.split("Streams")[0].strip()
for card in soup.css("article.game-card"):
if not (team_elem := card.css_first("h2.text-xl.font-semibold")):
continue
if not (link_elem := card.css_first("a.stream-button")) or not (
href := link_elem.attributes.get("href")
):
continue
if (
not (live_badge := card.css_first("span.bg-green-600"))
or live_badge.text(strip=True) != "LIVE"
):
continue
event_name = team_elem.text(strip=True)
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": event_name,
"link": href,
}
)
return events
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
if events := await get_events(cached_urls.keys()):
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
)
url, iframe = await network.safe_process(
handler,
url_num=i,
semaphore=network.HTTP_S,
log=log,
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": iframe,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
log.info(f"Collected and cached {valid_count - cached_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,7 +1,13 @@
## Base Log @ 2026-05-08 15:58 UTC ## Base Log @ 2026-05-07 16:36 UTC
### ✅ Working Streams: 160<br>❌ Dead Streams: 0 ### ✅ Working Streams: 157<br>❌ Dead Streams: 4
| Channel | Error (Code) | Link |
| ------- | ------------ | ---- |
| FDSN SoCal | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/296681` |
| FDSN Wisconsin | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/295668` |
| NBC Sports California | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/16116` |
| Ovation | HTTP Error (404) | `http://aflaxtv.xyz:8080/mitrovic/19106b7cb4/194336` |
--- ---
#### Base Channels URL #### Base Channels URL
``` ```