Compare commits

..

No commits in common. "e9438eab81a42224dc023ff07c311ba2d2e1c337" and "dfd3407884468b8aeffb4209d501eaab5c54e575" have entirely different histories.

16 changed files with 170586 additions and 166967 deletions

File diff suppressed because it is too large Load diff

212504
M3U8/TV.xml

File diff suppressed because one or more lines are too long

View file

@ -58,7 +58,7 @@ http://41.205.93.154/BRAVO/index.m3u8
https://buzzrota-web.amagi.tv/playlist.m3u8
#EXTINF:-1 tvg-chno="20" tvg-id="CSPAN.us2" tvg-name="C-SPAN" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s10161_dark_360w_270h.png" group-title="TV",C-SPAN
http://mytvstream.net:8080/live/bn80NG/909467/136589.m3u8
http://mytvstream.net:8080/live/30550113/30550113/136589.m3u8
#EXTINF:-1 tvg-chno="21" tvg-id="Cartoon.Network.HD.us2" tvg-name="Cartoon Network" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s12131_dark_360w_270h.png" group-title="TV",Cartoon Network
http://23.237.104.106:8080/USA_CARTOON_NETWORK/index.m3u8
@ -127,7 +127,7 @@ http://hardcoremedia.xyz/live/rabdsbmz/3731346838/129866.ts
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/129867.ts
#EXTINF:-1 tvg-chno="43" tvg-id="Disney.Channel.HD.us2" tvg-name="Disney" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s10171_dark_360w_270h.png" group-title="TV",Disney Channel
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/257087.ts
http://104.255.88.155/disney/index.m3u8
#EXTINF:-1 tvg-chno="44" tvg-id="Disney.XD.HD.us2" tvg-name="Disney XD" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s18279_dark_360w_270h.png" group-title="TV",Disney XD
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/130092.ts
@ -148,40 +148,40 @@ http://23.237.104.106:8080/USA_ESPNU/index.m3u8
http://snowbank.houseinventer.com/6501/index.m3u8?token=M1lDdWljYkdyZGFhZzVxeGc2Mkt5OGJicWNxd2xheDkzWWVieEt5b3lZVERxNEduaVp1UnBxU2VlWmF0ZnRlRGxaMm1zNStDbnJOOXFZMlhtcStybmc9PQ==
#EXTINF:-1 tvg-chno="50" tvg-id="FanDuel.Sports.Network.Detroit.24/7.HDTV.us" tvg-name="FDSN Detroit" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s54286_dark_360w_270h.png" group-title="TV",FDSN Detroit
http://mytvstream.net:8080/live/bn80NG/909467/20930.m3u8
http://mytvstream.net:8080/live/30550113/30550113/20930.m3u8
#EXTINF:-1 tvg-chno="51" tvg-id="FanDuel.Sports.Network.Florida.HDTV.(Out.of.Market).us" tvg-name="FDSN Florida" tvg-logo="https://i.gyazo.com/fad701fbaaafe161b13b23ed9b50179b.png" group-title="TV",FDSN Florida
http://mytvstream.net:8080/live/bn80NG/909467/46794.m3u8
http://mytvstream.net:8080/live/30550113/30550113/46794.m3u8
#EXTINF:-1 tvg-chno="52" tvg-id="FanDuel.Sports.Network.Midwest.24/7.HDTV.us" tvg-name="FDSN Midwest" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s11058_dark_360w_270h.png" group-title="TV",FDSN Midwest
http://mytvstream.net:8080/live/bn80NG/909467/66795.m3u8
http://mytvstream.net:8080/live/30550113/30550113/66795.m3u8
#EXTINF:-1 tvg-chno="53" tvg-id="FanDuel.Sports.Network.North.HDTV.us" tvg-name="FDSN North" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s10977_dark_360w_270h.png" group-title="TV",FDSN North
http://mytvstream.net:8080/live/bn80NG/909467/58827.m3u8
http://mytvstream.net:8080/live/30550113/30550113/58827.m3u8
#EXTINF:-1 tvg-chno="54" tvg-id="FanDuel.Sports.Network.Ohio.(Cleveland).HDTV.us" tvg-name="FDSN Ohio" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s49691_dark_360w_270h.png" group-title="TV",FDSN Ohio
http://mytvstream.net:8080/live/bn80NG/909467/17752.m3u8
http://mytvstream.net:8080/live/30550113/30550113/17752.m3u8
#EXTINF:-1 tvg-chno="55" tvg-id="FanDuel.Sports.Network.Oklahoma.24/7.HDTV.(Tulsa).us" tvg-name="FDSN Oklahoma" tvg-logo="https://i.gyazo.com/80ad6fd142cd67f06eef58d9ce5aa72b.png" group-title="TV",FDSN Oklahoma
http://mytvstream.net:8080/live/bn80NG/909467/20934.m3u8
http://mytvstream.net:8080/live/30550113/30550113/20934.m3u8
#EXTINF:-1 tvg-chno="56" tvg-id="FanDuel.Sports.Network.SoCal.HDTV.us" tvg-name="FDSN SoCal" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s16743_dark_360w_270h.png" group-title="TV",FDSN SoCal
http://mytvstream.net:8080/live/bn80NG/909467/221151.m3u8
http://mytvstream.net:8080/live/30550113/30550113/221151.m3u8
#EXTINF:-1 tvg-chno="57" tvg-id="FanDuel.Sports.Network.Southeast.HDTV.(Mont./Birm./Dothan/Mobile.AL).us" tvg-name="FDSN Southeast" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s20789_dark_360w_270h.png" group-title="TV",FDSN Southeast
http://mytvstream.net:8080/live/bn80NG/909467/2213.m3u8
http://mytvstream.net:8080/live/30550113/30550113/2213.m3u8
#EXTINF:-1 tvg-chno="58" tvg-id="FanDuel.Sports.Network.Southwest.HDTV.24/7.(Main).us" tvg-name="FDSN Southwest" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s59629_dark_360w_270h.png" group-title="TV",FDSN Southwest
http://mytvstream.net:8080/live/bn80NG/909467/21843.m3u8
http://mytvstream.net:8080/live/30550113/30550113/21843.m3u8
#EXTINF:-1 tvg-chno="59" tvg-id="FanDuel.Sports.Network.Sun.South.24/7.HDTV.(South.Marlins,.Rays,.Heat).us" tvg-name="FDSN Sun" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s61084_dark_360w_270h.png" group-title="TV",FDSN Sun
http://mytvstream.net:8080/live/bn80NG/909467/104917.m3u8
http://mytvstream.net:8080/live/30550113/30550113/104917.m3u8
#EXTINF:-1 tvg-chno="60" tvg-id="FanDuel.Sports.Network.West.HDTV.us" tvg-name="FDSN West" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s59627_dark_360w_270h.png" group-title="TV",FDSN West
http://mytvstream.net:8080/live/bn80NG/909467/20932.m3u8
http://mytvstream.net:8080/live/30550113/30550113/20932.m3u8
#EXTINF:-1 tvg-chno="61" tvg-id="FanDuel.Sports.Network.Wisconsin.24/7.HDTV.us" tvg-name="FDSN Wisconsin" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s16348_dark_360w_270h.png" group-title="TV",FDSN Wisconsin
http://mytvstream.net:8080/live/bn80NG/909467/78599.m3u8
http://mytvstream.net:8080/live/30550113/30550113/78599.m3u8
#EXTINF:-1 tvg-chno="62" tvg-id="plex.tv.FIFA+.plex" tvg-name="FIFA+ TV" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s136235_dark_360w_270h.png" group-title="TV",FIFA+ TV
https://jmp2.uk/stvp-IN270000230
@ -253,7 +253,7 @@ http://23.237.104.106:8080/USA_HBO2/index.m3u8
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/129990.ts
#EXTINF:-1 tvg-chno="85" tvg-id="HBO.Drama.us2" tvg-name="HBO Family" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s34879_dark_360w_270h.png" group-title="TV",HBO Family
http://mytvstream.net:8080/live/bn80NG/909467/17772.m3u8
http://mytvstream.net:8080/live/30550113/30550113/17772.m3u8
#EXTINF:-1 tvg-chno="86" tvg-id="HBO.Zone.HD.us2" tvg-name="HBO Zone" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s18431_dark_360w_270h.png" group-title="TV",HBO Zone
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/129995.ts
@ -307,10 +307,10 @@ http://212.102.60.231/NBA_TV/index.m3u8
http://stream.cammonitorplus.net/1842/index.m3u8?token=MnE3ZWg1YkgxdFdWZlo2c2hLMkltWnJhcFo1OHhxcXVyb2pKazZXaWxZRERxNEduaVp1UnBxU2VlWmF0ZnRlRGxaMm1zNStDbnJOOXFZMlhtcStybmc9PQ==
#EXTINF:-1 tvg-chno="103" tvg-id="NBC.Sports.Bay.Area.HD.us2" tvg-name="NBC Sports Bay Area" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s63138_dark_360w_270h.png" group-title="TV",NBC Sports Bay Area
http://mytvstream.net:8080/live/bn80NG/909467/9900.m3u8
http://mytvstream.net:8080/live/30550113/30550113/9900.m3u8
#EXTINF:-1 tvg-chno="104" tvg-id="NBC.Sports.Boston.HD.us2" tvg-name="NBC Sports Boston" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s49198_dark_360w_270h.png" group-title="TV",NBC Sports Boston
http://mytvstream.net:8080/live/bn80NG/909467/20939.m3u8
http://mytvstream.net:8080/live/30550113/30550113/20939.m3u8
#EXTINF:-1 tvg-chno="105" tvg-id="NBC.Sports.California.SAT.us2" tvg-name="NBC Sports California" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s45540_dark_360w_270h.png" group-title="TV",NBC Sports California
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/136474.ts
@ -334,7 +334,7 @@ https://stream.decentdoubts.net/6128524/index.m3u8?token=Mm9QYnQ1ZkpyYWllaEoydWh
https://starshare.st/live/P4B9TB9xR8/humongous2tonight/23550.ts
#EXTINF:-1 tvg-chno="112" tvg-id="NHL.Network.HD.us2" tvg-name="NHL Network" tvg-logo="http://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s58570_dark_360w_270h.png" group-title="TV",NHL Network
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/167982.ts
https://nhl-firetv.amagi.tv/playlist.m3u8
#EXTINF:-1 tvg-chno="113" tvg-id="Nick.Jr.HD.us2" tvg-name="Nick Jr" tvg-logo="https://schedulesdirect-api20141201-logos.s3.dualstack.us-east-1.amazonaws.com/stationLogos/s19211_dark_360w_270h.png" group-title="TV",Nick Jr
http://hardcoremedia.xyz/live/rabdsbmz/3731346838/137683.ts

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,6 @@ from pathlib import Path
from playwright.async_api import async_playwright
from scrapers import (
cdnlivetv,
embedhd,
fawa,
istreameast,
@ -62,15 +61,15 @@ async def main() -> None:
xtrnl_brwsr = await network.browser(p, external=True)
pw_tasks = [
asyncio.create_task(cdnlivetv.scrape(hdl_brwsr)),
asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(pixel.scrape(hdl_brwsr)),
asyncio.create_task(ppv.scrape(xtrnl_brwsr)),
asyncio.create_task(roxie.scrape(hdl_brwsr)),
asyncio.create_task(sport9.scrape(xtrnl_brwsr)),
asyncio.create_task(streamcenter.scrape(hdl_brwsr)),
# asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(hdl_brwsr)),
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)),
]
httpx_tasks = [
@ -83,7 +82,6 @@ async def main() -> None:
asyncio.create_task(tvapp.scrape()),
asyncio.create_task(volokit.scrape()),
asyncio.create_task(xstreameast.scrape()),
asyncio.create_task(webcast.scrape()),
]
await asyncio.gather(*(pw_tasks + httpx_tasks))
@ -100,8 +98,7 @@ async def main() -> None:
await network.client.aclose()
additions = (
cdnlivetv.urls
| embedhd.urls
embedhd.urls
| fawa.urls
| istreameast.urls
| livetvsx.urls

View file

@ -1,151 +0,0 @@
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "CDNTV"
CACHE_FILE = Cache(TAG, exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
API_URL = "https://api.cdn-live.tv"
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
events = []
if not (api_data := API_FILE.load(per_entry=False)):
log.info("Refreshing API cache")
if r := await network.request(
urljoin(API_URL, "api/v1/events/sports"),
log=log,
params={"user": "cdnlivetv", "plan": "free"},
):
api_data = r.json().get("cdn-live-tv", {"timestamp": now.timestamp()})
API_FILE.write(api_data)
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
sports = [key for key in api_data.keys() if not key.islower()]
for sport in sports:
event_info = api_data[sport]
for event in event_info:
t1, t2 = event["awayTeam"], event["homeTeam"]
name = f"{t1} vs {t2}"
league = event["tournament"]
if f"[{league}] {name} ({TAG})" in cached_keys:
continue
event_dt = Time.from_str(event["start"], timezone="UTC")
if not start_dt <= event_dt <= end_dt:
continue
if not (channels := event.get("channels")):
continue
event_links: list[str] = [channel["url"] for channel in channels]
# if not (
# link := (
# event_links[0]
# if len(event_links) == 1
# else await network.get_base(event_links)
# )
# ):
# continue
link = event_links[0]
events.append(
{
"sport": league,
"event": name,
"link": link,
"timestamp": event_dt.timestamp(),
}
)
return events
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{API_URL}"')
events = await get_events(cached_urls.keys())
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": link,
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -11,7 +11,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "OVOGOAL"
CACHE_FILE = Cache(TAG, exp=19_800)
CACHE_FILE = Cache(TAG, exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
BASE_URL = "https://ovogoal.plus"
@ -46,22 +48,27 @@ async def process_event(url: str, url_num: int) -> tuple[str | None, str | None]
return match[3], iframe_src
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
events = []
async def refresh_html_cache() -> dict[str, dict[str, str | float]]:
log.info("Refreshing HTML cache")
now = Time.clean(Time.now())
events = {}
if not (html_data := await network.request(BASE_URL, log=log)):
return events
soup = HTMLParser(html_data.content)
sport = "Live Event"
for card in soup.css(".stream-row"):
if (not (watch_btn_elem := card.css_first(".watch-btn"))) or (
not (onclick := watch_btn_elem.attributes.get("onclick"))
):
continue
if not (event_time_elem := card.css_first(".stream-time")):
continue
if not (event_name_elem := card.css_first(".stream-info")):
continue
@ -69,20 +76,50 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
event_name = event_name_elem.text(strip=True)
if f"[{sport}] {event_name} ({TAG})" in cached_keys:
continue
event_time = event_time_elem.text(strip=True)
events.append(
{
"sport": sport,
"event": event_name,
"link": href,
}
)
event_dt = Time.from_str(f"{now.date()} {event_time}", timezone="CET")
sport = "Live Event"
key = f"[{sport}] {event_name} ({TAG})"
events[key] = {
"sport": sport,
"event": event_name,
"link": href,
"event_ts": event_dt.timestamp(),
"timestamp": now.timestamp(),
}
return events
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
events = await refresh_html_cache()
HTML_CACHE.write(events)
live = []
start_ts = now.delta(minutes=-30).timestamp()
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items():
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append(v)
return live
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
@ -101,8 +138,6 @@ async def scrape() -> None:
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
@ -117,7 +152,11 @@ async def scrape() -> None:
log=log,
)
sport, event = ev["sport"], ev["event"]
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
)
key = f"[{sport}] {event} ({TAG})"
@ -127,7 +166,7 @@ async def scrape() -> None:
"url": url,
"logo": logo,
"base": iframe,
"timestamp": now.timestamp(),
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}

View file

@ -61,8 +61,6 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
feed = feedparser.parse(html_data.content)
sport = "Live Event"
for entry in feed.entries:
if not (link := entry.get("link")):
continue
@ -70,6 +68,8 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (title := entry.get("title")):
continue
sport = "Live Event"
title = title.replace(" v ", " vs ")
if f"[{sport}] {title} ({TAG})" in cached_keys:

View file

@ -149,7 +149,7 @@ class Leagues:
league: str,
) -> bool:
pattern = re.compile(r"\s+(?:-|vs\.?|at|@)\s+", re.I)
pattern = re.compile(r"\s+(?:-|vs\.?|at|@)\s+", re.IGNORECASE)
if pattern.search(event):
t1, t2 = re.split(pattern, event)

File diff suppressed because it is too large Load diff

View file

@ -215,7 +215,10 @@ class Network:
escaped = [re.escape(i) for i in invalids]
pattern = re.compile(rf"^(?!.*({'|'.join(escaped)})).*\.m3u8", re.I)
pattern = re.compile(
rf"^(?!.*({'|'.join(escaped)})).*\.m3u8",
re.IGNORECASE,
)
if pattern.search(req.url):
captured.append(req.url)

View file

@ -13,7 +13,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "VOLOKIT"
CACHE_FILE = Cache(TAG, exp=19_800)
CACHE_FILE = Cache(TAG, exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
BASE_URL = "http://volokit.xyz"
@ -69,48 +71,98 @@ async def process_event(url: str, url_num: int) -> str | None:
return match[1]
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
sport_urls = {
sport.upper(): urljoin(BASE_URL, f"sport/{sport}/") for sport in SPORT_ENDPOINTS
}
async def refresh_html_cache(
url: str,
sport: str,
now: Time,
) -> dict[str, dict[str, str | float]]:
tasks = [network.request(url, log=log) for url in sport_urls.values()]
events = {}
results = await asyncio.gather(*tasks)
events = []
if not (
soups := [(HTMLParser(html.content), html.url) for html in results if html]
):
if not (html_data := await network.request(url, log=log)):
return events
for soup, url in soups:
for card in soup.css("#events .table .vevent.theevent"):
if not (href := card.css_first("a").attributes.get("href")):
continue
soup = HTMLParser(html_data.content)
if not (name_node := card.css_first(".teamtd.event")):
continue
date = now.date()
name = fix_event(name_node.text(strip=True))
if date_node := soup.css_first("tr.date"):
date = date_node.text(strip=True)
sport = next((k for k, v in sport_urls.items() if v == url), "Live Event")
for card in soup.css("#events .table .vevent.theevent"):
if not (href := card.css_first("a").attributes.get("href")):
continue
if f"[{sport}] {name} ({TAG})" in cached_keys:
continue
name_node = card.css_first(".teamtd.event")
events.append(
{
"sport": sport,
"event": name,
"link": href,
}
)
time_node = card.css_first(".time")
if not (name_node and time_node):
continue
name, time = name_node.text(strip=True), time_node.text(strip=True)
event_sport = SPORT_ENDPOINTS[sport]
event_name = fix_event(name)
event_dt = Time.from_str(f"{date} {time}", timezone="UTC")
key = f"[{event_sport}] {event_name} ({TAG})"
events[key] = {
"sport": event_sport,
"event": event_name,
"link": href,
"event_ts": event_dt.timestamp(),
"timestamp": now.timestamp(),
}
return events
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
sport_urls = {
sport: urljoin(BASE_URL, f"sport/{sport}") for sport in SPORT_ENDPOINTS
}
tasks = [
refresh_html_cache(
url,
sport,
now,
)
for sport, url in sport_urls.items()
]
results = await asyncio.gather(*tasks)
events = {k: v for data in results for k, v in data.items()}
HTML_CACHE.write(events)
live = []
start_ts = now.delta(minutes=-30).timestamp()
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items():
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append(v)
return live
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
@ -129,8 +181,6 @@ async def scrape() -> None:
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
@ -145,7 +195,11 @@ async def scrape() -> None:
log=log,
)
sport, event = ev["sport"], ev["event"]
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
)
key = f"[{sport}] {event} ({TAG})"
@ -155,7 +209,7 @@ async def scrape() -> None:
"url": url,
"logo": logo,
"base": link,
"timestamp": now.timestamp(),
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}

View file

@ -297,7 +297,7 @@ async def scrape(browser: Browser) -> None:
if url:
valid_count += 1
entry["url"] = url.split("&t")[0]
urls[key] = entry

View file

@ -1,7 +1,7 @@
import asyncio
import re
from functools import partial
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -12,10 +12,11 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "WEBCAST"
CACHE_FILE = Cache(TAG, exp=19_800)
CACHE_FILE = Cache(TAG, exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=86_400)
BASE_URLS = {
"MLB": "https://mlbwebcast.com",
# "NFL": "https://nflwebcast.com",
"NHL": "https://slapstreams.com",
}
@ -25,100 +26,99 @@ def fix_event(s: str) -> str:
return " vs ".join(s.split("@"))
async def process_event(url: str, url_num: int) -> str | None:
if not (event_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
async def refresh_html_cache(url: str) -> dict[str, dict[str, str | float]]:
events = {}
return
soup = HTMLParser(event_data.content)
if not (iframe := soup.css_first('iframe[name="srcFrame"]')):
log.warning(f"URL {url_num}) No iframe element found.")
return
if not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe source found.")
return
if not (
iframe_src_data := await network.request(
iframe_src,
headers={"Referer": url},
log=log,
)
):
log.info(f"URL {url_num}) Failed to load iframe source.")
return
pattern = re.compile(r"source:\s+(\'|\")(.*)(\'|\")", re.I)
if not (match := pattern.search(iframe_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return
log.info(f"URL {url_num}) Captured M3U8")
return match[2]
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
tasks = [network.request(url, log=log) for url in BASE_URLS.values()]
results = await asyncio.gather(*tasks)
events = []
if not (
soups := [(HTMLParser(html.content), html.url) for html in results if html]
):
if not (html_data := await network.request(url, log=log)):
return events
for soup, url in soups:
sport = next((k for k, v in BASE_URLS.items() if v == url), "Live Event")
now = Time.clean(Time.now())
for row in soup.css("tr.singele_match_date"):
if not (vs_node := row.css_first("td.teamvs a")):
continue
soup = HTMLParser(html_data.content)
event_name = vs_node.text(strip=True)
title = soup.css_first("title").text(strip=True)
for span in vs_node.css("span.mtdate"):
date = span.text(strip=True)
sport = "NFL" if "NFL" in title else "NHL"
event_name = event_name.replace(date, "").strip()
date_text = now.strftime("%B %d, %Y")
if not (href := vs_node.attributes.get("href")):
continue
if date_row := soup.css_first("tr.mdatetitle"):
if mtdate_span := date_row.css_first("span.mtdate"):
date_text = mtdate_span.text(strip=True)
event = fix_event(event_name)
for row in soup.css("tr.singele_match_date"):
if not (time_node := row.css_first("td.matchtime")):
continue
if f"[{sport}] {event} ({TAG})" in cached_keys:
continue
time = time_node.text(strip=True)
events.append(
{
"sport": sport,
"event": event,
"link": href,
}
)
if not (vs_node := row.css_first("td.teamvs a")):
continue
event_name = vs_node.text(strip=True)
for span in vs_node.css("span.mtdate"):
date = span.text(strip=True)
event_name = event_name.replace(date, "").strip()
if not (href := vs_node.attributes.get("href")):
continue
event_dt = Time.from_str(f"{date_text} {time} PM", timezone="EST")
event = fix_event(event_name)
key = f"[{sport}] {event} ({TAG})"
events[key] = {
"sport": sport,
"event": event,
"link": href,
"event_ts": event_dt.timestamp(),
"timestamp": now.timestamp(),
}
return events
async def scrape() -> None:
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (events := HTML_CACHE.load()):
log.info("Refreshing HTML cache")
tasks = [refresh_html_cache(url) for url in BASE_URLS.values()]
results = await asyncio.gather(*tasks)
events = {k: v for data in results for k, v in data.items()}
HTML_CACHE.write(events)
live = []
start_ts = now.delta(minutes=-30).timestamp()
end_ts = now.delta(minutes=30).timestamp()
for k, v in events.items():
if k in cached_keys:
continue
if not start_ts <= v["event_ts"] <= end_ts:
continue
live.append(v)
return live
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
cached_count = len(cached_urls)
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -129,45 +129,47 @@ async def scrape() -> None:
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
)
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": BASE_URLS[sport],
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
entry = {
"url": url,
"logo": logo,
"base": BASE_URLS[sport],
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
if new_count := valid_count - cached_count:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:

View file

@ -81,9 +81,10 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
if not (soups := [HTMLParser(html.content) for html in results if html]):
return events
sport = "Live Event"
for soup in soups:
sport = "Live Event"
if sport_header := soup.css_first("h1.text-3xl"):
header = sport_header.text(strip=True)

View file

@ -1,13 +1,13 @@
## Base Log @ 2026-02-28 08:50 UTC
## Base Log @ 2026-02-27 09:06 UTC
### ✅ Working Streams: 157<br>❌ Dead Streams: 4
| Channel | Error (Code) | Link |
| ------- | ------------ | ---- |
| BET | HTTP Error (404) | `http://212.102.60.231/BET/index.m3u8` |
| Disney | HTTP Error (000) | `http://hardcoremedia.xyz/live/rabdsbmz/3731346838/257087.ts` |
| FYI TV | HTTP Error (000) | `http://hardcoremedia.xyz/live/rabdsbmz/3731346838/130105.ts` |
| Golf Channel | HTTP Error (000) | `http://hardcoremedia.xyz/live/rabdsbmz/3731346838/258721.ts` |
| MSNBC | HTTP Error (404) | `http://41.205.93.154/MSNBC/index.m3u8` |
| NBC Sports California | HTTP Error (403) | `http://hardcoremedia.xyz/live/rabdsbmz/3731346838/136474.ts` |
---
#### Base Channels URL
```