iptv/M3U8/scrapers/tvpass.py

83 lines
2.2 KiB
Python
Raw Normal View History

2025-08-19 10:54:50 -04:00
import re
import httpx
2025-10-01 11:57:49 -04:00
from .utils import Cache, Time, get_logger, leagues
2025-08-30 16:45:19 -04:00
2025-10-01 12:37:19 -04:00
log = get_logger(__name__)
2025-08-30 16:45:19 -04:00
2025-11-13 12:43:55 -05:00
urls: dict[str, dict[str, str | float]] = {}
2025-08-19 10:54:50 -04:00
2025-11-13 12:43:55 -05:00
CACHE_FILE = Cache("tvpass.json", exp=86_400)
2025-08-19 10:54:50 -04:00
2025-11-13 12:43:55 -05:00
BASE_URL = "https://tvpass.org/playlist/m3u"
2025-08-28 19:43:35 -04:00
2025-12-01 14:46:31 -05:00
TAG = "TVP"
2025-08-30 16:45:19 -04:00
2025-11-19 18:58:52 -05:00
async def get_data(client: httpx.AsyncClient) -> list[str]:
2025-08-19 10:54:50 -04:00
try:
2025-09-03 03:14:52 -04:00
r = await client.get(BASE_URL)
2025-08-19 10:54:50 -04:00
r.raise_for_status()
except Exception as e:
2025-10-01 11:57:49 -04:00
log.error(f'Failed to fetch "{BASE_URL}": {e}')
2025-11-13 12:43:55 -05:00
2025-09-04 19:53:27 -04:00
return []
2025-08-19 10:54:50 -04:00
return r.text.splitlines()
2025-11-19 18:58:52 -05:00
async def get_events(client: httpx.AsyncClient) -> dict[str, dict[str, str | float]]:
2025-10-01 18:34:18 -04:00
now = Time.now().timestamp()
2025-11-19 18:58:52 -05:00
events = {}
data = await get_data(client)
2025-10-01 11:57:49 -04:00
2025-11-21 13:32:09 -05:00
for i, line in enumerate(data, start=1):
2025-08-27 10:26:56 -04:00
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
2025-09-29 13:42:51 -04:00
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
2025-08-28 12:18:30 -04:00
group_title_match = re.search(r'group-title="([^"]*)"', line)
2025-08-19 10:54:50 -04:00
2025-09-29 13:42:51 -04:00
tvg = tvg_id_match[1] if tvg_id_match else None
2025-08-19 10:54:50 -04:00
2025-11-21 13:32:09 -05:00
if not tvg and (url := data[i]).endswith("/sd"):
2025-09-29 13:42:51 -04:00
if tvg_name := tvg_name_match[1]:
sport = group_title_match[1].upper().strip()
2025-08-19 10:54:50 -04:00
2025-09-29 13:42:51 -04:00
event = "(".join(tvg_name.split("(")[:-1]).strip()
2025-08-19 10:54:50 -04:00
2025-12-01 14:46:31 -05:00
key = f"[{sport}] {event} ({TAG})"
2025-09-13 04:42:55 -04:00
2025-09-20 23:26:18 -04:00
channel = url.split("/")[-2]
2025-09-24 12:30:55 -04:00
tvg_id, logo = leagues.info(sport)
2025-09-21 10:28:15 -04:00
2025-11-19 18:58:52 -05:00
events[key] = {
2025-09-20 23:26:18 -04:00
"url": f"http://origin.thetvapp.to/hls/{channel}/mono.m3u8",
2025-09-21 10:28:15 -04:00
"logo": logo,
2025-10-01 11:57:49 -04:00
"id": tvg_id or "Live.Event.us",
2025-09-13 04:42:55 -04:00
"base": "https://tvpass.org",
2025-10-01 18:34:18 -04:00
"timestamp": now,
2025-08-30 16:45:19 -04:00
}
2025-08-19 10:54:50 -04:00
2025-11-19 18:58:52 -05:00
return events
async def scrape(client: httpx.AsyncClient) -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(client)
urls.update(events)
2025-09-13 04:42:55 -04:00
2025-10-01 11:57:49 -04:00
CACHE_FILE.write(urls)
2025-08-30 16:45:19 -04:00
2025-11-19 18:58:52 -05:00
log.info(f"Collected and cached {len(urls)} new event(s)")