iptv/M3U8/scrape/tvpass.py

91 lines
2.8 KiB
Python
Raw Normal View History

2025-08-19 10:54:50 -04:00
import json
import re
2025-08-30 16:45:19 -04:00
from datetime import datetime
2025-08-19 10:54:50 -04:00
from pathlib import Path
import httpx
import pytz
2025-08-30 16:45:19 -04:00
from .logger import get_logger
log = get_logger(__name__)
2025-08-19 10:54:50 -04:00
base_url = "https://tvpass.org/playlist/m3u"
2025-08-28 12:18:30 -04:00
base_file = Path(__file__).parent / "tvpass.json"
2025-08-19 10:54:50 -04:00
2025-08-28 19:43:35 -04:00
urls: dict[str, dict[str, str]] = {}
logos = {
2025-08-30 16:45:19 -04:00
"MLB": "https://i.gyazo.com/0fe7865ef2f06c9507791b24f04dbca8.png",
"NBA": "https://i.gyazo.com/773c23570f095a5d549c23b9401d83f4.png",
2025-08-28 19:43:35 -04:00
"NCAAF": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",
"NCAAB": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",
2025-08-30 16:45:19 -04:00
"NFL": "https://i.gyazo.com/fb4956d7a2fe54a1bac54cd81e1b3f11.png",
"NHL": "https://i.gyazo.com/526607d4e886d5ed1fecca4bff3115e2.png",
"WNBA": "https://i.gyazo.com/02d665a5704118d195dbcd5fa20d5462.png",
2025-08-28 19:43:35 -04:00
}
2025-08-28 12:18:30 -04:00
2025-09-02 18:06:35 -04:00
TZ = pytz.timezone("America/New_York")
2025-08-19 10:54:50 -04:00
2025-08-30 16:45:19 -04:00
2025-09-02 18:06:35 -04:00
def load_cache() -> dict[str, str]:
2025-08-19 10:54:50 -04:00
try:
data = json.loads(base_file.read_text(encoding="utf-8"))
2025-08-30 16:45:19 -04:00
return {} if 8 <= datetime.now(TZ).hour <= 12 else data
2025-08-19 10:54:50 -04:00
except (FileNotFoundError, json.JSONDecodeError):
return {}
2025-08-27 10:26:56 -04:00
async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
2025-08-19 10:54:50 -04:00
try:
2025-08-27 10:26:56 -04:00
r = await client.get(base_url)
2025-08-19 10:54:50 -04:00
r.raise_for_status()
except Exception as e:
2025-08-30 16:45:19 -04:00
log.error(f'Failed to fetch "{base_url}"\n{e}')
2025-08-19 10:54:50 -04:00
return r.text.splitlines()
2025-08-27 10:26:56 -04:00
async def main(client: httpx.AsyncClient) -> None:
2025-08-19 10:54:50 -04:00
if cached := load_cache():
urls.update(cached)
2025-09-03 00:00:22 -04:00
log.info(f"Collected {len(urls)} event(s) from cache")
2025-08-19 10:54:50 -04:00
return
2025-08-30 16:45:19 -04:00
log.info(f'Scraping from "{base_url}"')
2025-08-19 10:54:50 -04:00
2025-08-27 10:26:56 -04:00
if not (data := await fetch_m3u8(client)):
2025-08-19 10:54:50 -04:00
return
2025-08-28 12:18:30 -04:00
for i, line in enumerate(data):
2025-08-27 10:26:56 -04:00
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
2025-08-28 12:18:30 -04:00
group_title_match = re.search(r'group-title="([^"]*)"', line)
2025-08-19 10:54:50 -04:00
tvg_id = tvg_id_match[1] if tvg_id_match else None
2025-08-27 10:26:56 -04:00
tvg_name = tvg_name_match[1] if tvg_name_match else None
2025-08-28 19:43:35 -04:00
sport = group_title_match[1].upper().strip() if group_title_match else None
2025-08-19 10:54:50 -04:00
if tvg_id == "":
url = data[i + 1]
2025-08-27 10:26:56 -04:00
if tvg_name:
2025-08-28 12:18:30 -04:00
tvg_name = "(".join(tvg_name.split("(")[:-1]).strip()
2025-08-19 10:54:50 -04:00
2025-08-30 14:29:49 -04:00
if url.endswith("/hd"):
2025-08-30 16:45:19 -04:00
urls[f"[{sport}] {tvg_name}"] = {
"url": f"http://origin.thetvapp.to/hls/{url.split('/')[-2]}/mono.m3u8",
2025-08-31 08:29:01 -04:00
"logo": logos.get(
sport,
"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
),
2025-08-30 16:45:19 -04:00
}
2025-08-19 10:54:50 -04:00
if urls:
2025-08-30 16:45:19 -04:00
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
2025-09-03 00:00:22 -04:00
log.info(f"Cached {len(urls)} event(s)")