iptv/M3U8/scrape/tvpass.py

import json
import re
from pathlib import Path

import httpx

from .utils import LOGOS, get_logger, now

log = get_logger(__name__)

urls: dict[str, dict[str, str]] = {}

BASE_URL = "https://tvpass.org/playlist/m3u"

CACHE_FILE = Path(__file__).parent / "caches" / "tvpass.json"


def load_cache() -> dict[str, str]:
    try:
        data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
        return {} if now.hour <= 12 else data
    except (FileNotFoundError, json.JSONDecodeError):
        return {}


async def fetch_m3u8(client: httpx.AsyncClient) -> list[str]:
    try:
        r = await client.get(BASE_URL)
        r.raise_for_status()
    except Exception as e:
        log.error(f'Failed to fetch "{BASE_URL}"\n{e}')
        return []

    return r.text.splitlines()


async def main(client: httpx.AsyncClient) -> None:
    if cached := load_cache():
        urls.update(cached)
        log.info(f"Collected {len(urls)} event(s) from cache")
        return

    log.info(f'Scraping from "{BASE_URL}"')

    for i, line in enumerate(data := await fetch_m3u8(client)):
        if line.startswith("#EXTINF"):
            tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
            tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
            group_title_match = re.search(r'group-title="([^"]*)"', line)

            tvg_id = tvg_id_match[1] if tvg_id_match else None
            tvg_name = tvg_name_match[1] if tvg_name_match else None
            sport = group_title_match[1].upper().strip() if group_title_match else None

            if tvg_id == "":
                url = data[i + 1]

                if tvg_name:
                    tvg_name = "(".join(tvg_name.split("(")[:-1]).strip()

                if url.endswith("/hd"):
                    urls[f"[{sport}] {tvg_name}"] = {
                        "url": f"http://origin.thetvapp.to/hls/{url.split('/')[-2]}/mono.m3u8",
                        "logo": LOGOS.get(
                            sport,
                            "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
                        ),
                    }

    CACHE_FILE.write_text(json.dumps(urls, indent=2), encoding="utf-8")

    log.info(f"Cached {len(urls)} event(s)")