iptv/M3U8/scrape/tvpass.py

import json
import re
from datetime import datetime, timedelta
from pathlib import Path
from urllib.parse import urlparse

import httpx
import pytz

base_url = "https://tvpass.org/playlist/m3u"

base_file = Path(__file__).parent / "tvpass.json"

TZ = pytz.timezone("America/New_York")

urls: dict[str, dict[str, str]] = {}

logos = {
    "MLB": "https://i.gyazo.com/ff3e375a48039d86d9b6216b213ad327.png",
    "NBA": "https://i.gyazo.com/29485b295d32782bbae31a0b35de0970.png",
    "NCAAF": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",
    "NCAAB": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",
    "NFL": "https://i.gyazo.com/8581d3d8cd6d902029e0daf9ca087842.png",
    "NHL": "https://i.gyazo.com/b634ca5b0d3f16f9863eca3b27568a10.png",
    "WNBA": "https://i.gyazo.com/f356a338044d1dfa9eed11979f8cf13f.png",
}


def cache_expired(t: float) -> bool:
    now = datetime.now(TZ)

    eleven = now.replace(hour=11, minute=0, second=0, microsecond=0)

    if now < eleven:
        eleven -= timedelta(days=1)

    return t < eleven.timestamp()


def load_cache() -> dict[str, str]:
    try:
        data = json.loads(base_file.read_text(encoding="utf-8"))

        ts = data.get("_timestamp", 0)

        return {} if cache_expired(ts) else data.get("urls", {})
    except (FileNotFoundError, json.JSONDecodeError):
        return {}


def save_cache(urls: dict[str, str]) -> None:
    payload = {"_timestamp": datetime.now(TZ).timestamp(), "urls": urls}

    base_file.write_text(json.dumps(payload, indent=2), encoding="utf-8")


async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] | None:
    try:
        r = await client.get(base_url)
        r.raise_for_status()
    except Exception as e:
        print(f'Failed to fetch "{base_url}"\n{e}')

    return r.text.splitlines()


async def main(client: httpx.AsyncClient) -> None:
    if cached := load_cache():
        urls.update(cached)
        print(f"TVPass: Collected {len(urls)} live events from cache")
        return

    print(f'Scraping from "{base_url}"')

    if not (data := await fetch_m3u8(client)):
        return

    for i, line in enumerate(data):
        if line.startswith("#EXTINF"):
            tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
            tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
            group_title_match = re.search(r'group-title="([^"]*)"', line)

            tvg_id = tvg_id_match[1] if tvg_id_match else None
            tvg_name = tvg_name_match[1] if tvg_name_match else None
            sport = group_title_match[1].upper().strip() if group_title_match else None

            if tvg_id == "":
                url = data[i + 1]

                if tvg_name:
                    tvg_name = "(".join(tvg_name.split("(")[:-1]).strip()

                    logo = logos.get(
                        sport,
                        "https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",
                    )

                if url.endswith("/hd"):
                    parts = urlparse(url).path.strip("/").split("/")

                    link = f"http://origin.thetvapp.to/hls/{parts[1]}/mono.m3u8"

                    urls[f"[{sport}] {tvg_name} (SD)"] = {"logo": logo, "url": link}

    if urls:
        save_cache(urls)
        print(f"Cached {len(urls)} live events")
e 2025-08-19 10:54:50 -04:00			`import json`
			`import re`
			`from datetime import datetime, timedelta`
			`from pathlib import Path`
e 2025-08-30 14:29:49 -04:00			`from urllib.parse import urlparse`
e 2025-08-19 10:54:50 -04:00
			`import httpx`
			`import pytz`

			`base_url = "https://tvpass.org/playlist/m3u"`

e 2025-08-28 12:18:30 -04:00			`base_file = Path(__file__).parent / "tvpass.json"`
e 2025-08-19 10:54:50 -04:00
			`TZ = pytz.timezone("America/New_York")`

e 2025-08-28 19:43:35 -04:00			`urls: dict[str, dict[str, str]] = {}`

			`logos = {`
			`"MLB": "https://i.gyazo.com/ff3e375a48039d86d9b6216b213ad327.png",`
			`"NBA": "https://i.gyazo.com/29485b295d32782bbae31a0b35de0970.png",`
			`"NCAAF": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",`
			`"NCAAB": "https://i.gyazo.com/ca63b40c86e757436de9d34d369b24f8.png",`
			`"NFL": "https://i.gyazo.com/8581d3d8cd6d902029e0daf9ca087842.png",`
			`"NHL": "https://i.gyazo.com/b634ca5b0d3f16f9863eca3b27568a10.png",`
			`"WNBA": "https://i.gyazo.com/f356a338044d1dfa9eed11979f8cf13f.png",`
			`}`
e 2025-08-28 12:18:30 -04:00
e 2025-08-19 10:54:50 -04:00
			`def cache_expired(t: float) -> bool:`
			`now = datetime.now(TZ)`

e 2025-08-23 15:15:47 -04:00			`eleven = now.replace(hour=11, minute=0, second=0, microsecond=0)`
e 2025-08-19 10:54:50 -04:00
e 2025-08-23 15:15:47 -04:00			`if now < eleven:`
			`eleven -= timedelta(days=1)`
e 2025-08-19 10:54:50 -04:00
e 2025-08-23 15:15:47 -04:00			`return t < eleven.timestamp()`
e 2025-08-19 10:54:50 -04:00

			`def load_cache() -> dict[str, str]:`
			`try:`
			`data = json.loads(base_file.read_text(encoding="utf-8"))`

			`ts = data.get("_timestamp", 0)`

			`return {} if cache_expired(ts) else data.get("urls", {})`
			`except (FileNotFoundError, json.JSONDecodeError):`
			`return {}`


			`def save_cache(urls: dict[str, str]) -> None:`
			`payload = {"_timestamp": datetime.now(TZ).timestamp(), "urls": urls}`

			`base_file.write_text(json.dumps(payload, indent=2), encoding="utf-8")`


e 2025-08-27 10:26:56 -04:00			`async def fetch_m3u8(client: httpx.AsyncClient) -> list[str] \| None:`
e 2025-08-19 10:54:50 -04:00			`try:`
e 2025-08-27 10:26:56 -04:00			`r = await client.get(base_url)`
e 2025-08-19 10:54:50 -04:00			`r.raise_for_status()`
			`except Exception as e:`
			`print(f'Failed to fetch "{base_url}"\n{e}')`

			`return r.text.splitlines()`


e 2025-08-27 10:26:56 -04:00			`async def main(client: httpx.AsyncClient) -> None:`
e 2025-08-19 10:54:50 -04:00			`if cached := load_cache():`
			`urls.update(cached)`
			`print(f"TVPass: Collected {len(urls)} live events from cache")`
			`return`

			`print(f'Scraping from "{base_url}"')`

e 2025-08-27 10:26:56 -04:00			`if not (data := await fetch_m3u8(client)):`
e 2025-08-19 10:54:50 -04:00			`return`

e 2025-08-28 12:18:30 -04:00			`for i, line in enumerate(data):`
e 2025-08-27 10:26:56 -04:00			`if line.startswith("#EXTINF"):`
			`tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)`
			`tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)`
e 2025-08-28 12:18:30 -04:00			`group_title_match = re.search(r'group-title="([^"]*)"', line)`
e 2025-08-19 10:54:50 -04:00
			`tvg_id = tvg_id_match[1] if tvg_id_match else None`
e 2025-08-27 10:26:56 -04:00			`tvg_name = tvg_name_match[1] if tvg_name_match else None`
e 2025-08-28 19:43:35 -04:00			`sport = group_title_match[1].upper().strip() if group_title_match else None`
e 2025-08-19 10:54:50 -04:00
			`if tvg_id == "":`
			`url = data[i + 1]`

e 2025-08-27 10:26:56 -04:00			`if tvg_name:`
e 2025-08-28 12:18:30 -04:00			`tvg_name = "(".join(tvg_name.split("(")[:-1]).strip()`
e 2025-08-19 10:54:50 -04:00
e 2025-08-28 19:43:35 -04:00			`logo = logos.get(`
			`sport,`
			`"https://i.gyazo.com/ec27417a9644ae517196494afa72d2b9.png",`
			`)`

e 2025-08-30 14:29:49 -04:00			`if url.endswith("/hd"):`
			`parts = urlparse(url).path.strip("/").split("/")`
e 2025-08-19 10:54:50 -04:00
e 2025-08-30 14:29:49 -04:00			`link = f"http://origin.thetvapp.to/hls/{parts[1]}/mono.m3u8"`

			`urls[f"[{sport}] {tvg_name} (SD)"] = {"logo": logo, "url": link}`
e 2025-08-19 10:54:50 -04:00
			`if urls:`
			`save_cache(urls)`
			`print(f"Cached {len(urls)} live events")`