iptv/M3U8/scrape/tvpass.py

import json
import re
from pathlib import Path
from urllib.parse import urlparse

import httpx

base_url = "https://tvpass.org/playlist/m3u"
base_file = Path(__file__).parent / "tvpass.json"

urls: dict[str, str] = {}


def fetch_m3u8(client: httpx.Client) -> list[str] | None:
    try:
        r = client.get(base_url)
        r.raise_for_status()
    except Exception as e:
        print(f'Failed to fetch "{base_url}"\n{e}')

    return r.text.splitlines()


def main(client: httpx.Client) -> None:
    print(f'Scraping from "{base_url}"')

    if not (data := fetch_m3u8(client)):
        return

    for i in range(len(data) - 1):
        if data[i].startswith("#EXTINF"):
            tvg_id_match = re.search(r'tvg-id="([^"]*)"', data[i])
            tvg_name_match = re.search(r'tvg-name="([^"]*)"', data[i])

            tvg_id = tvg_id_match[1] if tvg_id_match else None
            tvg_name = tvg_name_match[1]

            if tvg_id == "":
                url = data[i + 1]

                tvg_name = tvg_name.split("(")[0].strip()

                if url.endswith("/sd"):

                    path_parts = urlparse(url).path.strip("/").split("/")

                    if len(path_parts) >= 2 and path_parts[-1] == "sd":
                        sport = "".join(x for x in path_parts[1] if x.isalpha()).upper()
                    else:
                        sport = "UNKNWN"

                    urls[f"[{sport}] {tvg_name}"] = url

    print(f"Collected {len(urls)} live events")

    if urls:
        base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")


# if __name__ == "__main__":
#     # create client beforehand
#     main()
init 2025-08-17 10:05:09 -04:00			`import json`
			`import re`
			`from pathlib import Path`
			`from urllib.parse import urlparse`

			`import httpx`

			`base_url = "https://tvpass.org/playlist/m3u"`
			`base_file = Path(__file__).parent / "tvpass.json"`

			`urls: dict[str, str] = {}`


e 2025-08-17 17:01:52 -04:00			`def fetch_m3u8(client: httpx.Client) -> list[str] \| None:`
init 2025-08-17 10:05:09 -04:00			`try:`
e 2025-08-17 17:01:52 -04:00			`r = client.get(base_url)`
init 2025-08-17 10:05:09 -04:00			`r.raise_for_status()`
			`except Exception as e:`
			`print(f'Failed to fetch "{base_url}"\n{e}')`

			`return r.text.splitlines()`


e 2025-08-17 17:01:52 -04:00			`def main(client: httpx.Client) -> None:`
init 2025-08-17 10:05:09 -04:00			`print(f'Scraping from "{base_url}"')`

e 2025-08-17 17:01:52 -04:00			`if not (data := fetch_m3u8(client)):`
init 2025-08-17 10:05:09 -04:00			`return`

			`for i in range(len(data) - 1):`
			`if data[i].startswith("#EXTINF"):`
			`tvg_id_match = re.search(r'tvg-id="([^"]*)"', data[i])`
			`tvg_name_match = re.search(r'tvg-name="([^"]*)"', data[i])`

			`tvg_id = tvg_id_match[1] if tvg_id_match else None`
			`tvg_name = tvg_name_match[1]`

			`if tvg_id == "":`
			`url = data[i + 1]`

			`tvg_name = tvg_name.split("(")[0].strip()`

			`if url.endswith("/sd"):`

			`path_parts = urlparse(url).path.strip("/").split("/")`

			`if len(path_parts) >= 2 and path_parts[-1] == "sd":`
			`sport = "".join(x for x in path_parts[1] if x.isalpha()).upper()`
			`else:`
			`sport = "UNKNWN"`

			`urls[f"[{sport}] {tvg_name}"] = url`

			`print(f"Collected {len(urls)} live events")`

			`if urls:`
			`base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")`


e 2025-08-17 17:01:52 -04:00			`# if __name__ == "__main__":`
			`# # create client beforehand`
			`# main()`