iptv/M3U8/scrape/tvpass.py

63 lines
1.7 KiB
Python
Raw Normal View History

2025-08-17 10:05:09 -04:00
import json
import re
from pathlib import Path
from urllib.parse import urlparse
import httpx
base_url = "https://tvpass.org/playlist/m3u"
base_file = Path(__file__).parent / "tvpass.json"
urls: dict[str, str] = {}
2025-08-17 17:01:52 -04:00
def fetch_m3u8(client: httpx.Client) -> list[str] | None:
2025-08-17 10:05:09 -04:00
try:
2025-08-17 17:01:52 -04:00
r = client.get(base_url)
2025-08-17 10:05:09 -04:00
r.raise_for_status()
except Exception as e:
print(f'Failed to fetch "{base_url}"\n{e}')
return r.text.splitlines()
2025-08-17 17:01:52 -04:00
def main(client: httpx.Client) -> None:
2025-08-17 10:05:09 -04:00
print(f'Scraping from "{base_url}"')
2025-08-17 17:01:52 -04:00
if not (data := fetch_m3u8(client)):
2025-08-17 10:05:09 -04:00
return
for i in range(len(data) - 1):
if data[i].startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', data[i])
tvg_name_match = re.search(r'tvg-name="([^"]*)"', data[i])
tvg_id = tvg_id_match[1] if tvg_id_match else None
tvg_name = tvg_name_match[1]
if tvg_id == "":
url = data[i + 1]
tvg_name = tvg_name.split("(")[0].strip()
if url.endswith("/sd"):
path_parts = urlparse(url).path.strip("/").split("/")
if len(path_parts) >= 2 and path_parts[-1] == "sd":
sport = "".join(x for x in path_parts[1] if x.isalpha()).upper()
else:
sport = "UNKNWN"
urls[f"[{sport}] {tvg_name}"] = url
print(f"Collected {len(urls)} live events")
if urls:
base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
2025-08-17 17:01:52 -04:00
# if __name__ == "__main__":
# # create client beforehand
# main()