iptv/EPG/fetch.py

#!/usr/bin/env python3
import asyncio
import gzip
import json
from pathlib import Path
from xml.etree import ElementTree as ET

import httpx

tvg_ids_file = Path(__file__).parent / "TVG-IDs.json"
epg_file = Path(__file__).parent / "TV.xml"
epg_urls = [
    "https://epgshare01.online/epgshare01/epg_ripper_CA1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_DUMMY_CHANNELS.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_ES1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_FANDUEL1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_MY1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_PLEX1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_PT1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_US1.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_US2.xml.gz",
    "https://epgshare01.online/epgshare01/epg_ripper_US_LOCALS2.xml.gz",
    # "https://epgshare01.online/epgshare01/epg_ripper_US_SPORTS1.xml.gz",
]

client = httpx.AsyncClient(
    timeout=5,
    follow_redirects=True,
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
    },
)

live_img = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"

dummies = {
    "Basketball.Dummy.us": live_img,
    "Golf.Dummy.us": live_img,
    "Live.Event.us": live_img,
    "MLB.Baseball.Dummy.us": None,
    "NBA.Basketball.Dummy.us": None,
    "NFL.Dummy.us": None,
    "NHL.Hockey.Dummy.us": None,
    "PPV.EVENTS.Dummy.us": live_img,
    "Racing.Dummy.us": live_img,
    "Soccer.Dummy.us": live_img,
    "Tennis.Dummy.us": live_img,
    "WNBA.dummy.us": None,
}

replace_ids = {
    "NCAA Sports": {"old": "Sports.Dummy.us", "new": "NCAA.Sports.Dummy.us"},
    "UFC": {"old": "UFC.247.Dummy.us", "new": "UFC.Dummy.us"},
}


async def fetch_xml(url: str) -> ET.Element | None:
    try:
        r = await client.get(url)
        r.raise_for_status()
    except Exception as e:
        print(f'Failed to fetch "{url}"\n{e}')
        return

    try:
        decompressed_data = gzip.decompress(r.content)

        return ET.fromstring(decompressed_data)

    except Exception as e:
        print(f'Failed to decompress and parse XML from "{url}"\n{e}')


def hijack_id(
    old: str,
    new: str,
    text: str,
    root: ET.Element,
) -> None:

    og_channel = root.find(f"./channel[@id='{old}']")

    if og_channel is not None:
        new_channel = ET.Element(og_channel.tag, {**og_channel.attrib, "id": new})

        display_name = og_channel.find("display-name")

        if display_name is not None:
            new_channel.append(ET.Element("display-name", display_name.attrib))
            new_channel[-1].text = text

        for child in og_channel:
            if child.tag == "display-name":
                continue

            new_child = ET.Element(child.tag, child.attrib)
            new_child.text = child.text

        root.remove(og_channel)

        root.append(new_channel)

    for program in root.findall(f"./programme[@channel='{old}']"):
        new_program = ET.Element(program.tag, {**program.attrib, "channel": new})

        for child in program:
            new_child = ET.Element(child.tag, child.attrib)
            new_child.text = child.text
            new_program.append(new_child)

        for tag_name in ["title", "desc", "sub-title"]:
            tag = new_program.find(tag_name)

            if tag is not None:
                tag.text = text

        root.remove(program)

        root.append(new_program)


async def main() -> None:
    tvg_ids: dict[str, str] = json.loads(tvg_ids_file.read_text(encoding="utf-8"))

    additions = dummies | {v["old"]: live_img for v in replace_ids.values()}

    tvg_ids |= additions

    root = ET.Element("tv")

    tasks = [fetch_xml(url) for url in epg_urls]

    results = await asyncio.gather(*tasks)

    for epg_data in results:
        if epg_data is None:
            continue

        for channel in epg_data.findall("channel"):
            if (channel_id := channel.get("id")) in tvg_ids:
                for icon_tag in channel.findall("icon"):
                    if logo := tvg_ids.get(channel_id):
                        icon_tag.set("src", logo)

                if (url_tag := channel.find("url")) is not None:
                    channel.remove(url_tag)

                root.append(channel)

        for program in epg_data.findall("programme"):
            tvg_id = program.get("channel")

            if tvg_id in tvg_ids:
                title_text = program.find("title").text
                subtitle = program.find("sub-title")

                if (
                    title_text in ["NHL Hockey", "Live: NFL Football"]
                    and subtitle is not None
                ):
                    program.find("title").text = f"{title_text} {subtitle.text}"

                root.append(program)

        for k, v in replace_ids.items():
            hijack_id(**v, text=k, root=root)

    tree = ET.ElementTree(root)

    tree.write(epg_file, encoding="utf-8", xml_declaration=True)

    print(f"EPG saved to {epg_file.name}")


if __name__ == "__main__":
    asyncio.run(main())
init 2025-08-17 10:05:09 -04:00			`#!/usr/bin/env python3`
e 2025-08-27 10:26:56 -04:00			`import asyncio`
init 2025-08-17 10:05:09 -04:00			`import gzip`
e 2025-09-18 17:31:45 -04:00			`import json`
init 2025-08-17 10:05:09 -04:00			`from pathlib import Path`
			`from xml.etree import ElementTree as ET`

			`import httpx`

e 2025-09-18 17:31:45 -04:00			`tvg_ids_file = Path(__file__).parent / "TVG-IDs.json"`
init 2025-08-17 10:05:09 -04:00			`epg_file = Path(__file__).parent / "TV.xml"`
			`epg_urls = [`
e 2025-09-18 17:31:45 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_CA1.xml.gz",`
e 2025-09-19 02:05:40 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_DUMMY_CHANNELS.xml.gz",`
e 2025-09-18 17:31:45 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_ES1.xml.gz",`
			`"https://epgshare01.online/epgshare01/epg_ripper_FANDUEL1.xml.gz",`
			`"https://epgshare01.online/epgshare01/epg_ripper_MY1.xml.gz",`
			`"https://epgshare01.online/epgshare01/epg_ripper_PLEX1.xml.gz",`
			`"https://epgshare01.online/epgshare01/epg_ripper_PT1.xml.gz",`
			`"https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz",`
init 2025-08-17 10:05:09 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_US1.xml.gz",`
e 2025-09-14 11:10:51 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_US2.xml.gz",`
init 2025-08-17 10:05:09 -04:00			`"https://epgshare01.online/epgshare01/epg_ripper_US_LOCALS2.xml.gz",`
e 2025-09-23 21:23:31 -04:00			`# "https://epgshare01.online/epgshare01/epg_ripper_US_SPORTS1.xml.gz",`
init 2025-08-17 10:05:09 -04:00			`]`

e 2025-08-27 10:26:56 -04:00			`client = httpx.AsyncClient(`
e 2025-08-17 17:01:52 -04:00			`timeout=5,`
			`follow_redirects=True,`
			`headers={`
			`"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"`
			`},`
			`)`

e 2025-09-19 15:44:02 -04:00			`live_img = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"`

			`dummies = {`
			`"Basketball.Dummy.us": live_img,`
e 2025-09-19 17:35:18 -04:00			`"Golf.Dummy.us": live_img,`
e 2025-09-19 15:44:02 -04:00			`"Live.Event.us": live_img,`
			`"MLB.Baseball.Dummy.us": None,`
			`"NBA.Basketball.Dummy.us": None,`
			`"NFL.Dummy.us": None,`
			`"NHL.Hockey.Dummy.us": None,`
			`"PPV.EVENTS.Dummy.us": live_img,`
e 2025-09-21 10:14:26 -04:00			`"Racing.Dummy.us": live_img,`
e 2025-09-19 15:44:02 -04:00			`"Soccer.Dummy.us": live_img,`
e 2025-09-21 10:14:26 -04:00			`"Tennis.Dummy.us": live_img,`
e 2025-09-19 15:44:02 -04:00			`"WNBA.dummy.us": None,`
			`}`

e 2025-09-20 23:26:18 -04:00			`replace_ids = {`
			`"NCAA Sports": {"old": "Sports.Dummy.us", "new": "NCAA.Sports.Dummy.us"},`
			`"UFC": {"old": "UFC.247.Dummy.us", "new": "UFC.Dummy.us"},`
			`}`

init 2025-08-17 10:05:09 -04:00
e 2025-09-24 01:00:30 -04:00			`async def fetch_xml(url: str) -> ET.Element \| None:`
init 2025-08-17 10:05:09 -04:00			`try:`
e 2025-08-27 10:26:56 -04:00			`r = await client.get(url)`
init 2025-08-17 10:05:09 -04:00			`r.raise_for_status()`
			`except Exception as e:`
e 2025-09-24 01:00:30 -04:00			`print(f'Failed to fetch "{url}"\n{e}')`
			`return`
init 2025-08-17 10:05:09 -04:00
			`try:`
			`decompressed_data = gzip.decompress(r.content)`

			`return ET.fromstring(decompressed_data)`

			`except Exception as e:`
e 2025-09-24 01:00:30 -04:00			`print(f'Failed to decompress and parse XML from "{url}"\n{e}')`
init 2025-08-17 10:05:09 -04:00

e 2025-09-20 23:26:18 -04:00			`def hijack_id(`
			`old: str,`
			`new: str,`
			`text: str,`
			`root: ET.Element,`
			`) -> None:`

			`og_channel = root.find(f"./channel[@id='{old}']")`

			`if og_channel is not None:`
			`new_channel = ET.Element(og_channel.tag, {**og_channel.attrib, "id": new})`

			`display_name = og_channel.find("display-name")`

			`if display_name is not None:`
			`new_channel.append(ET.Element("display-name", display_name.attrib))`
			`new_channel[-1].text = text`

			`for child in og_channel:`
			`if child.tag == "display-name":`
			`continue`

			`new_child = ET.Element(child.tag, child.attrib)`
			`new_child.text = child.text`

			`root.remove(og_channel)`

			`root.append(new_channel)`

			`for program in root.findall(f"./programme[@channel='{old}']"):`
			`new_program = ET.Element(program.tag, {**program.attrib, "channel": new})`

			`for child in program:`
			`new_child = ET.Element(child.tag, child.attrib)`
			`new_child.text = child.text`
			`new_program.append(new_child)`

			`for tag_name in ["title", "desc", "sub-title"]:`
			`tag = new_program.find(tag_name)`

			`if tag is not None:`
			`tag.text = text`

			`root.remove(program)`

			`root.append(new_program)`


e 2025-08-27 10:26:56 -04:00			`async def main() -> None:`
e 2025-09-19 15:44:02 -04:00			`tvg_ids: dict[str, str] = json.loads(tvg_ids_file.read_text(encoding="utf-8"))`
e 2025-09-18 17:31:45 -04:00
e 2025-09-20 23:47:18 -04:00			`additions = dummies \| {v["old"]: live_img for v in replace_ids.values()}`
e 2025-09-20 23:26:18 -04:00
			`tvg_ids \|= additions`
init 2025-08-17 10:05:09 -04:00
			`root = ET.Element("tv")`

e 2025-08-27 10:26:56 -04:00			`tasks = [fetch_xml(url) for url in epg_urls]`
e 2025-09-20 23:47:18 -04:00
e 2025-08-27 10:26:56 -04:00			`results = await asyncio.gather(*tasks)`

			`for epg_data in results:`
			`if epg_data is None:`
			`continue`
init 2025-08-17 10:05:09 -04:00
			`for channel in epg_data.findall("channel"):`
			`if (channel_id := channel.get("id")) in tvg_ids:`
			`for icon_tag in channel.findall("icon"):`
e 2025-09-19 15:44:02 -04:00			`if logo := tvg_ids.get(channel_id):`
			`icon_tag.set("src", logo)`
init 2025-08-17 10:05:09 -04:00
			`if (url_tag := channel.find("url")) is not None:`
			`channel.remove(url_tag)`

			`root.append(channel)`

			`for program in epg_data.findall("programme"):`
			`tvg_id = program.get("channel")`

			`if tvg_id in tvg_ids:`
e 2025-09-20 23:26:18 -04:00			`title_text = program.find("title").text`
			`subtitle = program.find("sub-title")`
init 2025-08-17 10:05:09 -04:00
e 2025-09-20 23:26:18 -04:00			`if (`
			`title_text in ["NHL Hockey", "Live: NFL Football"]`
			`and subtitle is not None`
			`):`
init 2025-08-17 10:05:09 -04:00			`program.find("title").text = f"{title_text} {subtitle.text}"`

			`root.append(program)`

e 2025-09-20 23:26:18 -04:00			`for k, v in replace_ids.items():`
			`hijack_id(**v, text=k, root=root)`

init 2025-08-17 10:05:09 -04:00			`tree = ET.ElementTree(root)`

			`tree.write(epg_file, encoding="utf-8", xml_declaration=True)`

			`print(f"EPG saved to {epg_file.name}")`


			`if __name__ == "__main__":`
e 2025-08-27 10:26:56 -04:00			`asyncio.run(main())`