edit typing
move epg fetching into M3U8 folder
edit workflows
This commit is contained in:
doms9 2026-01-24 00:48:30 -05:00
parent 00000d9638
commit 00000d98e3
16 changed files with 376740 additions and 377279 deletions

View file

@ -14,14 +14,6 @@ jobs:
with:
fetch-depth: 0
- name: Cache venv
uses: actions/cache@v3
with:
path: .venv
key: shared-venv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
shared-venv-${{ runner.os }}-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
@ -36,13 +28,13 @@ jobs:
python-version-file: "pyproject.toml"
- name: Fetch EPG
run: uv run EPG/fetch.py
run: uv run M3U8/epg-fetch.py
- name: Push changes
uses: stefanzweifel/git-auto-commit-action@v6
with:
commit_message: "update EPG"
file_pattern: "EPG/TV.xml"
file_pattern: "M3U8/TV.xml"
commit_author: "GitHub Actions Bot <actions@github.com>"
commit_user_name: "GitHub Actions Bot"
commit_user_email: "actions@github.com"

View file

@ -22,23 +22,6 @@ jobs:
with:
fetch-depth: 0
- name: Cache venv
if: steps.check_time.outputs.run == 'true'
uses: actions/cache@v3
with:
path: .venv
key: shared-venv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
shared-venv-${{ runner.os }}-
- name: Cache cert
uses: actions/cache@v3
with:
path: M3U8/scrape/utils/cached-ca.pem
key: cert-cache-${{ runner.os }}-${{ hashFiles('M3U8/scrape/utils/cached-ca.pem') }}
restore-keys: |
cert-cache-${{ runner.os }}-
- name: Install uv
if: steps.check_time.outputs.run == 'true'
uses: astral-sh/setup-uv@v6

File diff suppressed because one or more lines are too long

View file

@ -5,15 +5,18 @@ import re
from pathlib import Path
from xml.etree import ElementTree as ET
import httpx
from scrapers.utils import get_logger, network
epg_file = Path(__file__).parent / "TV.xml"
log = get_logger(__name__)
epg_urls = [
BASE_M3U8 = Path(__file__).parent / "base.m3u8"
EPG_FILE = Path(__file__).parent / "TV.xml"
EPG_URLS = [
"https://epgshare01.online/epgshare01/epg_ripper_CA2.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_DUMMY_CHANNELS.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_FANDUEL1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_MY1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_PLEX1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_US2.xml.gz",
@ -21,44 +24,31 @@ epg_urls = [
"https://i.mjh.nz/Roku/all.xml.gz",
]
client = httpx.AsyncClient(
timeout=httpx.Timeout(5.0),
follow_redirects=True,
http2=True,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
},
)
LIVE_IMG = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"
live_img = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"
dummies = {
"Basketball.Dummy.us": live_img,
"Golf.Dummy.us": live_img,
"Live.Event.us": live_img,
DUMMIES = {
"Basketball.Dummy.us": LIVE_IMG,
"Golf.Dummy.us": LIVE_IMG,
"Live.Event.us": LIVE_IMG,
"MLB.Baseball.Dummy.us": None,
"NBA.Basketball.Dummy.us": None,
"NFL.Dummy.us": None,
"NHL.Hockey.Dummy.us": None,
"PPV.EVENTS.Dummy.us": live_img,
"Racing.Dummy.us": live_img,
"Soccer.Dummy.us": live_img,
"Tennis.Dummy.us": live_img,
"PPV.EVENTS.Dummy.us": LIVE_IMG,
"Racing.Dummy.us": LIVE_IMG,
"Soccer.Dummy.us": LIVE_IMG,
"Tennis.Dummy.us": LIVE_IMG,
"WNBA.dummy.us": None,
}
replace_ids = {
REPLACE_IDs = {
"NCAA Sports": {"old": "Sports.Dummy.us", "new": "NCAA.Sports.Dummy.us"},
"UFC": {"old": "UFC.247.Dummy.us", "new": "UFC.Dummy.us"},
}
def get_tvg_ids() -> dict[str, str]:
base_m3u8 = (
(Path(__file__).parent.parent / "M3U8" / "base.m3u8")
.read_text(encoding="utf-8")
.splitlines()
)
base_m3u8 = BASE_M3U8.read_text(encoding="utf-8").splitlines()
tvg = {}
@ -73,20 +63,17 @@ def get_tvg_ids() -> dict[str, str]:
async def fetch_xml(url: str) -> ET.Element | None:
try:
r = await client.get(url)
r.raise_for_status()
except Exception as e:
print(f'Failed to fetch "{url}": {e}')
if not (html_data := await network.request(url, log=log)):
return
try:
decompressed_data = gzip.decompress(r.content)
decompressed_data = gzip.decompress(html_data.content)
return ET.fromstring(decompressed_data)
except Exception as e:
print(f'Failed to decompress and parse XML from "{url}": {e}')
log.error(f'Failed to decompress and parse XML from "{url}": {e}')
return
def hijack_id(
@ -138,13 +125,15 @@ def hijack_id(
async def main() -> None:
log.info(f"{'=' * 10} Fetching EPG {'=' * 10}")
tvg_ids = get_tvg_ids()
tvg_ids |= dummies | {v["old"]: live_img for v in replace_ids.values()}
tvg_ids |= DUMMIES | {v["old"]: LIVE_IMG for v in REPLACE_IDs.values()}
root = ET.Element("tv")
tasks = [fetch_xml(url) for url in epg_urls]
tasks = [fetch_xml(url) for url in EPG_URLS]
results = await asyncio.gather(*tasks)
@ -176,20 +165,24 @@ async def main() -> None:
root.append(program)
for k, v in replace_ids.items():
for k, v in REPLACE_IDs.items():
hijack_id(**v, text=k, root=root)
tree = ET.ElementTree(root)
tree.write(epg_file, encoding="utf-8", xml_declaration=True)
tree.write(EPG_FILE, encoding="utf-8", xml_declaration=True)
print(f"EPG saved to {epg_file.resolve()}")
log.info(f"EPG saved to {EPG_FILE.resolve()}")
if __name__ == "__main__":
asyncio.run(main())
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")
try:
asyncio.run(client.aclose())
asyncio.run(network.client.aclose())
except Exception:
pass

View file

@ -162,10 +162,10 @@ async def main() -> None:
log.info(f"Events saved to {EVENTS_FILE.resolve()}")
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")
if __name__ == "__main__":
asyncio.run(main())
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
@ -85,7 +85,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
@ -75,7 +75,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -1,7 +1,7 @@
import json
from functools import partial
from playwright.async_api import BrowserContext, Page
from playwright.async_api import Browser, Page
from .utils import Cache, Time, get_logger, leagues, network
@ -73,7 +73,7 @@ async def get_events(page: Page) -> dict[str, dict[str, str | float]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
if cached := CACHE_FILE.load():
urls.update(cached)

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
@ -78,7 +78,7 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -2,7 +2,7 @@ import asyncio
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -88,7 +88,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
@ -90,7 +90,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -2,7 +2,7 @@ import asyncio
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -132,7 +132,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}

View file

@ -4,7 +4,7 @@ from itertools import chain
from typing import Any
from urllib.parse import urljoin
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from .utils import Cache, Time, get_logger, leagues, network
@ -120,7 +120,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)

View file

@ -3,7 +3,7 @@ from functools import partial
from urllib.parse import urljoin
import feedparser
from playwright.async_api import BrowserContext, Error, Page, TimeoutError
from playwright.async_api import Browser, Error, Page, TimeoutError
from .utils import Cache, Time, get_logger, leagues, network
@ -161,7 +161,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}

View file

@ -5,7 +5,7 @@ from itertools import chain
from typing import Any
from urllib.parse import urljoin
from playwright.async_api import BrowserContext, Page, TimeoutError
from playwright.async_api import Browser, Page, TimeoutError
from .utils import Cache, Time, get_logger, leagues, network
@ -232,7 +232,7 @@ async def get_events(base_url: str, cached_keys: list[str]) -> list[dict[str, st
return events
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}

View file

@ -1,7 +1,7 @@
import asyncio
from functools import partial
from playwright.async_api import BrowserContext
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -110,7 +110,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape(browser: BrowserContext) -> None:
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)