Compare commits

..

No commits in common. "c199ac5ce42376381c3fe6b38d04c17c9b6cd11d" and "dc72afa134f4c3399b9aab6c75f6b046f8f35692" have entirely different histories.

33 changed files with 91714 additions and 90061 deletions

View file

@ -14,6 +14,14 @@ jobs:
with:
fetch-depth: 0
- name: Cache venv
uses: actions/cache@v3
with:
path: .venv
key: shared-venv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
shared-venv-${{ runner.os }}-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
@ -28,13 +36,13 @@ jobs:
python-version-file: "pyproject.toml"
- name: Fetch EPG
run: uv run M3U8/epg-fetch.py
run: uv run EPG/fetch.py
- name: Push changes
uses: stefanzweifel/git-auto-commit-action@v6
with:
commit_message: "update EPG"
file_pattern: "M3U8/TV.xml"
file_pattern: "EPG/TV.xml"
commit_author: "GitHub Actions Bot <actions@github.com>"
commit_user_name: "GitHub Actions Bot"
commit_user_email: "actions@github.com"

View file

@ -22,6 +22,23 @@ jobs:
with:
fetch-depth: 0
- name: Cache venv
if: steps.check_time.outputs.run == 'true'
uses: actions/cache@v3
with:
path: .venv
key: shared-venv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
shared-venv-${{ runner.os }}-
- name: Cache cert
uses: actions/cache@v3
with:
path: M3U8/scrape/utils/cached-ca.pem
key: cert-cache-${{ runner.os }}-${{ hashFiles('M3U8/scrape/utils/cached-ca.pem') }}
restore-keys: |
cert-cache-${{ runner.os }}-
- name: Install uv
if: steps.check_time.outputs.run == 'true'
uses: astral-sh/setup-uv@v6

File diff suppressed because one or more lines are too long

View file

@ -5,18 +5,15 @@ import re
from pathlib import Path
from xml.etree import ElementTree as ET
from scrapers.utils import get_logger, network
import httpx
log = get_logger(__name__)
epg_file = Path(__file__).parent / "TV.xml"
BASE_M3U8 = Path(__file__).parent / "base.m3u8"
EPG_FILE = Path(__file__).parent / "TV.xml"
EPG_URLS = [
epg_urls = [
"https://epgshare01.online/epgshare01/epg_ripper_CA2.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_DUMMY_CHANNELS.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_FANDUEL1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_MY1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_PLEX1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz",
"https://epgshare01.online/epgshare01/epg_ripper_US2.xml.gz",
@ -24,31 +21,44 @@ EPG_URLS = [
"https://i.mjh.nz/Roku/all.xml.gz",
]
LIVE_IMG = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"
client = httpx.AsyncClient(
timeout=httpx.Timeout(5.0),
follow_redirects=True,
http2=True,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
},
)
DUMMIES = {
"Basketball.Dummy.us": LIVE_IMG,
"Golf.Dummy.us": LIVE_IMG,
"Live.Event.us": LIVE_IMG,
live_img = "https://i.gyazo.com/978f2eb4a199ca5b56b447aded0cb9e3.png"
dummies = {
"Basketball.Dummy.us": live_img,
"Golf.Dummy.us": live_img,
"Live.Event.us": live_img,
"MLB.Baseball.Dummy.us": None,
"NBA.Basketball.Dummy.us": None,
"NFL.Dummy.us": None,
"NHL.Hockey.Dummy.us": None,
"PPV.EVENTS.Dummy.us": LIVE_IMG,
"Racing.Dummy.us": LIVE_IMG,
"Soccer.Dummy.us": LIVE_IMG,
"Tennis.Dummy.us": LIVE_IMG,
"PPV.EVENTS.Dummy.us": live_img,
"Racing.Dummy.us": live_img,
"Soccer.Dummy.us": live_img,
"Tennis.Dummy.us": live_img,
"WNBA.dummy.us": None,
}
REPLACE_IDs = {
replace_ids = {
"NCAA Sports": {"old": "Sports.Dummy.us", "new": "NCAA.Sports.Dummy.us"},
"UFC": {"old": "UFC.247.Dummy.us", "new": "UFC.Dummy.us"},
}
def get_tvg_ids() -> dict[str, str]:
base_m3u8 = BASE_M3U8.read_text(encoding="utf-8").splitlines()
base_m3u8 = (
(Path(__file__).parent.parent / "M3U8" / "base.m3u8")
.read_text(encoding="utf-8")
.splitlines()
)
tvg = {}
@ -63,17 +73,20 @@ def get_tvg_ids() -> dict[str, str]:
async def fetch_xml(url: str) -> ET.Element | None:
if not (html_data := await network.request(url, log=log)):
try:
r = await client.get(url)
r.raise_for_status()
except Exception as e:
print(f'Failed to fetch "{url}": {e}')
return
try:
decompressed_data = gzip.decompress(html_data.content)
decompressed_data = gzip.decompress(r.content)
return ET.fromstring(decompressed_data)
except Exception as e:
log.error(f'Failed to decompress and parse XML from "{url}": {e}')
return
except Exception as e:
print(f'Failed to decompress and parse XML from "{url}": {e}')
def hijack_id(
@ -125,15 +138,13 @@ def hijack_id(
async def main() -> None:
log.info(f"{'=' * 10} Fetching EPG {'=' * 10}")
tvg_ids = get_tvg_ids()
tvg_ids |= DUMMIES | {v["old"]: LIVE_IMG for v in REPLACE_IDs.values()}
tvg_ids |= dummies | {v["old"]: live_img for v in replace_ids.values()}
root = ET.Element("tv")
tasks = [fetch_xml(url) for url in EPG_URLS]
tasks = [fetch_xml(url) for url in epg_urls]
results = await asyncio.gather(*tasks)
@ -165,24 +176,20 @@ async def main() -> None:
root.append(program)
for k, v in REPLACE_IDs.items():
for k, v in replace_ids.items():
hijack_id(**v, text=k, root=root)
tree = ET.ElementTree(root)
tree.write(EPG_FILE, encoding="utf-8", xml_declaration=True)
tree.write(epg_file, encoding="utf-8", xml_declaration=True)
log.info(f"EPG saved to {EPG_FILE.resolve()}")
print(f"EPG saved to {epg_file.resolve()}")
if __name__ == "__main__":
asyncio.run(main())
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")
try:
asyncio.run(network.client.aclose())
asyncio.run(client.aclose())
except Exception:
pass

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,6 @@ import asyncio
import re
from pathlib import Path
from playwright.async_api import async_playwright
from scrapers import (
cdnlivetv,
embedhd,
@ -20,7 +19,7 @@ from scrapers import (
streamfree,
streamhub,
streamsgate,
tflix,
strmd,
totalsportek,
tvpass,
watchfooty,
@ -54,46 +53,31 @@ async def main() -> None:
base_m3u8, tvg_chno = load_base()
async with async_playwright() as p:
try:
hdl_brwsr = await network.browser(p)
tasks = [
asyncio.create_task(cdnlivetv.scrape()),
asyncio.create_task(embedhd.scrape()),
asyncio.create_task(fawa.scrape()),
asyncio.create_task(istreameast.scrape()),
asyncio.create_task(pawa.scrape()),
asyncio.create_task(pixel.scrape()),
asyncio.create_task(ppv.scrape()),
asyncio.create_task(roxie.scrape()),
asyncio.create_task(shark.scrape()),
asyncio.create_task(sport9.scrape()),
asyncio.create_task(streambtw.scrape()),
asyncio.create_task(streamcenter.scrape()),
asyncio.create_task(streamfree.scrape()),
asyncio.create_task(streamhub.scrape()),
asyncio.create_task(streamsgate.scrape()),
# asyncio.create_task(strmd.scrape()),
asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvpass.scrape()),
asyncio.create_task(webcast.scrape()),
]
xtrnl_brwsr = await network.browser(p, external=True)
await asyncio.gather(*tasks)
pw_tasks = [
asyncio.create_task(cdnlivetv.scrape(hdl_brwsr)),
asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(pixel.scrape(hdl_brwsr)),
asyncio.create_task(ppv.scrape(xtrnl_brwsr)),
asyncio.create_task(sport9.scrape(xtrnl_brwsr)),
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
# asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(tflix.scrape(xtrnl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)),
asyncio.create_task(watchfooty.scrape(xtrnl_brwsr)),
]
httpx_tasks = [
asyncio.create_task(fawa.scrape()),
asyncio.create_task(istreameast.scrape()),
asyncio.create_task(pawa.scrape()),
asyncio.create_task(roxie.scrape()),
asyncio.create_task(shark.scrape()),
asyncio.create_task(streambtw.scrape()),
asyncio.create_task(streamfree.scrape()),
asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvpass.scrape()),
]
await asyncio.gather(*(pw_tasks + httpx_tasks))
finally:
await hdl_brwsr.close()
await xtrnl_brwsr.close()
await network.client.aclose()
await watchfooty.scrape()
additions = (
cdnlivetv.urls
@ -111,7 +95,7 @@ async def main() -> None:
| streamfree.urls
| streamhub.urls
| streamsgate.urls
| tflix.urls
| strmd.urls
| totalsportek.urls
| tvpass.urls
| watchfooty.urls
@ -155,17 +139,22 @@ async def main() -> None:
log.info(f"Base + Events saved to {COMBINED_FILE.resolve()}")
EVENTS_FILE.write_text(
'#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/M3U8/TV.xml"\n'
'#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/EPG/TV.xml"\n'
+ "\n".join(live_events),
encoding="utf-8",
)
log.info(f"Events saved to {EVENTS_FILE.resolve()}")
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")
if __name__ == "__main__":
asyncio.run(main())
for hndlr in log.handlers:
hndlr.flush()
hndlr.stream.write("\n")
try:
asyncio.run(network.client.aclose())
except Exception:
pass

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -10,9 +10,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "CDNTV"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
API_URL = "https://api.cdn-live.tv/api/v1/events/sports"
@ -85,7 +85,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -101,14 +101,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p)
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -142,6 +144,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -10,9 +10,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "EMBEDHD"
CACHE_FILE = Cache(TAG, exp=5_400)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=5_400)
API_CACHE = Cache(f"{TAG}-api", exp=28_800)
API_CACHE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://embedhd.org/api-event.php"
@ -75,7 +75,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -91,14 +91,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p)
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -132,6 +134,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -12,7 +12,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "FAWA"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "http://www.fawanews.sc/"

View file

@ -12,7 +12,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "iSTRMEAST"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://istreameast.app"

View file

@ -13,7 +13,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "PAWA"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
BASE_URL = "https://pawastreams.net/feed"

View file

@ -1,7 +1,7 @@
import json
from functools import partial
from playwright.async_api import Browser, Page
from playwright.async_api import BrowserContext, async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -11,13 +11,15 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "PIXEL"
CACHE_FILE = Cache(TAG, exp=19_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
BASE_URL = "https://pixelsport.tv/backend/livetv/events"
async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
async def get_api_data(context: BrowserContext) -> dict[str, list[dict, str, str]]:
try:
page = await context.new_page()
await page.goto(
BASE_URL,
wait_until="domcontentloaded",
@ -33,10 +35,10 @@ async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
return json.loads(raw_json)
async def get_events(page: Page) -> dict[str, dict[str, str | float]]:
async def get_events(context: BrowserContext) -> dict[str, dict[str, str | float]]:
now = Time.clean(Time.now())
api_data = await get_api_data(page)
api_data = await get_api_data(context)
events = {}
@ -73,7 +75,7 @@ async def get_events(page: Page) -> dict[str, dict[str, str | float]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
@ -83,9 +85,11 @@ async def scrape(browser: Browser) -> None:
log.info(f'Scraping from "{BASE_URL}"')
async with network.event_context(browser) as context:
async with network.event_page(context) as page:
handler = partial(get_events, page=page)
async with async_playwright() as p:
browser, context = await network.browser(p)
try:
handler = partial(get_events, context=context)
events = await network.safe_process(
handler,
@ -94,6 +98,9 @@ async def scrape(browser: Browser) -> None:
log=log,
)
finally:
await browser.close()
urls.update(events or {})
CACHE_FILE.write(urls)

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -10,9 +10,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "PPV"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
MIRRORS = [
"https://old.ppv.to/api/streams",
@ -78,7 +78,7 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -101,14 +101,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
timeout=6,
log=log,
)
@ -144,6 +146,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -13,9 +13,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "ROXIE"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800)
BASE_URL = "https://roxiestreams.live"

View file

@ -11,9 +11,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "SHARK"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=19_800)
BASE_URL = "https://sharkstreams.net"

View file

@ -2,7 +2,7 @@ import asyncio
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -13,7 +13,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "SPORT9"
CACHE_FILE = Cache(TAG, exp=5_400)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=5_400)
BASE_URL = "https://sport9.ru/"
@ -88,7 +88,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -106,14 +106,16 @@ async def scrape(browser: Browser) -> None:
if events:
now = Time.clean(Time.now()).timestamp()
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -146,6 +148,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -13,7 +13,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMBTW"
CACHE_FILE = Cache(TAG, exp=3_600)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600)
BASE_URLS = ["https://hiteasport.info/", "https://streambtw.com/"]

View file

@ -1,6 +1,6 @@
from functools import partial
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -10,9 +10,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMCNTR"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=28_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
BASE_URL = "https://backend.streamcenter.live/api/Parties"
@ -90,7 +90,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -106,14 +106,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -147,6 +149,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -8,7 +8,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMFREE"
CACHE_FILE = Cache(TAG, exp=19_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=19_800)
BASE_URL = "https://streamfree.to/"

View file

@ -2,7 +2,7 @@ import asyncio
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -13,9 +13,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMHUB"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=28_800)
BASE_URL = "https://streamhub.pro/"
@ -132,7 +132,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
@ -150,14 +150,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
timeout=5,
log=log,
)
@ -197,6 +199,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = entry
finally:
await browser.close()
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -4,7 +4,7 @@ from itertools import chain
from typing import Any
from urllib.parse import urljoin
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -14,9 +14,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMSGATE"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
BASE_URL = "https://streamingon.org"
@ -120,7 +120,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -136,14 +136,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -177,6 +179,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

196
M3U8/scrapers/strmd.py Normal file
View file

@ -0,0 +1,196 @@
import re
from functools import partial
from urllib.parse import urljoin
from playwright.async_api import async_playwright
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "STRMD"
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=28_800)
MIRRORS = [
"https://streami.su",
# "https://streamed.st",
"https://streamed.pk",
]
def fix_sport(s: str) -> str:
if "-" in s:
return " ".join(i.capitalize() for i in s.split("-"))
elif s == "fight":
return "Fight (UFC/Boxing)"
return s.capitalize() if len(s) >= 4 else s.upper()
async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
if not (api_data := API_FILE.load(per_entry=False, index=-1)):
log.info("Refreshing API cache")
api_data = [{"timestamp": now.timestamp()}]
if r := await network.request(
urljoin(url, "api/matches/all-today"),
log=log,
):
api_data: list[dict] = r.json()
api_data[-1]["timestamp"] = now.timestamp()
API_FILE.write(api_data)
events = []
pattern = re.compile(r"[\n\r]+|\s{2,}")
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
for event in api_data:
if (category := event.get("category")) == "other":
continue
if not (ts := event["date"]):
continue
start_ts = float(f"{ts}"[:-3])
event_dt = Time.from_ts(start_ts)
if not start_dt <= event_dt <= end_dt:
continue
sport = fix_sport(category)
parts = pattern.split(event["title"].strip())
name = " | ".join(p.strip() for p in parts if p.strip())
logo = urljoin(url, poster) if (poster := event.get("poster")) else None
if f"[{sport}] {name} ({TAG})" in cached_keys:
continue
sources: list[dict[str, str]] = event["sources"]
if not sources:
continue
skip_types = ["alpha", "bravo"]
valid_sources = [d for d in sources if d.get("source") not in skip_types]
if not valid_sources:
continue
srce = valid_sources[0]
source_type = srce.get("source")
stream_id = srce.get("id")
if not (source_type and stream_id):
continue
events.append(
{
"sport": sport,
"event": name,
"link": f"https://embedsports.top/embed/{source_type}/{stream_id}/1",
"logo": logo,
"timestamp": event_dt.timestamp(),
}
)
return events
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
if not (base_url := await network.get_base(MIRRORS)):
log.warning("No working STRMD mirrors")
CACHE_FILE.write(cached_urls)
return
log.info(f'Scraping from "{base_url}"')
events = await get_events(base_url, cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
context=context,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
if url:
sport, event, logo, ts, link = (
ev["sport"],
ev["event"],
ev["logo"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, pic = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo or pic,
"base": "https://embedsports.top/",
"timestamp": ts,
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -1,234 +0,0 @@
import asyncio
from functools import partial
from urllib.parse import urljoin
import feedparser
from playwright.async_api import Browser, Error, Page, TimeoutError
from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
TAG = "TFLIX"
CACHE_FILE = Cache(TAG, exp=28_800)
BASE_URL = "https://tv.tflix.app/"
SPORT_ENDPOINTS = ["football", "nba", "nfl", "nhl"]
async def process_event(
url: str,
url_num: int,
page: Page,
) -> tuple[str | None, str | None]:
try:
await page.goto(
url,
wait_until="domcontentloaded",
timeout=15_000,
)
try:
iframe = await page.wait_for_selector(
"iframe.metaframe.rptss",
timeout=3_500,
)
except TimeoutError:
log.warning(f"URL {url_num}) No iframe element.")
return None, None
if (old_src := await iframe.get_attribute("src")) and old_src.startswith(
"https://kloxmkhs.site/stream"
):
new_src = old_src
else:
try:
option = await page.wait_for_selector(
'li.dooplay_player_option >> span.title:has-text("TFLIX HD - iOS")',
timeout=3_000,
)
await option.scroll_into_view_if_needed()
await option.evaluate("el => el.click()")
await page.wait_for_function(
"""
(oldSrc) => {
const iframe = document.querySelector('iframe.metaframe.rptss');
return iframe && iframe.src && iframe.src !== oldSrc;
};
""",
arg=old_src,
timeout=5_000,
)
iframe_2 = await page.wait_for_selector("iframe.metaframe.rptss")
if not iframe_2 or not (new_src := await iframe_2.get_attribute("src")):
log.warning(f"URL {url_num}) No iframe source.")
return None, None
except TimeoutError:
log.warning(f"URL {url_num}) No valid TFLIX source.")
return None, None
try:
await page.goto(
new_src,
wait_until="domcontentloaded",
timeout=10_000,
referer=url,
)
except Error:
log.warning(
f"URL {url_num}) HTTP 403/404 error while redirecting to iframe source."
)
return None, None
try:
play_btn = await page.wait_for_selector(
'button[data-url][onclick*="startPlcb"]',
timeout=5_000,
)
except TimeoutError:
log.warning(f"URL {url_num}) No play button found.")
return None, None
if not (data_url := await play_btn.get_attribute("data-url")):
log.warning(f"URL {url_num}) No PBID found.")
return None, None
log.info(f"URL {url_num}) Captured M3U8")
return (
f"https://kloxmkhs.site/stream/stream.m3u8?id={data_url}&format=.m3u8",
new_src,
)
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
return None, None
async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
tasks = [
network.request(urljoin(BASE_URL, f"genre/{sport}/feed"), log=log)
for sport in SPORT_ENDPOINTS
]
results = await asyncio.gather(*tasks)
events = []
if not (feeds := [feedparser.parse(html.content) for html in results if html]):
return events
for feed in feeds:
title: str = feed["feed"]["title"]
sport = title.split("Archives")[0].strip()
for entry in feed.entries:
if not (link := entry.get("link")):
continue
if not (title := entry.get("title")):
continue
if f"[{sport}] {title} ({TAG})" in cached_keys:
continue
events.append(
{
"sport": sport,
"event": title,
"link": link,
}
)
return events
async def scrape(browser: Browser) -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(cached_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
log.info(f'Scraping from "{BASE_URL}"')
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
now = Time.clean(Time.now()).timestamp()
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
process_event,
url=ev["link"],
url_num=i,
page=page,
)
url, iframe = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
timeout=20,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": url,
"logo": logo,
"base": iframe,
"timestamp": now,
"id": tvg_id or "Live.Event.us",
"link": link,
}
cached_urls[key] = entry
if url:
valid_count += 1
urls[key] = entry
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")
CACHE_FILE.write(cached_urls)

View file

@ -12,7 +12,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "TOTALSPRTK"
CACHE_FILE = Cache(TAG, exp=28_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=28_800)
MIRRORS = [
{

View file

@ -8,7 +8,7 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "TVPASS"
CACHE_FILE = Cache(TAG, exp=86_400)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=86_400)
BASE_URL = "https://tvpass.org/playlist/m3u"

View file

@ -7,8 +7,8 @@ from .config import Time
class Cache:
now_ts: float = Time.now().timestamp()
def __init__(self, filename: str, exp: int | float) -> None:
self.file = Path(__file__).parent.parent / "caches" / f"{filename.lower()}.json"
def __init__(self, file: str, exp: int | float) -> None:
self.file = Path(__file__).parent.parent / "caches" / file
self.exp = exp

View file

@ -166,7 +166,7 @@
{
"3. LIGA": {
"logo": "https://i.gyazo.com/9f4f2e8370377b6214b4103003196de7.png",
"names": ["GERMAN 3 LIGA"]
"names": []
}
},
{
@ -198,12 +198,6 @@
"names": ["ADMIRAL BUNDESLIGA", "FEDERAL LEAGUE"]
}
},
{
"BELGIAN PRO LEAGUE": {
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/6.png",
"names": ["BELGIUM PRO LEAGUE", "JUPILER PRO LEAGUE"]
}
},
{
"BUNDESLIGA": {
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/10.png",
@ -630,18 +624,6 @@
"names": ["COLOMBIA PRIMERA B", "COLOMBIAN PRIMERA B"]
}
},
{
"SAUDI PRO LEAGUE": {
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/2488.png",
"names": ["ROSHN SAUDI LEAGUE", "SAUDI PREMIER LEAGUE"]
}
},
{
"SCOTTISH CHAMPIONSHIP": {
"logo": "https://i.gyazo.com/e1a8b0c2a85a16494bcee4a3422ba514.png",
"names": []
}
},
{
"SCOTTISH PREMIERSHIP": {
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/45.png",
@ -666,15 +648,6 @@
"names": ["ITALIAN SERIE B", "ITALY SERIE B", "SERIE-B"]
}
},
{
"SINGAPORE PREMIER LEAGUE": {
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/2292.png",
"names": [
"AIA SINGAPORE PREMIER LEAGUE",
"SINGAPOREAN PREMIER LEAGUE"
]
}
},
{
"SOCCER": {
"logo": "https://i.gyazo.com/1c4aa937f5ea01b0f29bb27adb59884c.png",
@ -775,7 +748,6 @@
"logo": "https://a.espncdn.com/combiner/i?img=/i/leaguelogos/soccer/500/2314.png",
"names": [
"BARCLAY'S WOMEN'S SUPER LEAGUE",
"ENGLISH WOMEN SUPER LEAGUE",
"ENGLISH WOMEN'S SUPER LEAGUE",
"FA WSL",
"WOMEN'S SUPER LEAGUE"

View file

@ -3,13 +3,12 @@ import logging
import random
import re
from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager
from functools import partial
from typing import AsyncGenerator, TypeVar
from typing import TypeVar
from urllib.parse import urlencode, urljoin
import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
from playwright.async_api import Browser, BrowserContext, Playwright, Request
from .logger import get_logger
@ -124,112 +123,6 @@ class Network:
return
@staticmethod
@asynccontextmanager
async def event_context(
browser: Browser,
stealth: bool = True,
) -> AsyncGenerator[BrowserContext, None]:
context: BrowserContext | None = None
try:
context = await browser.new_context(
user_agent=Network.UA if stealth else None,
viewport={"width": 1366, "height": 768},
device_scale_factor=1,
locale="en-US",
timezone_id="America/New_York",
color_scheme="dark",
permissions=["geolocation"],
extra_http_headers=(
{
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
}
if stealth
else None
),
)
if stealth:
await context.add_init_script("""
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
Object.defineProperty(navigator, "languages", {
get: () => ["en-US", "en"],
});
Object.defineProperty(navigator, "plugins", {
get: () => [1, 2, 3, 4],
});
const elementDescriptor = Object.getOwnPropertyDescriptor(
HTMLElement.prototype,
"offsetHeight"
);
Object.defineProperty(HTMLDivElement.prototype, "offsetHeight", {
...elementDescriptor,
get: function () {
if (this.id === "modernizr") {
return 24;
}
return elementDescriptor.get.apply(this);
},
});
Object.defineProperty(window.screen, "width", { get: () => 1366 });
Object.defineProperty(window.screen, "height", { get: () => 768 });
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Intel Inc."; // UNMASKED_VENDOR_WEBGL
if (param === 37446) return "Intel Iris OpenGL Engine"; // UNMASKED_RENDERER_WEBGL
return getParameter.apply(this, [param]);
};
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.tagName === "IFRAME" && node.hasAttribute("sandbox")) {
node.removeAttribute("sandbox");
}
});
});
});
observer.observe(document.documentElement, { childList: true, subtree: true });
""")
else:
context = await browser.new_context()
yield context
finally:
if context:
await context.close()
@staticmethod
@asynccontextmanager
async def event_page(context: BrowserContext) -> AsyncGenerator[Page, None]:
page = await context.new_page()
try:
yield page
finally:
await page.close()
@staticmethod
async def browser(playwright: Playwright, external: bool = False) -> Browser:
return (
await playwright.chromium.connect_over_cdp("http://localhost:9222")
if external
else await playwright.firefox.launch(headless=True)
)
@staticmethod
def capture_req(
req: Request,
@ -254,13 +147,15 @@ class Network:
self,
url: str,
url_num: int,
page: Page,
context: BrowserContext,
timeout: int | float = 10,
log: logging.Logger | None = None,
) -> str | None:
log = log or logger
page = await context.new_page()
captured: list[str] = []
got_one = asyncio.Event()
@ -317,6 +212,86 @@ class Network:
await page.close()
@staticmethod
async def browser(
playwright: Playwright, browser: str = "internal"
) -> tuple[Browser, BrowserContext]:
if browser == "external":
brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222")
context = brwsr.contexts[0]
else:
brwsr = await playwright.firefox.launch(headless=True)
context = await brwsr.new_context(
user_agent=Network.UA,
ignore_https_errors=False,
viewport={"width": 1366, "height": 768},
device_scale_factor=1,
locale="en-US",
timezone_id="America/New_York",
color_scheme="dark",
permissions=["geolocation"],
extra_http_headers={
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
},
)
await context.add_init_script("""
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
Object.defineProperty(navigator, "languages", {
get: () => ["en-US", "en"],
});
Object.defineProperty(navigator, "plugins", {
get: () => [1, 2, 3, 4],
});
const elementDescriptor = Object.getOwnPropertyDescriptor(
HTMLElement.prototype,
"offsetHeight"
);
Object.defineProperty(HTMLDivElement.prototype, "offsetHeight", {
...elementDescriptor,
get: function () {
if (this.id === "modernizr") {
return 24;
}
return elementDescriptor.get.apply(this);
},
});
Object.defineProperty(window.screen, "width", { get: () => 1366 });
Object.defineProperty(window.screen, "height", { get: () => 768 });
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Intel Inc."; // UNMASKED_VENDOR_WEBGL
if (param === 37446) return "Intel Iris OpenGL Engine"; // UNMASKED_RENDERER_WEBGL
return getParameter.apply(this, [param]);
};
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.tagName === "IFRAME" && node.hasAttribute("sandbox")) {
node.removeAttribute("sandbox");
}
});
});
});
observer.observe(document.documentElement, { childList: true, subtree: true });
""")
return brwsr, context
network = Network()

View file

@ -5,7 +5,7 @@ from itertools import chain
from typing import Any
from urllib.parse import urljoin
from playwright.async_api import Browser, Page, TimeoutError
from playwright.async_api import BrowserContext, async_playwright
from .utils import Cache, Time, get_logger, leagues, network
@ -15,9 +15,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "WATCHFTY"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
API_FILE = Cache(f"{TAG}-api", exp=19_800)
API_FILE = Cache(f"{TAG.lower()}-api.json", exp=19_800)
API_URL = "https://api.watchfooty.st"
@ -73,7 +73,7 @@ async def refresh_api_cache(now: Time) -> list[dict[str, Any]]:
async def process_event(
url: str,
url_num: int,
page: Page,
context: BrowserContext,
) -> tuple[str | None, str | None]:
pattern = re.compile(r"\((\d+)\)")
@ -82,6 +82,8 @@ async def process_event(
got_one = asyncio.Event()
page = await context.new_page()
handler = partial(
network.capture_req,
captured=captured,
@ -115,8 +117,7 @@ async def process_event(
try:
first_available = await page.wait_for_selector(
'a[href*="/stream/"]',
timeout=3_000,
'a[href*="/stream/"]', timeout=3_000
)
except TimeoutError:
log.warning(f"URL {url_num}) No available stream links.")
@ -175,6 +176,8 @@ async def process_event(
finally:
page.remove_listener("request", handler)
await page.close()
async def get_events(base_url: str, cached_keys: list[str]) -> list[dict[str, str]]:
now = Time.clean(Time.now())
@ -232,7 +235,7 @@ async def get_events(base_url: str, cached_keys: list[str]) -> list[dict[str, st
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
@ -257,14 +260,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p, browser="external")
try:
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
)
url, iframe = await network.safe_process(
@ -302,6 +307,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = entry
finally:
await browser.close()
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -1,7 +1,7 @@
import asyncio
from functools import partial
from playwright.async_api import Browser
from playwright.async_api import async_playwright
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -12,9 +12,9 @@ urls: dict[str, dict[str, str | float]] = {}
TAG = "WEBCAST"
CACHE_FILE = Cache(TAG, exp=10_800)
CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800)
HTML_CACHE = Cache(f"{TAG}-html", exp=86_400)
HTML_CACHE = Cache(f"{TAG.lower()}-html.json", exp=86_400)
BASE_URLS = {"NFL": "https://nflwebcast.com", "NHL": "https://slapstreams.com"}
@ -110,7 +110,7 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
return live
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
@ -126,14 +126,16 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)")
if events:
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
async with async_playwright() as p:
browser, context = await network.browser(p)
try:
for i, ev in enumerate(events, start=1):
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
context=context,
log=log,
)
@ -167,6 +169,9 @@ async def scrape(browser: Browser) -> None:
urls[key] = cached_urls[key] = entry
finally:
await browser.close()
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")

View file

@ -121,7 +121,7 @@ write_readme() {
echo "#### Combined (Base + Live Events) URL"
echo -e "\`\`\`\nhttps://s.id/d9M3U8\n\`\`\`\n"
echo "#### EPG URL"
echo -e "\`\`\`\nhttps://s.id/d9sEPG\n\`\`\`\n"
echo -e "\`\`\`\nhttps://s.id/d9EPG\n\`\`\`\n"
echo "---"
echo "#### Mirrors"
echo -n "[GitHub](https://github.com/doms9/iptv) | "

View file

@ -1,16 +1,13 @@
## Base Log @ 2026-01-24 20:41 UTC
## Base Log @ 2026-01-23 20:46 UTC
### ✅ Working Streams: 138<br>❌ Dead Streams: 7
### ✅ Working Streams: 141<br>❌ Dead Streams: 4
| Channel | Error (Code) | Link |
| ------- | ------------ | ---- |
| BBC World News | HTTP Error (404) | `http://fl1.moveonjoy.com/BBC_WORLD_NEWS/index.m3u8` |
| Cozi TV | HTTP Error (404) | `https://fl1.moveonjoy.com/COZI_TV/index.m3u8` |
| FXX | HTTP Error (404) | `https://fl1.moveonjoy.com/FXX/index.m3u8` |
| History Channel | HTTP Error (404) | `https://fl1.moveonjoy.com/history_channel/index.m3u8` |
| NFL RedZone | Unknown status (302) | `http://hardcoremedia.xyz:80/NW3Vk7xXwW/8375773282/249239` |
| Premier Sports 2 | Unknown status (302) | `http://hardcoremedia.xyz:80/NW3Vk7xXwW/8375773282/117038` |
| Sportsnet One | HTTP Error (404) | `http://mytvstream.net:8080/live/k4Svp2/645504/57297.m3u8` |
| TSN1 | HTTP Error (403) | `http://hardcoremedia.xyz:80/NW3Vk7xXwW/8375773282/142475` |
| Sportsnet One | HTTP Error (403) | `http://mytvstream.net:8080/live/k4Svp2/645504/57297.m3u8` |
---
#### Base Channels URL
```
@ -29,7 +26,7 @@ https://s.id/d9M3U8
#### EPG URL
```
https://s.id/d9sEPG
https://s.id/d9EPG
```
---