diff --git a/M3U8/fetch.py b/M3U8/fetch.py index 7fc19b0..565926d 100644 --- a/M3U8/fetch.py +++ b/M3U8/fetch.py @@ -8,6 +8,7 @@ from scrapers import ( embedhd, fawa, istreameast, + pawa, pixel, ppv, roxie, @@ -55,6 +56,7 @@ async def main() -> None: asyncio.create_task(embedhd.scrape()), asyncio.create_task(fawa.scrape()), asyncio.create_task(istreameast.scrape()), + asyncio.create_task(pawa.scrape()), asyncio.create_task(pixel.scrape()), asyncio.create_task(ppv.scrape()), asyncio.create_task(roxie.scrape()), @@ -80,6 +82,7 @@ async def main() -> None: | embedhd.urls | fawa.urls | istreameast.urls + | pawa.urls | pixel.urls | ppv.urls | roxie.urls diff --git a/M3U8/scrapers/istreameast.py b/M3U8/scrapers/istreameast.py index 6274ab0..3d5cf51 100644 --- a/M3U8/scrapers/istreameast.py +++ b/M3U8/scrapers/istreameast.py @@ -12,7 +12,7 @@ urls: dict[str, dict[str, str | float]] = {} TAG = "iSTRMEAST" -CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=3_600) +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) BASE_URL = "https://istreameast.app" diff --git a/M3U8/scrapers/pawa.py b/M3U8/scrapers/pawa.py new file mode 100644 index 0000000..5c6d75b --- /dev/null +++ b/M3U8/scrapers/pawa.py @@ -0,0 +1,149 @@ +import base64 +import re +from functools import partial + +import feedparser +from selectolax.parser import HTMLParser + +from .utils import Cache, Time, get_logger, leagues, network + +log = get_logger(__name__) + +urls: dict[str, dict[str, str | float]] = {} + +TAG = "PAWA" + +CACHE_FILE = Cache(f"{TAG.lower()}.json", exp=10_800) + +BASE_URL = "https://pawastreams.net/feed" + + +async def process_event(url: str, url_num: int) -> str | None: + if not (event_data := await network.request(url, log=log)): + log.info(f"URL {url_num}) Failed to load url.") + + return + + soup = HTMLParser(event_data.content) + + if not (iframe := soup.css_first("iframe")): + log.warning(f"URL {url_num}) No iframe element found.") + + return + + if not (iframe_src := iframe.attributes.get("src")): + log.warning(f"URL {url_num}) No iframe source found.") + + return + + if not (iframe_src_data := await network.request(iframe_src, log=log)): + log.info(f"URL {url_num}) Failed to load iframe source.") + + return + + pattern = re.compile(r"source:\s*window\.atob\(\s*'([^']+)'\s*\)", re.IGNORECASE) + + if not (match := pattern.search(iframe_src_data.text)): + log.warning(f"URL {url_num}) No Clappr source found.") + + return + + log.info(f"URL {url_num}) Captured M3U8") + + return base64.b64decode(match[1]).decode("utf-8") + + +async def get_events(cached_keys: list[str]) -> list[dict[str, str]]: + events = [] + + if not (html_data := await network.request(BASE_URL, log=log)): + return events + + feed = feedparser.parse(html_data.content) + + for entry in feed.entries: + if not (link := entry.get("link")): + continue + + if not (title := entry.get("title")): + continue + + sport = "Soccer" + + title = title.replace(" v ", " vs ") + + if f"[{sport}] {title} ({TAG})" in cached_keys: + continue + + events.append( + { + "sport": sport, + "event": title, + "link": link, + } + ) + + return events + + +async def scrape() -> None: + cached_urls = CACHE_FILE.load() + + cached_count = len(cached_urls) + + urls.update(cached_urls) + + log.info(f"Loaded {cached_count} event(s) from cache") + + log.info(f'Scraping from "{BASE_URL}"') + + events = await get_events(cached_urls.keys()) + + log.info(f"Processing {len(events)} new URL(s)") + + if events: + now = Time.clean(Time.now()).timestamp() + + for i, ev in enumerate(events, start=1): + handler = partial( + process_event, + url=ev["link"], + url_num=i, + ) + + url = await network.safe_process( + handler, + url_num=i, + semaphore=network.HTTP_S, + log=log, + ) + + if url: + sport, event, link = ( + ev["sport"], + ev["event"], + ev["link"], + ) + + key = f"[{sport}] {event} ({TAG})" + + tvg_id, logo = leagues.get_tvg_info(sport, event) + + entry = { + "url": url, + "logo": logo, + "base": link, + "timestamp": now, + "id": tvg_id or "Live.Event.us", + "link": link, + } + + urls[key] = cached_urls[key] = entry + + if new_count := len(cached_urls) - cached_count: + log.info(f"Collected and cached {new_count} new event(s)") + + else: + log.info("No new events found") + + CACHE_FILE.write(cached_urls) diff --git a/pyproject.toml b/pyproject.toml index 7049670..feb1f64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ name = "iptv" version = "0.0.3" requires-python = ">=3.10" dependencies = [ + "feedparser>=6.0.12", "httpx[http2]>=0.28.1", "playwright>=1.55.0", "pytz>=2025.2", diff --git a/uv.lock b/uv.lock index 1ac3d3e..e2fc5bd 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" [[package]] @@ -37,6 +37,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "feedparser" +version = "6.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sgmllib3k" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/79/db7edb5e77d6dfbc54d7d9df72828be4318275b2e580549ff45a962f6461/feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228", size = 286579, upload-time = "2025-09-10T13:33:59.486Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/eb/c96d64137e29ae17d83ad2552470bafe3a7a915e85434d9942077d7fd011/feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324", size = 81480, upload-time = "2025-09-10T13:33:58.022Z" }, +] + [[package]] name = "greenlet" version = "3.3.0" @@ -179,6 +191,7 @@ name = "iptv" version = "0.0.3" source = { virtual = "." } dependencies = [ + { name = "feedparser" }, { name = "httpx", extra = ["http2"] }, { name = "playwright" }, { name = "pytz" }, @@ -187,6 +200,7 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "feedparser", specifier = ">=6.0.12" }, { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" }, { name = "playwright", specifier = ">=1.55.0" }, { name = "pytz", specifier = ">=2025.2" }, @@ -295,6 +309,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/7f/f783e2254db082df4f6bc00fe3b32b9dd27c3b7302a44c8c37728bb67fb7/selectolax-0.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:66558cfb1c7402fed0f47b9a2692eed53e3e2f345526314b493b5093cb951e21", size = 1906079, upload-time = "2025-12-06T12:35:32.951Z" }, ] +[[package]] +name = "sgmllib3k" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/bd/3704a8c3e0942d711c1299ebf7b9091930adae6675d7c8f476a7ce48653c/sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", size = 5750, upload-time = "2010-08-24T14:33:52.445Z" } + [[package]] name = "typing-extensions" version = "4.15.0"