init

2026-03-10 11:47:34 +01:00 · 2025-08-17 10:05:09 -04:00 · 2025-08-17 10:05:09 -04:00 · 6e1c25caa4
commit 6e1c25caa4
16 changed files with 80378 additions and 0 deletions
--- a/M3U8/scrape/fstv.py
+++ b/M3U8/scrape/fstv.py
@ -0,0 +1,118 @@
+from urllib.parse import urljoin
+
+import httpx
+from bs4 import BeautifulSoup
+
+urls: dict[str, str] = {}
+
+mirrors = {"https://fstv.online", "https://fstv.space", "https://fstv.zip"}
+
+
+def check_status(l: str) -> bool:
+    try:
+        r = httpx.get(l)
+        r.raise_for_status()
+    except Exception:
+        return False
+
+    return r.status_code == 200
+
+
+def get_base() -> str:
+    for url in filter(check_status, mirrors):
+        return url
+
+
+def get_hrefs(base_url: str) -> list[tuple[str, str]] | tuple[None, None]:
+    print(f'Scraping from "{base_url}"')
+
+    try:
+        r = httpx.get(
+            base_url,
+            timeout=5,
+        )
+
+        r.raise_for_status()
+    except Exception as e:
+        print(f'Failed to fetch "{base_url}"\n{e}')
+
+        return None, None
+
+    soup = BeautifulSoup(r.text, "lxml")
+
+    events = {}
+
+    for wrpr in soup.find_all("div", class_="fixtures-live-wrapper"):
+        for games in wrpr.select(".match-table-item"):
+
+            league_name = games.select_one(".league-info a.league-name")
+
+            league_match = games.select_one(".common-table-row a[href*='/match/']")
+
+            if league_name and league_match:
+                full_text = league_name.get_text(strip=True)
+
+                if "]" in full_text:
+                    event_name = full_text.split("]", 1)[1].strip()
+                else:
+                    event_name = full_text
+
+                events[event_name] = urljoin(base_url, league_match["href"])
+
+    return events.items()
+
+
+def fetch_m3u8(url: str) -> tuple[str, list[str]] | tuple[None, None]:
+    try:
+        r = httpx.get(
+            url,
+            timeout=5,
+        )
+
+        r.raise_for_status()
+    except Exception as e:
+        print(f'Failed to fetch "{url}"\n{e}')
+
+        return None, None
+
+    soup = BeautifulSoup(r.text, "lxml")
+
+    if category_links := soup.select(".common-list-category .category-item a"):
+        match_name = category_links[-1].get_text(strip=True)
+    else:
+        match_name = None
+
+    if not match_name or match_name.lower() == "vs":
+        if og_title := soup.find("meta", property="og:title"):
+            match_name = og_title["content"].split(" start on")[0].strip()
+
+    btns = soup.select("button.btn-server")
+
+    return match_name, [btn["data-link"] for btn in btns if btn.has_attr("data-link")]
+
+
+def main() -> None:
+    for event, href in get_hrefs(get_base()):
+
+        if not href:
+            return
+
+        match_name, m3u8_urls = fetch_m3u8(href)
+
+        if not m3u8_urls:
+            return
+
+        for i, link in enumerate(m3u8_urls, start=1):
+            key = (
+                f"[{event}] (S{i})"
+                if not match_name
+                else f"[{event}] {match_name} (S{i})"
+            )
+
+            urls[key] = link
+
+    print(f"Collected {len(urls)} live events")
+
+
+if __name__ == "__main__":
+    main()
--- a/M3U8/scrape/tvpass.json
+++ b/M3U8/scrape/tvpass.json
@ -0,0 +1,24 @@
+{
+  "[MLB] Philadelphia Phillies @ Washington Nationals": "https://tvpass.org/live/mlb-01/sd",
+  "[WNBA] Indiana Fever @ Connecticut Sun": "https://tvpass.org/live/WNBA01/sd",
+  "[NFL] Jacksonville Jaguars @ New Orleans Saints": "https://tvpass.org/live/NFL23/sd",
+  "[MLB] Miami Marlins @ Boston Red Sox": "https://tvpass.org/live/mlb-22/sd",
+  "[MLB] Texas Rangers @ Toronto Blue Jays": "https://tvpass.org/live/mlb-12/sd",
+  "[MLB] Atlanta Braves @ Cleveland Guardians": "https://tvpass.org/live/mlb-30/sd",
+  "[MLB] Milwaukee Brewers @ Cincinnati Reds": "https://tvpass.org/live/mlb-04/sd",
+  "[MLB] Baltimore Orioles @ Houston Astros": "https://tvpass.org/live/mlb-06/sd",
+  "[MLB] Chicago White Sox @ Kansas City Royals": "https://tvpass.org/live/mlb-05/sd",
+  "[MLB] Detroit Tigers @ Minnesota Twins": "https://tvpass.org/live/mlb-20/sd",
+  "[MLB] New York Yankees @ St. Louis Cardinals": "https://tvpass.org/live/mlb-08/sd",
+  "[MLB] Pittsburgh Pirates @ Chicago Cubs": "https://tvpass.org/live/mlb-23/sd",
+  "[WNBA] Los Angeles Sparks @ Washington Mystics": "https://tvpass.org/live/WNBA02/sd",
+  "[MLB] Arizona Diamondbacks @ Colorado Rockies": "https://tvpass.org/live/mlb-25/sd",
+  "[WNBA] Dallas Wings @ Las Vegas Aces": "https://tvpass.org/live/WNBA03/sd",
+  "[MLB] Los Angeles Angels @ Oakland Athletics": "https://tvpass.org/live/mlb-19/sd",
+  "[MLB] Tampa Bay Rays @ San Francisco Giants": "https://tvpass.org/live/mlb-27/sd",
+  "[MLB] San Diego Padres @ Los Angeles Dodgers": "https://tvpass.org/live/mlb-14/sd",
+  "[WNBA] Phoenix Mercury @ Seattle Storm": "https://tvpass.org/live/WNBA04/sd",
+  "[MLB] Seattle Mariners @ New York Mets": "https://tvpass.org/live/mlb-24/sd",
+  "[NFL] Buffalo Bills @ Chicago Bears": "https://tvpass.org/live/NFL11/sd",
+  "[WNBA] Atlanta Dream @ Golden State Valkyries": "https://tvpass.org/live/WNBA05/sd"
+}
--- a/M3U8/scrape/tvpass.py
+++ b/M3U8/scrape/tvpass.py
@ -0,0 +1,69 @@
+import json
+import re
+from pathlib import Path
+from urllib.parse import urlparse
+
+import httpx
+
+base_url = "https://tvpass.org/playlist/m3u"
+base_file = Path(__file__).parent / "tvpass.json"
+
+urls: dict[str, str] = {}
+
+
+def fetch_m3u8() -> list[str] | None:
+    try:
+        r = httpx.get(
+            base_url,
+            follow_redirects=True,
+            timeout=5,
+        )
+
+        r.raise_for_status()
+
+    except Exception as e:
+        print(f'Failed to fetch "{base_url}"\n{e}')
+
+        return
+
+    return r.text.splitlines()
+
+
+def main() -> None:
+    print(f'Scraping from "{base_url}"')
+
+    if not (data := fetch_m3u8()):
+        return
+
+    for i in range(len(data) - 1):
+        if data[i].startswith("#EXTINF"):
+            tvg_id_match = re.search(r'tvg-id="([^"]*)"', data[i])
+            tvg_name_match = re.search(r'tvg-name="([^"]*)"', data[i])
+
+            tvg_id = tvg_id_match[1] if tvg_id_match else None
+            tvg_name = tvg_name_match[1]
+
+            if tvg_id == "":
+                url = data[i + 1]
+
+                tvg_name = tvg_name.split("(")[0].strip()
+
+                if url.endswith("/sd"):
+
+                    path_parts = urlparse(url).path.strip("/").split("/")
+
+                    if len(path_parts) >= 2 and path_parts[-1] == "sd":
+                        sport = "".join(x for x in path_parts[1] if x.isalpha()).upper()
+                    else:
+                        sport = "UNKNWN"
+
+                    urls[f"[{sport}] {tvg_name}"] = url
+
+    print(f"Collected {len(urls)} live events")
+
+    if urls:
+        base_file.write_text(json.dumps(urls, indent=2), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()