This commit is contained in:
doms9 2025-11-13 12:43:55 -05:00
parent 4f6e1caa5f
commit 00000d9c6d
16 changed files with 106 additions and 89 deletions

View file

@ -11,8 +11,8 @@ from scrapers import (
roxie,
streambtw,
streameast,
streamfree,
strmd,
strmfree,
tvpass,
watchfooty,
)
@ -50,8 +50,8 @@ async def main() -> None:
asyncio.create_task(roxie.scrape(network.client)),
asyncio.create_task(streambtw.scrape(network.client)),
asyncio.create_task(streameast.scrape(network.client)),
asyncio.create_task(streamfree.scrape(network.client)),
asyncio.create_task(strmd.scrape(network.client)),
asyncio.create_task(strmfree.scrape(network.client)),
asyncio.create_task(tvpass.scrape(network.client)),
asyncio.create_task(watchfooty.scrape(network.client)),
]
@ -67,7 +67,7 @@ async def main() -> None:
| streambtw.urls
| streameast.urls
| strmd.urls
| strmfree.urls
| streamfree.urls
| tvpass.urls
| watchfooty.urls
)

View file

@ -1,6 +1,5 @@
import re
from functools import partial
from pathlib import Path
from urllib.parse import quote, urljoin
import httpx
@ -10,12 +9,12 @@ from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {}
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("fawa.json", exp=10_800)
BASE_URL = "http://www.fawanews.sc/"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "fawa.json", exp=10_800)
async def process_event(
client: httpx.AsyncClient,

View file

@ -1,5 +1,4 @@
from functools import partial
from pathlib import Path
import httpx
from playwright.async_api import async_playwright
@ -10,9 +9,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "lotus.json", exp=3_600)
CACHE_FILE = Cache("lotus.json", exp=3_600)
API_CACHE = Cache(Path(__file__).parent / "caches" / "lotus_api.json", exp=28_800)
API_CACHE = Cache("lotus-api.json", exp=28_800)
BASE_URL = "https://lotusgamehd.xyz/api-event.php"
@ -33,6 +32,7 @@ async def refresh_api_cache(
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {}
data = r.json()
@ -58,7 +58,7 @@ async def get_events(
API_CACHE.write(api_data)
events: list[dict[str, str]] = []
events = []
for info in api_data["days"]:
day = Time.from_str(info["day_et"])

View file

@ -1,5 +1,4 @@
from functools import partial
from pathlib import Path
from urllib.parse import unquote, urljoin
import httpx
@ -9,7 +8,9 @@ from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {}
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("fstv.json", exp=10_800)
MIRRORS = [
"https://fstv.online",
@ -18,8 +19,6 @@ MIRRORS = [
"https://fstv.us",
]
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "fstv.json", exp=10_800)
async def process_event(
client: httpx.AsyncClient,
@ -179,3 +178,6 @@ async def scrape(client: httpx.AsyncClient) -> None:
log.info("No new events found")
CACHE_FILE.write(cached_urls)
# cloudflare bot check added

View file

@ -1,5 +1,4 @@
import re
from pathlib import Path
import httpx
@ -9,9 +8,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "pixel_api.json", exp=28_800)
CACHE_FILE = Cache("pixel.json", exp=10_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "pixel.json", exp=10_800)
API_FILE = Cache("pixel-api.json", exp=28_800)
BASE_URL = "https://pixelsport.tv/backend/livetv/events"
@ -28,6 +27,7 @@ async def refresh_api_cache(
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {}
data = r.json()
@ -63,7 +63,7 @@ async def get_events(
end_dt = now.delta(minutes=30)
for event in api_data["events"]:
event_dt = Time.from_str(f'{event["date"]}', timezone="UTC")
event_dt = Time.from_str(event["date"], timezone="UTC")
if now.date() != event_dt.date():
continue

View file

@ -1,5 +1,4 @@
from functools import partial
from pathlib import Path
from urllib.parse import urljoin
import httpx
@ -11,9 +10,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "ppv_api.json", exp=28_800)
CACHE_FILE = Cache("ppv.json", exp=10_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "ppv.json", exp=10_800)
API_FILE = Cache("ppv-api.json", exp=28_800)
BASE_URL = "https://ppv.to"
@ -29,6 +28,7 @@ async def refresh_api_cache(
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {}
return r.json()
@ -49,7 +49,7 @@ async def get_events(
API_FILE.write(api_data)
events: list[dict[str, str]] = []
events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)

View file

@ -1,7 +1,6 @@
import asyncio
import re
from functools import partial
from pathlib import Path
from urllib.parse import urljoin
import httpx
@ -11,14 +10,14 @@ from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {}
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("roxie.json", exp=10_800)
HTML_CACHE = Cache("roxie-html.json", exp=28_800)
MIRRORS = ["https://roxiestreams.live", "https://roxiestreams.cc"]
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "roxie.json", exp=10_800)
HTML_CACHE = Cache(Path(__file__).parent / "caches" / "roxie_html.json", exp=28_800)
async def process_event(
client: httpx.AsyncClient,
@ -80,7 +79,7 @@ async def refresh_html_cache(
data_start = span.attributes["data-start"].rsplit(":", 1)[0]
event_dt = Time.from_str(f"{data_start}", timezone="PST")
event_dt = Time.from_str(data_start, timezone="PST")
key = f"[{sport}] {event} (ROXIE)"

View file

@ -1,6 +1,5 @@
import re
from functools import partial
from pathlib import Path
from urllib.parse import urljoin
import httpx
@ -10,12 +9,12 @@ from .utils import Cache, Time, get_logger, leagues, network
log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {}
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("streambtw.json", exp=3_600)
BASE_URL = "https://streambtw.com"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streambtw.json", exp=3_600)
async def process_event(
client: httpx.AsyncClient,

View file

@ -1,5 +1,4 @@
from functools import partial
from pathlib import Path
from urllib.parse import urljoin
import httpx
@ -12,9 +11,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "streameast.json", exp=10_800)
CACHE_FILE = Cache("streameast.json", exp=10_800)
prefix = {
prefixes = {
"ch": None,
"sg": None,
"tw": None,
@ -33,8 +32,8 @@ prefix = {
}
MIRRORS = [
*[f"https://streameast.{ext}" for ext in prefix if not prefix[ext]],
*[f"https://thestreameast.{ext}" for ext in prefix if prefix[ext] == "the"],
*[f"https://streameast.{ext}" for ext in prefixes if not prefixes[ext]],
*[f"https://thestreameast.{ext}" for ext in prefixes if prefixes[ext] == "the"],
]
@ -52,6 +51,7 @@ async def get_events(
return []
soup = HTMLParser(r.text)
events = []
now = Time.clean(Time.now())

View file

@ -1,5 +1,4 @@
from functools import partial
from pathlib import Path
from urllib.parse import urljoin
import httpx
@ -11,9 +10,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "strmfree_api.json", exp=28_800)
CACHE_FILE = Cache("streamfree.json", exp=10_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmfree.json", exp=10_800)
API_FILE = Cache("streamfree-api.json", exp=28_800)
BASE_URL = "https://streamfree.to"
@ -29,6 +28,7 @@ async def refresh_api_cache(
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {}
data = r.json()
@ -52,7 +52,7 @@ async def get_events(
API_FILE.write(api_data)
events: list[dict[str, str]] = []
events = []
now = Time.clean(Time.now())
start_dt = now.delta(hours=-1)

View file

@ -1,7 +1,6 @@
import asyncio
import re
from functools import partial
from pathlib import Path
from typing import Any
from urllib.parse import urljoin
@ -14,9 +13,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "strmd_api.json", exp=28_800)
CACHE_FILE = Cache("strmd.json", exp=10_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "strmd.json", exp=10_800)
API_FILE = Cache("strmd-api.json", exp=28_800)
MIRRORS = ["https://streamed.pk", "https://streami.su", "https://streamed.st"]
@ -41,6 +40,7 @@ async def refresh_api_cache(
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return {}
data = r.json()
@ -130,7 +130,7 @@ async def get_events(
API_FILE.write(api_data)
events: list[dict[str, str]] = []
events = []
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)

View file

@ -1,5 +1,4 @@
import re
from pathlib import Path
import httpx
@ -7,12 +6,12 @@ from .utils import Cache, Time, get_logger, leagues
log = get_logger(__name__)
urls: dict[str, dict[str, str]] = {}
urls: dict[str, dict[str, str | float]] = {}
CACHE_FILE = Cache("tvpass.json", exp=86_400)
BASE_URL = "https://tvpass.org/playlist/m3u"
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "tvpass.json", exp=86_400)
async def fetch_m3u8(client: httpx.AsyncClient) -> list[str]:
try:
@ -20,6 +19,7 @@ async def fetch_m3u8(client: httpx.AsyncClient) -> list[str]:
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{BASE_URL}": {e}')
return []
return r.text.splitlines()

View file

@ -5,8 +5,8 @@ from .config import Time
class Cache:
def __init__(self, file: Path, exp: int | float) -> None:
self.file = file
def __init__(self, file: str, exp: int | float) -> None:
self.file = Path(__file__).parent.parent / "caches" / file
self.exp = exp
self.now_ts = Time.now().timestamp()
@ -17,6 +17,18 @@ class Cache:
return self.now_ts - dt_ts < self.exp
def write(self, data: dict) -> None:
self.file.parent.mkdir(parents=True, exist_ok=True)
self.file.write_text(
json.dumps(
data,
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)
def load(
self,
per_entry: bool = True,
@ -41,17 +53,5 @@ class Cache:
return data if self.is_fresh({"timestamp": dt_ts}) else {}
def write(self, data: dict) -> None:
self.file.parent.mkdir(parents=True, exist_ok=True)
self.file.write_text(
json.dumps(
data,
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)
__all__ = ["Cache"]

View file

@ -170,8 +170,6 @@ class Leagues:
elif self.is_valid(event, "WNBA"):
return self.info("WNBA")
# NCAA
else:
return self.info("Basketball")

View file

@ -2,7 +2,6 @@ import asyncio
import re
from functools import partial
from itertools import chain
from pathlib import Path
from typing import Any
from urllib.parse import urljoin
@ -15,9 +14,9 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {}
API_FILE = Cache(Path(__file__).parent / "caches" / "watchfty_api.json", exp=28_800)
CACHE_FILE = Cache("watchfty.json", exp=10_800)
CACHE_FILE = Cache(Path(__file__).parent / "caches" / "watchfty.json", exp=10_800)
API_FILE = Cache("watchfty-api.json", exp=28_800)
MIRRORS = [
"https://www.watchfooty.cc",
@ -26,23 +25,30 @@ MIRRORS = [
]
SPORT_ENDPOINTS = [
"football",
"american-football",
"hockey",
"basketball",
"australian-football",
"baseball",
"racing",
"basketball",
"cricket",
"darts",
"fighting",
"football",
"golf",
"hockey",
"racing",
"rugby",
"tennis",
"volleyball",
]
async def get_api_data(client: httpx.AsyncClient, url: str) -> list[dict[str, Any]]:
try:
r = await client.get(url, timeout=10)
r = await client.get(url, timeout=5)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}')
return []
return r.json()
@ -85,7 +91,11 @@ async def process_event(
got_one = asyncio.Event()
handler = partial(network.capture_req, captured=captured, got_one=got_one)
handler = partial(
network.capture_req,
captured=captured,
got_one=got_one,
)
page.on("request", handler)
@ -159,7 +169,7 @@ async def get_events(
API_FILE.write(api_data)
events: list[dict[str, str]] = []
events = []
now = Time.clean(Time.now())
start_dt = now.delta(hours=-1)