This commit is contained in:
doms9 2025-12-18 04:14:54 -05:00
parent 8b2d8cc1fc
commit 00000d939c
20 changed files with 119 additions and 12 deletions

View file

@ -20,6 +20,7 @@ BASE_URL = "http://www.fawanews.sc/"
async def process_event(url: str, url_num: int) -> str | None:
if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
return
valid_m3u8 = re.compile(
@ -29,9 +30,11 @@ async def process_event(url: str, url_num: int) -> str | None:
if not (match := valid_m3u8.search(html_data.text)):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) Captured M3U8")
return match[2]
@ -83,8 +86,11 @@ async def get_events(cached_hrefs: set[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_hrefs = {entry["href"] for entry in cached_urls.values()}
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -137,6 +143,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -21,27 +21,33 @@ async def process_event(url: str, url_num: int) -> str | None:
if not (event_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
return
soup = HTMLParser(event_data.content)
if not (iframe := soup.css_first("iframe#wp_player")):
log.warning(f"URL {url_num}) No iframe element found.")
return
if not (iframe_src := iframe.attributes.get("src")):
log.warning(f"URL {url_num}) No iframe source found.")
return
if not (iframe_src_data := await network.request(iframe_src, log=log)):
log.info(f"URL {url_num}) Failed to load iframe source.")
return
if not (match := pattern.search(iframe_src_data.text)):
log.warning(f"URL {url_num}) No Clappr source found.")
return
log.info(f"URL {url_num}) Captured M3U8")
return base64.b64decode(match[1]).decode("utf-8")
@ -98,7 +104,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -137,6 +145,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -72,7 +72,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -129,6 +131,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -55,7 +55,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
continue
event_name = event["match_name"]
channel_info: dict[str, str] = event["channel"]
category: dict[str, str] = channel_info["TVCategory"]
sport = category["name"]
@ -82,7 +84,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')

View file

@ -39,6 +39,7 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
API_FILE.write(api_data)
now = Time.clean(Time.now())
start_dt = now.delta(minutes=-30)
end_dt = now.delta(minutes=30)
@ -50,8 +51,11 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
for event in stream_group.get("streams", []):
name = event.get("name")
start_ts = event.get("starts_at")
logo = event.get("poster")
iframe = event.get("iframe")
if not (name and start_ts and iframe):
@ -80,7 +84,9 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -148,6 +154,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -41,9 +41,11 @@ async def process_event(url: str, url_num: int) -> str | None:
if not (match := valid_m3u8.search(html_data.text)):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) Captured M3U8")
return match[1]
@ -133,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -183,6 +187,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -21,15 +21,18 @@ BASE_URL = "https://sharkstreams.net"
async def process_event(url: str, url_num: int) -> str | None:
if not (r := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
return
data: dict[str, list[str]] = r.json()
if not (urls := data.get("urls")):
log.info(f"URL {url_num}) No M3U8 found")
return
log.info(f"URL {url_num}) Captured M3U8")
return urls[0]
@ -47,6 +50,7 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
for row in soup.css(".row"):
date_node = row.css_first(".ch-date")
sport_node = row.css_first(".ch-category")
name_node = row.css_first(".ch-name")
@ -54,7 +58,9 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
continue
event_dt = Time.from_str(date_node.text(strip=True), timezone="EST")
sport = sport_node.text(strip=True)
event_name = name_node.text(strip=True)
embed_btn = row.css_first("a.hd-link.secondary")
@ -107,7 +113,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -157,6 +165,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -52,7 +52,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
continue
sport = sport_node.text(strip=True)
team_1_node = card.css_first(".team1 .team-name")
team_2_node = card.css_first(".team2 .team-name")
if team_1_node and team_2_node:
@ -88,7 +90,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -146,6 +150,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -91,7 +91,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -148,6 +150,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -70,7 +70,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')

View file

@ -135,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -194,6 +196,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -125,7 +125,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -182,6 +184,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -117,7 +117,9 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -180,6 +182,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -26,7 +26,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
for i, line in enumerate(data, start=1):
if line.startswith("#EXTINF"):
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
group_title_match = re.search(r'group-title="([^"]*)"', line)
tvg = tvg_id_match[1] if tvg_id_match else None
@ -57,7 +59,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
log.info(f"Loaded {len(urls)} event(s) from cache")
return
log.info(f'Scraping from "{BASE_URL}"')

View file

@ -7,7 +7,9 @@ from .config import Time
class Cache:
def __init__(self, file: str, exp: int | float) -> None:
self.file = Path(__file__).parent.parent / "caches" / file
self.exp = exp
self.now_ts = Time.now().timestamp()
def is_fresh(self, entry: dict) -> bool:

View file

@ -45,11 +45,13 @@ class Time(datetime):
def to_tz(self, tzone: str) -> "Time":
dt = self.astimezone(self.ZONES[tzone])
return self.__class__.fromtimestamp(dt.timestamp(), tz=self.ZONES[tzone])
@classmethod
def _to_class_tz(cls, dt) -> "Time":
dt = dt.astimezone(cls.TZ)
return cls.fromtimestamp(dt.timestamp(), tz=cls.TZ)
@classmethod

View file

@ -22,9 +22,13 @@ COLORS = {
class ColorFormatter(logging.Formatter):
def format(self, record) -> str:
color = COLORS.get(record.levelname, COLORS["reset"])
levelname = record.levelname
record.levelname = f"{color}{levelname:<8}{COLORS['reset']}"
formatted = super().format(record)
record.levelname = levelname
return formatted
@ -38,10 +42,15 @@ def get_logger(name: str | None = None) -> logging.Logger:
if not logger.hasHandlers():
handler = logging.StreamHandler()
formatter = ColorFormatter(LOG_FMT, datefmt="%Y-%m-%d | %H:%M:%S")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
logger.propagate = False
return logger

View file

@ -12,6 +12,8 @@ from playwright.async_api import Browser, BrowserContext, Playwright, Request
from .logger import get_logger
logger = get_logger(__name__)
T = TypeVar("T")
@ -32,8 +34,6 @@ class Network:
http2=True,
)
self._logger = get_logger("network")
@staticmethod
def build_proxy_url(
tag: str,
@ -58,16 +58,18 @@ class Network:
**kwargs,
) -> httpx.Response | None:
log = log or self._logger
log = log or logger
try:
r = await self.client.get(url, **kwargs)
r.raise_for_status()
except Exception as e:
log.error(f'Failed to fetch "{url}": {e}\n{kwargs = }')
return ""
return r
r.raise_for_status()
return r
except (httpx.HTTPError, httpx.TimeoutException) as e:
log.error(f'Failed to fetch "{url}": {e}')
return ""
async def get_base(self, mirrors: list[str]) -> str | None:
random.shuffle(mirrors)
@ -89,7 +91,7 @@ class Network:
log: logging.Logger | None = None,
) -> T | None:
log = log or get_logger("network")
log = log or logger
task = asyncio.create_task(fn())
@ -104,13 +106,15 @@ class Network:
await task
except asyncio.CancelledError:
pass
except Exception as e:
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
return None
return
except Exception as e:
log.error(f"URL {url_num}) Unexpected error: {e}")
return None
return
@staticmethod
def capture_req(
@ -141,7 +145,7 @@ class Network:
log: logging.Logger | None = None,
) -> str | None:
log = log or self._logger
log = log or logger
page = await context.new_page()
@ -170,6 +174,7 @@ class Network:
await asyncio.wait_for(wait_task, timeout=timeout)
except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return
finally:
@ -183,17 +188,21 @@ class Network:
if captured:
log.info(f"URL {url_num}) Captured M3U8")
return captured[0]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
return
finally:
page.remove_listener("request", handler)
await page.close()
@staticmethod
@ -205,7 +214,9 @@ class Network:
if browser == "brave":
brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222")
context = brwsr.contexts[0]
else:
brwsr = await playwright.firefox.launch(headless=True)

View file

@ -104,12 +104,14 @@ async def process_event(
text = await header.inner_text()
except TimeoutError:
log.warning(f"URL {url_num}) Can't find stream links header.")
return
match = re.search(r"\((\d+)\)", text)
if not match or int(match[1]) == 0:
log.warning(f"URL {url_num}) No available stream links.")
return
first_available = await page.wait_for_selector(
@ -124,6 +126,7 @@ async def process_event(
await asyncio.wait_for(wait_task, timeout=6)
except asyncio.TimeoutError:
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
return
finally:
@ -137,17 +140,21 @@ async def process_event(
if captured:
log.info(f"URL {url_num}) Captured M3U8")
return captured[-1]
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
return
finally:
page.remove_listener("request", handler)
await page.close()
@ -213,8 +220,11 @@ async def get_events(
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
valid_count = cached_count = len(valid_urls)
urls.update(valid_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -225,7 +235,9 @@ async def scrape() -> None:
if not (base_url and api_url):
log.warning("No working Watch Footy mirrors")
CACHE_FILE.write(cached_urls)
return
log.info(f'Scraping from "{base_url}"')
@ -287,6 +299,7 @@ async def scrape() -> None:
if new_count := valid_count - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")

View file

@ -112,7 +112,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
async def scrape() -> None:
cached_urls = CACHE_FILE.load()
cached_count = len(cached_urls)
urls.update(cached_urls)
log.info(f"Loaded {cached_count} event(s) from cache")
@ -169,6 +171,7 @@ async def scrape() -> None:
if new_count := len(cached_urls) - cached_count:
log.info(f"Collected and cached {new_count} new event(s)")
else:
log.info("No new events found")