mirror of
https://github.com/doms9/iptv.git
synced 2026-01-21 03:59:03 +01:00
e
This commit is contained in:
parent
8b2d8cc1fc
commit
00000d939c
20 changed files with 119 additions and 12 deletions
|
|
@ -20,6 +20,7 @@ BASE_URL = "http://www.fawanews.sc/"
|
||||||
async def process_event(url: str, url_num: int) -> str | None:
|
async def process_event(url: str, url_num: int) -> str | None:
|
||||||
if not (html_data := await network.request(url, log=log)):
|
if not (html_data := await network.request(url, log=log)):
|
||||||
log.info(f"URL {url_num}) Failed to load url.")
|
log.info(f"URL {url_num}) Failed to load url.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
valid_m3u8 = re.compile(
|
valid_m3u8 = re.compile(
|
||||||
|
|
@ -29,9 +30,11 @@ async def process_event(url: str, url_num: int) -> str | None:
|
||||||
|
|
||||||
if not (match := valid_m3u8.search(html_data.text)):
|
if not (match := valid_m3u8.search(html_data.text)):
|
||||||
log.info(f"URL {url_num}) No M3U8 found")
|
log.info(f"URL {url_num}) No M3U8 found")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return match[2]
|
return match[2]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -83,8 +86,11 @@ async def get_events(cached_hrefs: set[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_hrefs = {entry["href"] for entry in cached_urls.values()}
|
cached_hrefs = {entry["href"] for entry in cached_urls.values()}
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -137,6 +143,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,27 +21,33 @@ async def process_event(url: str, url_num: int) -> str | None:
|
||||||
|
|
||||||
if not (event_data := await network.request(url, log=log)):
|
if not (event_data := await network.request(url, log=log)):
|
||||||
log.info(f"URL {url_num}) Failed to load url.")
|
log.info(f"URL {url_num}) Failed to load url.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
soup = HTMLParser(event_data.content)
|
soup = HTMLParser(event_data.content)
|
||||||
|
|
||||||
if not (iframe := soup.css_first("iframe#wp_player")):
|
if not (iframe := soup.css_first("iframe#wp_player")):
|
||||||
log.warning(f"URL {url_num}) No iframe element found.")
|
log.warning(f"URL {url_num}) No iframe element found.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not (iframe_src := iframe.attributes.get("src")):
|
if not (iframe_src := iframe.attributes.get("src")):
|
||||||
log.warning(f"URL {url_num}) No iframe source found.")
|
log.warning(f"URL {url_num}) No iframe source found.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not (iframe_src_data := await network.request(iframe_src, log=log)):
|
if not (iframe_src_data := await network.request(iframe_src, log=log)):
|
||||||
log.info(f"URL {url_num}) Failed to load iframe source.")
|
log.info(f"URL {url_num}) Failed to load iframe source.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not (match := pattern.search(iframe_src_data.text)):
|
if not (match := pattern.search(iframe_src_data.text)):
|
||||||
log.warning(f"URL {url_num}) No Clappr source found.")
|
log.warning(f"URL {url_num}) No Clappr source found.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return base64.b64decode(match[1]).decode("utf-8")
|
return base64.b64decode(match[1]).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -98,7 +104,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -137,6 +145,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -129,6 +131,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
event_name = event["match_name"]
|
event_name = event["match_name"]
|
||||||
|
|
||||||
channel_info: dict[str, str] = event["channel"]
|
channel_info: dict[str, str] = event["channel"]
|
||||||
|
|
||||||
category: dict[str, str] = channel_info["TVCategory"]
|
category: dict[str, str] = channel_info["TVCategory"]
|
||||||
|
|
||||||
sport = category["name"]
|
sport = category["name"]
|
||||||
|
|
@ -82,7 +84,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
if cached := CACHE_FILE.load():
|
if cached := CACHE_FILE.load():
|
||||||
urls.update(cached)
|
urls.update(cached)
|
||||||
|
|
||||||
log.info(f"Loaded {len(urls)} event(s) from cache")
|
log.info(f"Loaded {len(urls)} event(s) from cache")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
|
||||||
API_FILE.write(api_data)
|
API_FILE.write(api_data)
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
start_dt = now.delta(minutes=-30)
|
start_dt = now.delta(minutes=-30)
|
||||||
end_dt = now.delta(minutes=30)
|
end_dt = now.delta(minutes=30)
|
||||||
|
|
||||||
|
|
@ -50,8 +51,11 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
|
||||||
|
|
||||||
for event in stream_group.get("streams", []):
|
for event in stream_group.get("streams", []):
|
||||||
name = event.get("name")
|
name = event.get("name")
|
||||||
|
|
||||||
start_ts = event.get("starts_at")
|
start_ts = event.get("starts_at")
|
||||||
|
|
||||||
logo = event.get("poster")
|
logo = event.get("poster")
|
||||||
|
|
||||||
iframe = event.get("iframe")
|
iframe = event.get("iframe")
|
||||||
|
|
||||||
if not (name and start_ts and iframe):
|
if not (name and start_ts and iframe):
|
||||||
|
|
@ -80,7 +84,9 @@ async def get_events(api_url: str, cached_keys: list[str]) -> list[dict[str, str
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -148,6 +154,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,9 +41,11 @@ async def process_event(url: str, url_num: int) -> str | None:
|
||||||
|
|
||||||
if not (match := valid_m3u8.search(html_data.text)):
|
if not (match := valid_m3u8.search(html_data.text)):
|
||||||
log.info(f"URL {url_num}) No M3U8 found")
|
log.info(f"URL {url_num}) No M3U8 found")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return match[1]
|
return match[1]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -133,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -183,6 +187,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,15 +21,18 @@ BASE_URL = "https://sharkstreams.net"
|
||||||
async def process_event(url: str, url_num: int) -> str | None:
|
async def process_event(url: str, url_num: int) -> str | None:
|
||||||
if not (r := await network.request(url, log=log)):
|
if not (r := await network.request(url, log=log)):
|
||||||
log.info(f"URL {url_num}) Failed to load url.")
|
log.info(f"URL {url_num}) Failed to load url.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
data: dict[str, list[str]] = r.json()
|
data: dict[str, list[str]] = r.json()
|
||||||
|
|
||||||
if not (urls := data.get("urls")):
|
if not (urls := data.get("urls")):
|
||||||
log.info(f"URL {url_num}) No M3U8 found")
|
log.info(f"URL {url_num}) No M3U8 found")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return urls[0]
|
return urls[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -47,6 +50,7 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
|
||||||
|
|
||||||
for row in soup.css(".row"):
|
for row in soup.css(".row"):
|
||||||
date_node = row.css_first(".ch-date")
|
date_node = row.css_first(".ch-date")
|
||||||
|
|
||||||
sport_node = row.css_first(".ch-category")
|
sport_node = row.css_first(".ch-category")
|
||||||
name_node = row.css_first(".ch-name")
|
name_node = row.css_first(".ch-name")
|
||||||
|
|
||||||
|
|
@ -54,7 +58,9 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
|
||||||
continue
|
continue
|
||||||
|
|
||||||
event_dt = Time.from_str(date_node.text(strip=True), timezone="EST")
|
event_dt = Time.from_str(date_node.text(strip=True), timezone="EST")
|
||||||
|
|
||||||
sport = sport_node.text(strip=True)
|
sport = sport_node.text(strip=True)
|
||||||
|
|
||||||
event_name = name_node.text(strip=True)
|
event_name = name_node.text(strip=True)
|
||||||
|
|
||||||
embed_btn = row.css_first("a.hd-link.secondary")
|
embed_btn = row.css_first("a.hd-link.secondary")
|
||||||
|
|
@ -107,7 +113,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -157,6 +165,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sport = sport_node.text(strip=True)
|
sport = sport_node.text(strip=True)
|
||||||
|
|
||||||
team_1_node = card.css_first(".team1 .team-name")
|
team_1_node = card.css_first(".team1 .team-name")
|
||||||
|
|
||||||
team_2_node = card.css_first(".team2 .team-name")
|
team_2_node = card.css_first(".team2 .team-name")
|
||||||
|
|
||||||
if team_1_node and team_2_node:
|
if team_1_node and team_2_node:
|
||||||
|
|
@ -88,7 +90,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -146,6 +150,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -148,6 +150,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
if cached := CACHE_FILE.load():
|
if cached := CACHE_FILE.load():
|
||||||
urls.update(cached)
|
urls.update(cached)
|
||||||
|
|
||||||
log.info(f"Loaded {len(urls)} event(s) from cache")
|
log.info(f"Loaded {len(urls)} event(s) from cache")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -194,6 +196,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -182,6 +184,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,9 @@ async def get_events(url: str, cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -180,6 +182,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
for i, line in enumerate(data, start=1):
|
for i, line in enumerate(data, start=1):
|
||||||
if line.startswith("#EXTINF"):
|
if line.startswith("#EXTINF"):
|
||||||
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
|
tvg_id_match = re.search(r'tvg-id="([^"]*)"', line)
|
||||||
|
|
||||||
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
|
tvg_name_match = re.search(r'tvg-name="([^"]*)"', line)
|
||||||
|
|
||||||
group_title_match = re.search(r'group-title="([^"]*)"', line)
|
group_title_match = re.search(r'group-title="([^"]*)"', line)
|
||||||
|
|
||||||
tvg = tvg_id_match[1] if tvg_id_match else None
|
tvg = tvg_id_match[1] if tvg_id_match else None
|
||||||
|
|
@ -57,7 +59,9 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
if cached := CACHE_FILE.load():
|
if cached := CACHE_FILE.load():
|
||||||
urls.update(cached)
|
urls.update(cached)
|
||||||
|
|
||||||
log.info(f"Loaded {len(urls)} event(s) from cache")
|
log.info(f"Loaded {len(urls)} event(s) from cache")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f'Scraping from "{BASE_URL}"')
|
log.info(f'Scraping from "{BASE_URL}"')
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,9 @@ from .config import Time
|
||||||
class Cache:
|
class Cache:
|
||||||
def __init__(self, file: str, exp: int | float) -> None:
|
def __init__(self, file: str, exp: int | float) -> None:
|
||||||
self.file = Path(__file__).parent.parent / "caches" / file
|
self.file = Path(__file__).parent.parent / "caches" / file
|
||||||
|
|
||||||
self.exp = exp
|
self.exp = exp
|
||||||
|
|
||||||
self.now_ts = Time.now().timestamp()
|
self.now_ts = Time.now().timestamp()
|
||||||
|
|
||||||
def is_fresh(self, entry: dict) -> bool:
|
def is_fresh(self, entry: dict) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -45,11 +45,13 @@ class Time(datetime):
|
||||||
|
|
||||||
def to_tz(self, tzone: str) -> "Time":
|
def to_tz(self, tzone: str) -> "Time":
|
||||||
dt = self.astimezone(self.ZONES[tzone])
|
dt = self.astimezone(self.ZONES[tzone])
|
||||||
|
|
||||||
return self.__class__.fromtimestamp(dt.timestamp(), tz=self.ZONES[tzone])
|
return self.__class__.fromtimestamp(dt.timestamp(), tz=self.ZONES[tzone])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _to_class_tz(cls, dt) -> "Time":
|
def _to_class_tz(cls, dt) -> "Time":
|
||||||
dt = dt.astimezone(cls.TZ)
|
dt = dt.astimezone(cls.TZ)
|
||||||
|
|
||||||
return cls.fromtimestamp(dt.timestamp(), tz=cls.TZ)
|
return cls.fromtimestamp(dt.timestamp(), tz=cls.TZ)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,13 @@ COLORS = {
|
||||||
class ColorFormatter(logging.Formatter):
|
class ColorFormatter(logging.Formatter):
|
||||||
def format(self, record) -> str:
|
def format(self, record) -> str:
|
||||||
color = COLORS.get(record.levelname, COLORS["reset"])
|
color = COLORS.get(record.levelname, COLORS["reset"])
|
||||||
|
|
||||||
levelname = record.levelname
|
levelname = record.levelname
|
||||||
|
|
||||||
record.levelname = f"{color}{levelname:<8}{COLORS['reset']}"
|
record.levelname = f"{color}{levelname:<8}{COLORS['reset']}"
|
||||||
|
|
||||||
formatted = super().format(record)
|
formatted = super().format(record)
|
||||||
|
|
||||||
record.levelname = levelname
|
record.levelname = levelname
|
||||||
|
|
||||||
return formatted
|
return formatted
|
||||||
|
|
@ -38,10 +42,15 @@ def get_logger(name: str | None = None) -> logging.Logger:
|
||||||
|
|
||||||
if not logger.hasHandlers():
|
if not logger.hasHandlers():
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
|
|
||||||
formatter = ColorFormatter(LOG_FMT, datefmt="%Y-%m-%d | %H:%M:%S")
|
formatter = ColorFormatter(LOG_FMT, datefmt="%Y-%m-%d | %H:%M:%S")
|
||||||
|
|
||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
|
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
logger.propagate = False
|
logger.propagate = False
|
||||||
|
|
||||||
return logger
|
return logger
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ from playwright.async_api import Browser, BrowserContext, Playwright, Request
|
||||||
|
|
||||||
from .logger import get_logger
|
from .logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -32,8 +34,6 @@ class Network:
|
||||||
http2=True,
|
http2=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._logger = get_logger("network")
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_proxy_url(
|
def build_proxy_url(
|
||||||
tag: str,
|
tag: str,
|
||||||
|
|
@ -58,16 +58,18 @@ class Network:
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> httpx.Response | None:
|
) -> httpx.Response | None:
|
||||||
|
|
||||||
log = log or self._logger
|
log = log or logger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = await self.client.get(url, **kwargs)
|
r = await self.client.get(url, **kwargs)
|
||||||
r.raise_for_status()
|
|
||||||
except Exception as e:
|
|
||||||
log.error(f'Failed to fetch "{url}": {e}\n{kwargs = }')
|
|
||||||
return ""
|
|
||||||
|
|
||||||
return r
|
r.raise_for_status()
|
||||||
|
|
||||||
|
return r
|
||||||
|
except (httpx.HTTPError, httpx.TimeoutException) as e:
|
||||||
|
log.error(f'Failed to fetch "{url}": {e}')
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
async def get_base(self, mirrors: list[str]) -> str | None:
|
async def get_base(self, mirrors: list[str]) -> str | None:
|
||||||
random.shuffle(mirrors)
|
random.shuffle(mirrors)
|
||||||
|
|
@ -89,7 +91,7 @@ class Network:
|
||||||
log: logging.Logger | None = None,
|
log: logging.Logger | None = None,
|
||||||
) -> T | None:
|
) -> T | None:
|
||||||
|
|
||||||
log = log or get_logger("network")
|
log = log or logger
|
||||||
|
|
||||||
task = asyncio.create_task(fn())
|
task = asyncio.create_task(fn())
|
||||||
|
|
||||||
|
|
@ -104,13 +106,15 @@ class Network:
|
||||||
await task
|
await task
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
|
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
|
||||||
|
|
||||||
return None
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"URL {url_num}) Unexpected error: {e}")
|
log.error(f"URL {url_num}) Unexpected error: {e}")
|
||||||
return None
|
|
||||||
|
return
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def capture_req(
|
def capture_req(
|
||||||
|
|
@ -141,7 +145,7 @@ class Network:
|
||||||
log: logging.Logger | None = None,
|
log: logging.Logger | None = None,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
|
|
||||||
log = log or self._logger
|
log = log or logger
|
||||||
|
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
|
||||||
|
|
@ -170,6 +174,7 @@ class Network:
|
||||||
await asyncio.wait_for(wait_task, timeout=timeout)
|
await asyncio.wait_for(wait_task, timeout=timeout)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
|
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -183,17 +188,21 @@ class Network:
|
||||||
|
|
||||||
if captured:
|
if captured:
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return captured[0]
|
return captured[0]
|
||||||
|
|
||||||
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
|
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
page.remove_listener("request", handler)
|
page.remove_listener("request", handler)
|
||||||
|
|
||||||
await page.close()
|
await page.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -205,7 +214,9 @@ class Network:
|
||||||
|
|
||||||
if browser == "brave":
|
if browser == "brave":
|
||||||
brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222")
|
brwsr = await playwright.chromium.connect_over_cdp("http://localhost:9222")
|
||||||
|
|
||||||
context = brwsr.contexts[0]
|
context = brwsr.contexts[0]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
brwsr = await playwright.firefox.launch(headless=True)
|
brwsr = await playwright.firefox.launch(headless=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -104,12 +104,14 @@ async def process_event(
|
||||||
text = await header.inner_text()
|
text = await header.inner_text()
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
log.warning(f"URL {url_num}) Can't find stream links header.")
|
log.warning(f"URL {url_num}) Can't find stream links header.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
match = re.search(r"\((\d+)\)", text)
|
match = re.search(r"\((\d+)\)", text)
|
||||||
|
|
||||||
if not match or int(match[1]) == 0:
|
if not match or int(match[1]) == 0:
|
||||||
log.warning(f"URL {url_num}) No available stream links.")
|
log.warning(f"URL {url_num}) No available stream links.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
first_available = await page.wait_for_selector(
|
first_available = await page.wait_for_selector(
|
||||||
|
|
@ -124,6 +126,7 @@ async def process_event(
|
||||||
await asyncio.wait_for(wait_task, timeout=6)
|
await asyncio.wait_for(wait_task, timeout=6)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
|
log.warning(f"URL {url_num}) Timed out waiting for M3U8.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -137,17 +140,21 @@ async def process_event(
|
||||||
|
|
||||||
if captured:
|
if captured:
|
||||||
log.info(f"URL {url_num}) Captured M3U8")
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
return captured[-1]
|
return captured[-1]
|
||||||
|
|
||||||
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
|
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
page.remove_listener("request", handler)
|
page.remove_listener("request", handler)
|
||||||
|
|
||||||
await page.close()
|
await page.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -213,8 +220,11 @@ async def get_events(
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
|
valid_urls = {k: v for k, v in cached_urls.items() if v["url"]}
|
||||||
|
|
||||||
valid_count = cached_count = len(valid_urls)
|
valid_count = cached_count = len(valid_urls)
|
||||||
|
|
||||||
urls.update(valid_urls)
|
urls.update(valid_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -225,7 +235,9 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if not (base_url and api_url):
|
if not (base_url and api_url):
|
||||||
log.warning("No working Watch Footy mirrors")
|
log.warning("No working Watch Footy mirrors")
|
||||||
|
|
||||||
CACHE_FILE.write(cached_urls)
|
CACHE_FILE.write(cached_urls)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
log.info(f'Scraping from "{base_url}"')
|
log.info(f'Scraping from "{base_url}"')
|
||||||
|
|
@ -287,6 +299,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := valid_count - cached_count:
|
if new_count := valid_count - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,9 @@ async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
|
||||||
|
|
||||||
async def scrape() -> None:
|
async def scrape() -> None:
|
||||||
cached_urls = CACHE_FILE.load()
|
cached_urls = CACHE_FILE.load()
|
||||||
|
|
||||||
cached_count = len(cached_urls)
|
cached_count = len(cached_urls)
|
||||||
|
|
||||||
urls.update(cached_urls)
|
urls.update(cached_urls)
|
||||||
|
|
||||||
log.info(f"Loaded {cached_count} event(s) from cache")
|
log.info(f"Loaded {cached_count} event(s) from cache")
|
||||||
|
|
@ -169,6 +171,7 @@ async def scrape() -> None:
|
||||||
|
|
||||||
if new_count := len(cached_urls) - cached_count:
|
if new_count := len(cached_urls) - cached_count:
|
||||||
log.info(f"Collected and cached {new_count} new event(s)")
|
log.info(f"Collected and cached {new_count} new event(s)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No new events found")
|
log.info("No new events found")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue