- edit livetvsx.py scraping
This commit is contained in:
doms9 2026-03-02 20:02:26 -05:00
parent b896a22a29
commit 00000d9ab1
5 changed files with 55 additions and 26 deletions

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
import re
from functools import partial from functools import partial
import feedparser import feedparser
@ -24,9 +25,22 @@ VALID_SPORTS = [
"Basketball", "Basketball",
"Football", "Football",
"Ice Hockey", "Ice Hockey",
"Wrestling",
] ]
def fix_url(s: str) -> str | None:
pattern = re.compile(r"eventinfo\/(\d*)", re.I)
if not (match := pattern.search(s)):
return
elif not (event_id := match[1]).isalnum():
return
return f"https://cdn.livetv872.me/cache/links/en.{event_id}.html"
async def process_event( async def process_event(
url: str, url: str,
url_num: int, url_num: int,
@ -43,43 +57,40 @@ async def process_event(
got_one=got_one, got_one=got_one,
) )
event_id_pattern = re.compile(r"&c=(\d*)", re.I)
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( resp = await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=10_000, timeout=10_000,
) )
await page.wait_for_timeout(1_500) if resp.status != 200:
log.warning(f"URL {url_num}) status code: {resp.status}")
return
buttons = await page.query_selector_all(".lnktbj a[href*='webplayer']") try:
event_a = page.locator('a[title*="Aliez"]').first
labels = await page.eval_on_selector_all( href = await event_a.get_attribute("href", timeout=1_250)
".lnktyt span",
"elements => elements.map(el => el.textContent.trim().toLowerCase())",
)
for btn, label in zip(buttons, labels): except TimeoutError:
if label in ["web", "youtube"]:
continue
if not (href := await btn.get_attribute("href")):
continue
break
else:
log.warning(f"URL {url_num}) No valid sources found.") log.warning(f"URL {url_num}) No valid sources found.")
return return
href = href if href.startswith("http") else f"https:{href}" if match := event_id_pattern.search(href):
event_id = match[1]
href.replace("livetv.sx", "livetv873.me") event_url = f"https://emb.apl392.me/player/live.php?id={event_id}"
else:
event_url = href if href.startswith("http") else f"https:{href}"
await page.goto( await page.goto(
href, event_url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=5_000, timeout=5_000,
) )
@ -130,7 +141,7 @@ async def refresh_xml_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
if not (date := entry.get("published")): if not (date := entry.get("published")):
continue continue
if not (link := entry.get("link")): if (not (link := entry.get("link"))) or (not (fixed_link := fix_url(link))):
continue continue
if not (title := entry.get("title")): if not (title := entry.get("title")):
@ -151,7 +162,7 @@ async def refresh_xml_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
"sport": sport, "sport": sport,
"league": league, "league": league,
"event": title, "event": title,
"link": link.replace("livetv.sx", "livetv873.me"), "link": fixed_link,
"event_ts": event_dt.timestamp(), "event_ts": event_dt.timestamp(),
"timestamp": now_ts, "timestamp": now_ts,
} }

View file

@ -19,12 +19,17 @@ BASE_URL = "https://pixelsport.tv"
async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]: async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
try: try:
await page.goto( resp = await page.goto(
url := urljoin(BASE_URL, "backend/livetv/events"), url := urljoin(BASE_URL, "backend/livetv/events"),
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=6_000, timeout=6_000,
) )
if resp.status != 200:
log.warning(f"{url} status code: {resp.status}")
return {}
raw_json = await page.locator("pre").inner_text(timeout=5_000) raw_json = await page.locator("pre").inner_text(timeout=5_000)
except Exception as e: except Exception as e:
log.error(f'Failed to fetch "{url}": {e}') log.error(f'Failed to fetch "{url}": {e}')

View file

@ -93,12 +93,16 @@ async def process_event(
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( resp = await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=6_000, timeout=6_000,
) )
if resp.status != 200:
log.warning(f"URL {url_num}) status code: {resp.status}")
return
try: try:
if btn := await page.wait_for_selector( if btn := await page.wait_for_selector(
"button.streambutton:nth-of-type(1)", "button.streambutton:nth-of-type(1)",

View file

@ -250,12 +250,17 @@ class Network:
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( resp = await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=6_000, timeout=6_000,
) )
if resp.status != 200:
log.warning(f"URL {url_num}) status code: {resp.status}")
return
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
try: try:

View file

@ -89,12 +89,16 @@ async def process_event(
page.on("request", handler) page.on("request", handler)
try: try:
await page.goto( resp = await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=8_000, timeout=8_000,
) )
if resp.status != 200:
log.warning(f"URL {url_num}) status code: {resp.status}")
return
await page.wait_for_timeout(2_000) await page.wait_for_timeout(2_000)
try: try: