mirror of
https://github.com/doms9/iptv.git
synced 2026-03-07 11:18:25 +01:00
e
- edit watchfooty.py scraping
This commit is contained in:
parent
75f1d95b12
commit
00000d9c59
5 changed files with 51 additions and 47 deletions
|
|
@ -68,8 +68,10 @@ async def process_event(
|
|||
timeout=10_000,
|
||||
)
|
||||
|
||||
if resp.status != 200:
|
||||
log.warning(f"URL {url_num}) Status Code: {resp.status}")
|
||||
if not resp or resp.status != 200:
|
||||
log.warning(
|
||||
f"URL {url_num}) Status Code: {resp.status if resp else 'None'}"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
|
|
@ -83,7 +85,9 @@ async def process_event(
|
|||
if (match := event_id_pattern.search(href)) and (
|
||||
event_id := match[1]
|
||||
).isalnum():
|
||||
|
||||
event_url = f"https://aliez.tv/player/live.php?id={event_id}"
|
||||
|
||||
else:
|
||||
event_url = href if href.startswith("http") else f"https:{href}"
|
||||
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
|
|||
timeout=6_000,
|
||||
)
|
||||
|
||||
if resp.status != 200:
|
||||
log.warning(f"{url} Status Code: {resp.status}")
|
||||
if not resp or resp.status != 200:
|
||||
log.warning(f"{url} Status Code: {resp.status if resp else 'None'}")
|
||||
|
||||
return {}
|
||||
|
||||
|
|
|
|||
|
|
@ -99,8 +99,11 @@ async def process_event(
|
|||
timeout=6_000,
|
||||
)
|
||||
|
||||
if resp.status != 200:
|
||||
log.warning(f"URL {url_num}) Status Code: {resp.status}")
|
||||
if not resp or resp.status != 200:
|
||||
log.warning(
|
||||
f"URL {url_num}) Status Code: {resp.status if resp else 'None'}"
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -256,8 +256,10 @@ class Network:
|
|||
timeout=6_000,
|
||||
)
|
||||
|
||||
if resp.status != 200:
|
||||
log.warning(f"URL {url_num}) Status Code: {resp.status}")
|
||||
if not resp or resp.status != 200:
|
||||
log.warning(
|
||||
f"URL {url_num}) Status Code: {resp.status if resp else 'None'}"
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from itertools import chain
|
|||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from playwright.async_api import Browser, Page, TimeoutError
|
||||
from playwright.async_api import Browser, Page, Response, TimeoutError
|
||||
|
||||
from .utils import Cache, Time, get_logger, leagues, network
|
||||
|
||||
|
|
@ -66,16 +66,25 @@ async def refresh_api_cache(now: Time) -> list[dict[str, Any]]:
|
|||
return data
|
||||
|
||||
|
||||
def sift_xhr(resp: Response, match_id: int) -> bool:
|
||||
resp_url = resp.url
|
||||
|
||||
return (
|
||||
f"/en/stream/{match_id}/" in resp_url
|
||||
and "_rsc=" not in resp_url
|
||||
and resp.status == 200
|
||||
)
|
||||
|
||||
|
||||
async def process_event(
|
||||
url: str,
|
||||
match_id: int,
|
||||
url_num: int,
|
||||
page: Page,
|
||||
) -> tuple[str | None, str | None]:
|
||||
|
||||
nones = None, None
|
||||
|
||||
pattern = re.compile(r"\((\d+)\)")
|
||||
|
||||
captured: list[str] = []
|
||||
|
||||
got_one = asyncio.Event()
|
||||
|
|
@ -86,46 +95,30 @@ async def process_event(
|
|||
got_one=got_one,
|
||||
)
|
||||
|
||||
strm_handler = partial(sift_xhr, match_id=match_id)
|
||||
|
||||
page.on("request", handler)
|
||||
|
||||
try:
|
||||
resp = await page.goto(
|
||||
url,
|
||||
wait_until="domcontentloaded",
|
||||
timeout=8_000,
|
||||
)
|
||||
|
||||
if resp.status != 200:
|
||||
log.warning(f"URL {url_num}) Status Code: {resp.status}")
|
||||
return
|
||||
|
||||
await page.wait_for_timeout(2_000)
|
||||
|
||||
try:
|
||||
header = await page.wait_for_selector("text=/Stream Links/i", timeout=4_000)
|
||||
async with page.expect_response(strm_handler, timeout=2_500) as strm_resp:
|
||||
resp = await page.goto(
|
||||
url,
|
||||
wait_until="domcontentloaded",
|
||||
timeout=6_000,
|
||||
)
|
||||
|
||||
text = await header.inner_text()
|
||||
if not resp or resp.status != 200:
|
||||
log.warning(
|
||||
f"URL {url_num}) Status Code: {resp.status if resp else 'None'}"
|
||||
)
|
||||
|
||||
return nones
|
||||
|
||||
response = await strm_resp.value
|
||||
|
||||
stream_url = response.url
|
||||
except TimeoutError:
|
||||
log.warning(f"URL {url_num}) Can't find stream links header.")
|
||||
|
||||
return nones
|
||||
|
||||
if not (match := pattern.search(text)) or int(match[1]) == 0:
|
||||
log.warning(f"URL {url_num}) No available stream links.")
|
||||
|
||||
return nones
|
||||
|
||||
try:
|
||||
first_available = await page.wait_for_selector(
|
||||
'a[href*="/stream/"]',
|
||||
timeout=3_000,
|
||||
)
|
||||
except TimeoutError:
|
||||
log.warning(f"URL {url_num}) No available stream links.")
|
||||
|
||||
return nones
|
||||
|
||||
if not (href := await first_available.get_attribute("href")):
|
||||
log.warning(f"URL {url_num}) No available stream links.")
|
||||
|
||||
return nones
|
||||
|
|
@ -133,7 +126,7 @@ async def process_event(
|
|||
embed = re.sub(
|
||||
pattern=r"^.*\/stream",
|
||||
repl="https://spiderembed.top/embed",
|
||||
string=href,
|
||||
string=stream_url,
|
||||
)
|
||||
|
||||
await page.goto(
|
||||
|
|
@ -190,7 +183,7 @@ async def get_events(base_url: str, cached_keys: list[str]) -> list[dict[str, st
|
|||
|
||||
pattern = re.compile(r"\-+|\(")
|
||||
|
||||
start_dt = now.delta(minutes=-30)
|
||||
start_dt = now.delta(hours=-1)
|
||||
end_dt = now.delta(minutes=5)
|
||||
|
||||
for event in api_data:
|
||||
|
|
@ -226,6 +219,7 @@ async def get_events(base_url: str, cached_keys: list[str]) -> list[dict[str, st
|
|||
"sport": sport,
|
||||
"event": name,
|
||||
"link": urljoin(base_url, f"stream/{match_id}"),
|
||||
"match-id": match_id,
|
||||
"logo": logo,
|
||||
"timestamp": event_dt.timestamp(),
|
||||
}
|
||||
|
|
@ -263,6 +257,7 @@ async def scrape(browser: Browser) -> None:
|
|||
handler = partial(
|
||||
process_event,
|
||||
url=(link := ev["link"]),
|
||||
match_id=ev["match-id"],
|
||||
url_num=i,
|
||||
page=page,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue