mirror of
https://github.com/doms9/iptv.git
synced 2026-01-21 03:59:03 +01:00
e
add semaphores to scrapers (maybe) fix hanging on watchfooty misc. edits
This commit is contained in:
parent
6e9729bf8c
commit
00000d920a
20 changed files with 103 additions and 73 deletions
|
|
@ -78,12 +78,12 @@ async def process_event(
|
|||
|
||||
pattern = re.compile(r"\((\d+)\)")
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
captured: list[str] = []
|
||||
|
||||
got_one = asyncio.Event()
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
handler = partial(
|
||||
network.capture_req,
|
||||
captured=captured,
|
||||
|
|
@ -102,10 +102,7 @@ async def process_event(
|
|||
await page.wait_for_timeout(2_000)
|
||||
|
||||
try:
|
||||
header = await page.wait_for_selector(
|
||||
"text=/Stream Links/i",
|
||||
timeout=5_000,
|
||||
)
|
||||
header = await page.wait_for_selector("text=/Stream Links/i", timeout=5_000)
|
||||
|
||||
text = await header.inner_text()
|
||||
except TimeoutError:
|
||||
|
|
@ -120,8 +117,7 @@ async def process_event(
|
|||
|
||||
try:
|
||||
first_available = await page.wait_for_selector(
|
||||
'a[href*="/stream/"]',
|
||||
timeout=3_000,
|
||||
'a[href*="/stream/"]', timeout=3_000
|
||||
)
|
||||
except TimeoutError:
|
||||
log.warning(f"URL {url_num}) No available stream links.")
|
||||
|
|
@ -133,22 +129,18 @@ async def process_event(
|
|||
|
||||
return None, None
|
||||
|
||||
embed = re.sub(
|
||||
pattern=r"^.*\/stream",
|
||||
repl="https://spiderembed.top/embed",
|
||||
string=href,
|
||||
)
|
||||
|
||||
await page.goto(
|
||||
href,
|
||||
embed,
|
||||
wait_until="domcontentloaded",
|
||||
timeout=5_000,
|
||||
)
|
||||
|
||||
if not (iframe := await page.query_selector("iframe")):
|
||||
log.warning(f"URL {url_num}) No iframe found.")
|
||||
|
||||
return None, None
|
||||
|
||||
if not (iframe_src := await iframe.get_attribute("src")):
|
||||
log.warning(f"URL {url_num}) No iframe source found.")
|
||||
|
||||
return None, None
|
||||
|
||||
wait_task = asyncio.create_task(got_one.wait())
|
||||
|
||||
try:
|
||||
|
|
@ -170,7 +162,7 @@ async def process_event(
|
|||
if captured:
|
||||
log.info(f"URL {url_num}) Captured M3U8")
|
||||
|
||||
return captured[-1], iframe_src
|
||||
return captured[0], embed
|
||||
|
||||
log.warning(f"URL {url_num}) No M3U8 captured after waiting.")
|
||||
|
||||
|
|
@ -282,6 +274,7 @@ async def scrape() -> None:
|
|||
url, iframe = await network.safe_process(
|
||||
handler,
|
||||
url_num=i,
|
||||
semaphore=network.PW_S,
|
||||
log=log,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue