mirror of
https://github.com/doms9/iptv.git
synced 2026-03-07 11:18:25 +01:00
e
- add adblocking - edit roxie.py scraping method - edit tvapp.py scraping method - modify sports to scrape - misc edits
This commit is contained in:
parent
12d6d959f4
commit
00000d90e4
26 changed files with 59290 additions and 264 deletions
|
|
@ -71,7 +71,6 @@ async def main() -> None:
|
||||||
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
|
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
|
||||||
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
|
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
|
||||||
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
|
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
|
||||||
asyncio.create_task(tvapp.scrape(hdl_brwsr)),
|
|
||||||
asyncio.create_task(webcast.scrape(hdl_brwsr)),
|
asyncio.create_task(webcast.scrape(hdl_brwsr)),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -83,6 +82,7 @@ async def main() -> None:
|
||||||
asyncio.create_task(shark.scrape()),
|
asyncio.create_task(shark.scrape()),
|
||||||
asyncio.create_task(streambtw.scrape()),
|
asyncio.create_task(streambtw.scrape()),
|
||||||
asyncio.create_task(totalsportek.scrape()),
|
asyncio.create_task(totalsportek.scrape()),
|
||||||
|
asyncio.create_task(tvapp.scrape()),
|
||||||
asyncio.create_task(xstreameast.scrape()),
|
asyncio.create_task(xstreameast.scrape()),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -99,15 +99,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser) as context:
|
async with network.event_context(browser) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -121,11 +121,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -89,15 +89,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser) as context:
|
async with network.event_context(browser) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -111,10 +111,9 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, link, ts = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["link"],
|
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -101,15 +101,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_hrefs)
|
events = await get_events(cached_hrefs)
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -120,11 +120,7 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -114,15 +114,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -133,11 +133,7 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,14 @@ XML_CACHE = Cache(f"{TAG}-xml", exp=28_000)
|
||||||
|
|
||||||
BASE_URL = "https://cdn.livetv873.me/rss/upcoming_en.xml"
|
BASE_URL = "https://cdn.livetv873.me/rss/upcoming_en.xml"
|
||||||
|
|
||||||
VALID_SPORTS = {
|
VALID_SPORTS = [
|
||||||
"Football",
|
"MLB. Preseason",
|
||||||
|
"MLB",
|
||||||
"Basketball",
|
"Basketball",
|
||||||
|
"Football",
|
||||||
"Ice Hockey",
|
"Ice Hockey",
|
||||||
"Olympic Games",
|
"Olympic Games",
|
||||||
}
|
]
|
||||||
|
|
||||||
|
|
||||||
async def process_event(
|
async def process_event(
|
||||||
|
|
@ -48,7 +50,7 @@ async def process_event(
|
||||||
await page.goto(
|
await page.goto(
|
||||||
url,
|
url,
|
||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=15_000,
|
timeout=10_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
await page.wait_for_timeout(1_500)
|
await page.wait_for_timeout(1_500)
|
||||||
|
|
@ -108,7 +110,7 @@ async def process_event(
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -205,15 +207,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, ignore_https=True) as context:
|
async with network.event_context(browser, ignore_https=True) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
)
|
)
|
||||||
|
|
@ -226,12 +228,11 @@ async def scrape(browser: Browser) -> None:
|
||||||
timeout=20,
|
timeout=20,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, league, event, ts, link = (
|
sport, league, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["league"],
|
ev["league"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport} - {league}] {event} ({TAG})"
|
key = f"[{sport} - {league}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -135,13 +135,13 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -152,10 +152,9 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link, ts = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["link"],
|
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -101,15 +101,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -120,11 +120,7 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
|
||||||
await page.goto(
|
await page.goto(
|
||||||
url := urljoin(BASE_URL, "backend/livetv/events"),
|
url := urljoin(BASE_URL, "backend/livetv/events"),
|
||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=10_000,
|
timeout=6_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
raw_json = await page.locator("pre").inner_text(timeout=5_000)
|
raw_json = await page.locator("pre").inner_text(timeout=5_000)
|
||||||
|
|
|
||||||
|
|
@ -100,15 +100,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(base_url, cached_urls.keys())
|
events = await get_events(base_url, cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
timeout=6,
|
timeout=6,
|
||||||
|
|
@ -122,12 +122,11 @@ async def scrape(browser: Browser) -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, logo, ts, link = (
|
sport, event, logo, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["logo"],
|
ev["logo"],
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ BASE_URL = "https://roxiestreams.info"
|
||||||
|
|
||||||
SPORT_ENDPOINTS = {
|
SPORT_ENDPOINTS = {
|
||||||
"fighting": "Fighting",
|
"fighting": "Fighting",
|
||||||
# "mlb": "MLB",
|
"mlb": "MLB",
|
||||||
"motorsports": "Racing",
|
"motorsports": "Racing",
|
||||||
"nba": "NBA",
|
"nba": "NBA",
|
||||||
# "nfl": "American Football",
|
# "nfl": "American Football",
|
||||||
|
|
@ -97,12 +97,12 @@ async def process_event(
|
||||||
await page.goto(
|
await page.goto(
|
||||||
url,
|
url,
|
||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=15_000,
|
timeout=6_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if btn := await page.wait_for_selector(
|
if btn := await page.wait_for_selector(
|
||||||
"button:has-text('Stream 1')",
|
"button.streambutton:nth-of-type(1)",
|
||||||
timeout=5_000,
|
timeout=5_000,
|
||||||
):
|
):
|
||||||
await btn.click(force=True, click_count=2)
|
await btn.click(force=True, click_count=2)
|
||||||
|
|
@ -140,7 +140,7 @@ async def process_event(
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -202,15 +202,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser) as context:
|
async with network.event_context(browser) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
)
|
)
|
||||||
|
|
@ -222,11 +222,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
||||||
|
|
|
||||||
|
|
@ -124,13 +124,13 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -142,11 +142,10 @@ async def scrape() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
||||||
|
|
|
||||||
|
|
@ -101,9 +101,9 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
|
|
@ -111,7 +111,7 @@ async def scrape(browser: Browser) -> None:
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -125,11 +125,7 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,15 +105,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events()
|
events = await get_events()
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -125,11 +125,7 @@ async def scrape() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,13 +19,12 @@ BASE_URL = "https://backend.streamcenter.live/api/Parties"
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
4: "Basketball",
|
4: "Basketball",
|
||||||
9: "Football",
|
9: "Football",
|
||||||
# 13: "Baseball",
|
13: "Baseball",
|
||||||
# 14: "American Football",
|
# 14: "American Football",
|
||||||
15: "Motor Sport",
|
15: "Motor Sport",
|
||||||
16: "Hockey",
|
16: "Hockey",
|
||||||
17: "Fight MMA",
|
17: "Fight MMA",
|
||||||
18: "Boxing",
|
18: "Boxing",
|
||||||
19: "NCAA Sports",
|
|
||||||
20: "WWE",
|
20: "WWE",
|
||||||
21: "Tennis",
|
21: "Tennis",
|
||||||
}
|
}
|
||||||
|
|
@ -103,15 +102,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -125,11 +124,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -20,17 +20,14 @@ HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
|
||||||
BASE_URL = "https://livesports4u.net"
|
BASE_URL = "https://livesports4u.net"
|
||||||
|
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
"Soccer": "sport_68c02a4464a38",
|
|
||||||
# "American Football": "sport_68c02a4465113",
|
# "American Football": "sport_68c02a4465113",
|
||||||
# "Baseball": "sport_68c02a446582f",
|
"Baseball": "sport_68c02a446582f",
|
||||||
"Basketball": "sport_68c02a4466011",
|
"Basketball": "sport_68c02a4466011",
|
||||||
"Cricket": "sport_68c02a44669f3",
|
|
||||||
"Hockey": "sport_68c02a4466f56",
|
"Hockey": "sport_68c02a4466f56",
|
||||||
"MMA": "sport_68c02a44674e9",
|
"MMA": "sport_68c02a44674e9",
|
||||||
"Racing": "sport_68c02a4467a48",
|
"Racing": "sport_68c02a4467a48",
|
||||||
# "Rugby": "sport_68c02a4467fc1",
|
"Soccer": "sport_68c02a4464a38",
|
||||||
"Tennis": "sport_68c02a4468cf7",
|
"Tennis": "sport_68c02a4468cf7",
|
||||||
# "Volleyball": "sport_68c02a4469422",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -148,15 +145,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
timeout=5,
|
timeout=5,
|
||||||
|
|
@ -170,11 +167,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, logo, link, ts = (
|
sport, event, logo, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["logo"],
|
ev["logo"],
|
||||||
ev["link"],
|
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,15 +21,15 @@ API_FILE = Cache(f"{TAG}-api", exp=19_800)
|
||||||
BASE_URL = "https://streamingon.org"
|
BASE_URL = "https://streamingon.org"
|
||||||
|
|
||||||
SPORT_ENDPOINTS = [
|
SPORT_ENDPOINTS = [
|
||||||
"soccer",
|
|
||||||
# "nfl",
|
|
||||||
"nba",
|
|
||||||
"cfb",
|
|
||||||
# "mlb",
|
|
||||||
"nhl",
|
|
||||||
"ufc",
|
|
||||||
"boxing",
|
"boxing",
|
||||||
|
# "cfb",
|
||||||
"f1",
|
"f1",
|
||||||
|
"mlb",
|
||||||
|
"nba",
|
||||||
|
# "nfl",
|
||||||
|
"nhl",
|
||||||
|
"soccer",
|
||||||
|
"ufc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -133,15 +133,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -155,11 +155,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -147,15 +147,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -166,11 +166,7 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from playwright.async_api import Browser
|
|
||||||
from selectolax.parser import HTMLParser
|
from selectolax.parser import HTMLParser
|
||||||
|
|
||||||
from .utils import Cache, Time, get_logger, leagues, network
|
from .utils import Cache, Time, get_logger, leagues, network
|
||||||
|
|
@ -17,12 +16,27 @@ CACHE_FILE = Cache(TAG, exp=86_400)
|
||||||
BASE_URL = "https://thetvapp.to"
|
BASE_URL = "https://thetvapp.to"
|
||||||
|
|
||||||
|
|
||||||
def fix_url(s: str) -> str:
|
async def process_event(url: str, url_num: int) -> str | None:
|
||||||
parsed = urlparse(s)
|
if not (html_data := await network.request(url, log=log)):
|
||||||
|
log.info(f"URL {url_num}) Failed to load url.")
|
||||||
|
|
||||||
base = f"origin.{parsed.netloc.split('.', 1)[-1]}"
|
return
|
||||||
|
|
||||||
return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
|
soup = HTMLParser(html_data.content)
|
||||||
|
|
||||||
|
if not (channel_name_elem := soup.css_first("#stream_name")):
|
||||||
|
log.warning(f"URL {url_num}) No channel found.")
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
if not (channel_name := channel_name_elem.attributes.get("name")):
|
||||||
|
log.warning(f"URL {url_num}) No channel found.")
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
log.info(f"URL {url_num}) Captured M3U8")
|
||||||
|
|
||||||
|
return f"http://origin.thetvapp.to/hls/{channel_name.strip().upper()}/mono.m3u8"
|
||||||
|
|
||||||
|
|
||||||
async def get_events() -> list[dict[str, str]]:
|
async def get_events() -> list[dict[str, str]]:
|
||||||
|
|
@ -59,7 +73,7 @@ async def get_events() -> list[dict[str, str]]:
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|
||||||
async def scrape(browser: Browser) -> None:
|
async def scrape() -> None:
|
||||||
if cached := CACHE_FILE.load():
|
if cached := CACHE_FILE.load():
|
||||||
urls.update(cached)
|
urls.update(cached)
|
||||||
|
|
||||||
|
|
@ -71,50 +85,42 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events()
|
events = await get_events()
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
async with network.event_context(browser) as context:
|
for i, ev in enumerate(events, start=1):
|
||||||
for i, ev in enumerate(events, start=1):
|
handler = partial(
|
||||||
async with network.event_page(context) as page:
|
process_event,
|
||||||
handler = partial(
|
url=(link := ev["link"]),
|
||||||
network.process_event,
|
url_num=i,
|
||||||
url=ev["link"],
|
)
|
||||||
url_num=i,
|
|
||||||
page=page,
|
|
||||||
log=log,
|
|
||||||
)
|
|
||||||
|
|
||||||
url = await network.safe_process(
|
url = await network.safe_process(
|
||||||
handler,
|
handler,
|
||||||
url_num=i,
|
url_num=i,
|
||||||
semaphore=network.PW_S,
|
semaphore=network.HTTP_S,
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
tvg_id, logo = leagues.get_tvg_info(sport, event)
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
"url": fix_url(url),
|
"url": url,
|
||||||
"logo": logo,
|
"logo": logo,
|
||||||
"base": BASE_URL,
|
"base": BASE_URL,
|
||||||
"timestamp": now.timestamp(),
|
"timestamp": now.timestamp(),
|
||||||
"id": tvg_id or "Live.Event.us",
|
"id": tvg_id or "Live.Event.us",
|
||||||
"link": link,
|
"link": link,
|
||||||
}
|
}
|
||||||
|
|
||||||
urls[key] = entry
|
urls[key] = entry
|
||||||
|
|
||||||
log.info(f"Collected and cached {len(urls)} new event(s)")
|
log.info(f"Collected and cached {len(urls)} new event(s)")
|
||||||
|
|
||||||
|
|
|
||||||
59051
M3U8/scrapers/utils/easylist.txt
Normal file
59051
M3U8/scrapers/utils/easylist.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -15,7 +15,6 @@ LOG_FMT = (
|
||||||
)
|
)
|
||||||
|
|
||||||
COLORS = {
|
COLORS = {
|
||||||
"DEBUG": "\033[36m",
|
|
||||||
"INFO": "\033[32m",
|
"INFO": "\033[32m",
|
||||||
"WARNING": "\033[33m",
|
"WARNING": "\033[33m",
|
||||||
"ERROR": "\033[31m",
|
"ERROR": "\033[31m",
|
||||||
|
|
|
||||||
27
M3U8/scrapers/utils/stealth.js
Normal file
27
M3U8/scrapers/utils/stealth.js
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
Object.defineProperty(navigator, "webdriver", {
|
||||||
|
get: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, "languages", {
|
||||||
|
get: () => ["en-US", "en"],
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, "hardwareConcurrency", {
|
||||||
|
get: () => 8,
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, "deviceMemory", {
|
||||||
|
get: () => 8,
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, "plugins", {
|
||||||
|
get: () => [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const getParameter = WebGLRenderingContext.prototype.getParameter;
|
||||||
|
WebGLRenderingContext.prototype.getParameter = function (param) {
|
||||||
|
if (param === 37445) return "Google Inc.";
|
||||||
|
if (param === 37446)
|
||||||
|
return "ANGLE (Intel(R) UHD Graphics Direct3D11 vs_5_0 ps_5_0)";
|
||||||
|
return getParameter.apply(this, [param]);
|
||||||
|
};
|
||||||
|
|
@ -4,11 +4,20 @@ import random
|
||||||
import re
|
import re
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from functools import partial
|
from functools import cache, partial
|
||||||
|
from pathlib import Path
|
||||||
from typing import AsyncGenerator, TypeVar
|
from typing import AsyncGenerator, TypeVar
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
|
from playwright.async_api import (
|
||||||
|
Browser,
|
||||||
|
BrowserContext,
|
||||||
|
Page,
|
||||||
|
Playwright,
|
||||||
|
Request,
|
||||||
|
Route,
|
||||||
|
)
|
||||||
|
|
||||||
from .logger import get_logger
|
from .logger import get_logger
|
||||||
|
|
||||||
|
|
@ -73,7 +82,7 @@ class Network:
|
||||||
fn: Callable[[], Awaitable[T]],
|
fn: Callable[[], Awaitable[T]],
|
||||||
url_num: int,
|
url_num: int,
|
||||||
semaphore: asyncio.Semaphore,
|
semaphore: asyncio.Semaphore,
|
||||||
timeout: int | float = 10,
|
timeout: int | float = 30,
|
||||||
log: logging.Logger | None = None,
|
log: logging.Logger | None = None,
|
||||||
) -> T | None:
|
) -> T | None:
|
||||||
|
|
||||||
|
|
@ -98,7 +107,7 @@ class Network:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
|
log.warning(f"URL {url_num}) Ignore exception after timeout: {e}")
|
||||||
|
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -106,6 +115,35 @@ class Network:
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
@cache
|
||||||
|
def blocked_domains() -> list[str]:
|
||||||
|
return (
|
||||||
|
(Path(__file__).parent / "easylist.txt")
|
||||||
|
.read_text(encoding="utf-8")
|
||||||
|
.splitlines()
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_block(request: Request) -> bool:
|
||||||
|
hostname = (urlparse(request.url).hostname or "").lower()
|
||||||
|
|
||||||
|
return any(
|
||||||
|
hostname == domain or hostname.endswith(f".{domain}")
|
||||||
|
for domain in Network.blocked_domains()
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _adblock(route: Route) -> None:
|
||||||
|
request = route.request
|
||||||
|
|
||||||
|
if request.resource_type not in ["script", "image", "media", "xhr"]:
|
||||||
|
await route.continue_()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
await route.abort() if Network.to_block(request) else await route.continue_()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def event_context(
|
async def event_context(
|
||||||
|
|
@ -113,78 +151,30 @@ class Network:
|
||||||
stealth: bool = True,
|
stealth: bool = True,
|
||||||
ignore_https: bool = False,
|
ignore_https: bool = False,
|
||||||
) -> AsyncGenerator[BrowserContext, None]:
|
) -> AsyncGenerator[BrowserContext, None]:
|
||||||
|
|
||||||
context: BrowserContext | None = None
|
context: BrowserContext | None = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
context = await browser.new_context(
|
|
||||||
user_agent=Network.UA if stealth else None,
|
|
||||||
ignore_https_errors=ignore_https,
|
|
||||||
viewport={"width": 1366, "height": 768},
|
|
||||||
device_scale_factor=1,
|
|
||||||
locale="en-US",
|
|
||||||
timezone_id="America/New_York",
|
|
||||||
color_scheme="dark",
|
|
||||||
permissions=["geolocation"],
|
|
||||||
extra_http_headers=(
|
|
||||||
{
|
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
}
|
|
||||||
if stealth
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
if stealth:
|
if stealth:
|
||||||
await context.add_init_script("""
|
context = await browser.new_context(
|
||||||
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
|
user_agent=Network.UA,
|
||||||
|
ignore_https_errors=ignore_https,
|
||||||
Object.defineProperty(navigator, "languages", {
|
viewport={"width": 1366, "height": 768},
|
||||||
get: () => ["en-US", "en"],
|
device_scale_factor=1,
|
||||||
});
|
locale="en-US",
|
||||||
|
timezone_id="America/New_York",
|
||||||
Object.defineProperty(navigator, "plugins", {
|
color_scheme="dark",
|
||||||
get: () => [1, 2, 3, 4],
|
extra_http_headers=(
|
||||||
});
|
{
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
const elementDescriptor = Object.getOwnPropertyDescriptor(
|
"Upgrade-Insecure-Requests": "1",
|
||||||
HTMLElement.prototype,
|
|
||||||
"offsetHeight"
|
|
||||||
);
|
|
||||||
|
|
||||||
Object.defineProperty(HTMLDivElement.prototype, "offsetHeight", {
|
|
||||||
...elementDescriptor,
|
|
||||||
get: function () {
|
|
||||||
if (this.id === "modernizr") {
|
|
||||||
return 24;
|
|
||||||
}
|
}
|
||||||
return elementDescriptor.get.apply(this);
|
),
|
||||||
},
|
)
|
||||||
});
|
|
||||||
|
|
||||||
Object.defineProperty(window.screen, "width", { get: () => 1366 });
|
await context.add_init_script(path=Path(__file__).parent / "stealth.js")
|
||||||
Object.defineProperty(window.screen, "height", { get: () => 768 });
|
|
||||||
|
|
||||||
const getParameter = WebGLRenderingContext.prototype.getParameter;
|
await context.route("**/*", Network._adblock)
|
||||||
|
|
||||||
WebGLRenderingContext.prototype.getParameter = function (param) {
|
|
||||||
if (param === 37445) return "Intel Inc."; // UNMASKED_VENDOR_WEBGL
|
|
||||||
if (param === 37446) return "Intel Iris OpenGL Engine"; // UNMASKED_RENDERER_WEBGL
|
|
||||||
return getParameter.apply(this, [param]);
|
|
||||||
};
|
|
||||||
|
|
||||||
const observer = new MutationObserver((mutations) => {
|
|
||||||
mutations.forEach((mutation) => {
|
|
||||||
mutation.addedNodes.forEach((node) => {
|
|
||||||
if (node.tagName === "IFRAME" && node.hasAttribute("sandbox")) {
|
|
||||||
node.removeAttribute("sandbox");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
observer.observe(document.documentElement, { childList: true, subtree: true });
|
|
||||||
""")
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
context = await browser.new_context()
|
context = await browser.new_context()
|
||||||
|
|
@ -261,7 +251,7 @@ class Network:
|
||||||
await page.goto(
|
await page.goto(
|
||||||
url,
|
url,
|
||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=15_000,
|
timeout=6_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
wait_task = asyncio.create_task(got_one.wait())
|
wait_task = asyncio.create_task(got_one.wait())
|
||||||
|
|
@ -292,7 +282,7 @@ class Network:
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) {e}")
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,17 +29,13 @@ BASE_MIRRORS = [
|
||||||
|
|
||||||
VALID_SPORTS = [
|
VALID_SPORTS = [
|
||||||
# "american-football",
|
# "american-football",
|
||||||
# "australian-football",
|
"baseball",
|
||||||
# "baseball",
|
|
||||||
"basketball",
|
"basketball",
|
||||||
"cricket",
|
|
||||||
"darts",
|
|
||||||
"fighting",
|
"fighting",
|
||||||
"football",
|
"football",
|
||||||
"golf",
|
"golf",
|
||||||
"hockey",
|
"hockey",
|
||||||
"racing",
|
"racing",
|
||||||
# "rugby",
|
|
||||||
"tennis",
|
"tennis",
|
||||||
"volleyball",
|
"volleyball",
|
||||||
]
|
]
|
||||||
|
|
@ -96,7 +92,7 @@ async def process_event(
|
||||||
await page.goto(
|
await page.goto(
|
||||||
url,
|
url,
|
||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=10_000,
|
timeout=8_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
await page.wait_for_timeout(2_000)
|
await page.wait_for_timeout(2_000)
|
||||||
|
|
@ -170,7 +166,7 @@ async def process_event(
|
||||||
return nones
|
return nones
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"URL {url_num}) Exception while processing: {e}")
|
log.warning(f"URL {url_num}) {e}")
|
||||||
|
|
||||||
return nones
|
return nones
|
||||||
|
|
||||||
|
|
@ -256,15 +252,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(base_url, cached_urls.keys())
|
events = await get_events(base_url, cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser, stealth=False) as context:
|
async with network.event_context(browser, stealth=False) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
)
|
)
|
||||||
|
|
@ -277,12 +273,11 @@ async def scrape(browser: Browser) -> None:
|
||||||
timeout=20,
|
timeout=20,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, logo, ts, link = (
|
sport, event, logo, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["logo"],
|
ev["logo"],
|
||||||
ev["timestamp"],
|
ev["timestamp"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -126,15 +126,15 @@ async def scrape(browser: Browser) -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
async with network.event_context(browser) as context:
|
async with network.event_context(browser) as context:
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
async with network.event_page(context) as page:
|
async with network.event_page(context) as page:
|
||||||
handler = partial(
|
handler = partial(
|
||||||
network.process_event,
|
network.process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
page=page,
|
page=page,
|
||||||
log=log,
|
log=log,
|
||||||
|
|
@ -148,11 +148,10 @@ async def scrape(browser: Browser) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
sport, event, ts, link = (
|
sport, event, ts = (
|
||||||
ev["sport"],
|
ev["sport"],
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["event_ts"],
|
ev["event_ts"],
|
||||||
ev["link"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
|
||||||
BASE_URL = "https://xstreameast.com"
|
BASE_URL = "https://xstreameast.com"
|
||||||
|
|
||||||
SPORT_ENDPOINTS = [
|
SPORT_ENDPOINTS = [
|
||||||
# "f1",
|
"mlb",
|
||||||
# "mlb",
|
|
||||||
"mma",
|
"mma",
|
||||||
"nba",
|
"nba",
|
||||||
# "nfl",
|
# "nfl",
|
||||||
|
|
@ -137,15 +136,15 @@ async def scrape() -> None:
|
||||||
|
|
||||||
events = await get_events(cached_urls.keys())
|
events = await get_events(cached_urls.keys())
|
||||||
|
|
||||||
log.info(f"Processing {len(events)} new URL(s)")
|
|
||||||
|
|
||||||
if events:
|
if events:
|
||||||
|
log.info(f"Processing {len(events)} new URL(s)")
|
||||||
|
|
||||||
now = Time.clean(Time.now())
|
now = Time.clean(Time.now())
|
||||||
|
|
||||||
for i, ev in enumerate(events, start=1):
|
for i, ev in enumerate(events, start=1):
|
||||||
handler = partial(
|
handler = partial(
|
||||||
process_event,
|
process_event,
|
||||||
url=ev["link"],
|
url=(link := ev["link"]),
|
||||||
url_num=i,
|
url_num=i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -156,11 +155,7 @@ async def scrape() -> None:
|
||||||
log=log,
|
log=log,
|
||||||
)
|
)
|
||||||
|
|
||||||
sport, event, link = (
|
sport, event = ev["sport"], ev["event"]
|
||||||
ev["sport"],
|
|
||||||
ev["event"],
|
|
||||||
ev["link"],
|
|
||||||
)
|
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue