- add adblocking
- edit roxie.py scraping method
- edit tvapp.py scraping method
- modify sports to scrape
- misc edits
This commit is contained in:
doms9 2026-02-19 18:16:27 -05:00
parent 12d6d959f4
commit 00000d90e4
26 changed files with 59290 additions and 264 deletions

View file

@ -71,7 +71,6 @@ async def main() -> None:
asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)), asyncio.create_task(streamcenter.scrape(xtrnl_brwsr)),
asyncio.create_task(streamhub.scrape(xtrnl_brwsr)), asyncio.create_task(streamhub.scrape(xtrnl_brwsr)),
asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)), asyncio.create_task(streamsgate.scrape(xtrnl_brwsr)),
asyncio.create_task(tvapp.scrape(hdl_brwsr)),
asyncio.create_task(webcast.scrape(hdl_brwsr)), asyncio.create_task(webcast.scrape(hdl_brwsr)),
] ]
@ -83,6 +82,7 @@ async def main() -> None:
asyncio.create_task(shark.scrape()), asyncio.create_task(shark.scrape()),
asyncio.create_task(streambtw.scrape()), asyncio.create_task(streambtw.scrape()),
asyncio.create_task(totalsportek.scrape()), asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvapp.scrape()),
asyncio.create_task(xstreameast.scrape()), asyncio.create_task(xstreameast.scrape()),
] ]

View file

@ -99,15 +99,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context: async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -121,11 +121,10 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["timestamp"], ev["timestamp"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -89,15 +89,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context: async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -111,10 +111,9 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, link, ts = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["link"],
ev["timestamp"], ev["timestamp"],
) )

View file

@ -101,15 +101,15 @@ async def scrape() -> None:
events = await get_events(cached_hrefs) events = await get_events(cached_hrefs)
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -120,11 +120,7 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -114,15 +114,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -133,11 +133,7 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -18,12 +18,14 @@ XML_CACHE = Cache(f"{TAG}-xml", exp=28_000)
BASE_URL = "https://cdn.livetv873.me/rss/upcoming_en.xml" BASE_URL = "https://cdn.livetv873.me/rss/upcoming_en.xml"
VALID_SPORTS = { VALID_SPORTS = [
"Football", "MLB. Preseason",
"MLB",
"Basketball", "Basketball",
"Football",
"Ice Hockey", "Ice Hockey",
"Olympic Games", "Olympic Games",
} ]
async def process_event( async def process_event(
@ -48,7 +50,7 @@ async def process_event(
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=15_000, timeout=10_000,
) )
await page.wait_for_timeout(1_500) await page.wait_for_timeout(1_500)
@ -108,7 +110,7 @@ async def process_event(
return return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) {e}")
return return
finally: finally:
@ -205,15 +207,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, ignore_https=True) as context: async with network.event_context(browser, ignore_https=True) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
) )
@ -226,12 +228,11 @@ async def scrape(browser: Browser) -> None:
timeout=20, timeout=20,
) )
sport, league, event, ts, link = ( sport, league, event, ts = (
ev["sport"], ev["sport"],
ev["league"], ev["league"],
ev["event"], ev["event"],
ev["event_ts"], ev["event_ts"],
ev["link"],
) )
key = f"[{sport} - {league}] {event} ({TAG})" key = f"[{sport} - {league}] {event} ({TAG})"

View file

@ -135,13 +135,13 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -152,10 +152,9 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link, ts = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["link"],
ev["event_ts"], ev["event_ts"],
) )

View file

@ -101,15 +101,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -120,11 +120,7 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -22,7 +22,7 @@ async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
await page.goto( await page.goto(
url := urljoin(BASE_URL, "backend/livetv/events"), url := urljoin(BASE_URL, "backend/livetv/events"),
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=10_000, timeout=6_000,
) )
raw_json = await page.locator("pre").inner_text(timeout=5_000) raw_json = await page.locator("pre").inner_text(timeout=5_000)

View file

@ -100,15 +100,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(base_url, cached_urls.keys()) events = await get_events(base_url, cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
timeout=6, timeout=6,
@ -122,12 +122,11 @@ async def scrape(browser: Browser) -> None:
log=log, log=log,
) )
sport, event, logo, ts, link = ( sport, event, logo, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["logo"], ev["logo"],
ev["timestamp"], ev["timestamp"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -21,7 +21,7 @@ BASE_URL = "https://roxiestreams.info"
SPORT_ENDPOINTS = { SPORT_ENDPOINTS = {
"fighting": "Fighting", "fighting": "Fighting",
# "mlb": "MLB", "mlb": "MLB",
"motorsports": "Racing", "motorsports": "Racing",
"nba": "NBA", "nba": "NBA",
# "nfl": "American Football", # "nfl": "American Football",
@ -97,12 +97,12 @@ async def process_event(
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=15_000, timeout=6_000,
) )
try: try:
if btn := await page.wait_for_selector( if btn := await page.wait_for_selector(
"button:has-text('Stream 1')", "button.streambutton:nth-of-type(1)",
timeout=5_000, timeout=5_000,
): ):
await btn.click(force=True, click_count=2) await btn.click(force=True, click_count=2)
@ -140,7 +140,7 @@ async def process_event(
return return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) {e}")
return return
finally: finally:
@ -202,15 +202,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context: async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
) )
@ -222,11 +222,10 @@ async def scrape(browser: Browser) -> None:
log=log, log=log,
) )
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["event_ts"], ev["event_ts"],
ev["link"],
) )
tvg_id, logo = leagues.get_tvg_info(sport, event) tvg_id, logo = leagues.get_tvg_info(sport, event)

View file

@ -124,13 +124,13 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -142,11 +142,10 @@ async def scrape() -> None:
) )
if url: if url:
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["event_ts"], ev["event_ts"],
ev["link"],
) )
tvg_id, logo = leagues.get_tvg_info(sport, event) tvg_id, logo = leagues.get_tvg_info(sport, event)

View file

@ -101,9 +101,9 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
@ -111,7 +111,7 @@ async def scrape(browser: Browser) -> None:
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -125,11 +125,7 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -105,15 +105,15 @@ async def scrape() -> None:
events = await get_events() events = await get_events()
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -125,11 +125,7 @@ async def scrape() -> None:
) )
if url: if url:
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -19,13 +19,12 @@ BASE_URL = "https://backend.streamcenter.live/api/Parties"
CATEGORIES = { CATEGORIES = {
4: "Basketball", 4: "Basketball",
9: "Football", 9: "Football",
# 13: "Baseball", 13: "Baseball",
# 14: "American Football", # 14: "American Football",
15: "Motor Sport", 15: "Motor Sport",
16: "Hockey", 16: "Hockey",
17: "Fight MMA", 17: "Fight MMA",
18: "Boxing", 18: "Boxing",
19: "NCAA Sports",
20: "WWE", 20: "WWE",
21: "Tennis", 21: "Tennis",
} }
@ -103,15 +102,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -125,11 +124,10 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["timestamp"], ev["timestamp"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -20,17 +20,14 @@ HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
BASE_URL = "https://livesports4u.net" BASE_URL = "https://livesports4u.net"
CATEGORIES = { CATEGORIES = {
"Soccer": "sport_68c02a4464a38",
# "American Football": "sport_68c02a4465113", # "American Football": "sport_68c02a4465113",
# "Baseball": "sport_68c02a446582f", "Baseball": "sport_68c02a446582f",
"Basketball": "sport_68c02a4466011", "Basketball": "sport_68c02a4466011",
"Cricket": "sport_68c02a44669f3",
"Hockey": "sport_68c02a4466f56", "Hockey": "sport_68c02a4466f56",
"MMA": "sport_68c02a44674e9", "MMA": "sport_68c02a44674e9",
"Racing": "sport_68c02a4467a48", "Racing": "sport_68c02a4467a48",
# "Rugby": "sport_68c02a4467fc1", "Soccer": "sport_68c02a4464a38",
"Tennis": "sport_68c02a4468cf7", "Tennis": "sport_68c02a4468cf7",
# "Volleyball": "sport_68c02a4469422",
} }
@ -148,15 +145,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
timeout=5, timeout=5,
@ -170,11 +167,10 @@ async def scrape(browser: Browser) -> None:
log=log, log=log,
) )
sport, event, logo, link, ts = ( sport, event, logo, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["logo"], ev["logo"],
ev["link"],
ev["event_ts"], ev["event_ts"],
) )

View file

@ -21,15 +21,15 @@ API_FILE = Cache(f"{TAG}-api", exp=19_800)
BASE_URL = "https://streamingon.org" BASE_URL = "https://streamingon.org"
SPORT_ENDPOINTS = [ SPORT_ENDPOINTS = [
"soccer",
# "nfl",
"nba",
"cfb",
# "mlb",
"nhl",
"ufc",
"boxing", "boxing",
# "cfb",
"f1", "f1",
"mlb",
"nba",
# "nfl",
"nhl",
"soccer",
"ufc",
] ]
@ -133,15 +133,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -155,11 +155,10 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["timestamp"], ev["timestamp"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -147,15 +147,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -166,11 +166,7 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -1,7 +1,6 @@
from functools import partial from functools import partial
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin
from playwright.async_api import Browser
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network from .utils import Cache, Time, get_logger, leagues, network
@ -17,12 +16,27 @@ CACHE_FILE = Cache(TAG, exp=86_400)
BASE_URL = "https://thetvapp.to" BASE_URL = "https://thetvapp.to"
def fix_url(s: str) -> str: async def process_event(url: str, url_num: int) -> str | None:
parsed = urlparse(s) if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
base = f"origin.{parsed.netloc.split('.', 1)[-1]}" return
return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", "")) soup = HTMLParser(html_data.content)
if not (channel_name_elem := soup.css_first("#stream_name")):
log.warning(f"URL {url_num}) No channel found.")
return
if not (channel_name := channel_name_elem.attributes.get("name")):
log.warning(f"URL {url_num}) No channel found.")
return
log.info(f"URL {url_num}) Captured M3U8")
return f"http://origin.thetvapp.to/hls/{channel_name.strip().upper()}/mono.m3u8"
async def get_events() -> list[dict[str, str]]: async def get_events() -> list[dict[str, str]]:
@ -59,7 +73,7 @@ async def get_events() -> list[dict[str, str]]:
return events return events
async def scrape(browser: Browser) -> None: async def scrape() -> None:
if cached := CACHE_FILE.load(): if cached := CACHE_FILE.load():
urls.update(cached) urls.update(cached)
@ -71,50 +85,42 @@ async def scrape(browser: Browser) -> None:
events = await get_events() events = await get_events()
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
async with network.event_context(browser) as context: for i, ev in enumerate(events, start=1):
for i, ev in enumerate(events, start=1): handler = partial(
async with network.event_page(context) as page: process_event,
handler = partial( url=(link := ev["link"]),
network.process_event, url_num=i,
url=ev["link"], )
url_num=i,
page=page,
log=log,
)
url = await network.safe_process( url = await network.safe_process(
handler, handler,
url_num=i, url_num=i,
semaphore=network.PW_S, semaphore=network.HTTP_S,
log=log, log=log,
) )
if url: if url:
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event) tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = { entry = {
"url": fix_url(url), "url": url,
"logo": logo, "logo": logo,
"base": BASE_URL, "base": BASE_URL,
"timestamp": now.timestamp(), "timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us", "id": tvg_id or "Live.Event.us",
"link": link, "link": link,
} }
urls[key] = entry urls[key] = entry
log.info(f"Collected and cached {len(urls)} new event(s)") log.info(f"Collected and cached {len(urls)} new event(s)")

File diff suppressed because it is too large Load diff

View file

@ -15,7 +15,6 @@ LOG_FMT = (
) )
COLORS = { COLORS = {
"DEBUG": "\033[36m",
"INFO": "\033[32m", "INFO": "\033[32m",
"WARNING": "\033[33m", "WARNING": "\033[33m",
"ERROR": "\033[31m", "ERROR": "\033[31m",

View file

@ -0,0 +1,27 @@
Object.defineProperty(navigator, "webdriver", {
get: () => undefined,
});
Object.defineProperty(navigator, "languages", {
get: () => ["en-US", "en"],
});
Object.defineProperty(navigator, "hardwareConcurrency", {
get: () => 8,
});
Object.defineProperty(navigator, "deviceMemory", {
get: () => 8,
});
Object.defineProperty(navigator, "plugins", {
get: () => [],
});
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Google Inc.";
if (param === 37446)
return "ANGLE (Intel(R) UHD Graphics Direct3D11 vs_5_0 ps_5_0)";
return getParameter.apply(this, [param]);
};

View file

@ -4,11 +4,20 @@ import random
import re import re
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import cache, partial
from pathlib import Path
from typing import AsyncGenerator, TypeVar from typing import AsyncGenerator, TypeVar
from urllib.parse import urlparse
import httpx import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request from playwright.async_api import (
Browser,
BrowserContext,
Page,
Playwright,
Request,
Route,
)
from .logger import get_logger from .logger import get_logger
@ -73,7 +82,7 @@ class Network:
fn: Callable[[], Awaitable[T]], fn: Callable[[], Awaitable[T]],
url_num: int, url_num: int,
semaphore: asyncio.Semaphore, semaphore: asyncio.Semaphore,
timeout: int | float = 10, timeout: int | float = 30,
log: logging.Logger | None = None, log: logging.Logger | None = None,
) -> T | None: ) -> T | None:
@ -98,7 +107,7 @@ class Network:
pass pass
except Exception as e: except Exception as e:
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}") log.warning(f"URL {url_num}) Ignore exception after timeout: {e}")
return return
except Exception as e: except Exception as e:
@ -106,6 +115,35 @@ class Network:
return return
@staticmethod
@cache
def blocked_domains() -> list[str]:
return (
(Path(__file__).parent / "easylist.txt")
.read_text(encoding="utf-8")
.splitlines()
)
@staticmethod
def to_block(request: Request) -> bool:
hostname = (urlparse(request.url).hostname or "").lower()
return any(
hostname == domain or hostname.endswith(f".{domain}")
for domain in Network.blocked_domains()
)
@staticmethod
async def _adblock(route: Route) -> None:
request = route.request
if request.resource_type not in ["script", "image", "media", "xhr"]:
await route.continue_()
return
await route.abort() if Network.to_block(request) else await route.continue_()
@staticmethod @staticmethod
@asynccontextmanager @asynccontextmanager
async def event_context( async def event_context(
@ -113,78 +151,30 @@ class Network:
stealth: bool = True, stealth: bool = True,
ignore_https: bool = False, ignore_https: bool = False,
) -> AsyncGenerator[BrowserContext, None]: ) -> AsyncGenerator[BrowserContext, None]:
context: BrowserContext | None = None context: BrowserContext | None = None
try: try:
context = await browser.new_context(
user_agent=Network.UA if stealth else None,
ignore_https_errors=ignore_https,
viewport={"width": 1366, "height": 768},
device_scale_factor=1,
locale="en-US",
timezone_id="America/New_York",
color_scheme="dark",
permissions=["geolocation"],
extra_http_headers=(
{
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
}
if stealth
else None
),
)
if stealth: if stealth:
await context.add_init_script(""" context = await browser.new_context(
Object.defineProperty(navigator, "webdriver", { get: () => undefined }); user_agent=Network.UA,
ignore_https_errors=ignore_https,
Object.defineProperty(navigator, "languages", { viewport={"width": 1366, "height": 768},
get: () => ["en-US", "en"], device_scale_factor=1,
}); locale="en-US",
timezone_id="America/New_York",
Object.defineProperty(navigator, "plugins", { color_scheme="dark",
get: () => [1, 2, 3, 4], extra_http_headers=(
}); {
"Accept-Language": "en-US,en;q=0.9",
const elementDescriptor = Object.getOwnPropertyDescriptor( "Upgrade-Insecure-Requests": "1",
HTMLElement.prototype,
"offsetHeight"
);
Object.defineProperty(HTMLDivElement.prototype, "offsetHeight", {
...elementDescriptor,
get: function () {
if (this.id === "modernizr") {
return 24;
} }
return elementDescriptor.get.apply(this); ),
}, )
});
Object.defineProperty(window.screen, "width", { get: () => 1366 }); await context.add_init_script(path=Path(__file__).parent / "stealth.js")
Object.defineProperty(window.screen, "height", { get: () => 768 });
const getParameter = WebGLRenderingContext.prototype.getParameter; await context.route("**/*", Network._adblock)
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Intel Inc."; // UNMASKED_VENDOR_WEBGL
if (param === 37446) return "Intel Iris OpenGL Engine"; // UNMASKED_RENDERER_WEBGL
return getParameter.apply(this, [param]);
};
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.tagName === "IFRAME" && node.hasAttribute("sandbox")) {
node.removeAttribute("sandbox");
}
});
});
});
observer.observe(document.documentElement, { childList: true, subtree: true });
""")
else: else:
context = await browser.new_context() context = await browser.new_context()
@ -261,7 +251,7 @@ class Network:
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=15_000, timeout=6_000,
) )
wait_task = asyncio.create_task(got_one.wait()) wait_task = asyncio.create_task(got_one.wait())
@ -292,7 +282,7 @@ class Network:
return return
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) {e}")
return return

View file

@ -29,17 +29,13 @@ BASE_MIRRORS = [
VALID_SPORTS = [ VALID_SPORTS = [
# "american-football", # "american-football",
# "australian-football", "baseball",
# "baseball",
"basketball", "basketball",
"cricket",
"darts",
"fighting", "fighting",
"football", "football",
"golf", "golf",
"hockey", "hockey",
"racing", "racing",
# "rugby",
"tennis", "tennis",
"volleyball", "volleyball",
] ]
@ -96,7 +92,7 @@ async def process_event(
await page.goto( await page.goto(
url, url,
wait_until="domcontentloaded", wait_until="domcontentloaded",
timeout=10_000, timeout=8_000,
) )
await page.wait_for_timeout(2_000) await page.wait_for_timeout(2_000)
@ -170,7 +166,7 @@ async def process_event(
return nones return nones
except Exception as e: except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}") log.warning(f"URL {url_num}) {e}")
return nones return nones
@ -256,15 +252,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(base_url, cached_urls.keys()) events = await get_events(base_url, cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context: async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
) )
@ -277,12 +273,11 @@ async def scrape(browser: Browser) -> None:
timeout=20, timeout=20,
) )
sport, event, logo, ts, link = ( sport, event, logo, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["logo"], ev["logo"],
ev["timestamp"], ev["timestamp"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -126,15 +126,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context: async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(
network.process_event, network.process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
page=page, page=page,
log=log, log=log,
@ -148,11 +148,10 @@ async def scrape(browser: Browser) -> None:
) )
if url: if url:
sport, event, ts, link = ( sport, event, ts = (
ev["sport"], ev["sport"],
ev["event"], ev["event"],
ev["event_ts"], ev["event_ts"],
ev["link"],
) )
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"

View file

@ -18,8 +18,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
BASE_URL = "https://xstreameast.com" BASE_URL = "https://xstreameast.com"
SPORT_ENDPOINTS = [ SPORT_ENDPOINTS = [
# "f1", "mlb",
# "mlb",
"mma", "mma",
"nba", "nba",
# "nfl", # "nfl",
@ -137,15 +136,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys()) events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now()) now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
handler = partial( handler = partial(
process_event, process_event,
url=ev["link"], url=(link := ev["link"]),
url_num=i, url_num=i,
) )
@ -156,11 +155,7 @@ async def scrape() -> None:
log=log, log=log,
) )
sport, event, link = ( sport, event = ev["sport"], ev["event"]
ev["sport"],
ev["event"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})" key = f"[{sport}] {event} ({TAG})"