- add adblocking
- edit roxie.py scraping method
- edit tvapp.py scraping method
- modify sports to scrape
- misc edits
This commit is contained in:
doms9 2026-02-19 18:16:27 -05:00
parent 12d6d959f4
commit 00000d90e4
26 changed files with 59290 additions and 264 deletions

View file

@ -99,15 +99,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -121,11 +121,10 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -89,15 +89,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -111,10 +111,9 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, link, ts = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["link"],
ev["timestamp"],
)

View file

@ -101,15 +101,15 @@ async def scrape() -> None:
events = await get_events(cached_hrefs)
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -120,11 +120,7 @@ async def scrape() -> None:
log=log,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -114,15 +114,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -133,11 +133,7 @@ async def scrape() -> None:
log=log,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -18,12 +18,14 @@ XML_CACHE = Cache(f"{TAG}-xml", exp=28_000)
BASE_URL = "https://cdn.livetv873.me/rss/upcoming_en.xml"
VALID_SPORTS = {
"Football",
VALID_SPORTS = [
"MLB. Preseason",
"MLB",
"Basketball",
"Football",
"Ice Hockey",
"Olympic Games",
}
]
async def process_event(
@ -48,7 +50,7 @@ async def process_event(
await page.goto(
url,
wait_until="domcontentloaded",
timeout=15_000,
timeout=10_000,
)
await page.wait_for_timeout(1_500)
@ -108,7 +110,7 @@ async def process_event(
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
log.warning(f"URL {url_num}) {e}")
return
finally:
@ -205,15 +207,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, ignore_https=True) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
)
@ -226,12 +228,11 @@ async def scrape(browser: Browser) -> None:
timeout=20,
)
sport, league, event, ts, link = (
sport, league, event, ts = (
ev["sport"],
ev["league"],
ev["event"],
ev["event_ts"],
ev["link"],
)
key = f"[{sport} - {league}] {event} ({TAG})"

View file

@ -135,13 +135,13 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -152,10 +152,9 @@ async def scrape() -> None:
log=log,
)
sport, event, link, ts = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["link"],
ev["event_ts"],
)

View file

@ -101,15 +101,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -120,11 +120,7 @@ async def scrape() -> None:
log=log,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -22,7 +22,7 @@ async def get_api_data(page: Page) -> dict[str, list[dict, str, str]]:
await page.goto(
url := urljoin(BASE_URL, "backend/livetv/events"),
wait_until="domcontentloaded",
timeout=10_000,
timeout=6_000,
)
raw_json = await page.locator("pre").inner_text(timeout=5_000)

View file

@ -100,15 +100,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(base_url, cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
timeout=6,
@ -122,12 +122,11 @@ async def scrape(browser: Browser) -> None:
log=log,
)
sport, event, logo, ts, link = (
sport, event, logo, ts = (
ev["sport"],
ev["event"],
ev["logo"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -21,7 +21,7 @@ BASE_URL = "https://roxiestreams.info"
SPORT_ENDPOINTS = {
"fighting": "Fighting",
# "mlb": "MLB",
"mlb": "MLB",
"motorsports": "Racing",
"nba": "NBA",
# "nfl": "American Football",
@ -97,12 +97,12 @@ async def process_event(
await page.goto(
url,
wait_until="domcontentloaded",
timeout=15_000,
timeout=6_000,
)
try:
if btn := await page.wait_for_selector(
"button:has-text('Stream 1')",
"button.streambutton:nth-of-type(1)",
timeout=5_000,
):
await btn.click(force=True, click_count=2)
@ -140,7 +140,7 @@ async def process_event(
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
log.warning(f"URL {url_num}) {e}")
return
finally:
@ -202,15 +202,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
)
@ -222,11 +222,10 @@ async def scrape(browser: Browser) -> None:
log=log,
)
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
tvg_id, logo = leagues.get_tvg_info(sport, event)

View file

@ -124,13 +124,13 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -142,11 +142,10 @@ async def scrape() -> None:
)
if url:
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
tvg_id, logo = leagues.get_tvg_info(sport, event)

View file

@ -101,9 +101,9 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
async with network.event_context(browser, stealth=False) as context:
@ -111,7 +111,7 @@ async def scrape(browser: Browser) -> None:
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -125,11 +125,7 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -105,15 +105,15 @@ async def scrape() -> None:
events = await get_events()
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -125,11 +125,7 @@ async def scrape() -> None:
)
if url:
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -19,13 +19,12 @@ BASE_URL = "https://backend.streamcenter.live/api/Parties"
CATEGORIES = {
4: "Basketball",
9: "Football",
# 13: "Baseball",
13: "Baseball",
# 14: "American Football",
15: "Motor Sport",
16: "Hockey",
17: "Fight MMA",
18: "Boxing",
19: "NCAA Sports",
20: "WWE",
21: "Tennis",
}
@ -103,15 +102,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -125,11 +124,10 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -20,17 +20,14 @@ HTML_CACHE = Cache(f"{TAG}-html", exp=28_800)
BASE_URL = "https://livesports4u.net"
CATEGORIES = {
"Soccer": "sport_68c02a4464a38",
# "American Football": "sport_68c02a4465113",
# "Baseball": "sport_68c02a446582f",
"Baseball": "sport_68c02a446582f",
"Basketball": "sport_68c02a4466011",
"Cricket": "sport_68c02a44669f3",
"Hockey": "sport_68c02a4466f56",
"MMA": "sport_68c02a44674e9",
"Racing": "sport_68c02a4467a48",
# "Rugby": "sport_68c02a4467fc1",
"Soccer": "sport_68c02a4464a38",
"Tennis": "sport_68c02a4468cf7",
# "Volleyball": "sport_68c02a4469422",
}
@ -148,15 +145,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
timeout=5,
@ -170,11 +167,10 @@ async def scrape(browser: Browser) -> None:
log=log,
)
sport, event, logo, link, ts = (
sport, event, logo, ts = (
ev["sport"],
ev["event"],
ev["logo"],
ev["link"],
ev["event_ts"],
)

View file

@ -21,15 +21,15 @@ API_FILE = Cache(f"{TAG}-api", exp=19_800)
BASE_URL = "https://streamingon.org"
SPORT_ENDPOINTS = [
"soccer",
# "nfl",
"nba",
"cfb",
# "mlb",
"nhl",
"ufc",
"boxing",
# "cfb",
"f1",
"mlb",
"nba",
# "nfl",
"nhl",
"soccer",
"ufc",
]
@ -133,15 +133,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -155,11 +155,10 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -147,15 +147,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -166,11 +166,7 @@ async def scrape() -> None:
log=log,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"

View file

@ -1,7 +1,6 @@
from functools import partial
from urllib.parse import urljoin, urlparse
from urllib.parse import urljoin
from playwright.async_api import Browser
from selectolax.parser import HTMLParser
from .utils import Cache, Time, get_logger, leagues, network
@ -17,12 +16,27 @@ CACHE_FILE = Cache(TAG, exp=86_400)
BASE_URL = "https://thetvapp.to"
def fix_url(s: str) -> str:
parsed = urlparse(s)
async def process_event(url: str, url_num: int) -> str | None:
if not (html_data := await network.request(url, log=log)):
log.info(f"URL {url_num}) Failed to load url.")
base = f"origin.{parsed.netloc.split('.', 1)[-1]}"
return
return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
soup = HTMLParser(html_data.content)
if not (channel_name_elem := soup.css_first("#stream_name")):
log.warning(f"URL {url_num}) No channel found.")
return
if not (channel_name := channel_name_elem.attributes.get("name")):
log.warning(f"URL {url_num}) No channel found.")
return
log.info(f"URL {url_num}) Captured M3U8")
return f"http://origin.thetvapp.to/hls/{channel_name.strip().upper()}/mono.m3u8"
async def get_events() -> list[dict[str, str]]:
@ -59,7 +73,7 @@ async def get_events() -> list[dict[str, str]]:
return events
async def scrape(browser: Browser) -> None:
async def scrape() -> None:
if cached := CACHE_FILE.load():
urls.update(cached)
@ -71,50 +85,42 @@ async def scrape(browser: Browser) -> None:
events = await get_events()
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url_num=i,
page=page,
log=log,
)
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=(link := ev["link"]),
url_num=i,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.PW_S,
log=log,
)
url = await network.safe_process(
handler,
url_num=i,
semaphore=network.HTTP_S,
log=log,
)
if url:
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
if url:
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"
key = f"[{sport}] {event} ({TAG})"
tvg_id, logo = leagues.get_tvg_info(sport, event)
tvg_id, logo = leagues.get_tvg_info(sport, event)
entry = {
"url": fix_url(url),
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
entry = {
"url": url,
"logo": logo,
"base": BASE_URL,
"timestamp": now.timestamp(),
"id": tvg_id or "Live.Event.us",
"link": link,
}
urls[key] = entry
urls[key] = entry
log.info(f"Collected and cached {len(urls)} new event(s)")

File diff suppressed because it is too large Load diff

View file

@ -15,7 +15,6 @@ LOG_FMT = (
)
COLORS = {
"DEBUG": "\033[36m",
"INFO": "\033[32m",
"WARNING": "\033[33m",
"ERROR": "\033[31m",

View file

@ -0,0 +1,27 @@
Object.defineProperty(navigator, "webdriver", {
get: () => undefined,
});
Object.defineProperty(navigator, "languages", {
get: () => ["en-US", "en"],
});
Object.defineProperty(navigator, "hardwareConcurrency", {
get: () => 8,
});
Object.defineProperty(navigator, "deviceMemory", {
get: () => 8,
});
Object.defineProperty(navigator, "plugins", {
get: () => [],
});
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Google Inc.";
if (param === 37446)
return "ANGLE (Intel(R) UHD Graphics Direct3D11 vs_5_0 ps_5_0)";
return getParameter.apply(this, [param]);
};

View file

@ -4,11 +4,20 @@ import random
import re
from collections.abc import Awaitable, Callable
from contextlib import asynccontextmanager
from functools import partial
from functools import cache, partial
from pathlib import Path
from typing import AsyncGenerator, TypeVar
from urllib.parse import urlparse
import httpx
from playwright.async_api import Browser, BrowserContext, Page, Playwright, Request
from playwright.async_api import (
Browser,
BrowserContext,
Page,
Playwright,
Request,
Route,
)
from .logger import get_logger
@ -73,7 +82,7 @@ class Network:
fn: Callable[[], Awaitable[T]],
url_num: int,
semaphore: asyncio.Semaphore,
timeout: int | float = 10,
timeout: int | float = 30,
log: logging.Logger | None = None,
) -> T | None:
@ -98,7 +107,7 @@ class Network:
pass
except Exception as e:
log.debug(f"URL {url_num}) Ignore exception after timeout: {e}")
log.warning(f"URL {url_num}) Ignore exception after timeout: {e}")
return
except Exception as e:
@ -106,6 +115,35 @@ class Network:
return
@staticmethod
@cache
def blocked_domains() -> list[str]:
return (
(Path(__file__).parent / "easylist.txt")
.read_text(encoding="utf-8")
.splitlines()
)
@staticmethod
def to_block(request: Request) -> bool:
hostname = (urlparse(request.url).hostname or "").lower()
return any(
hostname == domain or hostname.endswith(f".{domain}")
for domain in Network.blocked_domains()
)
@staticmethod
async def _adblock(route: Route) -> None:
request = route.request
if request.resource_type not in ["script", "image", "media", "xhr"]:
await route.continue_()
return
await route.abort() if Network.to_block(request) else await route.continue_()
@staticmethod
@asynccontextmanager
async def event_context(
@ -113,78 +151,30 @@ class Network:
stealth: bool = True,
ignore_https: bool = False,
) -> AsyncGenerator[BrowserContext, None]:
context: BrowserContext | None = None
try:
context = await browser.new_context(
user_agent=Network.UA if stealth else None,
ignore_https_errors=ignore_https,
viewport={"width": 1366, "height": 768},
device_scale_factor=1,
locale="en-US",
timezone_id="America/New_York",
color_scheme="dark",
permissions=["geolocation"],
extra_http_headers=(
{
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
}
if stealth
else None
),
)
if stealth:
await context.add_init_script("""
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
Object.defineProperty(navigator, "languages", {
get: () => ["en-US", "en"],
});
Object.defineProperty(navigator, "plugins", {
get: () => [1, 2, 3, 4],
});
const elementDescriptor = Object.getOwnPropertyDescriptor(
HTMLElement.prototype,
"offsetHeight"
);
Object.defineProperty(HTMLDivElement.prototype, "offsetHeight", {
...elementDescriptor,
get: function () {
if (this.id === "modernizr") {
return 24;
context = await browser.new_context(
user_agent=Network.UA,
ignore_https_errors=ignore_https,
viewport={"width": 1366, "height": 768},
device_scale_factor=1,
locale="en-US",
timezone_id="America/New_York",
color_scheme="dark",
extra_http_headers=(
{
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
}
return elementDescriptor.get.apply(this);
},
});
),
)
Object.defineProperty(window.screen, "width", { get: () => 1366 });
Object.defineProperty(window.screen, "height", { get: () => 768 });
await context.add_init_script(path=Path(__file__).parent / "stealth.js")
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (param) {
if (param === 37445) return "Intel Inc."; // UNMASKED_VENDOR_WEBGL
if (param === 37446) return "Intel Iris OpenGL Engine"; // UNMASKED_RENDERER_WEBGL
return getParameter.apply(this, [param]);
};
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.tagName === "IFRAME" && node.hasAttribute("sandbox")) {
node.removeAttribute("sandbox");
}
});
});
});
observer.observe(document.documentElement, { childList: true, subtree: true });
""")
await context.route("**/*", Network._adblock)
else:
context = await browser.new_context()
@ -261,7 +251,7 @@ class Network:
await page.goto(
url,
wait_until="domcontentloaded",
timeout=15_000,
timeout=6_000,
)
wait_task = asyncio.create_task(got_one.wait())
@ -292,7 +282,7 @@ class Network:
return
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
log.warning(f"URL {url_num}) {e}")
return

View file

@ -29,17 +29,13 @@ BASE_MIRRORS = [
VALID_SPORTS = [
# "american-football",
# "australian-football",
# "baseball",
"baseball",
"basketball",
"cricket",
"darts",
"fighting",
"football",
"golf",
"hockey",
"racing",
# "rugby",
"tennis",
"volleyball",
]
@ -96,7 +92,7 @@ async def process_event(
await page.goto(
url,
wait_until="domcontentloaded",
timeout=10_000,
timeout=8_000,
)
await page.wait_for_timeout(2_000)
@ -170,7 +166,7 @@ async def process_event(
return nones
except Exception as e:
log.warning(f"URL {url_num}) Exception while processing: {e}")
log.warning(f"URL {url_num}) {e}")
return nones
@ -256,15 +252,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(base_url, cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser, stealth=False) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
)
@ -277,12 +273,11 @@ async def scrape(browser: Browser) -> None:
timeout=20,
)
sport, event, logo, ts, link = (
sport, event, logo, ts = (
ev["sport"],
ev["event"],
ev["logo"],
ev["timestamp"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -126,15 +126,15 @@ async def scrape(browser: Browser) -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
async with network.event_context(browser) as context:
for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page:
handler = partial(
network.process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
page=page,
log=log,
@ -148,11 +148,10 @@ async def scrape(browser: Browser) -> None:
)
if url:
sport, event, ts, link = (
sport, event, ts = (
ev["sport"],
ev["event"],
ev["event_ts"],
ev["link"],
)
key = f"[{sport}] {event} ({TAG})"

View file

@ -18,8 +18,7 @@ CACHE_FILE = Cache(TAG, exp=10_800)
BASE_URL = "https://xstreameast.com"
SPORT_ENDPOINTS = [
# "f1",
# "mlb",
"mlb",
"mma",
"nba",
# "nfl",
@ -137,15 +136,15 @@ async def scrape() -> None:
events = await get_events(cached_urls.keys())
log.info(f"Processing {len(events)} new URL(s)")
if events:
log.info(f"Processing {len(events)} new URL(s)")
now = Time.clean(Time.now())
for i, ev in enumerate(events, start=1):
handler = partial(
process_event,
url=ev["link"],
url=(link := ev["link"]),
url_num=i,
)
@ -156,11 +155,7 @@ async def scrape() -> None:
log=log,
)
sport, event, link = (
ev["sport"],
ev["event"],
ev["link"],
)
sport, event = ev["sport"], ev["event"]
key = f"[{sport}] {event} ({TAG})"