fix livetvsx.py scraping
edit health check script
This commit is contained in:
doms9 2026-01-27 20:14:27 -05:00
parent 53686dcc16
commit 00000d968d
3 changed files with 15 additions and 10 deletions

View file

@ -10,12 +10,12 @@ log = get_logger(__name__)
urls: dict[str, dict[str, str | float]] = {} urls: dict[str, dict[str, str | float]] = {}
TAG = "LTVSX" TAG = "LIVETVSX"
XML_CACHE = Cache(f"{TAG}-xml", exp=28_000)
CACHE_FILE = Cache(TAG, exp=10_800) CACHE_FILE = Cache(TAG, exp=10_800)
XML_CACHE = Cache(f"{TAG}-xml", exp=28_000)
BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml" BASE_URL = "https://cdn.livetv861.me/rss/upcoming_en.xml"
VALID_SPORTS = {"NBA", "NHL", "NFL", "NCAA", "MLB"} VALID_SPORTS = {"NBA", "NHL", "NFL", "NCAA", "MLB"}
@ -193,7 +193,7 @@ async def scrape(browser: Browser) -> None:
log.info(f"Processing {len(events)} new URL(s)") log.info(f"Processing {len(events)} new URL(s)")
if events: if events:
async with network.event_context(browser) as context: async with network.event_context(browser, ignore_https=True) as context:
for i, ev in enumerate(events, start=1): for i, ev in enumerate(events, start=1):
async with network.event_page(context) as page: async with network.event_page(context) as page:
handler = partial( handler = partial(

View file

@ -129,12 +129,14 @@ class Network:
async def event_context( async def event_context(
browser: Browser, browser: Browser,
stealth: bool = True, stealth: bool = True,
ignore_https: bool = False,
) -> AsyncGenerator[BrowserContext, None]: ) -> AsyncGenerator[BrowserContext, None]:
context: BrowserContext | None = None context: BrowserContext | None = None
try: try:
context = await browser.new_context( context = await browser.new_context(
user_agent=Network.UA if stealth else None, user_agent=Network.UA if stealth else None,
ignore_https_errors=ignore_https,
viewport={"width": 1366, "height": 768}, viewport={"width": 1366, "height": 768},
device_scale_factor=1, device_scale_factor=1,
locale="en-US", locale="en-US",

View file

@ -9,21 +9,23 @@ STATUSLOG=$(mktemp)
get_status() { get_status() {
local url="$1" local url="$1"
local channel="$2" local channel="$2"
local index="$3"
local total="$4"
local attempt response status_code local attempt response status_code
[[ "$url" != http* ]] && return [[ "$url" != http* ]] && return
for attempt in $(seq 1 "$RETRY_COUNT"); do printf '[%d/%d] Checking %s\n' "$((index + 1))" "$total" "$url"
echo "Checking '$url'"
for attempt in $(seq 1 "$RETRY_COUNT"); do
response=$( response=$(
curl -skL \ curl -skL \
-A "$UA" \ -A "$UA" \
-H "Accept: */*" \ -H "Accept: */*" \
-H "Accept-Language: en-US,en;q=0.9" \ -H "Accept-Language: en-US,en;q=0.9" \
-H "Accept-Encoding: gzip, deflate, br" \
-H "Connection: keep-alive" \ -H "Connection: keep-alive" \
-o /dev/null \ -o /dev/null \
--compressed \
--max-time 30 \ --max-time 30 \
-w "%{http_code}" \ -w "%{http_code}" \
"$url" 2>&1 "$url" 2>&1
@ -49,7 +51,7 @@ get_status() {
status_code="$response" status_code="$response"
case "$status_code" in case "$status_code" in
200) 2* | 3*)
echo "PASS" >>"$STATUSLOG" echo "PASS" >>"$STATUSLOG"
;; ;;
@ -73,6 +75,7 @@ get_status() {
check_links() { check_links() {
echo "Checking links from: $base_file" echo "Checking links from: $base_file"
total_urls=$(grep -cE '^https?://' "$base_file")
channel_num=0 channel_num=0
name="" name=""
@ -88,14 +91,14 @@ check_links() {
elif [[ "$line" =~ ^https?:// ]]; then elif [[ "$line" =~ ^https?:// ]]; then
while (($(jobs -r | wc -l) >= MAX_JOBS)); do sleep 0.2; done while (($(jobs -r | wc -l) >= MAX_JOBS)); do sleep 0.2; done
get_status "$line" "$name" & get_status "$line" "$name" "$channel_num" "$total_urls" &
((channel_num++)) ((channel_num++))
fi fi
done < <(cat "$base_file") done < <(cat "$base_file")
wait wait
echo "Done." echo -e "\nDone."
} }
write_readme() { write_readme() {