e

fix tvapp.py scraping misc edits
2026-06-06 02:43:05 +02:00 · 2026-02-12 00:27:36 -05:00 · 2026-02-12 00:27:36 -05:00 · 00000d947c
commit 00000d947c
parent 3d5b650833
5 changed files with 36 additions and 62 deletions
--- a/M3U8/scrapers/fawa.py
+++ b/M3U8/scrapers/fawa.py
@ -102,7 +102,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())
        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -133,7 +133,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": BASE_URL,
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "href": ev["href"],
                    "link": link,
--- a/M3U8/scrapers/istreameast.py
+++ b/M3U8/scrapers/istreameast.py
@ -119,7 +119,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())
        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -150,7 +150,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": "https://gooz.aapmains.net",
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "link": link,
                }
--- a/M3U8/scrapers/pawa.py
+++ b/M3U8/scrapers/pawa.py
@ -102,7 +102,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())
        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -133,7 +133,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": link,
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "link": link,
                }
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -104,7 +104,7 @@ async def scrape(browser: Browser) -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())
        async with network.event_context(browser, stealth=False) as context:
            for i, ev in enumerate(events, start=1):
@ -139,7 +139,7 @@ async def scrape(browser: Browser) -> None:
                            "url": url,
                            "logo": logo,
                            "base": "https://vividmosaica.com/",
-                            "timestamp": now,
+                            "timestamp": now.timestamp(),
                            "id": tvg_id or "Live.Event.us",
                            "link": link,
                        }
--- a/M3U8/scrapers/tvapp.py
+++ b/M3U8/scrapers/tvapp.py
@ -1,5 +1,5 @@
 from functools import partial
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse
 from playwright.async_api import Browser
 from selectolax.parser import HTMLParser
@ -12,17 +12,21 @@ urls: dict[str, dict[str, str | float]] = {}
 TAG = "TVAPP"
-CACHE_FILE = Cache(TAG, exp=10_800)
+CACHE_FILE = Cache(TAG, exp=86_400)
 HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
 BASE_URL = "https://thetvapp.to"
-async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
+def fix_url(s: str) -> str:
-    log.info("Refreshing HTML cache")
+    parsed = urlparse(s)
-    events = {}
+    base = f"origin.{parsed.netloc.split('.', 1)[-1]}"
    return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
 async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
    events = []
    if not (html_data := await network.request(BASE_URL, log=log)):
        return events
@ -39,56 +43,25 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
            continue
        for a in row.css("a.list-group-item[href]"):
            event_name = a.text(strip=True).split(":", 1)[0]
            if f"[{sport}] {event_name} ({TAG})" in cached_keys:
                continue
            if not (href := a.attributes.get("href")):
                continue
-            if not (span := a.css_first("span")):
+            events.append(
-                continue
+                {
            event_time = span.text(strip=True)
            event_dt = Time.from_str(event_time, timezone="UTC")
            event_name = a.text(strip=True).split(":")[0]
            key = f"[{sport}] {event_name} ({TAG})"
            events[key] = {
                    "sport": sport,
                    "event": event_name,
                    "link": urljoin(BASE_URL, href),
                "event_ts": event_dt.timestamp(),
                "timestamp": now_ts,
                }
            )
    return events
 async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
    now = Time.clean(Time.now())
    if not (events := HTML_CACHE.load()):
        events = await refresh_html_cache(now.timestamp())
        HTML_CACHE.write(events)
    live = []
    start_ts = now.delta(minutes=-30).timestamp()
    end_ts = now.delta(minutes=30).timestamp()
    for k, v in events.items():
        if k in cached_keys:
            continue
        if not start_ts <= v["event_ts"] <= end_ts:
            continue
        live.append({**v})
    return live
 async def scrape(browser: Browser) -> None:
    cached_urls = CACHE_FILE.load()
@ -105,6 +78,8 @@ async def scrape(browser: Browser) -> None:
    log.info(f"Processing {len(events)} new URL(s)")
    if events:
        now = Time.clean(Time.now())
        async with network.event_context(browser) as context:
            for i, ev in enumerate(events, start=1):
                async with network.event_page(context) as page:
@ -124,10 +99,9 @@ async def scrape(browser: Browser) -> None:
                    )
                    if url:
-                        sport, event, ts, link = (
+                        sport, event, link = (
                            ev["sport"],
                            ev["event"],
                            ev["event_ts"],
                            ev["link"],
                        )
@ -136,10 +110,10 @@ async def scrape(browser: Browser) -> None:
                        tvg_id, logo = leagues.get_tvg_info(sport, event)
                        entry = {
-                            "url": url,
+                            "url": fix_url(url),
                            "logo": logo,
                            "base": BASE_URL,
-                            "timestamp": ts,
+                            "timestamp": now.timestamp(),
                            "id": tvg_id or "Live.Event.us",
                            "link": link,
                        }