e

fix tvapp.py scraping misc edits
2026-06-06 02:43:05 +02:00 · 2026-02-12 00:27:36 -05:00 · 2026-02-12 00:27:36 -05:00 · 00000d947c
commit 00000d947c
parent 3d5b650833
5 changed files with 36 additions and 62 deletions
--- a/M3U8/scrapers/fawa.py
+++ b/M3U8/scrapers/fawa.py
@ -102,7 +102,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")

    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())

        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -133,7 +133,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": BASE_URL,
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "href": ev["href"],
                    "link": link,
--- a/M3U8/scrapers/istreameast.py
+++ b/M3U8/scrapers/istreameast.py
@ -119,7 +119,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")

    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())

        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -150,7 +150,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": "https://gooz.aapmains.net",
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "link": link,
                }
--- a/M3U8/scrapers/pawa.py
+++ b/M3U8/scrapers/pawa.py
@ -102,7 +102,7 @@ async def scrape() -> None:
    log.info(f"Processing {len(events)} new URL(s)")

    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())

        for i, ev in enumerate(events, start=1):
            handler = partial(
@ -133,7 +133,7 @@ async def scrape() -> None:
                    "url": url,
                    "logo": logo,
                    "base": link,
-                    "timestamp": now,
+                    "timestamp": now.timestamp(),
                    "id": tvg_id or "Live.Event.us",
                    "link": link,
                }
--- a/M3U8/scrapers/sport9.py
+++ b/M3U8/scrapers/sport9.py
@ -104,7 +104,7 @@ async def scrape(browser: Browser) -> None:
    log.info(f"Processing {len(events)} new URL(s)")

    if events:
-        now = Time.clean(Time.now()).timestamp()
+        now = Time.clean(Time.now())

        async with network.event_context(browser, stealth=False) as context:
            for i, ev in enumerate(events, start=1):
@ -139,7 +139,7 @@ async def scrape(browser: Browser) -> None:
                            "url": url,
                            "logo": logo,
                            "base": "https://vividmosaica.com/",
-                            "timestamp": now,
+                            "timestamp": now.timestamp(),
                            "id": tvg_id or "Live.Event.us",
                            "link": link,
                        }
--- a/M3U8/scrapers/tvapp.py
+++ b/M3U8/scrapers/tvapp.py
@ -1,5 +1,5 @@
 from functools import partial
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse

 from playwright.async_api import Browser
 from selectolax.parser import HTMLParser
@ -12,17 +12,21 @@ urls: dict[str, dict[str, str | float]] = {}

 TAG = "TVAPP"

-CACHE_FILE = Cache(TAG, exp=10_800)
-
-HTML_CACHE = Cache(f"{TAG}-html", exp=19_800)
+CACHE_FILE = Cache(TAG, exp=86_400)

 BASE_URL = "https://thetvapp.to"


-async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]:
-    log.info("Refreshing HTML cache")
+def fix_url(s: str) -> str:
+    parsed = urlparse(s)

-    events = {}
+    base = f"origin.{parsed.netloc.split('.', 1)[-1]}"
+
+    return urljoin(f"http://{base}", parsed.path.replace("tracks-v1a1/", ""))
+
+
+async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
+    events = []

    if not (html_data := await network.request(BASE_URL, log=log)):
        return events
@ -39,56 +43,25 @@ async def refresh_html_cache(now_ts: float) -> dict[str, dict[str, str | float]]
            continue

        for a in row.css("a.list-group-item[href]"):
+            event_name = a.text(strip=True).split(":", 1)[0]
+
+            if f"[{sport}] {event_name} ({TAG})" in cached_keys:
+                continue
+
            if not (href := a.attributes.get("href")):
                continue

-            if not (span := a.css_first("span")):
-                continue
-
-            event_time = span.text(strip=True)
-
-            event_dt = Time.from_str(event_time, timezone="UTC")
-
-            event_name = a.text(strip=True).split(":")[0]
-
-            key = f"[{sport}] {event_name} ({TAG})"
-
-            events[key] = {
-                "sport": sport,
-                "event": event_name,
-                "link": urljoin(BASE_URL, href),
-                "event_ts": event_dt.timestamp(),
-                "timestamp": now_ts,
-            }
+            events.append(
+                {
+                    "sport": sport,
+                    "event": event_name,
+                    "link": urljoin(BASE_URL, href),
+                }
+            )

    return events


-async def get_events(cached_keys: list[str]) -> list[dict[str, str]]:
-    now = Time.clean(Time.now())
-
-    if not (events := HTML_CACHE.load()):
-        events = await refresh_html_cache(now.timestamp())
-
-        HTML_CACHE.write(events)
-
-    live = []
-
-    start_ts = now.delta(minutes=-30).timestamp()
-    end_ts = now.delta(minutes=30).timestamp()
-
-    for k, v in events.items():
-        if k in cached_keys:
-            continue
-
-        if not start_ts <= v["event_ts"] <= end_ts:
-            continue
-
-        live.append({**v})
-
-    return live
-
-
 async def scrape(browser: Browser) -> None:
    cached_urls = CACHE_FILE.load()

@ -105,6 +78,8 @@ async def scrape(browser: Browser) -> None:
    log.info(f"Processing {len(events)} new URL(s)")

    if events:
+        now = Time.clean(Time.now())
+
        async with network.event_context(browser) as context:
            for i, ev in enumerate(events, start=1):
                async with network.event_page(context) as page:
@ -124,10 +99,9 @@ async def scrape(browser: Browser) -> None:
                    )

                    if url:
-                        sport, event, ts, link = (
+                        sport, event, link = (
                            ev["sport"],
                            ev["event"],
-                            ev["event_ts"],
                            ev["link"],
                        )

@ -136,10 +110,10 @@ async def scrape(browser: Browser) -> None:
                        tvg_id, logo = leagues.get_tvg_info(sport, event)

                        entry = {
-                            "url": url,
+                            "url": fix_url(url),
                            "logo": logo,
                            "base": BASE_URL,
-                            "timestamp": ts,
+                            "timestamp": now.timestamp(),
                            "id": tvg_id or "Live.Event.us",
                            "link": link,
                        }