- edit scraping for roxie.py
- misc edits.
This commit is contained in:
doms9 2026-06-04 15:05:05 -04:00
parent 97d0ca9ed1
commit 00000d9818
3 changed files with 74 additions and 83 deletions

View file

@ -48,7 +48,7 @@ def load_base() -> tuple[list[str], int]:
async def main() -> None:
log.info(f"{'=' * 10} Scraper Started {'=' * 10}")
base_m3u8, tvg_chno = load_base()
# base_m3u8, tvg_chno = load_base()
async with async_playwright() as p:
try:
@ -57,30 +57,30 @@ async def main() -> None:
xtrnl_brwsr = await network.browser(p, external=True)
pw_tasks = [
asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(fsports.scrape(xtrnl_brwsr)),
asyncio.create_task(roxie.scrape(hdl_brwsr)),
# asyncio.create_task(embedhd.scrape(hdl_brwsr)),
# asyncio.create_task(fsports.scrape(xtrnl_brwsr)),
# asyncio.create_task(roxie.scrape(hdl_brwsr)),
]
httpx_tasks = [
asyncio.create_task(fawa.scrape()),
asyncio.create_task(istreameast.scrape()),
# asyncio.create_task(ovogoal.scrape()),
asyncio.create_task(shark.scrape()),
asyncio.create_task(streamcenter.scrape()),
# asyncio.create_task(fawa.scrape()),
# asyncio.create_task(istreameast.scrape()),
# # asyncio.create_task(ovogoal.scrape()),
# asyncio.create_task(shark.scrape()),
# asyncio.create_task(streamcenter.scrape()),
asyncio.create_task(streamsgate.scrape()),
asyncio.create_task(streamtpnew.scrape()),
asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvapp.scrape()),
asyncio.create_task(webcast.scrape()),
asyncio.create_task(xyzstream.scrape()),
# asyncio.create_task(streamtpnew.scrape()),
# asyncio.create_task(totalsportek.scrape()),
# asyncio.create_task(tvapp.scrape()),
# asyncio.create_task(webcast.scrape()),
# asyncio.create_task(xyzstream.scrape()),
]
await asyncio.gather(*(pw_tasks + httpx_tasks))
# others
await cdnlivetv.scrape(xtrnl_brwsr)
await watchfooty.scrape(xtrnl_brwsr)
# await cdnlivetv.scrape(xtrnl_brwsr)
# await watchfooty.scrape(xtrnl_brwsr)
finally:
await hdl_brwsr.close()
@ -89,68 +89,68 @@ async def main() -> None:
await network.client.aclose()
additions = (
cdnlivetv.urls
| embedhd.urls
| fawa.urls
| fsports.urls
| istreameast.urls
| ovogoal.urls
| roxie.urls
| shark.urls
| streamcenter.urls
| streamsgate.urls
| streamtpnew.urls
| totalsportek.urls
| tvapp.urls
| watchfooty.urls
| webcast.urls
| xyzstream.urls
)
# additions = (
# cdnlivetv.urls
# | embedhd.urls
# | fawa.urls
# | fsports.urls
# | istreameast.urls
# | ovogoal.urls
# | roxie.urls
# | shark.urls
# | streamcenter.urls
# | streamsgate.urls
# | streamtpnew.urls
# | totalsportek.urls
# | tvapp.urls
# | watchfooty.urls
# | webcast.urls
# | xyzstream.urls
# )
live_events: list[str] = []
# live_events: list[str] = []
combined_channels: list[str] = []
# combined_channels: list[str] = []
for i, (event, info) in enumerate(
sorted(additions.items()),
start=1,
):
extinf_all = (
f'#EXTINF:-1 tvg-chno="{tvg_chno + i}" tvg-id="{info["id"]}" '
f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
)
# for i, (event, info) in enumerate(
# sorted(additions.items()),
# start=1,
# ):
# extinf_all = (
# f'#EXTINF:-1 tvg-chno="{tvg_chno + i}" tvg-id="{info["id"]}" '
# f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
# )
extinf_live = (
f'#EXTINF:-1 tvg-chno="{i}" tvg-id="{info["id"]}" '
f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
)
# extinf_live = (
# f'#EXTINF:-1 tvg-chno="{i}" tvg-id="{info["id"]}" '
# f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
# )
vlc_block = [
f'#EXTVLCOPT:http-referrer={info["base"]}',
f'#EXTVLCOPT:http-origin={info["base"]}',
f"#EXTVLCOPT:http-user-agent={info.get('UA', network.UA)}",
info["url"],
]
# vlc_block = [
# f'#EXTVLCOPT:http-referrer={info["base"]}',
# f'#EXTVLCOPT:http-origin={info["base"]}',
# f"#EXTVLCOPT:http-user-agent={info.get('UA', network.UA)}",
# info["url"],
# ]
combined_channels.extend(["\n" + extinf_all, *vlc_block])
# combined_channels.extend(["\n" + extinf_all, *vlc_block])
live_events.extend(["\n" + extinf_live, *vlc_block])
# live_events.extend(["\n" + extinf_live, *vlc_block])
COMBINED_FILE.write_text(
"\n".join(base_m3u8 + combined_channels),
encoding="utf-8",
)
# COMBINED_FILE.write_text(
# "\n".join(base_m3u8 + combined_channels),
# encoding="utf-8",
# )
log.info(f"Base + Events saved to {COMBINED_FILE.resolve()}")
# log.info(f"Base + Events saved to {COMBINED_FILE.resolve()}")
EVENTS_FILE.write_text(
'#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/M3U8/TV.xml"\n'
+ "\n".join(live_events),
encoding="utf-8",
)
# EVENTS_FILE.write_text(
# '#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/M3U8/TV.xml"\n'
# + "\n".join(live_events),
# encoding="utf-8",
# )
log.info(f"Events saved to {EVENTS_FILE.resolve()}")
# log.info(f"Events saved to {EVENTS_FILE.resolve()}")
if __name__ == "__main__":

View file

@ -1,5 +1,4 @@
import asyncio
import re
from functools import partial
from urllib.parse import urljoin
@ -16,7 +15,7 @@ TAG = "ROXIE"
CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://roxiestreams.su"
BASE_URL = "https://roxiestreams.info"
SPORT_URLS = {
# "March Madness": urljoin(BASE_URL, "march-madness"),
@ -102,18 +101,10 @@ async def get_events() -> list[dict[str, str]]:
if not (href := a_tag.attributes.get("href")):
continue
if not (span := row.css_first("span.countdown-timer")) or not (
data_start := span.attributes.get("data-start")
):
if not (event_time_elem := row.css_first("td.event-start-time")):
continue
event_time = (
data_start.rsplit(":", 1)[0]
if (re.search(r"\d+:\d+:\d+", data_start) or "M:00" in data_start)
else data_start
)
event_dt = Time.from_str(event_time, timezone="PST")
event_dt = Time.from_str(event_time_elem.text(strip=True), timezone="EST")
if event_dt.date() != now.date():
continue

View file

@ -2,11 +2,11 @@ import asyncio
import logging
import random
import re
from collections.abc import Awaitable, Callable
from collections.abc import AsyncGenerator, Awaitable, Callable
from contextlib import asynccontextmanager
from functools import cache, partial
from pathlib import Path
from typing import AsyncGenerator, TypeVar
from typing import TypeVar
from urllib.parse import urlparse
import httpx
@ -158,7 +158,7 @@ class Network:
browser: Browser,
stealth: bool = True,
ignore_https: bool = False,
) -> AsyncGenerator[BrowserContext, None]:
) -> AsyncGenerator[BrowserContext]:
context: BrowserContext | None = None
@ -195,7 +195,7 @@ class Network:
@staticmethod
@asynccontextmanager
async def event_page(context: BrowserContext) -> AsyncGenerator[Page, None]:
async def event_page(context: BrowserContext) -> AsyncGenerator[Page]:
page = await context.new_page()
try: