- edit scraping for roxie.py
- misc edits.
This commit is contained in:
doms9 2026-06-04 15:05:05 -04:00
parent 97d0ca9ed1
commit 00000d9818
3 changed files with 74 additions and 83 deletions

View file

@ -48,7 +48,7 @@ def load_base() -> tuple[list[str], int]:
async def main() -> None: async def main() -> None:
log.info(f"{'=' * 10} Scraper Started {'=' * 10}") log.info(f"{'=' * 10} Scraper Started {'=' * 10}")
base_m3u8, tvg_chno = load_base() # base_m3u8, tvg_chno = load_base()
async with async_playwright() as p: async with async_playwright() as p:
try: try:
@ -57,30 +57,30 @@ async def main() -> None:
xtrnl_brwsr = await network.browser(p, external=True) xtrnl_brwsr = await network.browser(p, external=True)
pw_tasks = [ pw_tasks = [
asyncio.create_task(embedhd.scrape(hdl_brwsr)), # asyncio.create_task(embedhd.scrape(hdl_brwsr)),
asyncio.create_task(fsports.scrape(xtrnl_brwsr)), # asyncio.create_task(fsports.scrape(xtrnl_brwsr)),
asyncio.create_task(roxie.scrape(hdl_brwsr)), # asyncio.create_task(roxie.scrape(hdl_brwsr)),
] ]
httpx_tasks = [ httpx_tasks = [
asyncio.create_task(fawa.scrape()), # asyncio.create_task(fawa.scrape()),
asyncio.create_task(istreameast.scrape()), # asyncio.create_task(istreameast.scrape()),
# asyncio.create_task(ovogoal.scrape()), # # asyncio.create_task(ovogoal.scrape()),
asyncio.create_task(shark.scrape()), # asyncio.create_task(shark.scrape()),
asyncio.create_task(streamcenter.scrape()), # asyncio.create_task(streamcenter.scrape()),
asyncio.create_task(streamsgate.scrape()), asyncio.create_task(streamsgate.scrape()),
asyncio.create_task(streamtpnew.scrape()), # asyncio.create_task(streamtpnew.scrape()),
asyncio.create_task(totalsportek.scrape()), # asyncio.create_task(totalsportek.scrape()),
asyncio.create_task(tvapp.scrape()), # asyncio.create_task(tvapp.scrape()),
asyncio.create_task(webcast.scrape()), # asyncio.create_task(webcast.scrape()),
asyncio.create_task(xyzstream.scrape()), # asyncio.create_task(xyzstream.scrape()),
] ]
await asyncio.gather(*(pw_tasks + httpx_tasks)) await asyncio.gather(*(pw_tasks + httpx_tasks))
# others # others
await cdnlivetv.scrape(xtrnl_brwsr) # await cdnlivetv.scrape(xtrnl_brwsr)
await watchfooty.scrape(xtrnl_brwsr) # await watchfooty.scrape(xtrnl_brwsr)
finally: finally:
await hdl_brwsr.close() await hdl_brwsr.close()
@ -89,68 +89,68 @@ async def main() -> None:
await network.client.aclose() await network.client.aclose()
additions = ( # additions = (
cdnlivetv.urls # cdnlivetv.urls
| embedhd.urls # | embedhd.urls
| fawa.urls # | fawa.urls
| fsports.urls # | fsports.urls
| istreameast.urls # | istreameast.urls
| ovogoal.urls # | ovogoal.urls
| roxie.urls # | roxie.urls
| shark.urls # | shark.urls
| streamcenter.urls # | streamcenter.urls
| streamsgate.urls # | streamsgate.urls
| streamtpnew.urls # | streamtpnew.urls
| totalsportek.urls # | totalsportek.urls
| tvapp.urls # | tvapp.urls
| watchfooty.urls # | watchfooty.urls
| webcast.urls # | webcast.urls
| xyzstream.urls # | xyzstream.urls
) # )
live_events: list[str] = [] # live_events: list[str] = []
combined_channels: list[str] = [] # combined_channels: list[str] = []
for i, (event, info) in enumerate( # for i, (event, info) in enumerate(
sorted(additions.items()), # sorted(additions.items()),
start=1, # start=1,
): # ):
extinf_all = ( # extinf_all = (
f'#EXTINF:-1 tvg-chno="{tvg_chno + i}" tvg-id="{info["id"]}" ' # f'#EXTINF:-1 tvg-chno="{tvg_chno + i}" tvg-id="{info["id"]}" '
f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}' # f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
) # )
extinf_live = ( # extinf_live = (
f'#EXTINF:-1 tvg-chno="{i}" tvg-id="{info["id"]}" ' # f'#EXTINF:-1 tvg-chno="{i}" tvg-id="{info["id"]}" '
f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}' # f'tvg-name="{event}" tvg-logo="{info["logo"]}" group-title="Live Events",{event}'
) # )
vlc_block = [ # vlc_block = [
f'#EXTVLCOPT:http-referrer={info["base"]}', # f'#EXTVLCOPT:http-referrer={info["base"]}',
f'#EXTVLCOPT:http-origin={info["base"]}', # f'#EXTVLCOPT:http-origin={info["base"]}',
f"#EXTVLCOPT:http-user-agent={info.get('UA', network.UA)}", # f"#EXTVLCOPT:http-user-agent={info.get('UA', network.UA)}",
info["url"], # info["url"],
] # ]
combined_channels.extend(["\n" + extinf_all, *vlc_block]) # combined_channels.extend(["\n" + extinf_all, *vlc_block])
live_events.extend(["\n" + extinf_live, *vlc_block]) # live_events.extend(["\n" + extinf_live, *vlc_block])
COMBINED_FILE.write_text( # COMBINED_FILE.write_text(
"\n".join(base_m3u8 + combined_channels), # "\n".join(base_m3u8 + combined_channels),
encoding="utf-8", # encoding="utf-8",
) # )
log.info(f"Base + Events saved to {COMBINED_FILE.resolve()}") # log.info(f"Base + Events saved to {COMBINED_FILE.resolve()}")
EVENTS_FILE.write_text( # EVENTS_FILE.write_text(
'#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/M3U8/TV.xml"\n' # '#EXTM3U url-tvg="https://raw.githubusercontent.com/doms9/iptv/refs/heads/default/M3U8/TV.xml"\n'
+ "\n".join(live_events), # + "\n".join(live_events),
encoding="utf-8", # encoding="utf-8",
) # )
log.info(f"Events saved to {EVENTS_FILE.resolve()}") # log.info(f"Events saved to {EVENTS_FILE.resolve()}")
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,5 +1,4 @@
import asyncio import asyncio
import re
from functools import partial from functools import partial
from urllib.parse import urljoin from urllib.parse import urljoin
@ -16,7 +15,7 @@ TAG = "ROXIE"
CACHE_FILE = Cache(TAG, exp=19_800) CACHE_FILE = Cache(TAG, exp=19_800)
BASE_URL = "https://roxiestreams.su" BASE_URL = "https://roxiestreams.info"
SPORT_URLS = { SPORT_URLS = {
# "March Madness": urljoin(BASE_URL, "march-madness"), # "March Madness": urljoin(BASE_URL, "march-madness"),
@ -102,18 +101,10 @@ async def get_events() -> list[dict[str, str]]:
if not (href := a_tag.attributes.get("href")): if not (href := a_tag.attributes.get("href")):
continue continue
if not (span := row.css_first("span.countdown-timer")) or not ( if not (event_time_elem := row.css_first("td.event-start-time")):
data_start := span.attributes.get("data-start")
):
continue continue
event_time = ( event_dt = Time.from_str(event_time_elem.text(strip=True), timezone="EST")
data_start.rsplit(":", 1)[0]
if (re.search(r"\d+:\d+:\d+", data_start) or "M:00" in data_start)
else data_start
)
event_dt = Time.from_str(event_time, timezone="PST")
if event_dt.date() != now.date(): if event_dt.date() != now.date():
continue continue

View file

@ -2,11 +2,11 @@ import asyncio
import logging import logging
import random import random
import re import re
from collections.abc import Awaitable, Callable from collections.abc import AsyncGenerator, Awaitable, Callable
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import cache, partial from functools import cache, partial
from pathlib import Path from pathlib import Path
from typing import AsyncGenerator, TypeVar from typing import TypeVar
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx import httpx
@ -158,7 +158,7 @@ class Network:
browser: Browser, browser: Browser,
stealth: bool = True, stealth: bool = True,
ignore_https: bool = False, ignore_https: bool = False,
) -> AsyncGenerator[BrowserContext, None]: ) -> AsyncGenerator[BrowserContext]:
context: BrowserContext | None = None context: BrowserContext | None = None
@ -195,7 +195,7 @@ class Network:
@staticmethod @staticmethod
@asynccontextmanager @asynccontextmanager
async def event_page(context: BrowserContext) -> AsyncGenerator[Page, None]: async def event_page(context: BrowserContext) -> AsyncGenerator[Page]:
page = await context.new_page() page = await context.new_page()
try: try: