mirror of
https://github.com/doms9/iptv.git
synced 2026-01-21 03:59:03 +01:00
e
fix streamhub scraping misc edits
This commit is contained in:
parent
86a88e206e
commit
00000d9ebc
7 changed files with 33 additions and 29 deletions
|
|
@ -62,7 +62,7 @@ async def main() -> None:
|
||||||
asyncio.create_task(streamhub.scrape(network.client)),
|
asyncio.create_task(streamhub.scrape(network.client)),
|
||||||
asyncio.create_task(streamsgate.scrape(network.client)),
|
asyncio.create_task(streamsgate.scrape(network.client)),
|
||||||
asyncio.create_task(strmd.scrape(network.client)),
|
asyncio.create_task(strmd.scrape(network.client)),
|
||||||
# asyncio.create_task(timstreams.scrape(network.client)),
|
asyncio.create_task(timstreams.scrape(network.client)),
|
||||||
asyncio.create_task(tvpass.scrape(network.client)),
|
asyncio.create_task(tvpass.scrape(network.client)),
|
||||||
asyncio.create_task(watchfooty.scrape(network.client)),
|
asyncio.create_task(watchfooty.scrape(network.client)),
|
||||||
asyncio.create_task(webcast.scrape(network.client)),
|
asyncio.create_task(webcast.scrape(network.client)),
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
|
@ -49,8 +48,6 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
|
|
||||||
events = {}
|
events = {}
|
||||||
|
|
||||||
pattern = re.compile(r"https?://[^\s'\"]+?\.m3u8(?:\?[^\s'\"]*)?", re.IGNORECASE)
|
|
||||||
|
|
||||||
for event in api_data.get("events", []):
|
for event in api_data.get("events", []):
|
||||||
event_dt = Time.from_str(event["date"], timezone="UTC")
|
event_dt = Time.from_str(event["date"], timezone="UTC")
|
||||||
|
|
||||||
|
|
@ -66,19 +63,18 @@ async def get_events() -> dict[str, dict[str, str | float]]:
|
||||||
stream_urls = [(i, f"server{i}URL") for i in range(1, 4)]
|
stream_urls = [(i, f"server{i}URL") for i in range(1, 4)]
|
||||||
|
|
||||||
for z, stream_url in stream_urls:
|
for z, stream_url in stream_urls:
|
||||||
if stream_link := channel_info.get(stream_url):
|
if (stream_link := channel_info.get(stream_url)) and stream_link != "null":
|
||||||
if pattern.search(stream_link):
|
key = f"[{sport}] {event_name} {z} ({TAG})"
|
||||||
key = f"[{sport}] {event_name} {z} ({TAG})"
|
|
||||||
|
|
||||||
tvg_id, logo = leagues.get_tvg_info(sport, event_name)
|
tvg_id, logo = leagues.get_tvg_info(sport, event_name)
|
||||||
|
|
||||||
events[key] = {
|
events[key] = {
|
||||||
"url": stream_link,
|
"url": stream_link,
|
||||||
"logo": logo,
|
"logo": logo,
|
||||||
"base": "https://pixelsport.tv",
|
"base": "https://pixelsport.tv",
|
||||||
"timestamp": now.timestamp(),
|
"timestamp": now.timestamp(),
|
||||||
"id": tvg_id or "Live.Event.us",
|
"id": tvg_id or "Live.Event.us",
|
||||||
}
|
}
|
||||||
|
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,6 @@ async def process_event(
|
||||||
async def refresh_html_cache(
|
async def refresh_html_cache(
|
||||||
client: httpx.AsyncClient, now_ts: float
|
client: httpx.AsyncClient, now_ts: float
|
||||||
) -> dict[str, dict[str, str | float]]:
|
) -> dict[str, dict[str, str | float]]:
|
||||||
|
|
||||||
log.info("Refreshing HTML cache")
|
log.info("Refreshing HTML cache")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ async def get_html_data(
|
||||||
r = await client.get(url, params={"date": date})
|
r = await client.get(url, params={"date": date})
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f'Failed to fetch "{url}": {e}')
|
log.error(f'Failed to fetch "{r.url}": {e}')
|
||||||
|
|
||||||
return b""
|
return b""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ async def refresh_api_cache(
|
||||||
r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
|
r = await client.get(BASE_URL, params={"pageNumber": 1, "pageSize": 500})
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f'Failed to fetch "{BASE_URL}": {e}')
|
log.error(f'Failed to fetch "{r.url}": {e}')
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,10 @@ BASE_URL = "https://streamfree.to/"
|
||||||
|
|
||||||
async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
|
async def refresh_api_cache(client: httpx.AsyncClient) -> dict[str, dict[str, list]]:
|
||||||
try:
|
try:
|
||||||
url = urljoin(BASE_URL, "streams")
|
r = await client.get(urljoin(BASE_URL, "streams"))
|
||||||
|
|
||||||
r = await client.get(url)
|
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f'Failed to fetch "{url}": {e}')
|
log.error(f'Failed to fetch "{r.url}": {e}')
|
||||||
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,15 +36,21 @@ CATEGORIES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def get_html_data(client: httpx.AsyncClient, sport_id: str) -> bytes:
|
async def get_html_data(
|
||||||
try:
|
client: httpx.AsyncClient,
|
||||||
url = urljoin(BASE_URL, f"events/{Time.now().date()}")
|
date: str,
|
||||||
|
sport_id: str,
|
||||||
|
) -> bytes:
|
||||||
|
|
||||||
r = await client.get(url, params={"sport_id": sport_id})
|
try:
|
||||||
|
r = await client.get(
|
||||||
|
urljoin(BASE_URL, f"events/{date}"),
|
||||||
|
params={"sport_id": sport_id},
|
||||||
|
)
|
||||||
|
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f'Failed to fetch "{url}": {e}')
|
log.error(f'Failed to fetch "{r.url}": {e}')
|
||||||
|
|
||||||
return b""
|
return b""
|
||||||
|
|
||||||
|
|
@ -53,11 +59,12 @@ async def get_html_data(client: httpx.AsyncClient, sport_id: str) -> bytes:
|
||||||
|
|
||||||
async def refresh_html_cache(
|
async def refresh_html_cache(
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
|
date: str,
|
||||||
sport_id: str,
|
sport_id: str,
|
||||||
ts: float,
|
ts: float,
|
||||||
) -> dict[str, dict[str, str | float]]:
|
) -> dict[str, dict[str, str | float]]:
|
||||||
|
|
||||||
html_data = await get_html_data(client, sport_id)
|
html_data = await get_html_data(client, date, sport_id)
|
||||||
|
|
||||||
soup = HTMLParser(html_data)
|
soup = HTMLParser(html_data)
|
||||||
|
|
||||||
|
|
@ -113,12 +120,16 @@ async def get_events(
|
||||||
if not (events := HTML_CACHE.load()):
|
if not (events := HTML_CACHE.load()):
|
||||||
log.info("Refreshing HTML cache")
|
log.info("Refreshing HTML cache")
|
||||||
|
|
||||||
|
dates = [now.date(), now.delta(days=1).date()]
|
||||||
|
|
||||||
tasks = [
|
tasks = [
|
||||||
refresh_html_cache(
|
refresh_html_cache(
|
||||||
client,
|
client,
|
||||||
|
date,
|
||||||
sport_id,
|
sport_id,
|
||||||
now.timestamp(),
|
now.timestamp(),
|
||||||
)
|
)
|
||||||
|
for date in dates
|
||||||
for sport_id in CATEGORIES.values()
|
for sport_id in CATEGORIES.values()
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -184,7 +195,7 @@ async def scrape(client: httpx.AsyncClient) -> None:
|
||||||
ev["event"],
|
ev["event"],
|
||||||
ev["logo"],
|
ev["logo"],
|
||||||
ev["link"],
|
ev["link"],
|
||||||
ev["timestamp"],
|
ev["event_ts"],
|
||||||
)
|
)
|
||||||
|
|
||||||
key = f"[{sport}] {event} ({TAG})"
|
key = f"[{sport}] {event} ({TAG})"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue