mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-13 01:05:43 +02:00
Merge a4d4809298
into 351dc0bc33
This commit is contained in:
commit
b602120a87
|
@ -503,6 +503,7 @@ # Supported sites
|
|||
- **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
|
||||
- **gem.cbc.ca:live**
|
||||
- **gem.cbc.ca:playlist**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **Genius**
|
||||
- **GeniusLyrics**
|
||||
- **GetCourseRu**: [*getcourseru*](## "netrc machine")
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import int_or_none, ExtractorError
|
||||
|
||||
|
||||
class BeatportIE(InfoExtractor):
|
||||
|
@ -43,55 +43,47 @@ def _real_extract(self, url):
|
|||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playables = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||
'playables info', flags=re.DOTALL),
|
||||
track_id)
|
||||
try:
|
||||
playables_json = self._search_regex(
|
||||
r'window\.Playables\s*=\s*({.+?})\s*;', webpage,
|
||||
'playables info', default='{}', flags=re.DOTALL)
|
||||
playables = self._parse_json(playables_json, track_id)
|
||||
except re.error:
|
||||
raise ExtractorError('Failed to extract playables information. The page structure may have changed.')
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
if not playables or 'tracks' not in playables:
|
||||
raise ExtractorError('No playable tracks found in the extracted information.')
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
track = next((t for t in playables['tracks'] if t['id'] == int(track_id)), None)
|
||||
if not track:
|
||||
raise ExtractorError(f'No track with ID {track_id} found.')
|
||||
|
||||
title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name']
|
||||
if track.get('mix'):
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
formats = []
|
||||
for ext, info in track['preview'].items():
|
||||
if not info['url']:
|
||||
continue
|
||||
fmt = {
|
||||
'url': info['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if ext == 'mp3':
|
||||
fmt['acodec'] = 'mp3'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
elif ext == 'mp4':
|
||||
fmt['acodec'] = 'aac'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
formats.append(fmt)
|
||||
for ext, info in track.get('preview', {}).items():
|
||||
url = info.get('url')
|
||||
if url:
|
||||
fmt = {
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3' if ext == 'mp3' else 'aac',
|
||||
'abr': 96,
|
||||
'asr': 44100
|
||||
}
|
||||
formats.append(fmt)
|
||||
|
||||
images = []
|
||||
for name, info in track['images'].items():
|
||||
image_url = info.get('url')
|
||||
if name == 'dynamic' or not image_url:
|
||||
continue
|
||||
image = {
|
||||
'id': name,
|
||||
'url': image_url,
|
||||
'height': int_or_none(info.get('height')),
|
||||
'width': int_or_none(info.get('width')),
|
||||
}
|
||||
images.append(image)
|
||||
images = [{'id': name, 'url': info['url'], 'height': int_or_none(info.get('height')), 'width': int_or_none(info.get('width'))}
|
||||
for name, info in track.get('images', {}).items() if name != 'dynamic' and info.get('url')]
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'id': compat_str(track.get('id', track_id)),
|
||||
'display_id': track.get('slug', display_id),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': images,
|
||||
}
|
||||
'thumbnails': images
|
||||
}
|
|
@ -1,17 +1,68 @@
|
|||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from .dplay import DPlayIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
def _generate_video_specific_cache_url(slug, parent_slug):
|
||||
"""
|
||||
Generate the MAGIC string for the video specific cache url.
|
||||
:param slug: The part of the url that identifies the video by title.
|
||||
:param parent_slug: The part of the url that identifies the PARENT directory.
|
||||
:return: The generated url.
|
||||
"""
|
||||
return 'https://de-api.loma-cms.com/feloma/page/{0}/?environment=tele5&parent_slug={1}&v=2'.format(slug,
|
||||
parent_slug)
|
||||
def _do_cached_post(s: requests.session,
|
||||
referer: str,
|
||||
url: str) -> dict:
|
||||
"""
|
||||
Do the API call to CACHED json endpoint.
|
||||
It is likely connected to the new "loma-cms" API.
|
||||
:param s: The session we use.
|
||||
:param referer: The referer url.
|
||||
:param url: The url to retrieve the cached data for.
|
||||
:return: The json dict from the response.
|
||||
"""
|
||||
r = s.post(url='https://tele5.de/cached',
|
||||
headers={
|
||||
'Origin': 'https://tele5.de',
|
||||
'Referer': referer,
|
||||
# Referer is a mandatory key,
|
||||
'User-Agent': 'Youtube-DL',
|
||||
# User-Agent is a mandatory key, it can be anything!
|
||||
},
|
||||
json={'path': url}
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TESTS = [{
|
||||
'url': 'https://tele5.de/mediathek/sorority-babes-in-the-slimeball-bowl-o-rama',
|
||||
'info_dict': {
|
||||
'id': '5582852',
|
||||
'title': 'Sorority Babes in the Slimeball Bowl-O-Rama',
|
||||
'ext': 'mp4',
|
||||
'series': 'Sorority Babes in the Slimeball Bowl-O-Rama',
|
||||
'duration': 4779.88,
|
||||
'description': 'md5:1d8d30ed3d221613861aaefa8d7e887e',
|
||||
'timestamp': 1697839800,
|
||||
'upload_date': '20231020',
|
||||
'creator': 'Tele5',
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/10/02/501fa839-d3ac-3c04-aa61-57f98802c532.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '1549416',
|
||||
'ext': 'mp4',
|
||||
|
@ -26,6 +77,7 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
|
|||
}, {
|
||||
# jwplatform, nexx unavailable
|
||||
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'WJuiOlUp',
|
||||
'ext': 'mp4',
|
||||
|
@ -40,6 +92,7 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
|
|||
'skip': 'No longer available, redirects to Filme page',
|
||||
}, {
|
||||
'url': 'https://tele5.de/mediathek/angel-of-mine/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '1252360',
|
||||
'ext': 'mp4',
|
||||
|
@ -72,18 +125,43 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
|
|||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
|
||||
player_info = extract_attributes(player_element)
|
||||
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
|
||||
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
|
||||
source_type = player_info.get('sourcetype')
|
||||
if source_type:
|
||||
endpoint = '%s-%s' % (source_type, endpoint)
|
||||
try:
|
||||
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
|
||||
except ExtractorError as e:
|
||||
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
||||
self.report_drm(video_id)
|
||||
raise
|
||||
content_regex = re.compile(r'https?://(?:www\.)?(?P<environment>[^.]+)\.de/(?P<parent_slug>[^/]+)/(?P<slug>[^/?#&]+)')
|
||||
m = content_regex.search(url)
|
||||
if m is not None:
|
||||
environment, parent_slug, slug = m.groups()
|
||||
s = requests.session()
|
||||
headers_for_origin = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0'}
|
||||
r = s.get(url=url,
|
||||
headers=headers_for_origin)
|
||||
r.raise_for_status()
|
||||
|
||||
cached_base = _do_cached_post(s=s,
|
||||
referer=url,
|
||||
url='https://de-api.loma-cms.com/feloma/configurations/?environment={0}'.format(environment))
|
||||
|
||||
site_info = cached_base.get('data').get('settings').get('site')
|
||||
player_info = site_info.get('player')
|
||||
|
||||
sonic_realm = player_info['sonicRealm']
|
||||
sonic_endpoint = compat_urlparse.urlparse(player_info['sonicEndpoint']).hostname
|
||||
country = site_info['info']['country']
|
||||
|
||||
cached_video_specific = _do_cached_post(s=s, referer=url,
|
||||
url=_generate_video_specific_cache_url(
|
||||
slug=slug,
|
||||
parent_slug=parent_slug))
|
||||
|
||||
video_id = cached_video_specific['data']['blocks'][1]['videoId']
|
||||
|
||||
try:
|
||||
return self._get_disco_api_info(url=url,
|
||||
display_id=video_id,
|
||||
disco_host=sonic_endpoint,
|
||||
realm=sonic_realm,
|
||||
country=country,
|
||||
api_version=3,
|
||||
)
|
||||
except ExtractorError as e:
|
||||
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
||||
self.report_drm(video_id)
|
||||
raise
|
||||
|
|
Loading…
Reference in New Issue
Block a user