mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 16:42:57 +01:00
parent
7275535116
commit
10db0d2f57
@ -1794,14 +1794,18 @@ def sanitize_numeric_fields(info):
|
|||||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
for ts_key, date_key in (
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
('timestamp', 'upload_date'),
|
||||||
# see http://bugs.python.org/issue1646728)
|
('release_timestamp', 'release_date'),
|
||||||
try:
|
):
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
except (ValueError, OverflowError, OSError):
|
# see http://bugs.python.org/issue1646728)
|
||||||
pass
|
try:
|
||||||
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
|
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||||
|
except (ValueError, OverflowError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Auto generate title fields corresponding to the *_number fields when missing
|
# Auto generate title fields corresponding to the *_number fields when missing
|
||||||
# in order to always have clean titles. This is very common for TV series.
|
# in order to always have clean titles. This is very common for TV series.
|
||||||
|
@ -42,6 +42,7 @@ def _real_extract(self, url):
|
|||||||
ember_data = self._parse_json(self._search_regex(
|
ember_data = self._parse_json(self._search_regex(
|
||||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
webpage, 'ember data'), episode_id)
|
webpage, 'ember data'), episode_id)
|
||||||
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
episode = ember_data['data']['attributes']
|
episode = ember_data['data']['attributes']
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'uploader': 'Ben Prunty',
|
'uploader': 'Ben Prunty',
|
||||||
'timestamp': 1396508491,
|
'timestamp': 1396508491,
|
||||||
'upload_date': '20140403',
|
'upload_date': '20140403',
|
||||||
|
'release_timestamp': 1396483200,
|
||||||
'release_date': '20140403',
|
'release_date': '20140403',
|
||||||
'duration': 260.877,
|
'duration': 260.877,
|
||||||
'track': 'Lanius (Battle)',
|
'track': 'Lanius (Battle)',
|
||||||
@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'uploader': 'Mastodon',
|
'uploader': 'Mastodon',
|
||||||
'timestamp': 1322005399,
|
'timestamp': 1322005399,
|
||||||
'upload_date': '20111122',
|
'upload_date': '20111122',
|
||||||
|
'release_timestamp': 1076112000,
|
||||||
'release_date': '20040207',
|
'release_date': '20040207',
|
||||||
'duration': 120.79,
|
'duration': 120.79,
|
||||||
'track': 'Hail to Fire',
|
'track': 'Hail to Fire',
|
||||||
@ -197,7 +199,7 @@ def _real_extract(self, url):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
|
@ -138,11 +138,6 @@ def _real_extract(self, url):
|
|||||||
anime_id = mobj.group('anime_id')
|
anime_id = mobj.group('anime_id')
|
||||||
page_id = mobj.group('page')
|
page_id = mobj.group('page')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
headers = {
|
|
||||||
'Referer': url,
|
|
||||||
'Accept': '*/*'
|
|
||||||
}
|
|
||||||
headers.update(self.geo_verification_headers())
|
|
||||||
|
|
||||||
if 'anime/' not in url:
|
if 'anime/' not in url:
|
||||||
cid = self._search_regex(
|
cid = self._search_regex(
|
||||||
@ -160,8 +155,12 @@ def _real_extract(self, url):
|
|||||||
if 'no_bangumi_tip' not in smuggled_data:
|
if 'no_bangumi_tip' not in smuggled_data:
|
||||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
|
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
|
||||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
|
'Referer': url
|
||||||
|
}
|
||||||
|
headers.update(self.geo_verification_headers())
|
||||||
|
|
||||||
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
|
|
||||||
js = self._download_json(
|
js = self._download_json(
|
||||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||||
data=urlencode_postdata({'episode_id': video_id}),
|
data=urlencode_postdata({'episode_id': video_id}),
|
||||||
@ -170,6 +169,12 @@ def _real_extract(self, url):
|
|||||||
self._report_error(js)
|
self._report_error(js)
|
||||||
cid = js['result']['cid']
|
cid = js['result']['cid']
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Referer': url
|
||||||
|
}
|
||||||
|
headers.update(self.geo_verification_headers())
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||||
|
@ -27,7 +27,7 @@ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
|||||||
|
|
||||||
|
|
||||||
class CBSIE(CBSBaseIE):
|
class CBSIE(CBSBaseIE):
|
||||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs\.com|paramountplus\.com)/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
@ -53,7 +53,7 @@ class CBSIE(CBSBaseIE):
|
|||||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.paramountplus.com/shows/star-trek-discovery/video/l5ANMH9wM7kxwV1qr4u1xn88XOhYMlZX/star-trek-discovery-the-vulcan-hello/',
|
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -231,8 +231,9 @@ class InfoExtractor(object):
|
|||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
license: License name the video is licensed under.
|
license: License name the video is licensed under.
|
||||||
creator: The creator of the video.
|
creator: The creator of the video.
|
||||||
|
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||||
release_date: The date (YYYYMMDD) when the video was released.
|
release_date: The date (YYYYMMDD) when the video was released.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
|
@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
|
||||||
for f in formats:
|
for f in formats:
|
||||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||||
if wh:
|
if wh:
|
||||||
|
@ -6,8 +6,10 @@
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -62,6 +64,7 @@ def _parse_stream(self, stream, url):
|
|||||||
'description': stream_value.get('description'),
|
'description': stream_value.get('description'),
|
||||||
'license': stream_value.get('license'),
|
'license': stream_value.get('license'),
|
||||||
'timestamp': int_or_none(stream.get('timestamp')),
|
'timestamp': int_or_none(stream.get('timestamp')),
|
||||||
|
'release_timestamp': int_or_none(stream_value.get('release_time')),
|
||||||
'tags': stream_value.get('tags'),
|
'tags': stream_value.get('tags'),
|
||||||
'duration': int_or_none(media.get('duration')),
|
'duration': int_or_none(media.get('duration')),
|
||||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||||
@ -94,6 +97,8 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||||
'timestamp': 1595694354,
|
'timestamp': 1595694354,
|
||||||
'upload_date': '20200725',
|
'upload_date': '20200725',
|
||||||
|
'release_timestamp': 1595340697,
|
||||||
|
'release_date': '20200721',
|
||||||
'width': 1280,
|
'width': 1280,
|
||||||
'height': 720,
|
'height': 720,
|
||||||
}
|
}
|
||||||
@ -108,6 +113,8 @@ class LBRYIE(LBRYBaseIE):
|
|||||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||||
'timestamp': 1591312601,
|
'timestamp': 1591312601,
|
||||||
'upload_date': '20200604',
|
'upload_date': '20200604',
|
||||||
|
'release_timestamp': 1591312421,
|
||||||
|
'release_date': '20200604',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'duration': 2570,
|
'duration': 2570,
|
||||||
'channel': 'The LBRY Foundation',
|
'channel': 'The LBRY Foundation',
|
||||||
@ -189,17 +196,18 @@ class LBRYChannelIE(LBRYBaseIE):
|
|||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 50
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
def _fetch_page(self, claim_id, url, page):
|
def _fetch_page(self, claim_id, url, params, page):
|
||||||
page += 1
|
page += 1
|
||||||
|
page_params = {
|
||||||
|
'channel_ids': [claim_id],
|
||||||
|
'claim_type': 'stream',
|
||||||
|
'no_totals': True,
|
||||||
|
'page': page,
|
||||||
|
'page_size': self._PAGE_SIZE,
|
||||||
|
}
|
||||||
|
page_params.update(params)
|
||||||
result = self._call_api_proxy(
|
result = self._call_api_proxy(
|
||||||
'claim_search', claim_id, {
|
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||||
'channel_ids': [claim_id],
|
|
||||||
'claim_type': 'stream',
|
|
||||||
'no_totals': True,
|
|
||||||
'page': page,
|
|
||||||
'page_size': self._PAGE_SIZE,
|
|
||||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
|
||||||
}, 'page %d' % page)
|
|
||||||
for item in (result.get('items') or []):
|
for item in (result.get('items') or []):
|
||||||
stream_claim_name = item.get('name')
|
stream_claim_name = item.get('name')
|
||||||
stream_claim_id = item.get('claim_id')
|
stream_claim_id = item.get('claim_id')
|
||||||
@ -220,8 +228,31 @@ def _real_extract(self, url):
|
|||||||
result = self._resolve_url(
|
result = self._resolve_url(
|
||||||
'lbry://' + display_id, display_id, 'channel')
|
'lbry://' + display_id, display_id, 'channel')
|
||||||
claim_id = result['claim_id']
|
claim_id = result['claim_id']
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
content = qs.get('content', [None])[0]
|
||||||
|
params = {
|
||||||
|
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||||
|
'order_by': {
|
||||||
|
'new': ['release_time'],
|
||||||
|
'top': ['effective_amount'],
|
||||||
|
'trending': ['trending_group', 'trending_mixed'],
|
||||||
|
}[qs.get('order', ['new'])[0]],
|
||||||
|
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||||
|
}
|
||||||
|
duration = qs.get('duration', [None])[0]
|
||||||
|
if duration:
|
||||||
|
params['duration'] = {
|
||||||
|
'long': '>=1200',
|
||||||
|
'short': '<=240',
|
||||||
|
}[duration]
|
||||||
|
language = qs.get('language', ['all'])[0]
|
||||||
|
if language != 'all':
|
||||||
|
languages = [language]
|
||||||
|
if language == 'en':
|
||||||
|
languages.append('none')
|
||||||
|
params['any_languages'] = languages
|
||||||
entries = OnDemandPagedList(
|
entries = OnDemandPagedList(
|
||||||
functools.partial(self._fetch_page, claim_id, url),
|
functools.partial(self._fetch_page, claim_id, url, params),
|
||||||
self._PAGE_SIZE)
|
self._PAGE_SIZE)
|
||||||
result_value = result.get('value') or {}
|
result_value = result.get('value') or {}
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
|
@ -599,11 +599,13 @@ def channel_data(field, type_):
|
|||||||
else:
|
else:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
|
|
||||||
|
webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
|
||||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||||
'uploader': account_data('displayName', compat_str),
|
'uploader': account_data('displayName', compat_str),
|
||||||
'uploader_id': str_or_none(account_data('id', int)),
|
'uploader_id': str_or_none(account_data('id', int)),
|
||||||
@ -621,5 +623,6 @@ def channel_data(field, type_):
|
|||||||
'tags': try_get(video, lambda x: x['tags'], list),
|
'tags': try_get(video, lambda x: x['tags'], list),
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles
|
'subtitles': subtitles,
|
||||||
|
'webpage_url': webpage_url,
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@ def _extract_video(self, data, extract_formats=True):
|
|||||||
|
|
||||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
duration = None
|
duration = None
|
||||||
if extract_formats:
|
if extract_formats:
|
||||||
@ -38,8 +39,9 @@ def _extract_video(self, data, extract_formats=True):
|
|||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = url_or_none(format_dict.get('url'))
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
if not format_url:
|
if not format_url or format_url in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(format_url)
|
||||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
|
||||||
}, {
|
}, {
|
||||||
# subtitles
|
# subtitles
|
||||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||||
@ -265,7 +266,8 @@ def dl_webpage(platform):
|
|||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||||
|
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||||
webpage, 'error message', default=None, group='error')
|
webpage, 'error message', default=None, group='error')
|
||||||
if error_msg:
|
if error_msg:
|
||||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||||
@ -394,6 +396,21 @@ def parse_quality_items(quality_items):
|
|||||||
|
|
||||||
upload_date = None
|
upload_date = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
def add_format(format_url, height=None):
|
||||||
|
tbr = None
|
||||||
|
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||||
|
if mobj:
|
||||||
|
if not height:
|
||||||
|
height = int(mobj.group('height'))
|
||||||
|
tbr = int(mobj.group('tbr'))
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
|
||||||
for video_url, height in video_urls:
|
for video_url, height in video_urls:
|
||||||
if not upload_date:
|
if not upload_date:
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
@ -410,18 +427,19 @@ def parse_quality_items(quality_items):
|
|||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
continue
|
continue
|
||||||
tbr = None
|
if '/video/get_media' in video_url:
|
||||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
medias = self._download_json(video_url, video_id, fatal=False)
|
||||||
if mobj:
|
if isinstance(medias, list):
|
||||||
if not height:
|
for media in medias:
|
||||||
height = int(mobj.group('height'))
|
if not isinstance(media, dict):
|
||||||
tbr = int(mobj.group('tbr'))
|
continue
|
||||||
formats.append({
|
video_url = url_or_none(media.get('videoUrl'))
|
||||||
'url': video_url,
|
if not video_url:
|
||||||
'format_id': '%dp' % height if height else None,
|
continue
|
||||||
'height': height,
|
height = int_or_none(media.get('quality'))
|
||||||
'tbr': tbr,
|
add_format(video_url, height)
|
||||||
})
|
continue
|
||||||
|
add_format(video_url)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
|
@ -2,8 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import re
|
import re
|
||||||
import time
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -14,56 +15,13 @@
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
qualities,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
sanitized_Request,
|
|
||||||
std_headers,
|
std_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
|
||||||
def _decrypt_url(png):
|
|
||||||
encrypted_data = compat_b64decode(png)
|
|
||||||
text_index = encrypted_data.find(b'tEXt')
|
|
||||||
text_chunk = encrypted_data[text_index - 4:]
|
|
||||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
|
||||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
|
||||||
data = bytearray(text_chunk[8:8 + length])
|
|
||||||
data = [chr(b) for b in data if b != 0]
|
|
||||||
hash_index = data.index('#')
|
|
||||||
alphabet_data = data[:hash_index]
|
|
||||||
url_data = data[hash_index + 1:]
|
|
||||||
if url_data[0] == 'H' and url_data[3] == '%':
|
|
||||||
# remove useless HQ%% at the start
|
|
||||||
url_data = url_data[4:]
|
|
||||||
|
|
||||||
alphabet = []
|
|
||||||
e = 0
|
|
||||||
d = 0
|
|
||||||
for l in alphabet_data:
|
|
||||||
if d == 0:
|
|
||||||
alphabet.append(l)
|
|
||||||
d = e = (e + 1) % 4
|
|
||||||
else:
|
|
||||||
d -= 1
|
|
||||||
url = ''
|
|
||||||
f = 0
|
|
||||||
e = 3
|
|
||||||
b = 1
|
|
||||||
for letter in url_data:
|
|
||||||
if f == 0:
|
|
||||||
l = int(letter) * 10
|
|
||||||
f = 1
|
|
||||||
else:
|
|
||||||
if e == 0:
|
|
||||||
l += int(letter)
|
|
||||||
url += alphabet[l]
|
|
||||||
e = (b + 3) % 4
|
|
||||||
f = 0
|
|
||||||
b += 1
|
|
||||||
else:
|
|
||||||
e -= 1
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
'duration': 5024.566,
|
'duration': 5024.566,
|
||||||
|
'series': 'Balonmano',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'note': 'Live stream',
|
'note': 'Live stream',
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1694255',
|
'id': '1694255',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'TODO',
|
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'live stream',
|
||||||
},
|
},
|
||||||
'skip': 'The f4m manifest can\'t be used yet',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4236788',
|
'id': '4236788',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Servir y proteger - Capítulo 104 ',
|
'title': 'Servir y proteger - Capítulo 104',
|
||||||
'duration': 3222.0,
|
'duration': 3222.0,
|
||||||
},
|
},
|
||||||
'params': {
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
'skip_download': True, # requires ffmpeg
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||||
manager_info = self._download_json(
|
self._manager = self._download_json(
|
||||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||||
None, 'Fetching manager info')
|
None, 'Fetching manager info')['manager']
|
||||||
self._manager = manager_info['manager']
|
|
||||||
|
@staticmethod
|
||||||
|
def _decrypt_url(png):
|
||||||
|
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
|
||||||
|
while True:
|
||||||
|
length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
|
||||||
|
chunk_type = encrypted_data.read(4)
|
||||||
|
if chunk_type == b'IEND':
|
||||||
|
break
|
||||||
|
data = encrypted_data.read(length)
|
||||||
|
if chunk_type == b'tEXt':
|
||||||
|
alphabet_data, text = data.split(b'\0')
|
||||||
|
quality, url_data = text.split(b'%%')
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in _bytes_to_chr(alphabet_data):
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in _bytes_to_chr(url_data):
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter) * 10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
yield quality.decode(), url
|
||||||
|
encrypted_data.read(4) # CRC
|
||||||
|
|
||||||
|
def _extract_png_formats(self, video_id):
|
||||||
|
png = self._download_webpage(
|
||||||
|
'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
|
||||||
|
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||||
|
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||||
|
formats = []
|
||||||
|
for quality, video_url in self._decrypt_url(png):
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, 'dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'quality': q(quality),
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
if info['state'] == 'DESPU':
|
if info['state'] == 'DESPU':
|
||||||
raise ExtractorError('The video is no longer available', expected=True)
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
title = info['title']
|
title = info['title'].strip()
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
formats = self._extract_png_formats(video_id)
|
||||||
png_request = sanitized_Request(png_url)
|
|
||||||
png_request.add_header('Referer', url)
|
|
||||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
ext = determine_ext(video_url)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
|
||||||
if '?' not in video_url:
|
|
||||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
|
||||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
|
||||||
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if info.get('sbtFile') is not None:
|
sbt_file = info.get('sbtFile')
|
||||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
if sbt_file:
|
||||||
|
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||||
|
|
||||||
|
is_live = info.get('live') is True
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': info.get('image'),
|
'thumbnail': info.get('image'),
|
||||||
'page_url': url,
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
'duration': float_or_none(info.get('duration'), 1000),
|
||||||
|
'is_live': is_live,
|
||||||
|
'series': info.get('programTitle'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, sub_file):
|
def _get_subtitles(self, video_id, sub_file):
|
||||||
@ -174,48 +179,26 @@ def _get_subtitles(self, video_id, sub_file):
|
|||||||
for s in subs)
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
class RTVEInfantilIE(InfoExtractor):
|
class RTVEInfantilIE(RTVEALaCartaIE):
|
||||||
IE_NAME = 'rtve.es:infantil'
|
IE_NAME = 'rtve.es:infantil'
|
||||||
IE_DESC = 'RTVE infantil'
|
IE_DESC = 'RTVE infantil'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3040283',
|
'id': '3040283',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Maneras de vivir',
|
'title': 'Maneras de vivir',
|
||||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||||
'duration': 357.958,
|
'duration': 357.958,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
info = self._download_json(
|
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
|
||||||
video_id)['page']['items'][0]
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
class RTVELiveIE(RTVEALaCartaIE):
|
||||||
vidplayer_id = self._search_regex(
|
|
||||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
|
||||||
|
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': info['title'],
|
|
||||||
'url': video_url,
|
|
||||||
'thumbnail': info.get('image'),
|
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
IE_DESC = 'RTVE.es live streams'
|
IE_DESC = 'RTVE.es live streams'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||||
@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'la-1',
|
'id': 'la-1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'live stream',
|
'skip_download': 'live stream',
|
||||||
@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
start_time = time.gmtime()
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||||
title = remove_start(title, 'Estoy viendo ')
|
title = remove_start(title, 'Estoy viendo ')
|
||||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
|
||||||
|
|
||||||
vidplayer_id = self._search_regex(
|
vidplayer_id = self._search_regex(
|
||||||
(r'playerId=player([0-9]+)',
|
(r'playerId=player([0-9]+)',
|
||||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||||
r'data-id=["\'](\d+)'),
|
r'data-id=["\'](\d+)'),
|
||||||
webpage, 'internal video ID')
|
webpage, 'internal video ID')
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
m3u8_url = _decrypt_url(png)
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title),
|
||||||
'formats': formats,
|
'formats': self._extract_png_formats(vidplayer_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,13 +51,16 @@ class ShahidIE(ShahidBaseIE):
|
|||||||
_NETRC_MACHINE = 'shahid'
|
_NETRC_MACHINE = 'shahid'
|
||||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
|
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '275286',
|
'id': '816924',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'مجلس الشباب الموسم 1 كليب 1',
|
'title': 'متحف الدحيح الموسم 1 كليب 1',
|
||||||
'timestamp': 1506988800,
|
'timestamp': 1602806400,
|
||||||
'upload_date': '20171003',
|
'upload_date': '20201016',
|
||||||
|
'description': 'برومو',
|
||||||
|
'duration': 22,
|
||||||
|
'categories': ['كوميديا'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -109,12 +112,15 @@ def _real_extract(self, url):
|
|||||||
page_type = 'episode'
|
page_type = 'episode'
|
||||||
|
|
||||||
playout = self._call_api(
|
playout = self._call_api(
|
||||||
'playout/url/' + video_id, video_id)['playout']
|
'playout/new/url/' + video_id, video_id)['playout']
|
||||||
|
|
||||||
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
|
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
|
formats = self._extract_m3u8_formats(re.sub(
|
||||||
|
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
|
||||||
|
r'aws\.manifestfilter=[\w:;,-]+&?',
|
||||||
|
'', playout['url']), video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# video = self._call_api(
|
# video = self._call_api(
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
class SouthParkIE(MTVServicesInfoExtractor):
|
class SouthParkIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'southpark.cc.com'
|
IE_NAME = 'southpark.cc.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
|
'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
|
||||||
@ -23,8 +23,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_feed_query(self, uri):
|
||||||
|
return {
|
||||||
|
'accountOverride': 'intl.mtvi.com',
|
||||||
|
'arcEp': 'shared.southpark.global',
|
||||||
|
'ep': '90877963',
|
||||||
|
'imageEp': 'shared.southpark.global',
|
||||||
|
'mgid': uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class SouthParkEsIE(SouthParkIE):
|
class SouthParkEsIE(SouthParkIE):
|
||||||
IE_NAME = 'southpark.cc.com:español'
|
IE_NAME = 'southpark.cc.com:español'
|
||||||
|
@ -1,82 +1,105 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SportDeutschlandIE(InfoExtractor):
|
class SportDeutschlandIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
|
'id': '5318cac0275701382770543d7edaf0a0',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
|
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
|
||||||
'categories': ['Badminton-Deutschland'],
|
'duration': 16106.36,
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
|
||||||
'timestamp': int,
|
|
||||||
'upload_date': '20200201',
|
|
||||||
'description': 're:.*', # meaningless description for THIS video
|
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c6e2fdd01f63013854c47054d2ab776f',
|
||||||
|
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
|
||||||
|
'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
|
||||||
|
'duration': 31397,
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
data = self._download_json(
|
||||||
sport_id = mobj.group('sport')
|
'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
|
||||||
|
display_id, query={'access_token': 'true'})
|
||||||
api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
|
||||||
sport_id, video_id)
|
|
||||||
req = sanitized_Request(api_url, headers={
|
|
||||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
|
||||||
'Referer': url,
|
|
||||||
})
|
|
||||||
data = self._download_json(req, video_id)
|
|
||||||
|
|
||||||
asset = data['asset']
|
asset = data['asset']
|
||||||
categories = [data['section']['title']]
|
title = (asset.get('title') or asset['label']).strip()
|
||||||
|
asset_id = asset.get('id') or asset.get('uuid')
|
||||||
formats = []
|
info = {
|
||||||
smil_url = asset['video']
|
'id': asset_id,
|
||||||
if '.smil' in smil_url:
|
'title': title,
|
||||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
|
||||||
formats.extend(
|
'duration': int_or_none(asset.get('seconds')),
|
||||||
self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
|
|
||||||
|
|
||||||
smil_doc = self._download_xml(
|
|
||||||
smil_url, video_id, note='Downloading SMIL metadata')
|
|
||||||
base_url_el = smil_doc.find('./head/meta')
|
|
||||||
if base_url_el:
|
|
||||||
base_url = base_url_el.attrib['base']
|
|
||||||
formats.extend([{
|
|
||||||
'format_id': 'rmtp',
|
|
||||||
'url': base_url if base_url_el else n.attrib['src'],
|
|
||||||
'play_path': n.attrib['src'],
|
|
||||||
'ext': 'flv',
|
|
||||||
'preference': -100,
|
|
||||||
'format_note': 'Seems to fail at example stream',
|
|
||||||
} for n in smil_doc.findall('./body/video')])
|
|
||||||
else:
|
|
||||||
formats.append({'url': smil_url})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'title': asset['title'],
|
|
||||||
'thumbnail': asset.get('image'),
|
|
||||||
'description': asset.get('teaser'),
|
|
||||||
'duration': asset.get('duration'),
|
|
||||||
'categories': categories,
|
|
||||||
'view_count': asset.get('views'),
|
|
||||||
'rtmp_live': asset.get('live'),
|
|
||||||
'timestamp': parse_iso8601(asset.get('date')),
|
|
||||||
}
|
}
|
||||||
|
videos = asset.get('videos') or []
|
||||||
|
if len(videos) > 1:
|
||||||
|
playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
|
||||||
|
if playlist_id:
|
||||||
|
if self._downloader.params.get('noplaylist'):
|
||||||
|
videos = [videos[int(playlist_id)]]
|
||||||
|
self.to_screen('Downloading just a single video because of --no-playlist')
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for i, video in enumerate(videos, 1):
|
||||||
|
video_id = video.get('uuid')
|
||||||
|
video_url = video.get('url')
|
||||||
|
if not (video_id and video_url):
|
||||||
|
continue
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
|
||||||
|
if not formats:
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
|
||||||
|
'duration': float_or_none(video.get('duration')),
|
||||||
|
}
|
||||||
|
info.update({
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': entries(),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
|
||||||
|
section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
|
||||||
|
info.update({
|
||||||
|
'formats': formats,
|
||||||
|
'display_id': asset.get('permalink'),
|
||||||
|
'thumbnail': try_get(asset, lambda x: x['images'][0]),
|
||||||
|
'categories': [section_title] if section_title else None,
|
||||||
|
'view_count': int_or_none(asset.get('views')),
|
||||||
|
'is_live': asset.get('is_live') is True,
|
||||||
|
'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -25,6 +26,10 @@ class TVerIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://tver.jp/episode/79622438',
|
'url': 'https://tver.jp/episode/79622438',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# subtitle = ' '
|
||||||
|
'url': 'https://tver.jp/corner/f0068870',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||||
@ -47,8 +52,12 @@ def _real_extract(self, url):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if service == 'cx':
|
if service == 'cx':
|
||||||
|
title = main['title']
|
||||||
|
subtitle = strip_or_none(main.get('subtitle'))
|
||||||
|
if subtitle:
|
||||||
|
title += ' - ' + subtitle
|
||||||
info.update({
|
info.update({
|
||||||
'title': main.get('subtitle') or main['title'],
|
'title': title,
|
||||||
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
||||||
'ie_key': 'FujiTVFODPlus7',
|
'ie_key': 'FujiTVFODPlus7',
|
||||||
})
|
})
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -19,14 +21,17 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
setup = self._parse_json(self._search_regex(
|
setup = self._parse_json(self._search_regex(
|
||||||
r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
|
r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
|
||||||
video_data = setup.get('video') or {}
|
player_setup = setup.get('player_setup') or setup
|
||||||
|
video_data = player_setup.get('video') or {}
|
||||||
|
formatted_metadata = video_data.get('formatted_metadata') or {}
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_data.get('title_short'),
|
'title': player_setup.get('title') or video_data.get('title_short'),
|
||||||
'description': video_data.get('description_long') or video_data.get('description_short'),
|
'description': video_data.get('description_long') or video_data.get('description_short'),
|
||||||
'thumbnail': video_data.get('brightcove_thumbnail')
|
'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
|
||||||
|
'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
|
||||||
}
|
}
|
||||||
asset = setup.get('asset') or setup.get('params') or {}
|
asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
hls_url = asset.get('hls_url')
|
hls_url = asset.get('hls_url')
|
||||||
@ -47,6 +52,7 @@ def _real_extract(self, url):
|
|||||||
if formats:
|
if formats:
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
|
info['duration'] = int_or_none(asset.get('duration'))
|
||||||
return info
|
return info
|
||||||
|
|
||||||
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
|
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
|
||||||
@ -84,7 +90,7 @@ class VoxMediaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# Volume embed, Youtube
|
# Volume embed, Youtube
|
||||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||||
'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
|
'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Gy8Md3Eky38',
|
'id': 'Gy8Md3Eky38',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -93,6 +99,7 @@ class VoxMediaIE(InfoExtractor):
|
|||||||
'uploader_id': 'TheVerge',
|
'uploader_id': 'TheVerge',
|
||||||
'upload_date': '20141021',
|
'upload_date': '20141021',
|
||||||
'uploader': 'The Verge',
|
'uploader': 'The Verge',
|
||||||
|
'timestamp': 1413907200,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
'skip': 'similar to the previous test',
|
'skip': 'similar to the previous test',
|
||||||
@ -100,13 +107,13 @@ class VoxMediaIE(InfoExtractor):
|
|||||||
# Volume embed, Youtube
|
# Volume embed, Youtube
|
||||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'YCjDnX-Xzhg',
|
'id': '22986359b',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
|
'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
|
||||||
'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
|
'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
|
||||||
'uploader_id': 'voxdotcom',
|
|
||||||
'upload_date': '20150915',
|
'upload_date': '20150915',
|
||||||
'uploader': 'Vox',
|
'timestamp': 1442332800,
|
||||||
|
'duration': 285,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
'skip': 'similar to the previous test',
|
'skip': 'similar to the previous test',
|
||||||
@ -160,6 +167,9 @@ class VoxMediaIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
|
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
|
||||||
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
|
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
|
||||||
|
'timestamp': 1402938000,
|
||||||
|
'upload_date': '20140616',
|
||||||
|
'duration': 4114,
|
||||||
},
|
},
|
||||||
'add_ie': ['VoxMediaVolume'],
|
'add_ie': ['VoxMediaVolume'],
|
||||||
}]
|
}]
|
||||||
|
Loading…
Reference in New Issue
Block a user