mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-26 04:07:36 +01:00
Update to ytdl-2021.01.16
This commit is contained in:
parent
7bc877a20d
commit
30a074c2b6
@ -369,6 +369,8 @@ class YoutubeDL(object):
|
|||||||
_pps = []
|
_pps = []
|
||||||
_download_retcode = None
|
_download_retcode = None
|
||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
|
_playlist_level = 0
|
||||||
|
_playlist_urls = set()
|
||||||
_screen_file = None
|
_screen_file = None
|
||||||
|
|
||||||
def __init__(self, params=None, auto_init=True):
|
def __init__(self, params=None, auto_init=True):
|
||||||
@ -1012,6 +1014,48 @@ class YoutubeDL(object):
|
|||||||
return self.process_ie_result(
|
return self.process_ie_result(
|
||||||
new_result, download=download, extra_info=extra_info)
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type in ('playlist', 'multi_video'):
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
|
# Protect from infinite recursion due to recursively nested playlists
|
||||||
|
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||||
|
webpage_url = ie_result['webpage_url']
|
||||||
|
if webpage_url in self._playlist_urls:
|
||||||
|
self.to_screen(
|
||||||
|
'[download] Skipping already downloaded playlist: %s'
|
||||||
|
% ie_result.get('title') or ie_result.get('id'))
|
||||||
|
return
|
||||||
|
|
||||||
|
self._playlist_level += 1
|
||||||
|
self._playlist_urls.add(webpage_url)
|
||||||
|
try:
|
||||||
|
return self.__process_playlist(ie_result, download)
|
||||||
|
finally:
|
||||||
|
self._playlist_level -= 1
|
||||||
|
if not self._playlist_level:
|
||||||
|
self._playlist_urls.clear()
|
||||||
|
elif result_type == 'compat_list':
|
||||||
|
self.report_warning(
|
||||||
|
'Extractor %s returned a compat_list result. '
|
||||||
|
'It needs to be updated.' % ie_result.get('extractor'))
|
||||||
|
|
||||||
|
def _fixup(r):
|
||||||
|
self.add_extra_info(
|
||||||
|
r,
|
||||||
|
{
|
||||||
|
'extractor': ie_result['extractor'],
|
||||||
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
|
'extractor_key': ie_result['extractor_key'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return r
|
||||||
|
ie_result['entries'] = [
|
||||||
|
self.process_ie_result(_fixup(r), download, extra_info)
|
||||||
|
for r in ie_result['entries']
|
||||||
|
]
|
||||||
|
return ie_result
|
||||||
|
else:
|
||||||
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def __process_playlist(self, ie_result, download):
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title') or ie_result.get('id')
|
playlist = ie_result.get('title') or ie_result.get('id')
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
@ -1119,29 +1163,6 @@ class YoutubeDL(object):
|
|||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
return ie_result
|
return ie_result
|
||||||
elif result_type == 'compat_list':
|
|
||||||
self.report_warning(
|
|
||||||
'Extractor %s returned a compat_list result. '
|
|
||||||
'It needs to be updated.' % ie_result.get('extractor'))
|
|
||||||
|
|
||||||
def _fixup(r):
|
|
||||||
self.add_extra_info(
|
|
||||||
r,
|
|
||||||
{
|
|
||||||
'extractor': ie_result['extractor'],
|
|
||||||
'webpage_url': ie_result['webpage_url'],
|
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
|
||||||
'extractor_key': ie_result['extractor_key'],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return r
|
|
||||||
ie_result['entries'] = [
|
|
||||||
self.process_ie_result(_fixup(r), download, extra_info)
|
|
||||||
for r in ie_result['entries']
|
|
||||||
]
|
|
||||||
return ie_result
|
|
||||||
else:
|
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
|
||||||
|
|
||||||
@__handle_extraction_exceptions
|
@__handle_extraction_exceptions
|
||||||
def __process_iterable_entry(self, entry, download, extra_info):
|
def __process_iterable_entry(self, entry, download, extra_info):
|
||||||
|
@ -10,6 +10,7 @@ import random
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_cbc_decrypt
|
from ..aes import aes_cbc_decrypt
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
)
|
)
|
||||||
@ -18,11 +19,13 @@ from ..utils import (
|
|||||||
bytes_to_long,
|
bytes_to_long,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
long_to_bytes,
|
long_to_bytes,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
urljoin,
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -31,16 +34,27 @@ class ADNIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7778',
|
'id': '7778',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
|
'series': 'Blue Exorcist - Kyôto Saga',
|
||||||
|
'duration': 1467,
|
||||||
|
'release_date': '20170106',
|
||||||
|
'comment_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
'season_number': 2,
|
||||||
|
'episode': 'Début des hostilités',
|
||||||
|
'episode_number': 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||||
|
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||||
|
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||||
_POS_ALIGN_MAP = {
|
_POS_ALIGN_MAP = {
|
||||||
'start': 1,
|
'start': 1,
|
||||||
'end': 3,
|
'end': 3,
|
||||||
@ -54,26 +68,24 @@ class ADNIE(InfoExtractor):
|
|||||||
def _ass_subtitles_timecode(seconds):
|
def _ass_subtitles_timecode(seconds):
|
||||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_url, video_id):
|
||||||
if not sub_path:
|
if not sub_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, sub_path),
|
sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
|
||||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||||
if subtitle_location:
|
if subtitle_location:
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, subtitle_location),
|
subtitle_location, video_id, 'Downloading subtitles data',
|
||||||
video_id, 'Downloading subtitles data', fatal=False,
|
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
@ -119,59 +131,76 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||||
player_config = self._parse_json(self._search_regex(
|
player = self._download_json(
|
||||||
r'playerConfig\s*=\s*({.+});', webpage,
|
video_base_url + 'configuration', video_id,
|
||||||
'player config', default='{}'), video_id, fatal=False)
|
|
||||||
if not player_config:
|
|
||||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
|
||||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
|
||||||
webpage, 'config url'))
|
|
||||||
player_config = self._download_json(
|
|
||||||
config_url, video_id,
|
|
||||||
'Downloading player config JSON metadata')['player']
|
'Downloading player config JSON metadata')['player']
|
||||||
|
options = player['options']
|
||||||
|
|
||||||
video_info = {}
|
user = options['user']
|
||||||
video_info_str = self._search_regex(
|
if not user.get('hasAccess'):
|
||||||
r'videoInfo\s*=\s*({.+});', webpage,
|
raise ExtractorError(
|
||||||
'video info', fatal=False)
|
'This video is only available for paying users', expected=True)
|
||||||
if video_info_str:
|
# self.raise_login_required() # FIXME: Login is not implemented
|
||||||
video_info = self._parse_json(
|
|
||||||
video_info_str, video_id, fatal=False) or {}
|
|
||||||
|
|
||||||
options = player_config.get('options') or {}
|
token = self._download_json(
|
||||||
metas = options.get('metas') or {}
|
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||||
links = player_config.get('links') or {}
|
video_id, 'Downloading access token', headers={
|
||||||
sub_path = player_config.get('subtitles')
|
'x-player-refresh-token': user['refreshToken']
|
||||||
error = None
|
}, data=b'')['token']
|
||||||
if not links:
|
|
||||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||||
token = options['token']
|
|
||||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||||
message = bytes_to_intlist(json.dumps({
|
message = bytes_to_intlist(json.dumps({
|
||||||
'k': self._K,
|
'k': self._K,
|
||||||
'e': 60,
|
|
||||||
't': token,
|
't': token,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
# Sometimes authentication fails for no good reason, retry with
|
||||||
|
# a different random padding
|
||||||
|
links_data = None
|
||||||
|
for _ in range(3):
|
||||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||||
n, e = self._RSA_KEY
|
n, e = self._RSA_KEY
|
||||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||||
authorization = base64.b64encode(encrypted_message).decode()
|
authorization = base64.b64encode(encrypted_message).decode()
|
||||||
|
|
||||||
|
try:
|
||||||
links_data = self._download_json(
|
links_data = self._download_json(
|
||||||
urljoin(self._BASE_URL, links_url), video_id,
|
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||||
'Downloading links JSON metadata', headers={
|
'X-Player-Token': authorization
|
||||||
'Authorization': 'Bearer ' + authorization,
|
}, query={
|
||||||
|
'freeWithAds': 'true',
|
||||||
|
'adaptive': 'false',
|
||||||
|
'withMetadata': 'true',
|
||||||
|
'source': 'Web'
|
||||||
})
|
})
|
||||||
|
break
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, compat_HTTPError):
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if e.cause.code == 401:
|
||||||
|
# This usually goes away with a different random pkcs1pad, so retry
|
||||||
|
continue
|
||||||
|
|
||||||
|
error = self._parse_json(e.cause.read(), video_id)
|
||||||
|
message = error.get('message')
|
||||||
|
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||||
|
self.raise_geo_restricted(msg=message)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(message)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Giving up retrying')
|
||||||
|
|
||||||
links = links_data.get('links') or {}
|
links = links_data.get('links') or {}
|
||||||
metas = metas or links_data.get('meta') or {}
|
metas = links_data.get('metadata') or {}
|
||||||
sub_path = sub_path or links_data.get('subtitles') or \
|
sub_url = (links.get('subtitles') or {}).get('all')
|
||||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
video_info = links_data.get('video') or {}
|
||||||
sub_path += '&token=' + token
|
title = metas['title']
|
||||||
error = links_data.get('error')
|
|
||||||
title = metas.get('title') or video_info['title']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, qualities in links.items():
|
for format_id, qualities in (links.get('streaming') or {}).items():
|
||||||
if not isinstance(qualities, dict):
|
if not isinstance(qualities, dict):
|
||||||
continue
|
continue
|
||||||
for quality, load_balancer_url in qualities.items():
|
for quality, load_balancer_url in qualities.items():
|
||||||
@ -189,19 +218,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
|||||||
for f in m3u8_formats:
|
for f in m3u8_formats:
|
||||||
f['language'] = 'fr'
|
f['language'] = 'fr'
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
if not error:
|
|
||||||
error = options.get('error')
|
|
||||||
if not formats and error:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video = (self._download_json(
|
||||||
|
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||||
|
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||||
|
show = video.get('show') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
'description': strip_or_none(metas.get('summary') or video.get('summary')),
|
||||||
'thumbnail': video_info.get('image'),
|
'thumbnail': video_info.get('image') or player.get('image'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
'subtitles': self.extract_subtitles(sub_url, video_id),
|
||||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
'episode': metas.get('subtitle') or video.get('name'),
|
||||||
'series': video_info.get('playlistTitle'),
|
'episode_number': int_or_none(video.get('shortNumber')),
|
||||||
|
'series': show.get('title'),
|
||||||
|
'season_number': int_or_none(video.get('season')),
|
||||||
|
'duration': int_or_none(video_info.get('duration') or video.get('duration')),
|
||||||
|
'release_date': unified_strdate(video.get('releaseDate')),
|
||||||
|
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||||
|
'comment_count': int_or_none(video.get('commentsCount')),
|
||||||
}
|
}
|
||||||
|
@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||||
webpage, 'anime description', default=None)
|
webpage, 'anime description', default=None)
|
||||||
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
def extract_info(html, video_id, num=None):
|
def extract_info(html, video_id, num=None):
|
||||||
title, description = [None] * 2
|
title, description = [None] * 2
|
||||||
formats = []
|
formats = []
|
||||||
@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
self._sort_formats(info['formats'])
|
self._sort_formats(info['formats'])
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update(info)
|
f.update(info)
|
||||||
entries.append(f)
|
yield f
|
||||||
|
|
||||||
# Extract teaser/trailer only when full episode is not available
|
# Extract teaser/trailer only when full episode is not available
|
||||||
if not info['formats']:
|
if not info['formats']:
|
||||||
@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': urljoin(url, m.group('href')),
|
'url': urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
yield f
|
||||||
|
|
||||||
def extract_episodes(html):
|
def extract_episodes(html):
|
||||||
for num, episode_html in enumerate(re.findall(
|
for num, episode_html in enumerate(re.findall(
|
||||||
@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
|
||||||
extract_entries(episode_html, video_id, common_info)
|
for e in extract_entries(episode_html, video_id, common_info):
|
||||||
|
yield e
|
||||||
|
|
||||||
def extract_film(html, video_id):
|
def extract_film(html, video_id):
|
||||||
common_info = {
|
common_info = {
|
||||||
@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'title': anime_title,
|
'title': anime_title,
|
||||||
'description': anime_description,
|
'description': anime_description,
|
||||||
}
|
}
|
||||||
extract_entries(html, video_id, common_info)
|
for e in extract_entries(html, video_id, common_info):
|
||||||
|
yield e
|
||||||
|
|
||||||
extract_episodes(webpage)
|
def entries():
|
||||||
|
has_episodes = False
|
||||||
|
for e in extract_episodes(webpage):
|
||||||
|
has_episodes = True
|
||||||
|
yield e
|
||||||
|
|
||||||
if not entries:
|
if not has_episodes:
|
||||||
extract_film(webpage, anime_id)
|
for e in extract_film(webpage, anime_id):
|
||||||
|
yield e
|
||||||
|
|
||||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
return self.playlist_result(
|
||||||
|
entries(), anime_id, anime_title, anime_description)
|
||||||
|
@ -8,11 +8,14 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
get_element_by_attribute,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor):
|
|||||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||||
base_url=url)
|
base_url=url)
|
||||||
add_referer(info['formats'])
|
add_referer(info['formats'])
|
||||||
|
for subtitles in info['subtitles'].values():
|
||||||
|
for subtitle in subtitles:
|
||||||
|
ext = determine_ext(subtitle['url'])
|
||||||
|
if ext == 'php':
|
||||||
|
ext = 'vtt'
|
||||||
|
subtitle['ext'] = ext
|
||||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||||
return merge_dicts(info, ld_info)
|
title = get_element_by_class('video-page-title', webpage) or \
|
||||||
|
self._og_search_title(webpage)
|
||||||
|
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||||
|
self._html_search_meta(['og:description', 'description'], webpage)
|
||||||
|
return merge_dicts(info, ld_info, {
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
|
||||||
|
'description': description,
|
||||||
|
'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
|
||||||
|
'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
|
||||||
|
'duration': int_or_none(self._search_regex(
|
||||||
|
r'jwsetup\.seclength\s*=\s*(\d+);',
|
||||||
|
webpage, 'duration', fatal=False)),
|
||||||
|
'view_count': str_to_int(self._search_regex(
|
||||||
|
r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
|
||||||
|
webpage, 'views', fatal=False)),
|
||||||
|
})
|
||||||
|
|
||||||
# Obsolete
|
# Obsolete
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
|
@ -551,7 +551,10 @@ from .karaoketv import KaraoketvIE
|
|||||||
from .karrierevideos import KarriereVideosIE
|
from .karrierevideos import KarriereVideosIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
from .ketnet import KetnetIE
|
from .ketnet import KetnetIE
|
||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import (
|
||||||
|
KhanAcademyIE,
|
||||||
|
KhanAcademyUnitIE,
|
||||||
|
)
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
|
@ -1,82 +1,107 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KhanAcademyIE(InfoExtractor):
|
class KhanAcademyBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||||
IE_NAME = 'KhanAcademy'
|
|
||||||
|
|
||||||
_TESTS = [{
|
def _parse_video(self, video):
|
||||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
return {
|
||||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
'_type': 'url_transparent',
|
||||||
|
'url': video['youtubeId'],
|
||||||
|
'id': video.get('slug'),
|
||||||
|
'title': video.get('title'),
|
||||||
|
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'description': video.get('description'),
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
component_props = self._parse_json(self._download_json(
|
||||||
|
'https://www.khanacademy.org/api/internal/graphql',
|
||||||
|
display_id, query={
|
||||||
|
'hash': 1604303425,
|
||||||
|
'variables': json.dumps({
|
||||||
|
'path': display_id,
|
||||||
|
'queryParams': '',
|
||||||
|
}),
|
||||||
|
})['data']['contentJson'], display_id)['componentProps']
|
||||||
|
return self._parse_component_props(component_props)
|
||||||
|
|
||||||
|
|
||||||
|
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||||
|
IE_NAME = 'khanacademy'
|
||||||
|
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||||
|
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'one-time-pad',
|
'id': 'FlIG3TvQCBQ',
|
||||||
'ext': 'webm',
|
'ext': 'mp4',
|
||||||
'title': 'The one-time pad',
|
'title': 'The one-time pad',
|
||||||
'description': 'The perfect cipher',
|
'description': 'The perfect cipher',
|
||||||
'duration': 176,
|
'duration': 176,
|
||||||
'uploader': 'Brit Cruise',
|
'uploader': 'Brit Cruise',
|
||||||
'uploader_id': 'khanacademy',
|
'uploader_id': 'khanacademy',
|
||||||
'upload_date': '20120411',
|
'upload_date': '20120411',
|
||||||
|
'timestamp': 1334170113,
|
||||||
|
'license': 'cc-by-nc-sa',
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}
|
||||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
|
||||||
|
def _parse_component_props(self, component_props):
|
||||||
|
video = component_props['tutorialPageData']['contentModel']
|
||||||
|
info = self._parse_video(video)
|
||||||
|
author_names = video.get('authorNames')
|
||||||
|
info.update({
|
||||||
|
'uploader': ', '.join(author_names) if author_names else None,
|
||||||
|
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||||
|
'license': video.get('kaUserLicense'),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||||
|
IE_NAME = 'khanacademy:unit'
|
||||||
|
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cryptography',
|
'id': 'cryptography',
|
||||||
'title': 'Journey into cryptography',
|
'title': 'Cryptography',
|
||||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 31,
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
m = re.match(self._VALID_URL, url)
|
|
||||||
video_id = m.group('id')
|
|
||||||
|
|
||||||
if m.group('key') == 'video':
|
|
||||||
data = self._download_json(
|
|
||||||
'http://api.khanacademy.org/api/v1/videos/' + video_id,
|
|
||||||
video_id, 'Downloading video info')
|
|
||||||
|
|
||||||
upload_date = unified_strdate(data['date_added'])
|
|
||||||
uploader = ', '.join(data['author_names'])
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': data['url'],
|
|
||||||
'id': video_id,
|
|
||||||
'title': data['title'],
|
|
||||||
'thumbnail': data['image_url'],
|
|
||||||
'duration': data['duration'],
|
|
||||||
'description': data['description'],
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
}
|
||||||
else:
|
|
||||||
# topic
|
|
||||||
data = self._download_json(
|
|
||||||
'http://api.khanacademy.org/api/v1/topic/' + video_id,
|
|
||||||
video_id, 'Downloading topic info')
|
|
||||||
|
|
||||||
entries = [
|
def _parse_component_props(self, component_props):
|
||||||
{
|
curation = component_props['curation']
|
||||||
'_type': 'url',
|
|
||||||
'url': c['url'],
|
|
||||||
'id': c['id'],
|
|
||||||
'title': c['title'],
|
|
||||||
}
|
|
||||||
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
|
|
||||||
|
|
||||||
return {
|
entries = []
|
||||||
'_type': 'playlist',
|
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||||
'id': video_id,
|
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||||
'title': data['title'],
|
chapter_info = {
|
||||||
'description': data['description'],
|
'chapter': tutorial.get('title'),
|
||||||
'entries': entries,
|
'chapter_number': tutorial_number,
|
||||||
|
'chapter_id': tutorial.get('id'),
|
||||||
}
|
}
|
||||||
|
for content_item in (tutorial.get('contentItems') or []):
|
||||||
|
if content_item.get('kind') == 'Video':
|
||||||
|
info = self._parse_video(content_item)
|
||||||
|
info.update(chapter_info)
|
||||||
|
entries.append(info)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, curation.get('unit'), curation.get('title'),
|
||||||
|
curation.get('description'))
|
||||||
|
@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
|||||||
cloudcast_url = cloudcast.get('url')
|
cloudcast_url = cloudcast.get('url')
|
||||||
if not cloudcast_url:
|
if not cloudcast_url:
|
||||||
continue
|
continue
|
||||||
|
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||||
|
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||||
|
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||||
|
|
||||||
page_info = items['pageInfo']
|
page_info = items['pageInfo']
|
||||||
has_next_page = page_info['hasNextPage']
|
has_next_page = page_info['hasNextPage']
|
||||||
@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
_DESCRIPTION_KEY = 'biog'
|
_DESCRIPTION_KEY = 'biog'
|
||||||
_ROOT_TYPE = 'user'
|
_ROOT_TYPE = 'user'
|
||||||
_NODE_TEMPLATE = '''slug
|
_NODE_TEMPLATE = '''slug
|
||||||
url'''
|
url
|
||||||
|
owner { username }'''
|
||||||
|
|
||||||
def _get_playlist_title(self, title, slug):
|
def _get_playlist_title(self, title, slug):
|
||||||
return '%s (%s)' % (title, slug)
|
return '%s (%s)' % (title, slug)
|
||||||
@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
|||||||
_NODE_TEMPLATE = '''cloudcast {
|
_NODE_TEMPLATE = '''cloudcast {
|
||||||
slug
|
slug
|
||||||
url
|
url
|
||||||
|
owner { username }
|
||||||
}'''
|
}'''
|
||||||
|
|
||||||
def _get_cloudcast(self, node):
|
def _get_cloudcast(self, node):
|
||||||
|
@ -450,6 +450,18 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
'tags': ['framasoft', 'peertube'],
|
'tags': ['framasoft', 'peertube'],
|
||||||
'categories': ['Science & Technology'],
|
'categories': ['Science & Technology'],
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Issue #26002
|
||||||
|
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dot matrix printer shell demo',
|
||||||
|
'uploader_id': '3',
|
||||||
|
'timestamp': 1587401293,
|
||||||
|
'upload_date': '20200420',
|
||||||
|
'uploader': 'Drew DeVault',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -526,7 +538,15 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
title = video['name']
|
title = video['name']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for file_ in video['files']:
|
files = video.get('files') or []
|
||||||
|
for playlist in (video.get('streamingPlaylists') or []):
|
||||||
|
if not isinstance(playlist, dict):
|
||||||
|
continue
|
||||||
|
playlist_files = playlist.get('files')
|
||||||
|
if not (playlist_files and isinstance(playlist_files, list)):
|
||||||
|
continue
|
||||||
|
files.extend(playlist_files)
|
||||||
|
for file_ in files:
|
||||||
if not isinstance(file_, dict):
|
if not isinstance(file_, dict):
|
||||||
continue
|
continue
|
||||||
file_url = url_or_none(file_.get('fileUrl'))
|
file_url = url_or_none(file_.get('fileUrl'))
|
||||||
|
@ -50,9 +50,15 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
_GEO_COUNTRIES = ['US']
|
_GEO_COUNTRIES = ['US']
|
||||||
|
|
||||||
|
def _get_feed_query(self, uri):
|
||||||
|
return {
|
||||||
|
'arcEp': 'paramountnetwork.com',
|
||||||
|
'mgid': uri,
|
||||||
|
}
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, url):
|
def _extract_mgid(self, webpage, url):
|
||||||
root_data = self._parse_json(self._search_regex(
|
root_data = self._parse_json(self._search_regex(
|
||||||
r'window\.__DATA__\s*=\s*({.+})',
|
r'window\.__DATA__\s*=\s*({.+})',
|
||||||
|
@ -3,10 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
js_to_json,
|
ExtractorError,
|
||||||
mimetype2ext,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -15,29 +18,35 @@ class ThreeQSDNIE(InfoExtractor):
|
|||||||
IE_DESC = '3Q SDN'
|
IE_DESC = '3Q SDN'
|
||||||
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
# https://player.3qsdn.com/demo.html
|
||||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
|
||||||
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
|
'md5': '64a57396b16fa011b15e0ea60edce918',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
'id': '7201c779-6b3c-11e7-a40e-002590c750be',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
'title': 'Video Ads',
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
|
'description': 'Video Ads Demo',
|
||||||
|
'timestamp': 1500334803,
|
||||||
|
'upload_date': '20170717',
|
||||||
|
'duration': 888.032,
|
||||||
|
'subtitles': {
|
||||||
|
'eng': 'count:1',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# live video stream
|
# live video stream
|
||||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
'id': '66e68995-11ca-11e8-9273-002590c750be',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 downloads
|
'skip_download': True, # m3u8 downloads
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to download MPD manifest'],
|
|
||||||
}, {
|
}, {
|
||||||
# live audio stream
|
# live audio stream
|
||||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||||
@ -58,6 +67,14 @@ class ThreeQSDNIE(InfoExtractor):
|
|||||||
# live video with rtmp link
|
# live video with rtmp link
|
||||||
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
|
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||||
|
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# live video stream
|
||||||
|
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -70,73 +87,78 @@ class ThreeQSDNIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
js = self._download_webpage(
|
try:
|
||||||
'http://playout.3qsdn.com/%s' % video_id, video_id,
|
config = self._download_json(
|
||||||
query={'js': 'true'})
|
url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
if any(p in js for p in (
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
'>This content is not available in your country',
|
|
||||||
'playout.3qsdn.com/forbidden')):
|
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
raise
|
||||||
|
|
||||||
stream_content = self._search_regex(
|
live = config.get('streamContent') == 'live'
|
||||||
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
|
aspect = float_or_none(config.get('aspect'))
|
||||||
'stream content', default='demand', group='content')
|
|
||||||
|
|
||||||
live = stream_content == 'live'
|
|
||||||
|
|
||||||
stream_type = self._search_regex(
|
|
||||||
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
|
|
||||||
'stream type', default='video', group='type')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
urls = set()
|
for source_type, source in (config.get('sources') or {}).items():
|
||||||
|
if not source:
|
||||||
def extract_formats(item_url, item={}):
|
continue
|
||||||
if not item_url or item_url in urls:
|
if source_type == 'dash':
|
||||||
return
|
|
||||||
urls.add(item_url)
|
|
||||||
ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
|
|
||||||
if ext == 'mpd':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
source, video_id, mpd_id='mpd', fatal=False))
|
||||||
elif ext == 'm3u8':
|
elif source_type == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
item_url, video_id, 'mp4',
|
source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
|
||||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
elif ext == 'f4m':
|
elif source_type == 'progressive':
|
||||||
formats.extend(self._extract_f4m_formats(
|
for s in source:
|
||||||
item_url, video_id, f4m_id='hds', fatal=False))
|
src = s.get('src')
|
||||||
else:
|
if not (src and self._is_valid_url(src, video_id)):
|
||||||
if not self._is_valid_url(item_url, video_id):
|
continue
|
||||||
return
|
width = None
|
||||||
|
format_id = ['http']
|
||||||
|
ext = determine_ext(src)
|
||||||
|
if ext:
|
||||||
|
format_id.append(ext)
|
||||||
|
height = int_or_none(s.get('height'))
|
||||||
|
if height:
|
||||||
|
format_id.append('%dp' % height)
|
||||||
|
if aspect:
|
||||||
|
width = int(height * aspect)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': item_url,
|
'ext': ext,
|
||||||
'format_id': item.get('quality'),
|
'format_id': '-'.join(format_id),
|
||||||
'ext': 'mp4' if item_url.startswith('rtsp') else ext,
|
'height': height,
|
||||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
'source_preference': 0,
|
||||||
|
'url': src,
|
||||||
|
'vcodec': 'none' if height == 0 else None,
|
||||||
|
'width': width,
|
||||||
|
})
|
||||||
|
for f in formats:
|
||||||
|
if f.get('acodec') == 'none':
|
||||||
|
f['preference'] = -40
|
||||||
|
elif f.get('vcodec') == 'none':
|
||||||
|
f['preference'] = -50
|
||||||
|
self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in (config.get('subtitles') or []):
|
||||||
|
src = subtitle.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
|
||||||
|
'url': src,
|
||||||
})
|
})
|
||||||
|
|
||||||
for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
title = config.get('title') or video_id
|
||||||
f = self._parse_json(
|
|
||||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
|
||||||
if not f:
|
|
||||||
continue
|
|
||||||
extract_formats(f.get('src'), f)
|
|
||||||
|
|
||||||
# More relaxed version to collect additional URLs and acting
|
|
||||||
# as a future-proof fallback
|
|
||||||
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
|
|
||||||
extract_formats(src)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = self._live_title(video_id) if live else video_id
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if live else title,
|
||||||
|
'thumbnail': config.get('poster') or None,
|
||||||
|
'description': config.get('description') or None,
|
||||||
|
'timestamp': parse_iso8601(config.get('upload_date')),
|
||||||
|
'duration': float_or_none(config.get('vlength')) or None,
|
||||||
'is_live': live,
|
'is_live': live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -76,14 +77,14 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'Referer': page_url,
|
'Referer': page_url,
|
||||||
'Origin': page_url,
|
'Origin': 'https://www.twitch.tv',
|
||||||
'Content-Type': 'text/plain;charset=UTF-8',
|
'Content-Type': 'text/plain;charset=UTF-8',
|
||||||
}
|
}
|
||||||
|
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
post_url, None, note, data=json.dumps(form).encode(),
|
post_url, None, note, data=json.dumps(form).encode(),
|
||||||
headers=headers, expected_status=400)
|
headers=headers, expected_status=400)
|
||||||
error = response.get('error_description') or response.get('error_code')
|
error = dict_get(response, ('error', 'error_description', 'error_code'))
|
||||||
if error:
|
if error:
|
||||||
fail(error)
|
fail(error)
|
||||||
|
|
||||||
@ -137,13 +138,17 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
def _download_base_gql(self, video_id, ops, note, fatal=True):
|
def _download_base_gql(self, video_id, ops, note, fatal=True):
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'text/plain;charset=UTF-8',
|
||||||
|
'Client-ID': self._CLIENT_ID,
|
||||||
|
}
|
||||||
|
gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
|
||||||
|
if gql_auth:
|
||||||
|
headers['Authorization'] = 'OAuth ' + gql_auth.value
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://gql.twitch.tv/gql', video_id, note,
|
'https://gql.twitch.tv/gql', video_id, note,
|
||||||
data=json.dumps(ops).encode(),
|
data=json.dumps(ops).encode(),
|
||||||
headers={
|
headers=headers, fatal=fatal)
|
||||||
'Content-Type': 'text/plain;charset=UTF-8',
|
|
||||||
'Client-ID': self._CLIENT_ID,
|
|
||||||
}, fatal=fatal)
|
|
||||||
|
|
||||||
def _download_gql(self, video_id, ops, note, fatal=True):
|
def _download_gql(self, video_id, ops, note, fatal=True):
|
||||||
for op in ops:
|
for op in ops:
|
||||||
|
@ -373,6 +373,24 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'uploader_id': '1eVjYOLGkGrQL',
|
'uploader_id': '1eVjYOLGkGrQL',
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterBroadcast'],
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
}, {
|
||||||
|
# unified card
|
||||||
|
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1349794411333394432',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
|
||||||
|
'uploader': 'Brooklyn Nets',
|
||||||
|
'uploader_id': 'BrooklynNets',
|
||||||
|
'duration': 324.484,
|
||||||
|
'timestamp': 1610651040,
|
||||||
|
'upload_date': '20210114',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# Twitch Clip Embed
|
# Twitch Clip Embed
|
||||||
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||||
@ -389,6 +407,22 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
# appplayer card
|
# appplayer card
|
||||||
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
|
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# video_direct_message card
|
||||||
|
'url': 'https://twitter.com/qarev001/status/1348948114569269251',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# poll2choice_video card
|
||||||
|
'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# poll3choice_video card
|
||||||
|
'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# poll4choice_video card
|
||||||
|
'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -433,8 +467,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
def extract_from_video_info(media):
|
||||||
if media and media.get('type') != 'photo':
|
|
||||||
video_info = media.get('video_info') or {}
|
video_info = media.get('video_info') or {}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@ -461,6 +494,10 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||||
|
if media and media.get('type') != 'photo':
|
||||||
|
extract_from_video_info(media)
|
||||||
else:
|
else:
|
||||||
card = status.get('card')
|
card = status.get('card')
|
||||||
if card:
|
if card:
|
||||||
@ -493,7 +530,12 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': get_binding_value('card_url'),
|
'url': get_binding_value('card_url'),
|
||||||
})
|
})
|
||||||
# amplify, promo_video_website, promo_video_convo, appplayer, ...
|
elif card_name == 'unified_card':
|
||||||
|
media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
|
||||||
|
extract_from_video_info(next(iter(media_entities.values())))
|
||||||
|
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||||
|
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||||
|
# poll4choice_video, ...
|
||||||
else:
|
else:
|
||||||
is_amplify = card_name == 'amplify'
|
is_amplify = card_name == 'amplify'
|
||||||
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
|
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
|
||||||
|
@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.youporn.com/watch/505835',
|
'url': 'http://www.youporn.com/watch/505835',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -88,7 +91,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
# Main source
|
# Main source
|
||||||
definitions = self._parse_json(
|
definitions = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
|
||||||
'media definitions', default='[]'),
|
'media definitions', default='[]'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
if definitions:
|
if definitions:
|
||||||
@ -100,7 +103,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
links.append(video_url)
|
links.append(video_url)
|
||||||
|
|
||||||
# Fallback #1, this also contains extra low quality 180p format
|
# Fallback #1, this also contains extra low quality 180p format
|
||||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||||
links.append(link)
|
links.append(link)
|
||||||
|
|
||||||
# Fallback #2 (unavailable as at 22.06.2017)
|
# Fallback #2 (unavailable as at 22.06.2017)
|
||||||
@ -128,8 +131,9 @@ class YouPornIE(InfoExtractor):
|
|||||||
# Video URL's path looks like this:
|
# Video URL's path looks like this:
|
||||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
|
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||||
# We will benefit from it by extracting some metadata
|
# We will benefit from it by extracting some metadata
|
||||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
|
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||||
if mobj:
|
if mobj:
|
||||||
height = int(mobj.group('height'))
|
height = int(mobj.group('height'))
|
||||||
bitrate = int(mobj.group('bitrate'))
|
bitrate = int(mobj.group('bitrate'))
|
||||||
|
@ -332,6 +332,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||||
default='{}'), video_id, fatal=False)
|
default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
|
def _extract_video(self, renderer):
|
||||||
|
video_id = renderer.get('videoId')
|
||||||
|
title = try_get(
|
||||||
|
renderer,
|
||||||
|
(lambda x: x['title']['runs'][0]['text'],
|
||||||
|
lambda x: x['title']['simpleText']), compat_str)
|
||||||
|
description = try_get(
|
||||||
|
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
||||||
|
compat_str)
|
||||||
|
duration = parse_duration(try_get(
|
||||||
|
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||||
|
view_count_text = try_get(
|
||||||
|
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||||
|
'view count', default=None))
|
||||||
|
uploader = try_get(
|
||||||
|
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com'
|
IE_DESC = 'YouTube.com'
|
||||||
@ -2871,36 +2901,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
if renderer:
|
if renderer:
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
def _extract_video(self, renderer):
|
|
||||||
video_id = renderer.get('videoId')
|
|
||||||
title = try_get(
|
|
||||||
renderer,
|
|
||||||
(lambda x: x['title']['runs'][0]['text'],
|
|
||||||
lambda x: x['title']['simpleText']), compat_str)
|
|
||||||
description = try_get(
|
|
||||||
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
|
||||||
compat_str)
|
|
||||||
duration = parse_duration(try_get(
|
|
||||||
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
|
||||||
view_count_text = try_get(
|
|
||||||
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
|
||||||
view_count = str_to_int(self._search_regex(
|
|
||||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
|
||||||
'view count', default=None))
|
|
||||||
uploader = try_get(
|
|
||||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': YoutubeIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'uploader': uploader,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _grid_entries(self, grid_renderer):
|
def _grid_entries(self, grid_renderer):
|
||||||
for item in grid_renderer['items']:
|
for item in grid_renderer['items']:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
@ -3583,34 +3583,17 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
|||||||
if not slr_contents:
|
if not slr_contents:
|
||||||
break
|
break
|
||||||
|
|
||||||
isr_contents = []
|
|
||||||
continuation_token = None
|
|
||||||
# Youtube sometimes adds promoted content to searches,
|
# Youtube sometimes adds promoted content to searches,
|
||||||
# changing the index location of videos and token.
|
# changing the index location of videos and token.
|
||||||
# So we search through all entries till we find them.
|
# So we search through all entries till we find them.
|
||||||
for index, isr in enumerate(slr_contents):
|
continuation_token = None
|
||||||
if not isr_contents:
|
for slr_content in slr_contents:
|
||||||
isr_contents = try_get(
|
isr_contents = try_get(
|
||||||
slr_contents,
|
slr_content,
|
||||||
(lambda x: x[index]['itemSectionRenderer']['contents']),
|
lambda x: x['itemSectionRenderer']['contents'],
|
||||||
list)
|
list)
|
||||||
for content in isr_contents:
|
|
||||||
if content.get('videoRenderer') is not None:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
isr_contents = []
|
|
||||||
|
|
||||||
if continuation_token is None:
|
|
||||||
continuation_token = try_get(
|
|
||||||
slr_contents,
|
|
||||||
lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
|
|
||||||
'token'],
|
|
||||||
compat_str)
|
|
||||||
if continuation_token is not None and isr_contents:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not isr_contents:
|
if not isr_contents:
|
||||||
break
|
continue
|
||||||
for content in isr_contents:
|
for content in isr_contents:
|
||||||
if not isinstance(content, dict):
|
if not isinstance(content, dict):
|
||||||
continue
|
continue
|
||||||
@ -3620,28 +3603,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
|||||||
video_id = video.get('videoId')
|
video_id = video.get('videoId')
|
||||||
if not video_id:
|
if not video_id:
|
||||||
continue
|
continue
|
||||||
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
|
|
||||||
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
|
yield self._extract_video(video)
|
||||||
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
|
|
||||||
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
|
||||||
view_count = str_to_int(self._search_regex(
|
|
||||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
|
||||||
'view count', default=None))
|
|
||||||
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
|
||||||
total += 1
|
total += 1
|
||||||
yield {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': YoutubeIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'uploader': uploader,
|
|
||||||
}
|
|
||||||
if total == n:
|
if total == n:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if continuation_token is None:
|
||||||
|
continuation_token = try_get(
|
||||||
|
slr_content,
|
||||||
|
lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||||
|
compat_str)
|
||||||
|
|
||||||
if not continuation_token:
|
if not continuation_token:
|
||||||
break
|
break
|
||||||
data['continuation'] = continuation_token
|
data['continuation'] = continuation_token
|
||||||
|
Loading…
x
Reference in New Issue
Block a user