mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 15:16:52 +01:00
Updated to release 2020.11.21.1
This commit is contained in:
parent
3462ffa892
commit
a0566bbf5c
@ -37,7 +37,7 @@
|
||||
"writeinfojson": true,
|
||||
"writesubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false,
|
||||
"listsubtitles": false,
|
||||
"socket_timeout": 20,
|
||||
"fixup": "never"
|
||||
}
|
||||
|
@ -919,6 +919,76 @@ def _real_extract(self, url):
|
||||
self.assertEqual(downloaded['extractor'], 'testex')
|
||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||
|
||||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||
|
||||
class _YDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(_YDL, self).__init__(*args, **kwargs)
|
||||
|
||||
def trouble(self, s, tb=None):
|
||||
pass
|
||||
|
||||
ydl = _YDL({
|
||||
'format': 'extra',
|
||||
'ignoreerrors': True,
|
||||
})
|
||||
|
||||
class VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'video:(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = [{
|
||||
'format_id': 'default',
|
||||
'url': 'url:',
|
||||
}]
|
||||
if video_id == '0':
|
||||
raise ExtractorError('foo')
|
||||
if video_id == '2':
|
||||
formats.append({
|
||||
'format_id': 'extra',
|
||||
'url': TEST_URL,
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Video %s' % video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
class PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'playlist:'
|
||||
|
||||
def _entries(self):
|
||||
for n in range(3):
|
||||
video_id = compat_str(n)
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VideoIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': 'video:%s' % video_id,
|
||||
'title': 'Video Transparent %s' % video_id,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries())
|
||||
|
||||
ydl.add_info_extractor(VideoIE(ydl))
|
||||
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||
info = ydl.extract_info('playlist:')
|
||||
entries = info['entries']
|
||||
self.assertEqual(len(entries), 3)
|
||||
self.assertTrue(entries[0] is None)
|
||||
self.assertTrue(entries[1] is None)
|
||||
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(entries[2], downloaded)
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||
self.assertEqual(downloaded['id'], '2')
|
||||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -829,7 +829,6 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
|
||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||
'and will probably not work.')
|
||||
|
||||
try:
|
||||
try:
|
||||
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
|
||||
except (AssertionError, IndexError, AttributeError):
|
||||
@ -839,9 +838,38 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
|
||||
ie_key, temp_id))
|
||||
break
|
||||
|
||||
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
|
||||
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def __handle_extraction_exceptions(func):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
msg += '\nThis video is available in %s.' % ', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
except MaxDownloadsReached:
|
||||
raise
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||
else:
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
break
|
||||
return
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
ie_result = {
|
||||
@ -858,27 +886,6 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
msg += '\nThis video is available in %s.' % ', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
break
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
break
|
||||
except MaxDownloadsReached:
|
||||
raise
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||
break
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
self.add_extra_info(ie_result, {
|
||||
@ -1057,9 +1064,8 @@ def report_download(num_entries):
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
@ -1088,6 +1094,11 @@ def _fixup(r):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
entry, download=download, extra_info=extra_info)
|
||||
|
||||
def _build_format_filter(self, filter_spec):
|
||||
" Returns a function to filter the formats according to the filter_spec "
|
||||
|
||||
|
@ -2345,7 +2345,7 @@ def __init__(self, version, name, value, *args, **kwargs):
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exceptiong handling
|
||||
# and uniform cross-version exception handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
|
@ -109,7 +109,9 @@ def establish_connection():
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
except (compat_urllib_error.URLError, ) as err:
|
||||
if isinstance(err.reason, socket.timeout):
|
||||
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||
reason = getattr(err, 'reason', None)
|
||||
if isinstance(reason, socket.timeout):
|
||||
raise RetryDownload(err)
|
||||
raise err
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
|
103
youtube_dlc/extractor/amara.py
Normal file
103
youtube_dlc/extractor/amara.py
Normal file
@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AmaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# Youtube
|
||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||
'info_dict': {
|
||||
'id': 'h6ZuVdvYnfE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why jury trials are becoming less common',
|
||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20160813',
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||
'info_dict': {
|
||||
'id': '18622084',
|
||||
'ext': 'mov',
|
||||
'title': 'Vimeo at CES 2011!',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||
'info_dict': {
|
||||
'id': 's8KL7I3jLmh6',
|
||||
'ext': 'mp4',
|
||||
'title': 'The danger of a single story',
|
||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
subtitles = {}
|
||||
for language in (meta.get('languages') or []):
|
||||
subtitles_uri = language.get('subtitles_uri')
|
||||
if not (subtitles_uri and language.get('published')):
|
||||
continue
|
||||
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||
for f in ('json', 'srt', 'vtt'):
|
||||
subtitle.append({
|
||||
'ext': f,
|
||||
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||
})
|
||||
|
||||
info = {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('thumbnail'),
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'timestamp': parse_iso8601(meta.get('created')),
|
||||
}
|
||||
|
||||
for ie in (YoutubeIE, VimeoIE):
|
||||
if ie.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ie.ie_key(),
|
||||
})
|
||||
break
|
||||
|
||||
return info
|
@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
def _build_brightcove_url(cls, object_str):
|
||||
"""
|
||||
Build a Brightcove url from a xml string containing
|
||||
<object class="BrightcoveExperience">{params}</object>
|
||||
@ -217,7 +217,7 @@ def find_param(name):
|
||||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url_from_js(cls, object_js):
|
||||
def _build_brightcove_url_from_js(cls, object_js):
|
||||
# The layout of JS is as follows:
|
||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||
# // build Brightcove <object /> XML
|
||||
@ -272,12 +272,12 @@ def _extract_brightcove_urls(cls, webpage):
|
||||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
cls._build_brightcove_url_from_js(custom_bc)
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
|
@ -1664,7 +1664,7 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
||||
# just the media without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
||||
}, {
|
||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -60,7 +60,7 @@ def get_item(type_, preference):
|
||||
|
||||
title = get_item('title', preferred_langs) or video_id
|
||||
description = get_item('description', preferred_langs)
|
||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||
@ -85,7 +85,7 @@ def get_item(type_, preference):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnmail,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
|
@ -36,6 +36,7 @@
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .alura import (
|
||||
AluraIE,
|
||||
AluraCourseIE
|
||||
@ -1507,7 +1508,6 @@
|
||||
YoutubeIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLiveIE,
|
||||
YoutubeTabIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
|
@ -211,7 +211,7 @@ def sign(manifest_url, manifest_id):
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(info.get('synopsis')),
|
||||
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')),
|
||||
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||
'is_live': is_live,
|
||||
|
@ -842,7 +842,7 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# MTVSercices embed
|
||||
# MTVServices embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||
|
@ -3,11 +3,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||
'info_dict': {
|
||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||
}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'info_dict': {
|
||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||
'duration': 189,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
@ -171,23 +162,21 @@ def _get_automatic_captions(self, video_id, subtitles_id, hl):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
video_info = compat_parse_qs(self._download_webpage(
|
||||
'https://drive.google.com/get_video_info',
|
||||
video_id, query={'docid': video_id}))
|
||||
|
||||
title = self._search_regex(
|
||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||
default=None))
|
||||
def get_value(key):
|
||||
return try_get(video_info, lambda x: x[key][0])
|
||||
|
||||
reason = get_value('reason')
|
||||
title = get_value('title')
|
||||
if not title and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
formats = []
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map', default='').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt_list', default='').split(',')
|
||||
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
@ -257,19 +246,14 @@ def add_source_format(urlh):
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
|
||||
if not formats:
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
hl = self._search_regex(
|
||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
||||
hl = get_value('hl')
|
||||
subtitles_id = None
|
||||
ttsurl = self._search_regex(
|
||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
||||
ttsurl = get_value('ttsurl')
|
||||
if ttsurl:
|
||||
# the video Id for subtitles will be the last value in the ttsurl
|
||||
# query string
|
||||
@ -281,8 +265,8 @@ def add_source_format(urlh):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||
'duration': int_or_none(get_value('length_seconds')),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||
'automatic_captions': self.extract_automatic_captions(
|
||||
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(
|
||||
@ -86,17 +86,18 @@ def _extract_http_video(self, webpage):
|
||||
return [{
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
|
@ -64,7 +64,7 @@ def _real_extract(self, url):
|
||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||
description = xpath_text(doc, 'ABSTRACT')
|
||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
|
||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||
formats = []
|
||||
@ -84,5 +84,5 @@ def _real_extract(self, url):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': createtion_time,
|
||||
'timestamp': creation_time,
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# mutlimedia, not media title
|
||||
# multimedia, not media title
|
||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||
'info_dict': {
|
||||
'id': '533198237',
|
||||
|
@ -477,7 +477,7 @@ def _extract_webpage(self, url):
|
||||
if media_id:
|
||||
return media_id, presumptive_id, upload_date, description
|
||||
|
||||
# Fronline video embedded via flp
|
||||
# Frontline video embedded via flp
|
||||
video_id = self._search_regex(
|
||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||
if video_id:
|
||||
|
@ -16,8 +16,9 @@
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@ -67,7 +68,7 @@ def _extract_relinker_info(self, relinker_url, video_id):
|
||||
|
||||
# This does not imply geo restriction (e.g.
|
||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
||||
if '/video_no_available.mp4' in media_url:
|
||||
continue
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
@ -122,27 +123,8 @@ def _extract_subtitles(url, subtitle_url):
|
||||
|
||||
|
||||
class RaiPlayIE(RaiBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
|
||||
'md5': '340aa3b7afb54bfd14a8c11786450d76',
|
||||
'info_dict': {
|
||||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
|
||||
'ext': 'mp4',
|
||||
'title': 'La Casa Bianca',
|
||||
'alt_title': 'S2016 - Puntata del 23/10/2016',
|
||||
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai 3',
|
||||
'creator': 'Rai 3',
|
||||
'duration': 3278,
|
||||
'timestamp': 1477764300,
|
||||
'upload_date': '20161029',
|
||||
'series': 'La Casa Bianca',
|
||||
'season': '2016',
|
||||
},
|
||||
'skip': 'This content is not available',
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||
'info_dict': {
|
||||
@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, video_id = re.match(self._VALID_URL, url).groups()
|
||||
base, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
url.replace('.html', '.json'), video_id, 'Downloading video JSON')
|
||||
base + '.json', video_id, 'Downloading video JSON')
|
||||
|
||||
title = media['name']
|
||||
video = media['video']
|
||||
@ -195,7 +177,8 @@ def _real_extract(self, url):
|
||||
season = media.get('season')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||
'display_id': video_id,
|
||||
'title': self._live_title(title) if relinker_info.get(
|
||||
'is_live') else title,
|
||||
'alt_title': strip_or_none(media.get('subtitle')),
|
||||
@ -217,16 +200,16 @@ def _real_extract(self, url):
|
||||
return info
|
||||
|
||||
|
||||
class RaiPlayLiveIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
class RaiPlayLiveIE(RaiPlayIE):
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||
'info_dict': {
|
||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||
'display_id': 'rainews24',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:6eca31500550f9376819f174e5644754',
|
||||
'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
|
||||
'uploader': 'Rai News 24',
|
||||
'creator': 'Rai News 24',
|
||||
'is_live': True,
|
||||
@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
||||
webpage, 'content id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RaiPlayIE.ie_key(),
|
||||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class RaiPlayPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||
'info_dict': {
|
||||
'id': 'nondirloalmiocapo',
|
||||
'title': 'Non dirlo al mio capo',
|
||||
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
|
||||
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
base, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
('programma', 'nomeProgramma'), webpage, 'title')
|
||||
description = unescapeHTML(self._html_search_meta(
|
||||
('description', 'og:description'), webpage, 'description'))
|
||||
program = self._download_json(
|
||||
base + '.json', playlist_id, 'Downloading program JSON')
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
|
||||
webpage):
|
||||
video_url = urljoin(url, mobj.group('path'))
|
||||
for b in (program.get('blocks') or []):
|
||||
for s in (b.get('sets') or []):
|
||||
s_id = s.get('id')
|
||||
if not s_id:
|
||||
continue
|
||||
medias = self._download_json(
|
||||
'%s/%s.json' % (base, s_id), s_id,
|
||||
'Downloading content set JSON', fatal=False)
|
||||
if not medias:
|
||||
continue
|
||||
for m in (medias.get('items') or []):
|
||||
path_id = m.get('path_id')
|
||||
if not path_id:
|
||||
continue
|
||||
video_url = urljoin(url, path_id)
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=RaiPlayIE.ie_key(),
|
||||
video_id=RaiPlayIE._match_id(video_url)))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, program.get('name'),
|
||||
try_get(program, lambda x: x['program_info']['description']))
|
||||
|
||||
|
||||
class RaiIE(RaiBaseIE):
|
||||
@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE):
|
||||
'duration': 2214,
|
||||
'upload_date': '20161103',
|
||||
}
|
||||
}, {
|
||||
# drawMediaRaiTV(...)
|
||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
||||
'info_dict': {
|
||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il pacco',
|
||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'skip': 'This content is not available',
|
||||
}, {
|
||||
# initEdizione('ContentItem-...'
|
||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||
@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE):
|
||||
'upload_date': '20170401',
|
||||
},
|
||||
'skip': 'Changes daily',
|
||||
}, {
|
||||
# HDS live stream with only relinker URL
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
||||
'info_dict': {
|
||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
||||
'ext': 'flv',
|
||||
'title': 'EuroNews',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This content is available only in Italy',
|
||||
}, {
|
||||
# HLS live stream with ContentItem in og:url
|
||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||
@ -473,7 +423,7 @@ def _real_extract(self, url):
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
relinker_url = self._search_regex(
|
||||
relinker_url = self._proto_relative_url(self._search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
var\s+videoURL|
|
||||
@ -485,7 +435,7 @@ def _real_extract(self, url):
|
||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||
''',
|
||||
webpage, 'relinker URL', group='url')
|
||||
webpage, 'relinker URL', group='url'))
|
||||
|
||||
relinker_info = self._extract_relinker_info(
|
||||
urljoin(url, relinker_url), video_id)
|
||||
|
@ -649,7 +649,7 @@ def _real_extract(self, url):
|
||||
|
||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
|
||||
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
|
||||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
||||
COMMON_QUERY = {
|
||||
'limit': 200,
|
||||
|
@ -9,6 +9,7 @@
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
@ -44,7 +45,8 @@ def _extract_video(self, video_info, video_id):
|
||||
'format_id': player_type,
|
||||
'url': vurl,
|
||||
})
|
||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
||||
rights = try_get(video_info, lambda x: x['rights'], dict) or {}
|
||||
if not formats and rights.get('geoBlockedSweden'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is only available in Sweden',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
@ -70,6 +72,7 @@ def _extract_video(self, video_info, video_id):
|
||||
episode = video_info.get('episodeTitle')
|
||||
episode_number = int_or_none(video_info.get('episodeNumber'))
|
||||
|
||||
timestamp = unified_timestamp(rights.get('validFrom'))
|
||||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
||||
age_limit = None
|
||||
adult = dict_get(
|
||||
@ -84,6 +87,7 @@ def _extract_video(self, video_info, video_id):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
svt:(?P<svt_id>[^/?#&]+)|
|
||||
(?:
|
||||
svt:|
|
||||
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
|
||||
)
|
||||
(?P<svt_id>[^/?#&]+)|
|
||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
|
||||
'md5': '2382036fd6f8c994856c323fe51c426e',
|
||||
'info_dict': {
|
||||
'id': '5996901',
|
||||
'id': 'jNwpV9P',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flygplan till Haile Selassie',
|
||||
'duration': 3527,
|
||||
'thumbnail': r're:^https?://.*[\.-]jpg$',
|
||||
'title': 'Det här är himlen',
|
||||
'timestamp': 1586044800,
|
||||
'upload_date': '20200405',
|
||||
'duration': 3515,
|
||||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||
'age_limit': 0,
|
||||
'subtitles': {
|
||||
'sv': [{
|
||||
'ext': 'wsrt',
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
|
||||
# init segments that are smaller
|
||||
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted to Sweden
|
||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
}, {
|
||||
'url': 'svt:14278044',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:eWv5MLX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _adjust_title(self, info):
|
||||
@ -236,7 +259,10 @@ def _real_extract(self, url):
|
||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._extract_by_video_id(svt_id, webpage)
|
||||
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||
info_dict['thumbnail'] = thumbnail
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
@ -86,7 +86,7 @@ def _real_extract(self, url):
|
||||
# return self._extract_via_api(kind, video_id)
|
||||
|
||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||
# extractiong audio via webpage
|
||||
# extracting audio via webpage
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
@ -208,7 +208,7 @@ def _extract_urls(cls, webpage):
|
||||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
# Are whitesapces ignored in URLs?
|
||||
# Are whitespaces ignored in URLs?
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
||||
matches = re.findall(
|
||||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
|
@ -56,9 +56,9 @@ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
||||
# if rtmp_src:
|
||||
# splited_rtmp_src = rtmp_src.split(',')
|
||||
# if len(splited_rtmp_src) == 2:
|
||||
# rtmp_src = splited_rtmp_src[1]
|
||||
# split_rtmp_src = rtmp_src.split(',')
|
||||
# if len(split_rtmp_src) == 2:
|
||||
# rtmp_src = split_rtmp_src[1]
|
||||
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
||||
|
||||
urls = []
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
@ -9,6 +10,10 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
||||
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boys Over Flowers - Episode 1',
|
||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||
'duration': 4204,
|
||||
'duration': 4172,
|
||||
'timestamp': 1270496524,
|
||||
'upload_date': '20100405',
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
# youtube external
|
||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||
@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE):
|
||||
'uploader_id': 'ad14065n',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
}
|
||||
},
|
||||
'skip': 'Page not found!',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/player/44699v',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._call_api(
|
||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
formats = []
|
||||
for format_id, stream_dict in streams.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
format_drms = format_dict.get('drms')
|
||||
format_stream_id = format_dict.get('id')
|
||||
if format_id == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.extend(m3u8_formats)
|
||||
elif format_id == 'mpd':
|
||||
mpd_formats = self._extract_mpd_formats(
|
||||
format_url, video_id,
|
||||
mpd_id='mpd-%s' % protocol, fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
elif format_id == 'mpd':
|
||||
|
||||
formats.extend(mpd_formats)
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
})
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': height,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
resp = self._download_json(
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON',
|
||||
headers={'x-viki-app-ver': '4.0.57'})
|
||||
video = resp['video']
|
||||
|
||||
self._check_errors(video)
|
||||
|
||||
@ -342,6 +289,73 @@ def _real_extract(self, url):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
def add_format(format_id, format_dict, protocol='http'):
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
return
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
return
|
||||
format_drms = format_dict.get('drms')
|
||||
format_stream_id = format_dict.get('id')
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||
stream = qs.get('stream', [None])[0]
|
||||
if stream:
|
||||
format_url = base64.b64decode(stream).decode()
|
||||
if format_id in ('m3u8', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if '_drm/index_' in f['url']:
|
||||
continue
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.append(f)
|
||||
elif format_id in ('mpd', 'dash'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
return
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
})
|
||||
else:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||
'drms': format_drms,
|
||||
'stream_id': format_stream_id,
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
|
||||
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||
add_format(format_id, format_dict)
|
||||
if not formats:
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
if 'external' in streams:
|
||||
result.update({
|
||||
'_type': 'url_transparent',
|
||||
@ -349,6 +363,11 @@ def _real_extract(self, url):
|
||||
})
|
||||
return result
|
||||
|
||||
for format_id, stream_dict in streams.items():
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
add_format(format_id, format_dict, protocol)
|
||||
self._sort_formats(formats)
|
||||
|
||||
result['formats'] = formats
|
||||
return result
|
||||
|
||||
|
@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||
def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||
api_page = page + 1
|
||||
query = {
|
||||
'fields': 'link,uri',
|
||||
@ -934,7 +934,7 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorizaion,
|
||||
'Authorization': 'jwt ' + authorization,
|
||||
})['data']
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
|
@ -54,17 +54,17 @@ def _extract_tracks(self, item_id, referer, typ=None):
|
||||
def _decrypt(origin):
|
||||
n = int(origin[0])
|
||||
origin = origin[1:]
|
||||
short_lenth = len(origin) // n
|
||||
long_num = len(origin) - short_lenth * n
|
||||
short_length = len(origin) // n
|
||||
long_num = len(origin) - short_length * n
|
||||
l = tuple()
|
||||
for i in range(0, n):
|
||||
length = short_lenth
|
||||
length = short_length
|
||||
if i < long_num:
|
||||
length += 1
|
||||
l += (origin[0:length], )
|
||||
origin = origin[length:]
|
||||
ans = ''
|
||||
for i in range(0, short_lenth + 1):
|
||||
for i in range(0, short_length + 1):
|
||||
for j in range(0, n):
|
||||
if len(l[j]) > i:
|
||||
ans += l[j][i]
|
||||
|
@ -306,6 +306,8 @@ def _real_initialize(self):
|
||||
},
|
||||
}
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
|
||||
def _call_api(self, ep, query, video_id):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
data.update(query)
|
||||
@ -322,8 +324,8 @@ def _call_api(self, ep, query, video_id):
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
|
||||
webpage, 'yt initial data'),
|
||||
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
|
||||
@ -1089,6 +1091,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
|
||||
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
|
||||
'info_dict': {
|
||||
'id': 'CHqg6qOn4no',
|
||||
'ext': 'mp4',
|
||||
'title': 'Part 77 Sort a list of simple types in c#',
|
||||
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
|
||||
'upload_date': '20130831',
|
||||
'uploader_id': 'kudvenkat',
|
||||
'uploader': 'kudvenkat',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -2138,6 +2156,21 @@ def _extract_filesize(media_url):
|
||||
formats.append(a_format)
|
||||
else:
|
||||
error_message = extract_unavailable_message()
|
||||
if not error_message:
|
||||
reason_list = try_get(
|
||||
player_response,
|
||||
lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
|
||||
list) or []
|
||||
for reason in reason_list:
|
||||
if not isinstance(reason, dict):
|
||||
continue
|
||||
reason_text = try_get(reason, lambda x: x['text'], compat_str)
|
||||
if reason_text:
|
||||
if not error_message:
|
||||
error_message = ''
|
||||
error_message += reason_text
|
||||
if error_message:
|
||||
error_message = clean_html(error_message)
|
||||
if not error_message:
|
||||
error_message = clean_html(try_get(
|
||||
player_response, lambda x: x['playabilityStatus']['reason'],
|
||||
@ -2319,8 +2352,8 @@ def extract_meta(field):
|
||||
|
||||
def _extract_count(count_name):
|
||||
return str_to_int(self._search_regex(
|
||||
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
|
||||
% re.escape(count_name),
|
||||
(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
|
||||
r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
|
||||
video_webpage, count_name, default=None))
|
||||
|
||||
like_count = _extract_count('like')
|
||||
@ -2613,13 +2646,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 138,
|
||||
}, {
|
||||
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
|
||||
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
|
||||
'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',
|
||||
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||
@ -2666,7 +2699,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
|
||||
'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Playlist URL that does not actually serve a playlist
|
||||
@ -2698,14 +2731,59 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
IGNORE = (YoutubeLiveIE,)
|
||||
return (
|
||||
False if any(ie.suitable(url) for ie in IGNORE)
|
||||
else super(YoutubeTabIE, cls).suitable(url))
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||
'info_dict': {
|
||||
'id': '9Auq9mYxFEE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Watch Sky News live',
|
||||
'uploader': 'Sky News',
|
||||
'uploader_id': 'skynews',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
|
||||
'upload_date': '20191102',
|
||||
'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
|
||||
'categories': ['News & Politics'],
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
|
||||
'info_dict': {
|
||||
'id': 'a48o2S1cPoo',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Young Turks - Live Main Show',
|
||||
'uploader': 'The Young Turks',
|
||||
'uploader_id': 'TheYoungTurks',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
|
||||
'upload_date': '20150715',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
|
||||
'categories': ['News & Politics'],
|
||||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
|
||||
'only_matching': True,
|
||||
},
|
||||
# TODO
|
||||
# {
|
||||
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
# 'only_matching': True,
|
||||
# }
|
||||
]
|
||||
|
||||
def _extract_channel_id(self, webpage):
|
||||
channel_id = self._html_search_meta(
|
||||
@ -3147,7 +3225,7 @@ def _real_extract(self, url):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
identity_token = self._search_regex(
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
r'\bID_TOKEN["\']\s*:\s/l*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None)
|
||||
data = self._extract_yt_initial_data(item_id, webpage)
|
||||
tabs = try_get(
|
||||
@ -3158,7 +3236,11 @@ def _real_extract(self, url):
|
||||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
||||
if playlist:
|
||||
return self._extract_from_playlist(item_id, data, playlist)
|
||||
# Fallback to video extraction if no playlist alike page is recognized
|
||||
# Fallback to video extraction if no playlist alike page is recognized.
|
||||
# First check for the current video then try the v attribute of URL query.
|
||||
video_id = try_get(
|
||||
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
|
||||
compat_str) or video_id
|
||||
if video_id:
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
# Failed to recognize
|
||||
@ -3279,58 +3361,6 @@ def _real_extract(self, url):
|
||||
ie=YoutubeTabIE.ie_key(), video_id=user_id)
|
||||
|
||||
|
||||
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com live streams'
|
||||
_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
|
||||
IE_NAME = 'youtube:live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
|
||||
'info_dict': {
|
||||
'id': 'a48o2S1cPoo',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Young Turks - Live Main Show',
|
||||
'uploader': 'The Young Turks',
|
||||
'uploader_id': 'TheYoungTurks',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
|
||||
'upload_date': '20150715',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
|
||||
'categories': ['News & Politics'],
|
||||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
base_url = mobj.group('base_url')
|
||||
webpage = self._download_webpage(url, channel_id, fatal=False)
|
||||
if webpage:
|
||||
page_type = self._og_search_property(
|
||||
'type', webpage, 'page type', default='')
|
||||
video_id = self._html_search_meta(
|
||||
'videoId', webpage, 'video id', default=None)
|
||||
if page_type.startswith('video') and video_id and re.match(
|
||||
r'^[0-9A-Za-z_-]{11}$', video_id):
|
||||
return self.url_result(video_id, YoutubeIE.ie_key())
|
||||
return self.url_result(base_url)
|
||||
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com searches'
|
||||
# there doesn't appear to be a real limit, for example if you search for
|
||||
|
@ -2460,7 +2460,7 @@ def __init__(self, code=None, msg='Unknown error'):
|
||||
|
||||
# Parsing code and msg
|
||||
if (self.code in (errno.ENOSPC, errno.EDQUOT)
|
||||
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
|
||||
or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
|
||||
self.reason = 'NO_SPACE'
|
||||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
||||
self.reason = 'VALUE_TOO_LONG'
|
||||
@ -4215,10 +4215,10 @@ def parse_codecs(codecs_str):
|
||||
# http://tools.ietf.org/html/rfc6381
|
||||
if not codecs_str:
|
||||
return {}
|
||||
splited_codecs = list(filter(None, map(
|
||||
split_codecs = list(filter(None, map(
|
||||
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
||||
vcodec, acodec = None, None
|
||||
for full_codec in splited_codecs:
|
||||
for full_codec in split_codecs:
|
||||
codec = full_codec.split('.')[0]
|
||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
|
||||
if not vcodec:
|
||||
@ -4229,10 +4229,10 @@ def parse_codecs(codecs_str):
|
||||
else:
|
||||
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
||||
if not vcodec and not acodec:
|
||||
if len(splited_codecs) == 2:
|
||||
if len(split_codecs) == 2:
|
||||
return {
|
||||
'vcodec': splited_codecs[0],
|
||||
'acodec': splited_codecs[1],
|
||||
'vcodec': split_codecs[0],
|
||||
'acodec': split_codecs[1],
|
||||
}
|
||||
else:
|
||||
return {
|
||||
@ -5471,7 +5471,7 @@ def encode_base_n(num, n, table=None):
|
||||
|
||||
def decode_packed_codes(code):
|
||||
mobj = re.search(PACKED_CODES_RE, code)
|
||||
obfucasted_code, base, count, symbols = mobj.groups()
|
||||
obfuscated_code, base, count, symbols = mobj.groups()
|
||||
base = int(base)
|
||||
count = int(count)
|
||||
symbols = symbols.split('|')
|
||||
@ -5484,7 +5484,7 @@ def decode_packed_codes(code):
|
||||
|
||||
return re.sub(
|
||||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
||||
obfucasted_code)
|
||||
obfuscated_code)
|
||||
|
||||
|
||||
def caesar(s, alphabet, shift):
|
||||
|
Loading…
Reference in New Issue
Block a user