Merge remote-tracking branch 'rzhxeo/youtube'

This commit is contained in:
Philipp Hagemeister 2013-12-23 04:37:40 +01:00
commit f82b18efc1
2 changed files with 99 additions and 147 deletions

View File

@ -643,7 +643,7 @@ def process_video_result(self, info_dict, download=True):
info_dict['playlist_index'] = None info_dict['playlist_index'] = None
# This extractors handle format selection themselves # This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku']: if info_dict['extractor'] in [u'Youku']:
if download: if download:
self.process_info(info_dict) self.process_info(info_dict)
return info_dict return info_dict
@ -669,10 +669,6 @@ def process_video_result(self, info_dict, download=True):
if 'ext' not in format: if 'ext' not in format:
format['ext'] = determine_ext(format['url']) format['ext'] = determine_ext(format['url'])
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
format_limit = self.params.get('format_limit', None) format_limit = self.params.get('format_limit', None)
if format_limit: if format_limit:
formats = list(takewhile_inclusive( formats = list(takewhile_inclusive(
@ -685,9 +681,16 @@ def _free_formats_key(f):
except ValueError: except ValueError:
ext_ord = -1 ext_ord = -1
# We only compare the extension if they have the same height and width # We only compare the extension if they have the same height and width
return (f.get('height'), f.get('width'), ext_ord) return (f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_ord)
formats = sorted(formats, key=_free_formats_key) formats = sorted(formats, key=_free_formats_key)
info_dict['formats'] = formats
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
req_format = self.params.get('format', 'best') req_format = self.params.get('format', 'best')
if req_format is None: if req_format is None:
req_format = 'best' req_format = 'best'

View File

@ -162,23 +162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Dash audio # Dash audio
'141', '172', '140', '171', '139', '141', '172', '140', '171', '139',
] ]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
'244', '135', '243', '134', '242', '133', '160',
# Dash audio
'172', '141', '171', '140', '139',
]
_video_formats_map = {
'flv': ['35', '34', '6', '5'],
'3gp': ['36', '17', '13'],
'mp4': ['38', '37', '22', '18'],
'webm': ['46', '45', '44', '43'],
}
_video_extensions = { _video_extensions = {
'13': '3gp', '13': '3gp',
'17': '3gp', '17': '3gp',
@ -236,54 +219,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'248': 'webm', '248': 'webm',
} }
_video_dimensions = { _video_dimensions = {
'5': '400x240', '5': {'width': 400, 'height': 240},
'6': '???', '6': {},
'13': '???', '13': {},
'17': '176x144', '17': {'width': 176, 'height': 144},
'18': '640x360', '18': {'width': 640, 'height': 360},
'22': '1280x720', '22': {'width': 1280, 'height': 720},
'34': '640x360', '34': {'width': 640, 'height': 360},
'35': '854x480', '35': {'width': 854, 'height': 480},
'36': '320x240', '36': {'width': 320, 'height': 240},
'37': '1920x1080', '37': {'width': 1920, 'height': 1080},
'38': '4096x3072', '38': {'width': 4096, 'height': 3072},
'43': '640x360', '43': {'width': 640, 'height': 360},
'44': '854x480', '44': {'width': 854, 'height': 480},
'45': '1280x720', '45': {'width': 1280, 'height': 720},
'46': '1920x1080', '46': {'width': 1920, 'height': 1080},
'82': '360p', '82': {'height': 360, 'display': '360p'},
'83': '480p', '83': {'height': 480, 'display': '480p'},
'84': '720p', '84': {'height': 720, 'display': '720p'},
'85': '1080p', '85': {'height': 1080, 'display': '1080p'},
'92': '240p', '92': {'height': 240, 'display': '240p'},
'93': '360p', '93': {'height': 360, 'display': '360p'},
'94': '480p', '94': {'height': 480, 'display': '480p'},
'95': '720p', '95': {'height': 720, 'display': '720p'},
'96': '1080p', '96': {'height': 1080, 'display': '1080p'},
'100': '360p', '100': {'height': 360, 'display': '360p'},
'101': '480p', '101': {'height': 480, 'display': '480p'},
'102': '720p', '102': {'height': 720, 'display': '720p'},
'132': '240p', '132': {'height': 240, 'display': '240p'},
'151': '72p', '151': {'height': 72, 'display': '72p'},
'133': '240p', '133': {'height': 240, 'display': '240p'},
'134': '360p', '134': {'height': 360, 'display': '360p'},
'135': '480p', '135': {'height': 480, 'display': '480p'},
'136': '720p', '136': {'height': 720, 'display': '720p'},
'137': '1080p', '137': {'height': 1080, 'display': '1080p'},
'138': '>1080p', '138': {'height': 1081, 'display': '>1080p'},
'139': '48k', '139': {'display': '48k'},
'140': '128k', '140': {'display': '128k'},
'141': '256k', '141': {'display': '256k'},
'160': '192p', '160': {'height': 192, 'display': '192p'},
'171': '128k', '171': {'display': '128k'},
'172': '256k', '172': {'display': '256k'},
'242': '240p', '242': {'height': 240, 'display': '240p'},
'243': '360p', '243': {'height': 360, 'display': '360p'},
'244': '480p', '244': {'height': 480, 'display': '480p'},
'245': '480p', '245': {'height': 480, 'display': '480p'},
'246': '480p', '246': {'height': 480, 'display': '480p'},
'247': '720p', '247': {'height': 720, 'display': '720p'},
'248': '1080p', '248': {'height': 1080, 'display': '1080p'},
} }
_special_itags = { _special_itags = {
'82': '3D', '82': '3D',
@ -1153,13 +1136,6 @@ def _get_available_automatic_caption(self, video_id, webpage):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'),
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
def _extract_id(self, url): def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None: if mobj is None:
@ -1172,48 +1148,11 @@ def _get_video_url_list(self, url_map):
Transform a dictionary in the format {itag:url} to a list of (itag, url) Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats. with the requested formats.
""" """
req_format = self._downloader.params.get('format', None) existing_formats = [x for x in self._available_formats if x in url_map]
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0: if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video') raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
self._print_formats(existing_formats) video_url_list.reverse() # order worst to best
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
# available in the specified format. For example,
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if rf in self._video_formats_map:
for srf in self._video_formats_map[rf]:
if srf in url_map:
video_url_list = [(srf, url_map[srf])]
break
else:
continue
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
return video_url_list return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id): def _extract_from_m3u8(self, manifest_url, video_id):
@ -1462,50 +1401,60 @@ def _extract_count(klass):
url += '&ratebypass=yes' url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url url_map[url_data['itag'][0]] = url
video_url_list = self._get_video_url_list(url_map) video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
elif video_info.get('hlsvp'): elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0] manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id) url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = self._get_video_url_list(url_map) video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
else: else:
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = [] formats = []
for itag, video_real_url in video_url_list: for itag, video_real_url in video_url_list:
# Extension # Extension
video_extension = self._video_extensions.get(itag, 'flv') video_extension = self._video_extensions.get(itag, 'flv')
resolution = self._video_dimensions.get(itag, {}).get('display')
width = self._video_dimensions.get(itag, {}).get('width')
height = self._video_dimensions.get(itag, {}).get('height')
note = self._special_itags.get(itag)
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
self._video_dimensions.get(itag, '???'), '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '') ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
results.append({ formats.append({
'id': video_id, 'url': video_real_url,
'url': video_real_url, 'ext': video_extension,
'uploader': video_uploader, 'format': video_format,
'uploader_id': video_uploader_id, 'format_id': itag,
'upload_date': upload_date, 'player_url': player_url,
'title': video_title, '_resolution': resolution,
'ext': video_extension, 'width': width,
'format': video_format, 'height': height,
'format_id': itag, 'format_note': note,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
}) })
return results def _formats_key(f):
return (f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1)
formats = sorted(formats, key=_formats_key)
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'formats': formats,
}
class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists' IE_DESC = u'YouTube.com playlists'