default info_dict['format'] to info_dict['ext'] and make the YT one more verbose

This commit is contained in:
Filippo Valsorda 2012-11-27 17:20:25 +01:00
parent c9128b353d
commit 717b1f72ed
2 changed files with 29 additions and 38 deletions

View File

@ -355,6 +355,9 @@ def process_info(self, info_dict):
# Keep for backwards compatibility # Keep for backwards compatibility
info_dict['stitle'] = info_dict['title'] info_dict['stitle'] = info_dict['title']
if not 'format' in info_dict:
info_dict['format'] = info_dict['ext']
reason = self._match_entry(info_dict) reason = self._match_entry(info_dict)
if reason is not None: if reason is not None:
self.to_screen(u'[download] ' + reason) self.to_screen(u'[download] ' + reason)

View File

@ -29,33 +29,34 @@ class InfoExtractor(object):
"""Information Extractor class. """Information Extractor class.
Information extractors are the classes that, given a URL, extract Information extractors are the classes that, given a URL, extract
information from the video (or videos) the URL refers to. This information about the video (or videos) the URL refers to. This
information includes the real video URL, the video title and simplified information includes the real video URL, the video title, author and
title, author and others. The information is stored in a dictionary others. The information is stored in a dictionary which is then
which is then passed to the FileDownloader. The FileDownloader passed to the FileDownloader. The FileDownloader processes this
processes this information possibly downloading the video to the file information possibly downloading the video to the file system, among
system, among other possible outcomes. The dictionaries must include other possible outcomes.
the following fields:
id: Video identifier. The dictionaries must include the following fields:
url: Final video URL.
uploader: Nickname of the video uploader.
title: Literal title.
ext: Video filename extension.
format: Video format.
player_url: SWF Player URL (may be None).
The following fields are optional. Their primary purpose is to allow id: Video identifier.
youtube-dl to serve as the backend for a video search function, such url: Final video URL.
as the one in youtube2mp3. They are only used when their respective uploader: Nickname of the video uploader.
forced printing functions are called: title: Video title, unescaped.
ext: Video filename extension.
player_url: SWF Player URL (may be None).
thumbnail: Full URL to a video thumbnail image. The following fields are optional:
description: One-line video description.
format: The video format, defaults to ext. Used by --get-format
thumbnail: Full URL to a video thumbnail image.
description One-line video description.
Subclasses of this one should re-define the _real_initialize() and Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp. _real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors. Probably, they should also be added to the list of extractors.
_real_extract() must return a *list* of information dictionaries as
described above.
""" """
_ready = False _ready = False
@ -475,6 +476,9 @@ def _real_extract(self, url):
# Extension # Extension
video_extension = self._video_extensions.get(format_param, 'flv') video_extension = self._video_extensions.get(format_param, 'flv')
video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'),
self._video_dimensions.get(format_param, '???'))
results.append({ results.append({
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'), 'url': video_real_url.decode('utf-8'),
@ -482,7 +486,7 @@ def _real_extract(self, url):
'upload_date': upload_date, 'upload_date': upload_date,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'format': video_format,
'thumbnail': video_thumbnail.decode('utf-8'), 'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description, 'description': video_description,
'player_url': player_url, 'player_url': player_url,
@ -616,7 +620,6 @@ def _real_extract(self, url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -715,7 +718,6 @@ def _real_extract(self, url):
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -810,7 +812,6 @@ def _real_extract(self, url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -877,7 +878,6 @@ def _real_extract(self, url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -1282,7 +1282,6 @@ def _real_extract(self, url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -1888,7 +1887,6 @@ def _real_extract(self, url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': file_title, 'title': file_title,
'ext': file_extension.decode('utf-8'), 'ext': file_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -2243,7 +2241,6 @@ def _real_extract(self,url):
'upload_date': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'ext': u'flv', 'ext': u'flv',
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]
@ -2501,7 +2498,6 @@ def _real_extract(self, url):
'upload_date': None, 'upload_date': None,
'title': showName, 'title': showName,
'ext': 'flv', 'ext': 'flv',
'format': 'flv',
'thumbnail': imgUrl, 'thumbnail': imgUrl,
'description': description, 'description': description,
'player_url': playerUrl, 'player_url': playerUrl,
@ -2566,7 +2562,6 @@ def _real_extract(self, url):
info['url'] = videoNode.findall('./file')[0].text info['url'] = videoNode.findall('./file')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
info['ext'] = info['url'].rpartition('.')[2] info['ext'] = info['url'].rpartition('.')[2]
info['format'] = info['ext']
except IndexError: except IndexError:
self._downloader.trouble(u'\nERROR: Invalid metadata XML file') self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
return return
@ -2637,7 +2632,6 @@ def _real_extract(self, url):
'upload_date': None, 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': 'flv', 'ext': 'flv',
'format': 'flv',
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': None, 'description': None,
'player_url': None, 'player_url': None,
@ -2734,7 +2728,6 @@ def _real_extract(self, url):
'upload_date': upload_date, 'upload_date': upload_date,
'title': title, 'title': title,
'ext': u'mp3', 'ext': u'mp3',
'format': u'NA',
'player_url': None, 'player_url': None,
'description': description.decode('utf-8') 'description': description.decode('utf-8')
}] }]
@ -2802,8 +2795,7 @@ def _real_extract(self, url):
'uploader': None, 'uploader': None,
'upload_date': None, 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': extension, 'ext': extension, # Extension is always(?) mp4, but seems to be flv
'format': extension, # Extension is always(?) mp4, but seems to be flv
'thumbnail': None, 'thumbnail': None,
'description': video_description, 'description': video_description,
'player_url': None, 'player_url': None,
@ -2967,7 +2959,6 @@ def _real_extract(self, url):
self._downloader.trouble(u'\nERROR: Invalid metadata XML file') self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
return return
info['ext'] = info['url'].rpartition('.')[2] info['ext'] = info['url'].rpartition('.')[2]
info['format'] = info['ext']
return [info] return [info]
elif mobj.group('course'): # A course page elif mobj.group('course'): # A course page
course = mobj.group('course') course = mobj.group('course')
@ -3241,7 +3232,6 @@ def _real_extract(self, url):
'uploader': None, 'uploader': None,
'title': video_title, 'title': video_title,
'ext': ext, 'ext': ext,
'format': u'NA'
} }
files_info.append(info) files_info.append(info)
@ -3305,7 +3295,6 @@ def _real_extract(self, url):
'upload_date': None, 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': 'flv', 'ext': 'flv',
'format': 'flv',
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': None, 'description': None,
'player_url': None} 'player_url': None}
@ -3432,6 +3421,5 @@ def _real_extract(self, url):
'upload_date': upload_date.decode('utf-8'), 'upload_date': upload_date.decode('utf-8'),
'title': video_title.decode('utf-8'), 'title': video_title.decode('utf-8'),
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'format': u'NA',
'player_url': None, 'player_url': None,
}] }]