Allows to specify which IE should be used for extracting info for a result of type url

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-04-20 12:50:14 +02:00
parent 9341212642
commit 6de8f1afb7
2 changed files with 21 additions and 12 deletions

View File

@ -17,6 +17,7 @@
import ctypes import ctypes
from .utils import * from .utils import *
from .InfoExtractors import get_info_extractor
class FileDownloader(object): class FileDownloader(object):
@ -425,13 +426,23 @@ def _match_entry(self, info_dict):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None return None
def extract_info(self, url, download = True): def extract_info(self, url, download = True, ie_name = None):
''' '''
Returns a list with a dictionary for each video we find. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos. If 'download', also downloads the videos.
''' '''
suitable_found = False suitable_found = False
for ie in self._ies:
#We copy the original list
ies = list(self._ies)
if ie_name is not None:
#We put in the first place the given info extractor
first_ie = get_info_extractor(ie_name)()
first_ie.set_downloader(self)
ies.insert(0, first_ie)
for ie in ies:
# Go to next InfoExtractor if not suitable # Go to next InfoExtractor if not suitable
if not ie.suitable(url): if not ie.suitable(url):
continue continue
@ -486,7 +497,7 @@ def process_ie_result(self, ie_result, download = True):
return ie_result return ie_result
elif result_type == 'url': elif result_type == 'url':
#We get the video pointed by the url #We get the video pointed by the url
result = self.extract_info(ie_result['url'], download)[0] result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
return result return result
elif result_type == 'playlist': elif result_type == 'playlist':
#We process each entry in the playlist #We process each entry in the playlist

View File

@ -154,7 +154,8 @@ def url_result(self, url, ie=None):
"""Returns a url that points to a page that should be processed""" """Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info #TODO: ie should be the class used for getting the info
video_info = {'_type': 'url', video_info = {'_type': 'url',
'url': url} 'url': url,
'ie_key': ie}
return video_info return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None): def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist""" """Returns a playlist"""
@ -728,7 +729,7 @@ def _real_extract(self, url):
# Check if video comes from YouTube # Check if video comes from YouTube
mobj2 = re.match(r'^yt-(.*)$', video_id) mobj2 = re.match(r'^yt-(.*)$', video_id)
if mobj2 is not None: if mobj2 is not None:
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1))] return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
webpage = self._download_webpage('http://www.metacafe.com/watch/%s/' % video_id, video_id) webpage = self._download_webpage('http://www.metacafe.com/watch/%s/' % video_id, video_id)
@ -1810,7 +1811,7 @@ def _real_extract(self, url):
videos = [v[1] for v in sorted(videos)] videos = [v[1] for v in sorted(videos)]
url_results = [self.url_result(url) for url in videos] url_results = [self.url_result(url, 'Youtube') for url in videos]
return [self.playlist_result(url_results, playlist_id)] return [self.playlist_result(url_results, playlist_id)]
@ -1884,7 +1885,7 @@ def _real_extract(self, url):
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
url_entries = [self.url_result(url) for url in urls] url_entries = [self.url_result(url, 'Youtube') for url in urls]
return [self.playlist_result(url_entries, channel_id)] return [self.playlist_result(url_entries, channel_id)]
@ -1956,7 +1957,7 @@ def _real_extract(self, url):
pagenum += 1 pagenum += 1
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
url_results = [self.url_result(url) for url in urls] url_results = [self.url_result(url, 'Youtube') for url in urls]
return [self.playlist_result(url_results, playlist_title = username)] return [self.playlist_result(url_results, playlist_title = username)]
@ -2035,11 +2036,8 @@ def _real_extract(self, url):
pagenum += 1 pagenum += 1
self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" %
(self.IE_NAME, username, all_ids_count, len(video_ids)))
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(url) for url in urls] url_entries = [self.url_result(url, 'BlipTV') for url in urls]
return [self.playlist_result(url_entries, playlist_title = username)] return [self.playlist_result(url_entries, playlist_title = username)]