diff --git a/youtube-dl b/youtube-dl index 3ac27a8570..a6d0ce4344 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,6 +15,7 @@ import email.utils import gzip import htmlentitydefs import httplib +import json # TODO: json for 2.5 import locale import math import netrc @@ -2563,6 +2564,80 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') +class BlipTVIE(InfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$' + _URL_EXT = r'^.*\.([a-z0-9]+)$' + + @staticmethod + def suitable(url): + return (re.match(BlipTVIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id)) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id)) + + @property + def service_name(self): + return u'blip.tv' + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + request = urllib2.Request(json_url) + try: + json_code = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + try: + json_data = json.loads(json_code) + data = json_data['Post'] + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + video_url = data['media']['url'] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + info = { + 'id': data['item_id'], + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'stitle': self._simplify_title(data['title']), + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'] + } + except (ValueError,KeyError), err: + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err)) + return + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + class PostProcessor(object): """Post Processor class. @@ -2911,6 +2986,7 @@ if __name__ == '__main__': yahoo_search_ie = YahooSearchIE(yahoo_ie) deposit_files_ie = DepositFilesIE() facebook_ie = FacebookIE() + bliptv_ie = BlipTVIE() generic_ie = GenericIE() # File downloader @@ -2963,6 +3039,7 @@ if __name__ == '__main__': fd.add_info_extractor(yahoo_search_ie) fd.add_info_extractor(deposit_files_ie) fd.add_info_extractor(facebook_ie) + fd.add_info_extractor(bliptv_ie) # This must come last since it's the # fallback if none of the others work