diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py index ac05f8246..72f81d01a 100644 --- a/youtube_dl/extractor/academicearth.py +++ b/youtube_dl/extractor/academicearth.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import re from .common import InfoExtractor @@ -5,7 +6,7 @@ class AcademicEarthCourseIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P[^?#/]+)' - IE_NAME = u'AcademicEarth:Course' + IE_NAME = 'AcademicEarth:Course' def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index e7361ae06..922cede05 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -11,46 +13,46 @@ class AppleTrailersIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' _TEST = { - u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", - u"playlist": [ + "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + "playlist": [ { - u"file": u"manofsteel-trailer4.mov", - u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", - u"info_dict": { - u"duration": 111, - u"title": u"Trailer 4", - u"upload_date": u"20130523", - u"uploader_id": u"wb", + "file": "manofsteel-trailer4.mov", + "md5": "d97a8e575432dbcb81b7c3acb741f8a8", + "info_dict": { + "duration": 111, + "title": "Trailer 4", + "upload_date": "20130523", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer3.mov", - u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", - u"info_dict": { - u"duration": 182, - u"title": u"Trailer 3", - u"upload_date": u"20130417", - u"uploader_id": u"wb", + "file": "manofsteel-trailer3.mov", + "md5": "b8017b7131b721fb4e8d6f49e1df908c", + "info_dict": { + "duration": 182, + "title": "Trailer 3", + "upload_date": "20130417", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer.mov", - u"md5": u"d0f1e1150989b9924679b441f3404d48", - u"info_dict": { - u"duration": 148, - u"title": u"Trailer", - u"upload_date": u"20121212", - u"uploader_id": u"wb", + "file": "manofsteel-trailer.mov", + "md5": "d0f1e1150989b9924679b441f3404d48", + "info_dict": { + "duration": 148, + "title": "Trailer", + "upload_date": "20121212", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-teaser.mov", - u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", - u"info_dict": { - u"duration": 93, - u"title": u"Teaser", - u"upload_date": u"20120721", - u"uploader_id": u"wb", + "file": "manofsteel-teaser.mov", + "md5": "5fe08795b943eb2e757fa95cb6def1cb", + "info_dict": { + "duration": 93, + "title": "Teaser", + "upload_date": "20120721", + "uploader_id": "wb", }, } ] diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 8bb546410..497ce97ac 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -13,14 +15,14 @@ class ArchiveOrgIE(InfoExtractor): IE_DESC = 'archive.org videos' _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P[^?/]+)(?:[?].*)?$' _TEST = { - u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", - u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', - u'md5': u'8af1d4cf447933ed3c7f4871162602db', - u'info_dict': { - u"title": u"1968 Demo - FJCC Conference Presentation Reel #1", - u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also Doug's 1968 Demo page for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | Reel 2 | Reel 3", - u"upload_date": u"19681210", - u"uploader": u"SRI International" + "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", + 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', + 'md5': '8af1d4cf447933ed3c7f4871162602db', + 'info_dict': { + "title": "1968 Demo - FJCC Conference Presentation Reel #1", + "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also Doug's 1968 Demo page for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | Reel 2 | Reel 3", + "upload_date": "19681210", + "uploader": "SRI International" } } @@ -29,7 +31,7 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - json_url = url + (u'?' if u'?' in url else '&') + u'output=json' + json_url = url + ('?' if '?' in url else '&') + 'output=json' json_data = self._download_webpage(json_url, video_id) data = json.loads(json_data) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9254fbfe0..7cf3785ac 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import re import json @@ -22,7 +24,7 @@ class ArteTvIE(InfoExtractor): _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?Pfr|de)/(?P.+?)/(?P.+)' _LIVE_URL = r'index-[0-9]+\.html$' - IE_NAME = u'arte.tv' + IE_NAME = 'arte.tv' @classmethod def suitable(cls, url): @@ -37,7 +39,7 @@ def suitable(cls, url): # r'src="(.*?/videothek_js.*?\.js)', # 0, # [ - # (1, 'url', u'Invalid URL: %s' % url) + # (1, 'url', 'Invalid URL: %s' % url) # ] # ) # http_host = url.split('/')[2] @@ -49,12 +51,12 @@ def suitable(cls, url): # '(rtmp://.*?)\'', # re.DOTALL, # [ - # (1, 'path', u'could not extract video path: %s' % url), - # (2, 'player', u'could not extract video player: %s' % url), - # (3, 'url', u'could not extract video url: %s' % url) + # (1, 'path', 'could not extract video path: %s' % url), + # (2, 'player', 'could not extract video player: %s' % url), + # (3, 'url', 'could not extract video url: %s' % url) # ] # ) - # video_url = u'%s/%s' % (info.get('url'), info.get('path')) + # video_url = '%s/%s' % (info.get('url'), info.get('path')) def _real_extract(self, url): mobj = re.match(self._VIDEOS_URL, url) @@ -107,9 +109,9 @@ def _key(m): def _extract_liveweb(self, url, name, lang): """Extract form http://liveweb.arte.tv/""" webpage = self._download_webpage(url, name) - video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') + video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id') config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, - video_id, u'Downloading information') + video_id, 'Downloading information') event_doc = config_doc.find('event') url_node = event_doc.find('video').find('urlHd') if url_node is None: @@ -124,7 +126,7 @@ def _extract_liveweb(self, url, name, lang): class ArteTVPlus7IE(InfoExtractor): - IE_NAME = u'arte.tv:+7' + IE_NAME = 'arte.tv:+7' _VALID_URL = r'https?://www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' @classmethod @@ -207,7 +209,7 @@ def _format(format_info): if bitrate is not None: quality += '-%d' % bitrate if format_info.get('versionCode') is not None: - format_id = u'%s-%s' % (quality, format_info['versionCode']) + format_id = '%s-%s' % (quality, format_info['versionCode']) else: format_id = quality info = { @@ -216,7 +218,7 @@ def _format(format_info): 'width': format_info.get('width'), 'height': height, } - if format_info['mediaType'] == u'rtmp': + if format_info['mediaType'] == 'rtmp': info['url'] = format_info['streamer'] info['play_path'] = 'mp4:' + format_info['url'] info['ext'] = 'flv' @@ -231,27 +233,27 @@ def _format(format_info): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:creative' + IE_NAME = 'arte.tv:creative' _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de)/magazine?/(?P.+)' _TEST = { - u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', - u'file': u'050489-002.mp4', - u'info_dict': { - u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', + 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + 'file': '050489-002.mp4', + 'info_dict': { + 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', }, } class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:future' + IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(thema|sujet)/.*?#article-anchor-(?P\d+)' _TEST = { - u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', - u'file': u'050940-003.mp4', - u'info_dict': { - u'title': u'Les champignons au secours de la planète', + 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', + 'file': '050940-003.mp4', + 'info_dict': { + 'title': 'Les champignons au secours de la planète', }, } @@ -263,7 +265,7 @@ def _real_extract(self, url): class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:ddc' + IE_NAME = 'arte.tv:ddc' _VALID_URL = r'http?://ddc\.arte\.tv/(?Pemission|folge)/(?P.+)' def _real_extract(self, url): diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index bcccc0b7a..c6f30e626 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -7,13 +9,14 @@ ExtractorError, ) + class AUEngineIE(InfoExtractor): _TEST = { - u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', - u'file': u'lfvlytY6.mp4', - u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', - u'info_dict': { - u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" + 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', + 'file': 'lfvlytY6.mp4', + 'md5': '48972bdbcf1a3a2f5533e62425b41d4f', + 'info_dict': { + 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' } } _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' @@ -23,7 +26,7 @@ def _real_extract(self, url): video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(?P<title>.+?)', - webpage, u'title') + webpage, 'title') title = title.strip() links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) links = map(compat_urllib_parse.unquote, links) @@ -37,7 +40,7 @@ def _real_extract(self, url): video_url = link if not video_url: raise ExtractorError(u'Could not find video URL') - ext = u'.' + determine_ext(video_url) + ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: title = title[:-len(ext)] diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index d48c0c38d..ccd31c4c7 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json import itertools @@ -9,26 +11,26 @@ class BambuserIE(InfoExtractor): - IE_NAME = u'bambuser' + IE_NAME = 'bambuser' _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' _API_KEY = '005f64509e19a868399060af746a00aa' _TEST = { - u'url': u'http://bambuser.com/v/4050584', + 'url': 'http://bambuser.com/v/4050584', # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 - #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', - u'info_dict': { - u'id': u'4050584', - u'ext': u'flv', - u'title': u'Education engineering days - lightning talks', - u'duration': 3741, - u'uploader': u'pixelversity', - u'uploader_id': u'344706', + #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', + 'info_dict': { + 'id': '4050584', + 'ext': 'flv', + 'title': 'Education engineering days - lightning talks', + 'duration': 3741, + 'uploader': 'pixelversity', + 'uploader_id': '344706', }, - u'params': { + 'params': { # It doesn't respect the 'Range' header, it would download the whole video # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 - u'skip_download': True, + 'skip_download': True, }, } @@ -53,7 +55,7 @@ def _real_extract(self, url): class BambuserChannelIE(InfoExtractor): - IE_NAME = u'bambuser:channel' + IE_NAME = 'bambuser:channel' _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' # The maximum number we can get with each request _STEP = 50 @@ -72,7 +74,7 @@ def _real_extract(self, url): # Without setting this header, we wouldn't get any result req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) info_json = self._download_webpage(req, user, - u'Downloading page %d' % i) + 'Downloading page %d' % i) results = json.loads(info_json)['result'] if len(results) == 0: break diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 15aee2786..87932ba1c 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -12,14 +14,14 @@ class BandcampIE(InfoExtractor): _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' _TESTS = [{ - u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', - u'file': u'1812978515.mp3', - u'md5': u'c557841d5e50261777a6585648adf439', - u'info_dict': { - u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", - u"duration": 10, + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + 'file': '1812978515.mp3', + 'md5': 'c557841d5e50261777a6585648adf439', + 'info_dict': { + "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", + "duration": 10, }, - u'skip': u'There is a limit of 200 free downloads / month for the test song' + '_skip': 'There is a limit of 200 free downloads / month for the test song' }] def _real_extract(self, url): @@ -58,7 +60,7 @@ def _real_extract(self, url): 'duration': duration, } else: - raise ExtractorError(u'No free songs found') + raise ExtractorError('No free songs found') download_link = m_download.group(1) video_id = re.search( @@ -72,9 +74,9 @@ def _real_extract(self, url): download_webpage, re.MULTILINE).group(1) info = json.loads(info)[0] # We pick mp3-320 for now, until format selection can be easily implemented. - mp3_info = info[u'downloads'][u'mp3-320'] + mp3_info = info['downloads']['mp3-320'] # If we try to use this url it says the link has expired - initial_url = mp3_info[u'url'] + initial_url = mp3_info['url'] re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' m_url = re.match(re_url, initial_url) #We build the url we will use to get the final track url @@ -87,41 +89,41 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': info[u'title'], + 'title': info['title'], 'ext': 'mp3', 'vcodec': 'none', 'url': final_url, - 'thumbnail': info[u'thumb_url'], - 'uploader': info[u'artist'], + 'thumbnail': info['thumb_url'], + 'uploader': info['artist'], } class BandcampAlbumIE(InfoExtractor): - IE_NAME = u'Bandcamp:album' + IE_NAME = 'Bandcamp:album' _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' _TEST = { - u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', - u'playlist': [ + 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', + 'playlist': [ { - u'file': u'1353101989.mp3', - u'md5': u'39bc1eded3476e927c724321ddf116cf', - u'info_dict': { - u'title': u'Intro', + 'file': '1353101989.mp3', + 'md5': '39bc1eded3476e927c724321ddf116cf', + 'info_dict': { + 'title': 'Intro', } }, { - u'file': u'38097443.mp3', - u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', - u'info_dict': { - u'title': u'Kero One - Keep It Alive (Blazo remix)', + 'file': '38097443.mp3', + 'md5': '1a2c32e2691474643e912cc6cd4bffaa', + 'info_dict': { + 'title': 'Kero One - Keep It Alive (Blazo remix)', } }, ], - u'params': { - u'playlistend': 2 + 'params': { + 'playlistend': 2 }, - u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' } def _real_extract(self, url): @@ -130,11 +132,11 @@ def _real_extract(self, url): webpage = self._download_webpage(url, title) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: - raise ExtractorError(u'The page doesn\'t contain any tracks') + raise ExtractorError('The page doesn\'t contain any tracks') entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') + title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') return { '_type': 'playlist', 'title': title, diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index ecac5e0e9..c9e7cc561 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,14 +14,14 @@ class CNNIE(InfoExtractor): (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' _TESTS = [{ - u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', - u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', - u'md5': u'3e6121ea48df7e2259fe73a0628605c4', - u'info_dict': { - u'title': u'Nadal wins 8th French Open title', - u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', - u'duration': 135, - u'upload_date': u'20130609', + 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', + 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4', + 'md5': '3e6121ea48df7e2259fe73a0628605c4', + 'info_dict': { + 'title': 'Nadal wins 8th French Open title', + 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + 'duration': 135, + 'upload_date': '20130609', }, }, { @@ -36,7 +38,7 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) path = mobj.group('path') page_title = mobj.group('title') - info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path + info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path info = self._download_xml(info_url, page_title) formats = []