From bcd6e4bd07fb924e3de37ed46f21c46debdebaa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 16:51:18 +0200 Subject: [PATCH 1/3] YoutubeIE: extract the correct video id for movie URLs (closes #597) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 109c8a93f..656f46f21 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -34,7 +34,7 @@ class YoutubeIE(InfoExtractor): (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms - (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= From d828f3a5500b29f30c702e6aa34add6e29370b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 17:19:33 +0200 Subject: [PATCH 2/3] YoutubeIE: use a negative index when accessing the last element of the format list --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 656f46f21..6782bbff6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -586,7 +586,7 @@ def _real_extract(self, url): if req_format is None or req_format == 'best': video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif req_format == 'worst': - video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality + video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality elif req_format in ('-1', 'all'): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: From 75dff0eef753f7bbef2947449441f4f6b2e13547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 17:59:28 +0200 Subject: [PATCH 3/3] [youtube]: add YoutubeShowIE (closes #14) It just extracts the playlists urls for each season --- test/test_youtube_lists.py | 8 +++++++- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/youtube.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 4486b7eb0..dd9e292b0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -8,7 +8,7 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE +from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from youtube_dl.utils import * from helper import FakeYDL @@ -88,5 +88,11 @@ def test_youtube_safe_search(self): result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] self.assertEqual(len(result['entries']), 2) + def test_youtube_show(self): + dl = FakeYDL() + ie = YoutubeShowIE(dl) + result = ie.extract('http://www.youtube.com/show/airdisasters') + self.assertTrue(len(result) >= 4) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ba0e86713..ac2e5f0e7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -67,7 +67,7 @@ from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE -from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE +from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6782bbff6..76b297ea5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -853,3 +853,17 @@ def _get_n_results(self, query, n): video_ids = video_ids[:n] videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] return self.playlist_result(videos, query) + + +class YoutubeShowIE(InfoExtractor): + _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' + IE_NAME = u'youtube:show' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_name = mobj.group(1) + webpage = self._download_webpage(url, show_name, u'Downloading show webpage') + # There's one playlist for each season of the show + m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) + self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) + return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]