From eb0f3e7ec080549c1df6a104fc59400efd9a992a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 21 Apr 2015 22:36:41 +0600 Subject: [PATCH] [youtube:user] Extract in terms of `load_more_widget_html` --- youtube_dl/extractor/youtube.py | 49 +++------------------------------ 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4ec39c589..0b4038038 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1358,6 +1358,7 @@ def _real_extract(self, url): class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P[0-9A-Za-z_-]+)' + _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1386,7 +1387,7 @@ def extract_videos_from_page(self, page): def _real_extract(self, url): channel_id = self._match_id(url) - url = 'https://www.youtube.com/channel/%s/videos' % channel_id + url = self._TEMPLATE_URL % channel_id channel_page = self._download_webpage(url, channel_id) autogenerated = re.search(r'''(?x) class="[^"]*?(?: @@ -1429,12 +1430,10 @@ def _entries(): return self.playlist_result(_entries(), channel_id) -class YoutubeUserIE(InfoExtractor): +class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' - _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' - _GDATA_PAGE_SIZE = 50 - _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' + _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' _TESTS = [{ @@ -1458,46 +1457,6 @@ def suitable(cls, url): else: return super(YoutubeUserIE, cls).suitable(url) - def _real_extract(self, url): - username = self._match_id(url) - - # Download video ids using YouTube Data API. Result size per - # query is limited (currently to 50 videos) so we need to query - # page by page until there are no video ids - it means we got - # all of them. - - def download_page(pagenum): - start_index = pagenum * self._GDATA_PAGE_SIZE + 1 - - gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) - page = self._download_webpage( - gdata_url, username, - 'Downloading video ids from %d to %d' % ( - start_index, start_index + self._GDATA_PAGE_SIZE)) - - try: - response = json.loads(page) - except ValueError as err: - raise ExtractorError('Invalid JSON in API response: ' + compat_str(err)) - if 'entry' not in response['feed']: - return - - # Extract video identifiers - entries = response['feed']['entry'] - for entry in entries: - title = entry['title']['$t'] - video_id = entry['id']['$t'].split('/')[-1] - yield { - '_type': 'url', - 'url': video_id, - 'ie_key': 'Youtube', - 'id': video_id, - 'title': title, - } - url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) - - return self.playlist_result(url_results, playlist_title=username) - class YoutubeSearchIE(SearchInfoExtractor): IE_DESC = 'YouTube.com searches'