mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 15:16:52 +01:00
[youtube] Move more tests to extractors
This commit is contained in:
parent
69ea8ca42c
commit
cdc628a498
@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
@ -43,28 +42,6 @@ def test_youtube_course(self):
|
|||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeChannelIE(dl)
|
|
||||||
#test paginated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
|
||||||
#test autogenerated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
|
||||||
|
|
||||||
def test_youtube_user(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeUserIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
|
||||||
|
|
||||||
def test_youtube_show(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeShowIE(dl)
|
|
||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
|
||||||
self.assertTrue(len(result) >= 3)
|
|
||||||
|
|
||||||
def test_youtube_mix(self):
|
def test_youtube_mix(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
@ -83,21 +60,5 @@ def test_youtube_toptracks(self):
|
|||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(len(entries), 100)
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeTopListIE(dl)
|
|
||||||
result = ie.extract('yttoplist:music:Trending')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
def test_youtube_search_url(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeSearchURLIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertIsPlaylist(result)
|
|
||||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1160,16 +1160,25 @@ class YoutubeTopListIE(YoutubePlaylistIE):
|
|||||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||||
' (Example: "yttoplist:music:Top Tracks")')
|
' (Example: "yttoplist:music:Top Tracks")')
|
||||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||||
_TESTS = []
|
_TESTS = [{
|
||||||
|
'url': 'yttoplist:music:Trending',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'skip': 'Only works for logged-in users',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
channel = mobj.group('chann')
|
channel = mobj.group('chann')
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
query = compat_urllib_parse.urlencode({'title': title})
|
query = compat_urllib_parse.urlencode({'title': title})
|
||||||
playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
|
channel_page = self._download_webpage(
|
||||||
channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
|
'https://www.youtube.com/%s' % channel, title)
|
||||||
link = self._html_search_regex(playlist_re, channel_page, 'list')
|
link = self._html_search_regex(
|
||||||
|
r'''(?x)
|
||||||
|
<a\s+href="([^"]+)".*?>\s*
|
||||||
|
<span\s+class="branded-page-module-title-text">\s*
|
||||||
|
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
|
||||||
|
channel_page, 'list')
|
||||||
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
||||||
|
|
||||||
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
||||||
@ -1195,6 +1204,11 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||||
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||||
IE_NAME = 'youtube:channel'
|
IE_NAME = 'youtube:channel'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'paginated channel',
|
||||||
|
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
|
'playlist_mincount': 91,
|
||||||
|
}]
|
||||||
|
|
||||||
def extract_videos_from_page(self, page):
|
def extract_videos_from_page(self, page):
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@ -1253,6 +1267,17 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
|
||||||
|
'playlist_mincount': 320,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'TheLinuxFoundation',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'ytuser:phihag',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
# Don't return True if the url can be extracted with other youtube
|
# Don't return True if the url can be extracted with other youtube
|
||||||
@ -1361,6 +1386,13 @@ class YoutubeSearchURLIE(InfoExtractor):
|
|||||||
IE_DESC = 'YouTube.com search URLs'
|
IE_DESC = 'YouTube.com search URLs'
|
||||||
IE_NAME = 'youtube:search_url'
|
IE_NAME = 'youtube:search_url'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -1395,17 +1427,38 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
class YoutubeShowIE(InfoExtractor):
|
||||||
IE_DESC = 'YouTube.com (multi-season) shows'
|
IE_DESC = 'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||||
IE_NAME = 'youtube:show'
|
IE_NAME = 'youtube:show'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.youtube.com/show/airdisasters',
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'airdisasters',
|
||||||
|
'title': 'Air Disasters',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
show_name = mobj.group(1)
|
playlist_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
|
webpage = self._download_webpage(
|
||||||
|
url, playlist_id, 'Downloading show webpage')
|
||||||
# There's one playlist for each season of the show
|
# There's one playlist for each season of the show
|
||||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||||
self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons)))
|
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
|
||||||
|
for season in m_seasons
|
||||||
|
]
|
||||||
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
|
Loading…
Reference in New Issue
Block a user