1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2025-01-26 12:27:33 +01:00

Merge pull request #2041 from dstftw/imdb-list

[imdb] Add support for IMDb list (#2033)
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-01-01 12:42:59 +01:00
commit 4fb757d1e0
3 changed files with 44 additions and 2 deletions

View File

@ -28,7 +28,8 @@ from youtube_dl.extractor import (
BandcampAlbumIE, BandcampAlbumIE,
SmotriCommunityIE, SmotriCommunityIE,
SmotriUserIE, SmotriUserIE,
IviCompilationIE IviCompilationIE,
ImdbListIE,
) )
@ -188,6 +189,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20) self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'sMjedvGDd8U')
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -80,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE

View File

@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr, 'description': descr,
'thumbnail': format_info['slate'], 'thumbnail': format_info['slate'],
} }
class ImdbListIE(InfoExtractor):
IE_NAME = u'imdb:list'
IE_DESC = u'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
list_id = mobj.group('id')
# RSS XML is sometimes malformed
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
# However, passing dummy author_id seems to be enough.
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
list_id, u'Downloading list CSV')
entries = []
for item in csv.split('\n')[1:]:
cols = item.split(',')
if len(cols) < 2:
continue
item_id = cols[1][1:-1]
if item_id.startswith('vi'):
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
return self.playlist_result(entries, list_id, list_title)