From 40e146aa1e1a8fd57d3f84b0a541174f56fa5dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Feb 2016 22:29:17 +0600 Subject: [PATCH] [pornhub:user:videos] Add extractor (Closes #8548) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornhub.py | 55 ++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4049141d9..1edbfbd28 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -555,6 +555,7 @@ from .pornhub import ( PornHubIE, PornHubPlaylistIE, + PornHubUserVideosIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 91e574dc2..405dbf006 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -129,7 +129,31 @@ def _real_extract(self, url): } -class PornHubPlaylistIE(InfoExtractor): +class PornHubPlaylistBaseIE(InfoExtractor): + def _extract_entries(self, webpage): + return [ + self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') + for video_url in set(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) + ] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = self._extract_entries(webpage) + + playlist = self._parse_json( + self._search_regex( + r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), + playlist_id) + + return self.playlist_result( + entries, playlist_id, playlist.get('title'), playlist.get('description')) + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)' _TESTS = [{ 'url': 'http://www.pornhub.com/playlist/6201671', @@ -140,21 +164,20 @@ class PornHubPlaylistIE(InfoExtractor): 'playlist_mincount': 35, }] + +class PornHubUserVideosIE(PornHubPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P[^/]+)/videos' + _TESTS = [{ + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'info_dict': { + 'id': 'rushandlia', + }, + 'playlist_mincount': 13, + }] + def _real_extract(self, url): - playlist_id = self._match_id(url) + user_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) + webpage = self._download_webpage(url, user_id) - entries = [ - self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') - for video_url in set(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) - ] - - playlist = self._parse_json( - self._search_regex( - r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), - playlist_id) - - return self.playlist_result( - entries, playlist_id, playlist.get('title'), playlist.get('description')) + return self.playlist_result(self._extract_entries(webpage), user_id)