[xtube] user playlist extractor

This commit is contained in:
fiocfun 2014-03-23 00:16:35 +06:00
parent 0320ddc192
commit 9f5809b3e8
3 changed files with 47 additions and 2 deletions

View File

@ -38,6 +38,7 @@
GenericIE, GenericIE,
TEDIE, TEDIE,
ToypicsUserIE, ToypicsUserIE,
XTubeUserIE,
) )
@ -278,5 +279,13 @@ def test_toypics_user(self):
self.assertEqual(result['id'], 'Mikey') self.assertEqual(result['id'], 'Mikey')
self.assertTrue(len(result['entries']) >= 17) self.assertTrue(len(result['entries']) >= 17)
def test_xtube_user(self):
dl = FakeYDL()
ie = XTubeUserIE(dl)
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -288,7 +288,7 @@
from .xhamster import XHamsterIE from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeIE from .xtube import XTubeUserIE, XTubeIE
from .yahoo import ( from .yahoo import (
YahooIE, YahooIE,
YahooNewsIE, YahooNewsIE,

View File

@ -76,3 +76,39 @@ def _real_extract(self, url):
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
} }
class XTubeUserIE(InfoExtractor):
IE_DESC = 'XTube user profile'
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
username = mobj.group('username')
profile_page = self._download_webpage(
url, username, note='Retrieving profile page')
video_count = int(self._search_regex(
r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
'video count'))
PAGE_SIZE = 25
urls = []
page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
for n in range(1, page_count + 1):
lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
lpage = self._download_webpage(
lpage_url, username,
note='Downloading page %d/%d' % (n, page_count))
urls.extend(
re.findall(r'addthis:url="([^"]+)"', lpage))
return {
'_type': 'playlist',
'id': username,
'entries': [{
'_type': 'url',
'url': eurl,
'ie_key': 'XTube',
} for eurl in urls]
}