From 1d45a23b745cdbb361dd5cef8f848f7ebcfa8f5a Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 26 Oct 2013 23:27:30 +0200 Subject: [PATCH 1/3] Add support for http://www.tube8.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tube8.py | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/tube8.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db69af3619..84fc2e4fac 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -121,6 +121,7 @@ from .tf1 import TF1IE from .thisav import ThisAVIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE from .tutv import TutvIE diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py new file mode 100644 index 0000000000..b7e7d984d0 --- /dev/null +++ b/youtube_dl/extractor/tube8.py @@ -0,0 +1,63 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + unescapeHTML, +) +from ..aes import ( + aes_decrypt_text +) + +class Tube8IE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ptube8.com/[^/]+/[^/]+/(?P[0-9]+)/?)' + _TEST = { + u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', + u'file': u'229795.mp4', + u'md5': u'e9e0b0c86734e5e3766e653509475db0', + u'info_dict': { + u"description": u"hot teen Kasia grinding", + u"uploader": u"unknown", + u"title": u"Kasia music video", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') + video_description = self._html_search_regex(r'>Description:(.+?)<', webpage, u'description', fatal=False) + video_uploader = self._html_search_regex(r'>Submitted by:(?:\w|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) + if thumbnail: + thumbnail = thumbnail.replace('\\/', '/') + + video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url') + if webpage.find('"encrypted":true')!=-1: + password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') + video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join( format ) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + } From 8cb57d9b91cce72b522d89b5e3e469c433956a07 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 27 Oct 2013 00:21:27 +0200 Subject: [PATCH 2/3] [Tube8IE] Escape dot in regex --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index b7e7d984d0..ef8d216421 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -13,7 +13,7 @@ from ..aes import ( ) class Tube8IE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ptube8.com/[^/]+/[^/]+/(?P[0-9]+)/?)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ptube8\.com/[^/]+/[^/]+/(?P[0-9]+)/?)' _TEST = { u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', u'file': u'229795.mp4', From 71865091abbb0166edeffff14da019542260557f Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 27 Oct 2013 01:08:03 +0200 Subject: [PATCH 3/3] [Tube8IE] Fix regex for uploader extraction --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index ef8d216421..ebc8c1f4f1 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -36,7 +36,7 @@ class Tube8IE(InfoExtractor): video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') video_description = self._html_search_regex(r'>Description:(.+?)<', webpage, u'description', fatal=False) - video_uploader = self._html_search_regex(r'>Submitted by:(?:\w|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + video_uploader = self._html_search_regex(r'>Submitted by:(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) if thumbnail: thumbnail = thumbnail.replace('\\/', '/')