From 5f432ac8f552b06e715c5e17165328dc76d9c1b5 Mon Sep 17 00:00:00 2001 From: Founder Fang Date: Sun, 20 Dec 2015 19:09:45 +0800 Subject: [PATCH 01/11] [Weiqitv] Add new extractor --- youtube_dl/extractor/__init__.py | 4 ++- youtube_dl/extractor/letv.py | 57 ++++++++++++++++++++++++++++++++ youtube_dl/extractor/weiqitv.py | 54 ++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/weiqitv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e46d73ed7..9dcd252f8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -331,7 +331,8 @@ from .letv import ( LetvIE, LetvTvIE, - LetvPlaylistIE + LetvPlaylistIE, + LetvCloudIE, ) from .libsyn import LibsynIE from .lifenews import ( @@ -834,6 +835,7 @@ WebOfStoriesPlaylistIE, ) from .weibo import WeiboIE +from .weiqitv import WeiqitvIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index be648000e..c096cb1ab 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -4,6 +4,7 @@ import datetime import re import time +import base64 from .common import InfoExtractor from ..compat import ( @@ -16,6 +17,7 @@ parse_iso8601, sanitized_Request, int_or_none, + str_or_none, encode_data_uri, ) @@ -239,3 +241,58 @@ class LetvPlaylistIE(LetvTvIE): }, 'playlist_mincount': 7 }] + + +class LetvCloudIE(InfoExtractor): + IE_DESC = '乐视云' + _VALID_URL = r'http://yuntv\.letv\.com/bcloud.html\?.*$' + + _TESTS = [{ + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf', + 'md5': '26450599afd64c513bc77030ad15db44', + 'info_dict': { + 'id': 'p7jnfw5hw9_467623dedf', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_467623dedf', + }, + }, { + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', + 'info_dict': { + 'id': 'p7jnfw5hw9_ec93197892', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_ec93197892', + }, + }, { + 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', + 'info_dict': { + 'id': 'p7jnfw5hw9_187060b6fd', + 'ext': 'mp4', + 'title': 'p7jnfw5hw9_187060b6fd', + }, + }] + + def _real_extract(self, url): + uu = re.search('uu=([\w]+)', url).group(1) + vu = re.search('vu=([\w]+)', url).group(1) + media_id = uu + '_' + vu + + play_json_req = sanitized_Request( + 'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' + + "uu=" + uu + "&vu=" + vu) + play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') + + formats = [{ + 'url': base64.b64decode(media['play_url']['main_url'].encode('utf-8')).decode("utf-8"), + 'ext': 'mp4', + 'format_id': int_or_none(media.get('play_url', {}).get('vtype')), + 'format_note': str_or_none(media.get('play_url', {}).get('definition')), + 'width': int_or_none(media.get('play_url', {}).get('vwidth')), + 'height': int_or_none(media.get('play_url', {}).get('vheight')), + } for media in play_json['data']['video_info']['media'].values()] + self._sort_formats(formats) + + return { + 'id': media_id, + 'title': media_id, + 'formats': formats, + } diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py new file mode 100644 index 000000000..da3b3d145 --- /dev/null +++ b/youtube_dl/extractor/weiqitv.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class WeiqitvIE(InfoExtractor): + IE_DESC = 'WQTV' + _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P[A-Za-z0-9]+)' + + _TESTS = [{ + 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', + 'md5': '26450599afd64c513bc77030ad15db44', + 'info_dict': { + 'id': '53c744f09874f0e76a8b46f3', + 'ext': 'mp4', + 'title': '2013年度盘点', + }, + }, { + 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569', + 'info_dict': { + 'id': '567379a2d4c36cca518b4569', + 'ext': 'mp4', + 'title': '民国围棋史', + }, + }, { + 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567', + 'info_dict': { + 'id': '5430220a9874f088658b4567', + 'ext': 'mp4', + 'title': '二路托过的手段和运用', + }, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + page = self._download_webpage(url, media_id) + + info_json_str = self._search_regex( + 'var\s+video\s*=\s*(.+});', + page, 'info_json_str') + info_json = self._parse_json(info_json_str, media_id) + + letvcloud_url = self._search_regex( + 'var\s+letvurl\s*=\s*"([^"]+)', + page, 'letvcloud_url') + + return { + '_type': 'url_transparent', + "ie_key": 'LetvCloud', + 'url': letvcloud_url, + 'title': info_json['name'], + 'id': media_id, + } From 10defdd06a1ea878087f5bad6ee99da3b2f9d3f6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:17:35 +0800 Subject: [PATCH 02/11] [letv] Reduce duplicated codes --- youtube_dl/extractor/letv.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index c096cb1ab..d43b4e808 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -281,14 +281,17 @@ def _real_extract(self, url): "uu=" + uu + "&vu=" + vu) play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') - formats = [{ - 'url': base64.b64decode(media['play_url']['main_url'].encode('utf-8')).decode("utf-8"), - 'ext': 'mp4', - 'format_id': int_or_none(media.get('play_url', {}).get('vtype')), - 'format_note': str_or_none(media.get('play_url', {}).get('definition')), - 'width': int_or_none(media.get('play_url', {}).get('vwidth')), - 'height': int_or_none(media.get('play_url', {}).get('vheight')), - } for media in play_json['data']['video_info']['media'].values()] + formats = [] + for media in play_json['data']['video_info']['media'].values(): + play_url = media['play_url'] + formats.append({ + 'url': base64.b64decode(play_url['main_url'].encode('utf-8')).decode("utf-8"), + 'ext': 'mp4', + 'format_id': int_or_none(play_url.get('vtype')), + 'format_note': str_or_none(play_url.get('definition')), + 'width': int_or_none(play_url.get('vwidth')), + 'height': int_or_none(play_url.get('vheight')), + }) self._sort_formats(formats) return { From 8fff4f61e5741e56890ccb108f42b0c7dc6607e8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:18:54 +0800 Subject: [PATCH 03/11] [letv] Use single quotes --- youtube_dl/extractor/letv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index d43b4e808..be6c75967 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -278,14 +278,14 @@ def _real_extract(self, url): play_json_req = sanitized_Request( 'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' + - "uu=" + uu + "&vu=" + vu) + 'uu=' + uu + '&vu=' + vu) play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') formats = [] for media in play_json['data']['video_info']['media'].values(): play_url = media['play_url'] formats.append({ - 'url': base64.b64decode(play_url['main_url'].encode('utf-8')).decode("utf-8"), + 'url': base64.b64decode(play_url['main_url'].encode('utf-8')).decode('utf-8'), 'ext': 'mp4', 'format_id': int_or_none(play_url.get('vtype')), 'format_note': str_or_none(play_url.get('definition')), From e0690782b8531f9962950693fe33a5d4a4f494f6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:25:12 +0800 Subject: [PATCH 04/11] [letv] LetvCloud: guard against invalid URLs --- youtube_dl/extractor/letv.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index be6c75967..b8d4f5bb8 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -272,8 +272,14 @@ class LetvCloudIE(InfoExtractor): }] def _real_extract(self, url): - uu = re.search('uu=([\w]+)', url).group(1) - vu = re.search('vu=([\w]+)', url).group(1) + uu_mobj = re.search('uu=([\w]+)', url) + vu_mobj = re.search('vu=([\w]+)', url) + + if not uu_mobj or not vu_mobj: + raise ExtractorError('Invalid URL: %s' % url, expected=True) + + uu = uu_mobj.group(1) + vu = vu_mobj.group(1) media_id = uu + '_' + vu play_json_req = sanitized_Request( From 26de1bba83b3269653b571682cf962899b6231b7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:31:34 +0800 Subject: [PATCH 05/11] [letv] LetvCloud: check error messages from server --- youtube_dl/extractor/letv.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index b8d4f5bb8..177d53bbb 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -287,6 +287,14 @@ def _real_extract(self, url): 'uu=' + uu + '&vu=' + vu) play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data') + if not play_json.get('data'): + if play_json.get('message'): + raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True) + elif play_json.get('code'): + raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True) + else: + raise ExtractorError('Letv cloud returned an unknwon error') + formats = [] for media in play_json['data']['video_info']['media'].values(): play_url = media['play_url'] From 73e74424561b3e277c2f071fd88b17c845a1162f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:39:45 +0800 Subject: [PATCH 06/11] [letv] LetvCloud: simplify and improve _VALID_URL --- youtube_dl/extractor/letv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 177d53bbb..ff74b5141 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -245,7 +245,7 @@ class LetvPlaylistIE(LetvTvIE): class LetvCloudIE(InfoExtractor): IE_DESC = '乐视云' - _VALID_URL = r'http://yuntv\.letv\.com/bcloud.html\?.*$' + _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+' _TESTS = [{ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf', From 0428106da33419302ac8ad31306f9357549f086e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 03:53:17 +0800 Subject: [PATCH 07/11] [letv] LetvCloud: make title looks like a title --- youtube_dl/extractor/letv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index ff74b5141..b15ac163e 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -253,21 +253,21 @@ class LetvCloudIE(InfoExtractor): 'info_dict': { 'id': 'p7jnfw5hw9_467623dedf', 'ext': 'mp4', - 'title': 'p7jnfw5hw9_467623dedf', + 'title': 'Video p7jnfw5hw9_467623dedf', }, }, { 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', 'info_dict': { 'id': 'p7jnfw5hw9_ec93197892', 'ext': 'mp4', - 'title': 'p7jnfw5hw9_ec93197892', + 'title': 'Video p7jnfw5hw9_ec93197892', }, }, { 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', 'info_dict': { 'id': 'p7jnfw5hw9_187060b6fd', 'ext': 'mp4', - 'title': 'p7jnfw5hw9_187060b6fd', + 'title': 'Video p7jnfw5hw9_187060b6fd', }, }] @@ -310,6 +310,6 @@ def _real_extract(self, url): return { 'id': media_id, - 'title': media_id, + 'title': 'Video %s' % media_id, 'formats': formats, } From bec30224ff86d11c3f61808dbe683f97940ba23e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 04:00:37 +0800 Subject: [PATCH 08/11] [letv] LetvCloud: Detect ext instead of the hardcoded one --- youtube_dl/extractor/letv.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index b15ac163e..08bdae8a2 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -19,6 +19,7 @@ int_or_none, str_or_none, encode_data_uri, + url_basename, ) @@ -295,12 +296,17 @@ def _real_extract(self, url): else: raise ExtractorError('Letv cloud returned an unknwon error') + def b64decode(s): + return base64.b64decode(s.encode('utf-8')).decode('utf-8') + formats = [] for media in play_json['data']['video_info']['media'].values(): play_url = media['play_url'] + url = b64decode(play_url['main_url']) + decoded_url = b64decode(url_basename(url)) formats.append({ - 'url': base64.b64decode(play_url['main_url'].encode('utf-8')).decode('utf-8'), - 'ext': 'mp4', + 'url': url, + 'ext': determine_ext(decoded_url), 'format_id': int_or_none(play_url.get('vtype')), 'format_note': str_or_none(play_url.get('definition')), 'width': int_or_none(play_url.get('vwidth')), From 65ced034b851f890f894ce79d8069de852208dfc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 04:02:30 +0800 Subject: [PATCH 09/11] [weiqitv] Make codes shorter --- youtube_dl/extractor/weiqitv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index da3b3d145..b72df1e8d 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -37,13 +37,11 @@ def _real_extract(self, url): page = self._download_webpage(url, media_id) info_json_str = self._search_regex( - 'var\s+video\s*=\s*(.+});', - page, 'info_json_str') + 'var\s+video\s*=\s*(.+});', page, 'info json str') info_json = self._parse_json(info_json_str, media_id) letvcloud_url = self._search_regex( - 'var\s+letvurl\s*=\s*"([^"]+)', - page, 'letvcloud_url') + 'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url') return { '_type': 'url_transparent', From f15a9ca301799c44300d16ac7e61e5b36d67fd31 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 04:03:57 +0800 Subject: [PATCH 10/11] [weiqitv] Rename the extractor - capitilize 'TV' --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aeb67354e..5ec017d5d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -857,7 +857,7 @@ WebOfStoriesPlaylistIE, ) from .weibo import WeiboIE -from .weiqitv import WeiqitvIE +from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index b72df1e8d..24389d4bb 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -4,7 +4,7 @@ from .common import InfoExtractor -class WeiqitvIE(InfoExtractor): +class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P[A-Za-z0-9]+)' From 8652bd22f1ee955940793874b5c4060ef8e273b3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 Jan 2016 04:04:39 +0800 Subject: [PATCH 11/11] [weiqitv] Use single quotes --- youtube_dl/extractor/weiqitv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 24389d4bb..e333ae345 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -45,7 +45,7 @@ def _real_extract(self, url): return { '_type': 'url_transparent', - "ie_key": 'LetvCloud', + 'ie_key': 'LetvCloud', 'url': letvcloud_url, 'title': info_json['name'], 'id': media_id,