From 37c1e4025c6df834e93a64c1c13eebac23e90942 Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 6 Jul 2015 15:26:49 +0800 Subject: [PATCH 1/4] [yinyuetai] New extractor for yinyuetai.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/yinyuetai.py | 47 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/yinyuetai.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aba62db53..8665855eb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -733,6 +733,7 @@ YandexMusicPlaylistIE, ) from .yesjapan import YesJapanIE +from .yinyuetai import YinYueTaiIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import YoukuIE diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py new file mode 100644 index 000000000..661c34602 --- /dev/null +++ b/youtube_dl/extractor/yinyuetai.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class YinYueTaiIE(InfoExtractor): + IE_NAME = 'yinyuetai:video' + _VALID_URL = r'https?://v\.yinyuetai\.com/video(/h5)?/(?P[0-9]+)' + _TEST = { + 'url': 'http://v.yinyuetai.com/video/2322376', + 'md5': '6e3abe28d38e3a54b591f9f040595ce0', + 'info_dict': { + 'id': '2322376', + 'ext': 'mp4', + 'title': '少女时代_PARTY_Music Video Teaser', + 'creator': '少女时代', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + info = self._download_json( + 'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id, + 'Downloading mv info')['videoInfo']['coreVideoInfo'] + + if info['error']: + raise ExtractorError(info['errorMsg'], expected=True) + + formats = [ + {'url': format_info['videoUrl'], 'format_id': format_info['qualityLevel'], + 'format': format_info['qualityLevelName'], 'filesize': format_info['fileSize'], + 'ext': 'mp4', 'preference': format_info['bitrate']} + for format_info in info['videoUrlModels'] + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': info['videoName'], + 'thumbnail': info['bigHeadImage'], + 'creator': info['artistNames'], + 'duration': info['duration'], + 'formats': formats, + } From e2082ea9422aadf7ae2580d9333008279cda51f0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 9 Jul 2015 00:50:32 +0800 Subject: [PATCH 2/4] [yinyuetai] Add test for h5/ part in _VALID_URL --- youtube_dl/extractor/yinyuetai.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py index 661c34602..41ee89da4 100644 --- a/youtube_dl/extractor/yinyuetai.py +++ b/youtube_dl/extractor/yinyuetai.py @@ -8,7 +8,7 @@ class YinYueTaiIE(InfoExtractor): IE_NAME = 'yinyuetai:video' _VALID_URL = r'https?://v\.yinyuetai\.com/video(/h5)?/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://v.yinyuetai.com/video/2322376', 'md5': '6e3abe28d38e3a54b591f9f040595ce0', 'info_dict': { @@ -17,7 +17,10 @@ class YinYueTaiIE(InfoExtractor): 'title': '少女时代_PARTY_Music Video Teaser', 'creator': '少女时代', }, - } + }, { + 'url': 'http://v.yinyuetai.com/video/h5/2322376', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From af0f9b0e95233862e758140b282497d04edfb885 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 9 Jul 2015 00:54:37 +0800 Subject: [PATCH 3/4] [yinyuetai] Style --- youtube_dl/extractor/yinyuetai.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py index 41ee89da4..a4ada4872 100644 --- a/youtube_dl/extractor/yinyuetai.py +++ b/youtube_dl/extractor/yinyuetai.py @@ -32,12 +32,14 @@ def _real_extract(self, url): if info['error']: raise ExtractorError(info['errorMsg'], expected=True) - formats = [ - {'url': format_info['videoUrl'], 'format_id': format_info['qualityLevel'], - 'format': format_info['qualityLevelName'], 'filesize': format_info['fileSize'], - 'ext': 'mp4', 'preference': format_info['bitrate']} - for format_info in info['videoUrlModels'] - ] + formats = [{ + 'url': format_info['videoUrl'], + 'format_id': format_info['qualityLevel'], + 'format': format_info['qualityLevelName'], + 'filesize': format_info['fileSize'], + 'ext': 'mp4', + 'preference': format_info['bitrate'], + } for format_info in info['videoUrlModels']] self._sort_formats(formats) return { From d76dea001b5365b7646986cba12f9908cd321f6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 9 Jul 2015 01:07:45 +0800 Subject: [PATCH 4/4] [yinyuetai] Miscellaneous improvements 1. Include all fields in _TEST 2. Use .get() for optional fields 3. Clarify the intention of 'ext' in formats --- youtube_dl/extractor/yinyuetai.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py index a4ada4872..003df9233 100644 --- a/youtube_dl/extractor/yinyuetai.py +++ b/youtube_dl/extractor/yinyuetai.py @@ -16,6 +16,8 @@ class YinYueTaiIE(InfoExtractor): 'ext': 'mp4', 'title': '少女时代_PARTY_Music Video Teaser', 'creator': '少女时代', + 'duration': 25, + 'thumbnail': 're:^https?://.*\.jpg$', }, }, { 'url': 'http://v.yinyuetai.com/video/h5/2322376', @@ -35,18 +37,19 @@ def _real_extract(self, url): formats = [{ 'url': format_info['videoUrl'], 'format_id': format_info['qualityLevel'], - 'format': format_info['qualityLevelName'], - 'filesize': format_info['fileSize'], + 'format': format_info.get('qualityLevelName'), + 'filesize': format_info.get('fileSize'), + # though URLs ends with .flv, the downloaded files are in fact mp4 'ext': 'mp4', - 'preference': format_info['bitrate'], + 'tbr': format_info.get('bitrate'), } for format_info in info['videoUrlModels']] self._sort_formats(formats) return { 'id': video_id, 'title': info['videoName'], - 'thumbnail': info['bigHeadImage'], - 'creator': info['artistNames'], - 'duration': info['duration'], + 'thumbnail': info.get('bigHeadImage'), + 'creator': info.get('artistNames'), + 'duration': info.get('duration'), 'formats': formats, }