[crunchyroll] Improve series and season metadata extraction (closes #11832)

This commit is contained in:
Sergey M․ 2017-01-27 23:55:55 +07:00
parent 3a194cb4ec
commit e0b6e50ccd
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
'info_dict': {
'id': '727589',
'ext': 'mp4',
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 Give Me Deliverance from this Judicial Injustice!",
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Kadokawa Pictures Inc.',
'upload_date': '20170118',
'series': "KONOSUBA -God's blessing on this wonderful world!",
'season_number': 2,
'episode': 'Give Me Deliverance from this Judicial Injustice!',
'episode_number': 1,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
'only_matching': True,
@ -439,6 +458,18 @@ def _real_extract(self, url):
subtitles = self.extract_subtitles(video_id, webpage)
# webpage provide more accurate data than series_title from XML
series = self._html_search_regex(
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
webpage, 'series', default=xpath_text(metadata, 'series_title'))
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
season_number = int_or_none(self._search_regex(
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None))
return {
'id': video_id,
'title': video_title,
@ -446,9 +477,10 @@ def _real_extract(self, url):
'thumbnail': xpath_text(metadata, 'episode_image_url'),
'uploader': video_uploader,
'upload_date': video_upload_date,
'series': xpath_text(metadata, 'series_title'),
'episode': xpath_text(metadata, 'episode_title'),
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'subtitles': subtitles,
'formats': formats,
}