[pokemon] Relax _VALID_URL and extend title extraction (closes #15518)

This commit is contained in:
Sergey M․ 2018-02-08 03:58:35 +07:00
parent 9f4ec3de25
commit 237d07f114
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -11,19 +11,34 @@
class PokemonIE(InfoExtractor): class PokemonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))' _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
'md5': '9fb209ae3a569aac25de0f5afc4ee08f', 'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': { 'info_dict': {
'id': 'd0436c00c3ce4071ac6cee8130ac54a1', 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4', 'ext': 'mp4',
'title': 'From A to Z!', 'title': 'The Ol Raise and Switch!',
'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!', 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
'timestamp': 1460478136, 'timestamp': 1511824728,
'upload_date': '20160412', 'upload_date': '20171127',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
'info_dict': {
'id': '99f3bae270bf4e5097274817239ce9c8',
'ext': 'mp4',
'title': 'Pokémon: The Rise of Darkrai',
'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
'timestamp': 1417778347,
'upload_date': '20141205',
},
'add_id': ['LimelightMedia'],
'params': {
'skip_download': True,
}, },
'add_id': ['LimelightMedia']
}, { }, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True, 'only_matching': True,
@ -42,7 +57,9 @@ def _real_extract(self, url):
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element')) webpage, 'video data element'))
video_id = video_data['data-video-id'] video_id = video_data['data-video-id']
title = video_data['data-video-title'] title = video_data.get('data-video-title') or self._html_search_meta(
'pkm-title', webpage,' title', default=None) or self._search_regex(
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,