[bbc] Extract _ID_REGEX and ad one more video id pattern (Closes #7724)

This commit is contained in:
Sergey M․ 2015-12-02 02:34:31 +06:00
parent 24121bc703
commit 22d7368dfb

View File

@ -22,7 +22,8 @@
class BBCCoUkIE(InfoExtractor): class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>[\da-z]{8})' _ID_REGEX = r'[pb][\da-z]{7}'
_VALID_URL = r'https?://(?:(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])|)(?P<id>%s)' % _ID_REGEX
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
@ -465,7 +466,7 @@ def _real_extract(self, url):
if not programme_id: if not programme_id:
programme_id = self._search_regex( programme_id = self._search_regex(
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
if programme_id: if programme_id:
formats, subtitles = self._download_media_selector(programme_id) formats, subtitles = self._download_media_selector(programme_id)
@ -780,8 +781,9 @@ def _real_extract(self, url):
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex( programme_id = self._search_regex(
[r'data-video-player-vpid="([\da-z]{8})"', [r'data-video-player-vpid="(%s)"' % self._ID_REGEX,
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'], r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
webpage, 'vpid', default=None) webpage, 'vpid', default=None)
if programme_id: if programme_id:
@ -816,7 +818,7 @@ def extract_all(pattern):
# Multiple video article (e.g. # Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?' EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
entries = [] entries = []
for match in extract_all(r'new\s+SMP\(({.+?})\)'): for match in extract_all(r'new\s+SMP\(({.+?})\)'):
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')