[bbccouk] Improve extraction (Closes #5530)

2024-11-28 07:36:52 +01:00 · 2015-05-01 03:59:13 +06:00 · 2015-05-01 03:59:13 +06:00 · 8683b4d8d9
commit 8683b4d8d9
parent 1dbd717eb4
1 changed files with 26 additions and 9 deletions
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -3,7 +3,10 @@
 import xml.etree.ElementTree
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
    ExtractorError,
    int_or_none,
 )
 from ..compat import compat_HTTPError
@ -326,16 +329,29 @@ def _real_extract(self, url):
        webpage = self._download_webpage(url, group_id, 'Downloading video page')
-        programme_id = self._search_regex(
+        thumbnail = self._og_search_thumbnail(webpage)
-            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+
        programme_id = None
        tviplayer = self._search_regex(
            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
            webpage, 'player', default=None)
        if tviplayer:
            player = self._parse_json(tviplayer, group_id).get('player', {})
            duration = int_or_none(player.get('duration'))
            programme_id = player.get('vpid')
        if not programme_id:
            programme_id = self._search_regex(
                r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
        if programme_id:
            player = self._download_json(
                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
                group_id)['jsConf']['player']
            title = player['title']
            description = player['subtitle']
            duration = player['duration']
            formats, subtitles = self._download_media_selector(programme_id)
            title = self._og_search_title(webpage)
            description = self._search_regex(
                r'<p class="medium-description">([^<]+)</p>',
                webpage, 'description', fatal=False)
        else:
            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
@ -345,6 +361,7 @@ def _real_extract(self, url):
            'id': programme_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,