[vice] update tests and add support for ooyala embeds in article pages

2024-11-30 16:52:57 +01:00 · 2017-05-05 16:12:40 +01:00 · 2017-05-05 16:12:40 +01:00 · 1d9e0a4f40
commit 1d9e0a4f40
parent 7ad53cb7ff
1 changed files with 48 additions and 35 deletions
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE):
            resource = self._get_mvpd_resource(
                'VICELAND', title, video_id,
                watch_hub_data.get('video-rating'))
-            query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
+            query['tvetoken'] = self._extract_mvpd_auth(
                url, video_id, 'VICELAND', resource)
        # signature generation algorithm is reverse engineered from signatureGenerator in
        # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE):
        try:
            host = 'www.viceland' if is_locked else self._PREPLAY_HOST
-            preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query)
+            preplay = self._download_json(
                'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
                video_id, query=query)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                error = json.loads(e.cause.read().decode())
-                raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
+                raise ExtractorError('%s said: %s' % (
                    self.IE_NAME, error['details']), expected=True)
            raise
        video_data = preplay['video']
@ -88,16 +92,17 @@ class ViceBaseIE(AdobePassIE):
 class ViceIE(ViceBaseIE):
-    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P<locale>[^/]+)/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
+    IE_NAME = 'vice'
    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
    _TESTS = [{
-        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
+        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
-        'md5': 'e9d77741f9e42ba583e683cd170660f7',
+        'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
        'info_dict': {
-            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+            'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
            'ext': 'flv',
-            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+            'title': 'Monkey Labs of Holland',
-            'duration': 725.983,
+            'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
        },
        'add_ie': ['Ooyala'],
    }, {
@ -136,22 +141,13 @@ class ViceIE(ViceBaseIE):
        },
        'add_ie': ['UplynkPreplay'],
    }, {
-        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
        'only_matching': True,
    }, {
        'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
        'only_matching': True,
    }, {
        'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
        'only_matching': True,
    }]
    _PREPLAY_HOST = 'video.vice'
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        locale, video_id = re.match(self._VALID_URL, url).groups()
        video_id = mobj.group('id')
        locale = mobj.group('locale')
        video_id = self._match_id(url)
        webpage, urlh = self._download_webpage_handle(url, video_id)
        embed_code = self._search_regex(
            r'embedCode=([^&\'"]+)', webpage,
@ -166,6 +162,7 @@ class ViceIE(ViceBaseIE):
 class ViceShowIE(InfoExtractor):
    IE_NAME = 'vice:show'
    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
    _TEST = {
@ -192,12 +189,14 @@ class ViceShowIE(InfoExtractor):
            r'<title>(.+?)</title>', webpage, 'title', default=None)
        if title:
            title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
-        description = self._html_search_meta('description', webpage, 'description')
+        description = self._html_search_meta(
            'description', webpage, 'description')
        return self.playlist_result(entries, show_id, title, description)
 class ViceArticleIE(InfoExtractor):
    IE_NAME = 'vice:article'
    _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
    _TESTS = [{
@ -216,8 +215,9 @@ class ViceArticleIE(InfoExtractor):
            # AES-encrypted m3u8
            'skip_download': True,
        },
        'add_ie': ['UplynkPreplay'],
    }, {
-        'url': 'http://www.vice.com/video/how-to-hack-a-car',
+        'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
        'info_dict': {
            'id': '3jstaBeXgAs',
@ -229,6 +229,12 @@ class ViceArticleIE(InfoExtractor):
            'upload_date': '20140529',
        },
        'add_ie': ['Youtube'],
    }, {
        'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
        'only_matching': True,
    }, {
        'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -240,22 +246,29 @@ class ViceArticleIE(InfoExtractor):
            r'window\.__PREFETCH_DATA\s*=\s*({.*});',
            webpage, 'prefetch data'), display_id)
        body = prefetch_data['body']
-        youtube_url = self._html_search_regex(
+
-            r'<iframe[^>]+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None)
+        def _url_res(video_url, ie_key):
        if youtube_url:
            return {
                '_type': 'url_transparent',
-                'url': youtube_url,
+                'url': video_url,
                'display_id': display_id,
-                'ie_key': 'Youtube',
+                'ie_key': ie_key,
            }
-        video_url = self._html_search_regex(
+        embed_code = self._search_regex(
-            r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL')
+            r'embedCode=([^&\'"]+)', body,
            'ooyala embed code', default=None)
        if embed_code:
            return _url_res('ooyala:%s' % embed_code, 'Ooyala')
-        return {
+        youtube_url = self._html_search_regex(
-            '_type': 'url_transparent',
+            r'<iframe[^>]+src="(.*youtube\.com/.*)"',
-            'url': video_url,
+            body, 'YouTube URL', default=None)
-            'display_id': display_id,
+        if youtube_url:
-            'ie_key': ViceIE.ie_key(),
+            return _url_res(youtube_url, 'Youtube')
-        }
+
        video_url = self._html_search_regex(
            r'data-video-url="([^"]+)"',
            prefetch_data['embed_code'], 'video URL')
        return _url_res(video_url, ViceIE.ie_key())