[voot] Improve extraction (#10255, closes #11814)

2024-11-27 17:26:51 +01:00 · 2017-08-06 08:04:51 +07:00 · 2017-08-06 08:04:51 +07:00 · e2b4808fd8
commit e2b4808fd8
parent daaaf5f594
2 changed files with 78 additions and 35 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1222,6 +1222,7 @@ from .vodlocker import VodlockerIE
 from .vodpl import VODPlIE
 from .vodplatform import VODPlatformIE
 from .voicerepublic import VoiceRepublicIE
 from .voot import VootIE
 from .voxmedia import VoxMediaIE
 from .vporn import VpornIE
 from .vrt import VRTIE
@ -1333,4 +1334,3 @@ from .zapiks import ZapiksIE
 from .zaq1 import Zaq1IE
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
 from .voot import VootIE
--- a/youtube_dl/extractor/voot.py
+++ b/youtube_dl/extractor/voot.py
@ -2,54 +2,97 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .kaltura import KalturaIE
 from ..utils import (
    ExtractorError,
    int_or_none,
    try_get,
    unified_timestamp,
 )
 class VootIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/(?:.+?[/-]?)/1/(?:.+?[0-9]?)/(?:.+?[/-]?)/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)'
-    _TEST = {
+    _GEO_COUNTRIES = ['IN']
    _TESTS = [{
        'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
        'info_dict': {
-            'id': '441353',
+            'id': '0_8ledb18o',
            'ext': 'mp4',
            'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
-        }
+            'uploader_id': 'batchUser',
-    }
+            'timestamp': 1472162937,
-
+            'upload_date': '20160825',
-    _GET_CONTENT_TEMPLATE = 'https://wapi.voot.com/ws/ott/getMediaInfo.json?platform=Web&pId=3&mediaId=%s'
+            'duration': 1146,
-
+            'series': 'Ishq Ka Rang Safed',
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
+            'season_number': 1,
-        json_data = super(VootIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
+            'episode': 'Is this the end of Kamini?',
-        if json_data['status']['code'] != 0:
+            'episode_number': 340,
-            if fatal:
+            'view_count': int,
-                raise ExtractorError(json_data['status']['message'])
+            'like_count': int,
-            return None
+        },
-        return json_data['assets']
+        'params': {
            'skip_download': True,
        },
        'expected_warnings': ['Failed to download m3u8 information'],
    }, {
        'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
        'only_matching': True,
    }, {
        'url': 'https://www.voot.com/movies/pandavas-5/424627',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
            self._GET_CONTENT_TEMPLATE % video_id,
            video_id)
-        thumbnail = ''
+        media_info = self._download_json(
-        formats = []
+            'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
            query={
                'platform': 'Web',
                'pId': 2,
                'mediaId': video_id,
            })
-        if video_data:
+        status_code = try_get(media_info, lambda x: x['status']['code'], int)
-            format_url = video_data.get('URL')
+        if status_code != 0:
-            formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            raise ExtractorError(media_info['status']['message'], expected=True)
-        if video_data['Pictures']:
+        media = media_info['assets']
            for picture in video_data['Pictures']:
                #Get only first available thumbnail
                thumbnail = picture.get('URL')
                break
-        self._sort_formats(formats)
+        entry_id = media['EntryId']
        title = media['MediaName']
        description, series, season_number, episode, episode_number = [None] * 5
        for meta in try_get(media, lambda x: x['Metas'], list) or []:
            key, value = meta.get('Key'), meta.get('Value')
            if not key or not value:
                continue
            if key == 'ContentSynopsis':
                description = value
            elif key == 'RefSeriesTitle':
                series = value
            elif key == 'RefSeriesSeason':
                season_number = int_or_none(value)
            elif key == 'EpisodeMainTitle':
                episode = value
            elif key == 'EpisodeNo':
                episode_number = int_or_none(value)
        return {
-            'id': video_id,
+            '_type': 'url_transparent',
-            'title': video_data.get('MediaName'),
+            'url': 'kaltura:1982551:%s' % entry_id,
-            'thumbnail': thumbnail,
+            'ie_key': KalturaIE.ie_key(),
-            'formats':formats,
+            'title': title,
            'description': description,
            'series': series,
            'season_number': season_number,
            'episode': episode,
            'episode_number': episode_number,
            'timestamp': unified_timestamp(media.get('CreationDate')),
            'duration': int_or_none(media.get('Duration')),
            'view_count': int_or_none(media.get('ViewCounter')),
            'like_count': int_or_none(media.get('like_counter')),
        }