[hotstar] Add new extractor

2024-11-27 19:36:50 +01:00 · 2015-12-25 01:59:56 +01:00 · 2015-12-25 01:59:56 +01:00 · fb8e402ad2
commit fb8e402ad2
parent c24044635b
2 changed files with 80 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -253,6 +253,7 @@ from .history import HistoryIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hotstar import HotStarIE
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
 from .huffpost import HuffPostIE
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@ -0,0 +1,79 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    determine_ext,
    int_or_none,
 )
 class HotStarIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
    _TEST = {
        'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
        'info_dict': {
            'id': '1000076273',
            'ext': 'mp4',
            'title': 'On Air With AIB - English',
            'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
            'timestamp': 1447227000,
            'upload_date': '20151111',
            'duration': 381,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }
    _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
    _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
        json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
        if json_data['resultCode'] != 'OK':
            if fatal:
                raise ExtractorError(json_data['errorDescription'])
            return None
        return json_data['resultObj']
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
            self._GET_CONTENT_TEMPLATE % video_id,
            video_id)['contentInfo'][0]
        formats = []
        # PCTV for extracting f4m manifest
        for f in ('TABLET',):
            format_data = self._download_json(
                self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
                video_id, 'Downloading %s JSON metadata' % f, fatal=False)
            if format_data:
                format_url = format_data['src']
                ext = determine_ext(format_url)
                if ext == 'm3u8':
                    m3u8_formats = self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
                    if m3u8_formats:
                        formats.extend(m3u8_formats)
                elif ext == 'f4m':
                    # produce broken files
                    continue
                else:
                    formats.append({
                        'url': format_url,
                        'width': int_or_none(format_data.get('width')),
                        'height': int_or_none(format_data.get('height')),
                    })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_data['episodeTitle'],
            'description': video_data.get('description'),
            'duration': int_or_none(video_data.get('duration')),
            'timestamp': int_or_none(video_data.get('broadcastDate')),
            'formats': formats,
        }