From 169bd46bdc54ae169ffcbb79b44ac1b3866d2ee3 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 9 Sep 2020 21:20:13 +0200 Subject: [PATCH 1/2] [viki] subtitles, formats --- youtube_dlc/extractor/viki.py | 118 ++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 47 deletions(-) diff --git a/youtube_dlc/extractor/viki.py b/youtube_dlc/extractor/viki.py index 9e4171237..fc150d5a4 100644 --- a/youtube_dlc/extractor/viki.py +++ b/youtube_dlc/extractor/viki.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, int_or_none, + HEADRequest, parse_age_limit, parse_iso8601, sanitized_Request, @@ -220,6 +221,69 @@ def _real_extract(self, url): video = self._call_api( 'videos/%s.json' % video_id, video_id, 'Downloading video JSON') + streams = self._call_api( + 'videos/%s/streams.json' % video_id, video_id, + 'Downloading video streams JSON') + + formats = [] + for format_id, stream_dict in streams.items(): + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + for protocol, format_dict in stream_dict.items(): + # rtmps URLs does not seem to work + if protocol == 'rtmps': + continue + format_url = format_dict.get('url') + format_drms = format_dict.get('drms') + format_stream_id = format_dict.get('id') + if format_id == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id='m3u8-%s' % protocol, fatal=False) + # Despite CODECS metadata in m3u8 all video-only formats + # are actually video+audio + for f in m3u8_formats: + if f.get('acodec') == 'none' and f.get('vcodec') != 'none': + f['acodec'] = None + formats.extend(m3u8_formats) + elif format_id == 'mpd': + mpd_formats = self._extract_mpd_formats( + format_url, video_id, + mpd_id='mpd-%s' % protocol, fatal=False) + formats.extend(mpd_formats) + elif format_id == 'mpd': + + formats.extend(mpd_formats) + elif format_url.startswith('rtmp'): + mobj = re.search( + r'^(?Prtmp://[^/]+/(?P.+?))/(?Pmp4:.+)$', + format_url) + if not mobj: + continue + formats.append({ + 'format_id': 'rtmp-%s' % format_id, + 'ext': 'flv', + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': url, + 'drms': format_drms, + 'stream_id': format_stream_id, + }) + else: + urlh = self._request_webpage( + HEADRequest(format_url), video_id, 'Checking file size', fatal=False) + formats.append({ + 'url': format_url, + 'format_id': '%s-%s' % (format_id, protocol), + 'height': height, + 'drms': format_drms, + 'stream_id': format_stream_id, + 'filesize': int_or_none(urlh.headers.get('Content-Length')), + }) + self._sort_formats(formats) + self._check_errors(video) title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) @@ -244,12 +308,18 @@ def _real_extract(self, url): 'url': thumbnail.get('url'), }) + stream_ids = [] + for f in formats: + s_id = f.get('stream_id') + if s_id != None: + stream_ids.append(s_id) + subtitles = {} for subtitle_lang, _ in video.get('subtitle_completions', {}).items(): subtitles[subtitle_lang] = [{ 'ext': subtitles_format, 'url': self._prepare_call( - 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)), + 'videos/%s/subtitles/%s.%s?stream_id=%s' % (video_id, subtitle_lang, subtitles_format, stream_ids[0])), } for subtitles_format in ('srt', 'vtt')] result = { @@ -265,10 +335,6 @@ def _real_extract(self, url): 'subtitles': subtitles, } - streams = self._call_api( - 'videos/%s/streams.json' % video_id, video_id, - 'Downloading video streams JSON') - if 'external' in streams: result.update({ '_type': 'url_transparent', @@ -276,48 +342,6 @@ def _real_extract(self, url): }) return result - formats = [] - for format_id, stream_dict in streams.items(): - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None)) - for protocol, format_dict in stream_dict.items(): - # rtmps URLs does not seem to work - if protocol == 'rtmps': - continue - format_url = format_dict['url'] - if format_id == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', - m3u8_id='m3u8-%s' % protocol, fatal=False) - # Despite CODECS metadata in m3u8 all video-only formats - # are actually video+audio - for f in m3u8_formats: - if f.get('acodec') == 'none' and f.get('vcodec') != 'none': - f['acodec'] = None - formats.extend(m3u8_formats) - elif format_url.startswith('rtmp'): - mobj = re.search( - r'^(?Prtmp://[^/]+/(?P.+?))/(?Pmp4:.+)$', - format_url) - if not mobj: - continue - formats.append({ - 'format_id': 'rtmp-%s' % format_id, - 'ext': 'flv', - 'url': mobj.group('url'), - 'play_path': mobj.group('playpath'), - 'app': mobj.group('app'), - 'page_url': url, - }) - else: - formats.append({ - 'url': format_url, - 'format_id': '%s-%s' % (format_id, protocol), - 'height': height, - }) - self._sort_formats(formats) - result['formats'] = formats return result From 4ae68091bbd8f40dc977934d70359ddda601a5b9 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 12 Sep 2020 01:17:16 +0200 Subject: [PATCH 2/2] [soundcloud] flake8 --- youtube_dlc/extractor/viki.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dlc/extractor/viki.py b/youtube_dlc/extractor/viki.py index fc150d5a4..f8e360338 100644 --- a/youtube_dlc/extractor/viki.py +++ b/youtube_dlc/extractor/viki.py @@ -273,7 +273,7 @@ def _real_extract(self, url): }) else: urlh = self._request_webpage( - HEADRequest(format_url), video_id, 'Checking file size', fatal=False) + HEADRequest(format_url), video_id, 'Checking file size', fatal=False) formats.append({ 'url': format_url, 'format_id': '%s-%s' % (format_id, protocol), @@ -311,9 +311,9 @@ def _real_extract(self, url): stream_ids = [] for f in formats: s_id = f.get('stream_id') - if s_id != None: + if s_id is not None: stream_ids.append(s_id) - + subtitles = {} for subtitle_lang, _ in video.get('subtitle_completions', {}).items(): subtitles[subtitle_lang] = [{