[roosterteeth.com] Add subtitle support (https://github.com/ytdl-org/youtube-dl/pull/23985)

Closes #15 Authored by samiksome
2024-11-24 05:36:56 +01:00 · 2020-02-07 18:27:32 +05:30 · 2020-02-07 18:27:32 +05:30 · dfd14aadfa
commit dfd14aadfa
parent 0c3d0f5177
1 changed files with 32 additions and 2 deletions
--- a/youtube_dlc/extractor/roosterteeth.py
+++ b/youtube_dlc/extractor/roosterteeth.py
@ -86,9 +86,11 @@ def _real_extract(self, url):
        api_episode_url = self._EPISODE_BASE_URL + display_id
        try:
-            m3u8_url = self._download_json(
+            video_data = self._download_json(
                api_episode_url + '/videos', display_id,
-                'Downloading video JSON metadata')['data'][0]['attributes']['url']
+                'Downloading video JSON metadata')['data'][0]
            m3u8_url = video_data['attributes']['url']
            subtitle_m3u8_url = video_data['links']['download']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
@ -119,6 +121,33 @@ def _real_extract(self, url):
                            'url': img_url,
                        })
        subtitles = {}
        res = self._download_webpage_handle(
            subtitle_m3u8_url, display_id,
            'Downloading m3u8 information',
            'Failed to download m3u8 information',
            fatal=True, data=None, headers={}, query={})
        if res is not False:
            subtitle_m3u8_doc, _ = res
            for line in subtitle_m3u8_doc.split('\n'):
                if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line:
                    parts = line.split(',')
                    for part in parts:
                        if 'LANGUAGE' in part:
                            lang = part[part.index('=') + 2:-1]
                        elif 'URI' in part:
                            uri = part[part.index('=') + 2:-1]
                    res = self._download_webpage_handle(
                        uri, display_id,
                        'Downloading m3u8 information',
                        'Failed to download m3u8 information',
                        fatal=True, data=None, headers={}, query={})
                    doc, _ = res
                    for l in doc.split('\n'):
                        if not l.startswith('#'):
                            subtitles[lang] = [{'url': uri[:-uri[::-1].index('/')] + l}]
                            break
        return {
            'id': video_id,
            'display_id': display_id,
@ -134,4 +163,5 @@ def _real_extract(self, url):
            'formats': formats,
            'channel_id': attributes.get('channel_id'),
            'duration': int_or_none(attributes.get('length')),
            'subtitles': subtitles
        }