1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-12-13 04:07:55 +01:00

[RoosterTeeth] Add subtitle support to extractor

This commit is contained in:
wtpckl 2021-01-28 12:06:52 +01:00
parent 7b8fa658f8
commit 759e8ce15b
No known key found for this signature in database
GPG Key ID: 7EEB886BD742D45A

View File

@ -1,16 +1,20 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str, compat_str,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_or_none, str_or_none,
urlencode_postdata, urlencode_postdata,
parse_m3u8_attributes,
) )
@ -86,9 +90,11 @@ class RoosterTeethIE(InfoExtractor):
api_episode_url = self._EPISODE_BASE_URL + display_id api_episode_url = self._EPISODE_BASE_URL + display_id
try: try:
m3u8_url = self._download_json( video_json = self._download_json(
api_episode_url + '/videos', display_id, api_episode_url + '/videos', display_id)['data'][0]
'Downloading video JSON metadata')['data'][0]['attributes']['url'] m3u8_url = \
video_json['attributes'].get('url') or \
video_json['links'].get('master')
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
@ -100,6 +106,9 @@ class RoosterTeethIE(InfoExtractor):
m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
self._sort_formats(formats) self._sort_formats(formats)
subtitles = self._extract_m3u8_subtitles(
m3u8_url, display_id)
episode = self._download_json( episode = self._download_json(
api_episode_url, display_id, api_episode_url, display_id,
'Downloading episode JSON metadata')['data'][0] 'Downloading episode JSON metadata')['data'][0]
@ -133,5 +142,59 @@ class RoosterTeethIE(InfoExtractor):
'episode_id': str_or_none(episode.get('uuid')), 'episode_id': str_or_none(episode.get('uuid')),
'formats': formats, 'formats': formats,
'channel_id': attributes.get('channel_id'), 'channel_id': attributes.get('channel_id'),
'subtitles': subtitles,
'duration': int_or_none(attributes.get('length')), 'duration': int_or_none(attributes.get('length')),
} }
def _extract_m3u8_subtitles(self, m3u8_url, video_id):
res = self._download_webpage_handle(
m3u8_url, video_id,
note='Downloading subtitle information',
errnote='Failed to download subtitle information',
fatal=False, data=None, headers={}, query={})
if res is False:
return None
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
def format_url(url, base_url):
if re.match(r'^https?://', url):
return url
else:
return compat_urlparse.urljoin(base_url, url)
subtitles = {}
for line in m3u8_doc.splitlines():
if not line.startswith("#EXT-X-MEDIA:"):
continue
media = parse_m3u8_attributes(line)
media_type, media_url_raw, media_lang = (
media.get('TYPE'),
media.get('URI'),
media.get('LANGUAGE'),
)
if not (media_type in ('SUBTITLES',) and media_url_raw and media_lang):
continue
media_url = format_url(media_url_raw, base_url=m3u8_url)
res = self._download_webpage_handle(
media_url, video_id,
note='Downloading subtitle information ({})'.format(media_lang),
errnote='Failed to download subtitle information ({})'.format(media_lang),
fatal=False, data=None, headers={}, query={})
if res is False:
continue
m3u8_subtitle_doc, _ = res
for subtitle_line in m3u8_subtitle_doc.splitlines():
if subtitle_line.startswith("#"):
continue
media_url = format_url(subtitle_line, base_url=media_url)
break
subtitles[media_lang] = [{'url': media_url, }, ]
return subtitles if len(subtitles) > 0 else None