From 76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 10 Jan 2015 17:46:01 +0100 Subject: [PATCH] [nrk] Improve subtitle support (#3092) --- youtube_dl/extractor/nrk.py | 42 ++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 321ce5ce70..f6de260222 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -7,8 +7,10 @@ from ..utils import ( ExtractorError, float_or_none, + parse_duration, unified_strdate, ) +from .subtitles import SubtitlesInfoExtractor class NRKIE(InfoExtractor): @@ -71,8 +73,8 @@ def _real_extract(self, url): } -class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' +class NRKTVIE(SubtitlesInfoExtractor): + _VALID_URL = r'(?Phttp://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' _TESTS = [ { @@ -147,6 +149,29 @@ class NRKTVIE(InfoExtractor): } ] + def _seconds2str(self, s): + return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000) + + def _debug_print(self, txt): + if self._downloader.params.get('verbose', False): + self.to_screen('[debug] %s' % txt) + + def _extract_captions(self, subtitlesurl, video_id, baseurl): + url = "%s%s" % (baseurl, subtitlesurl) + self._debug_print('%s: Subtitle url: %s' % (video_id, url)) + captions = self._download_xml(url, video_id, 'Downloading subtitles') + lang = captions.get('lang', 'no') + ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) + srt = '' + for pos, p in enumerate(ps): + begin = parse_duration(p.get('begin')) + duration = parse_duration(p.get('dur')) + starttime = self._seconds2str(begin) + endtime = self._seconds2str(begin + duration) + text = '\n'.join(p.itertext()) + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text) + return {lang: srt} + def _extract_f4m(self, manifest_url, video_id): return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) @@ -154,6 +179,7 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') part_id = mobj.group('part_id') + baseurl = mobj.group('baseurl') webpage = self._download_webpage(url, video_id) @@ -210,9 +236,18 @@ def _real_extract(self, url): m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) if m3u8_url: formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4')) - self._sort_formats(formats) + subtitles_url = self._html_search_regex( + r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', + webpage, 'subtitle URL', default=None) + subtitles = None + if subtitles_url: + subtitles = self._extract_captions(subtitles_url, video_id, baseurl) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + return { 'id': video_id, 'title': title, @@ -221,4 +256,5 @@ def _real_extract(self, url): 'upload_date': upload_date, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, }