From 1534aba8658294913d58accbc6688574c9911585 Mon Sep 17 00:00:00 2001 From: Simon Sawicki <37424085+Grub4K@users.noreply.github.com> Date: Mon, 26 Sep 2022 18:43:54 +0200 Subject: [PATCH] [extractor/artetv] Remove duplicate stream urls (#5047) Closes #4510 Authored by: Grub4K --- yt_dlp/extractor/arte.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 25ecb4230..d3ec4a66c 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -135,6 +135,7 @@ def _real_extract(self, url): 'Video is not available in this language edition of Arte or broadcast rights expired', expected=True) formats, subtitles = [], {} + secondary_formats = [] for stream in config['data']['attributes']['streams']: # official player contains code like `e.get("versions")[0].eStat.ml5` stream_version = stream['versions'][0] @@ -152,22 +153,26 @@ def _real_extract(self, url): not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles ))) + short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') if stream['protocol'].startswith('HLS'): fmts, subs = self._extract_m3u8_formats_and_subtitles( stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) for fmt in fmts: fmt.update({ - 'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]', + 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]', 'language_preference': lang_pref, }) - formats.extend(fmts) + if any(map(short_label.startswith, ('cc', 'OGsub'))): + secondary_formats.extend(fmts) + else: + formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif stream['protocol'] in ('HTTPS', 'RTMP'): formats.append({ 'format_id': f'{stream["protocol"]}-{stream_version_code}', 'url': stream['url'], - 'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]', + 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]', 'language_preference': lang_pref, # 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS }) @@ -179,6 +184,8 @@ def _real_extract(self, url): # The JS also looks for chapters in config['data']['attributes']['chapters'], # but I am yet to find a video having those + formats.extend(secondary_formats) + self._remove_duplicate_formats(formats) self._sort_formats(formats) metadata = config['data']['attributes']['metadata']