From e4cf7741f9302b3faa092962f2895b55cb3d89bb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 21 Mar 2023 17:48:22 -0500 Subject: [PATCH] [extractor/rozhlas] Extract manifest formats (#6590) Closes #6584 Authored by: bashonly --- yt_dlp/extractor/rozhlas.py | 80 ++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py index 08ebb93e3..5cc664e00 100644 --- a/yt_dlp/extractor/rozhlas.py +++ b/yt_dlp/extractor/rozhlas.py @@ -1,5 +1,12 @@ from .common import InfoExtractor -from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj +from ..utils import ( + extract_attributes, + int_or_none, + remove_start, + str_or_none, + traverse_obj, + url_or_none, +) class RozhlasIE(InfoExtractor): @@ -50,7 +57,7 @@ class RozhlasVltavaIE(InfoExtractor): 'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337', 'md5': 'ba2fdbc1242fc16771c7695d271ec355', 'info_dict': { - 'id': 8891337, + 'id': '8891337', 'title': 'md5:21f99739d04ab49d8c189ec711eef4ec', }, 'playlist_count': 1, @@ -69,7 +76,7 @@ class RozhlasVltavaIE(InfoExtractor): }, { 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744', 'info_dict': { - 'id': 8554744, + 'id': '8554744', 'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko', }, 'playlist_count': 5, @@ -139,27 +146,62 @@ class RozhlasVltavaIE(InfoExtractor): 'chapter_number': 5, }, }] + }, { + 'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969', + 'info_dict': { + 'id': '8946969', + 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '10631121', + 'ext': 'm4a', + 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', + 'description': 'Karel Šiktanc: Černý jezdec, bílý kůň', + 'duration': 2656, + 'artist': 'Tvůrčí skupina Drama a literatura', + 'channel_id': 'dvojka', + }, + }], + 'params': {'skip_download': 'dash'}, }] def _extract_video(self, entry): - chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart'))) + formats = [] + audio_id = entry['meta']['ga']['contentId'] + for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))): + ext = audio.get('variant') + if ext == 'dash': + formats.extend(self._extract_mpd_formats( + audio['url'], audio_id, mpd_id=ext, fatal=False)) + elif ext == 'hls': + formats.extend(self._extract_m3u8_formats( + audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False)) + else: + formats.append({ + 'url': audio['url'], + 'ext': ext, + 'format_id': ext, + 'abr': int_or_none(audio.get('bitrate')), + 'acodec': ext, + 'vcodec': 'none', + }) + + chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none})) + return { - 'id': entry['meta']['ga']['contentId'], - 'title': traverse_obj(entry, ('meta', 'ga', 'contentName')), - 'description': entry.get('title'), - 'duration': entry.get('duration'), - 'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')), - 'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')), + 'id': audio_id, 'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None, 'chapter_number': chapter_number, - 'formats': [{ - 'url': audio_link['url'], - 'ext': audio_link.get('variant'), - 'format_id': audio_link.get('variant'), - 'abr': audio_link.get('bitrate'), - 'acodec': audio_link.get('variant'), - 'vcodec': 'none', - } for audio_link in entry['audioLinks']], + 'formats': formats, + **traverse_obj(entry, { + 'title': ('meta', 'ga', 'contentName'), + 'description': 'title', + 'duration': ('duration', {int_or_none}), + 'artist': ('meta', 'ga', 'contentAuthor'), + 'channel_id': ('meta', 'ga', 'contentCreator'), + }) } def _real_extract(self, url): @@ -173,7 +215,7 @@ def _real_extract(self, url): return { '_type': 'playlist', - 'id': data.get('embedId'), + 'id': str_or_none(data.get('embedId')) or video_id, 'title': traverse_obj(data, ('series', 'title')), 'entries': map(self._extract_video, data['playlist']), }