[extractor/rozhlas] Extract manifest formats (#6590)

Closes #6584
Authored by: bashonly
This commit is contained in:
bashonly 2023-03-21 17:48:22 -05:00 committed by GitHub
parent c14af7a741
commit e4cf7741f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,5 +1,12 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj from ..utils import (
extract_attributes,
int_or_none,
remove_start,
str_or_none,
traverse_obj,
url_or_none,
)
class RozhlasIE(InfoExtractor): class RozhlasIE(InfoExtractor):
@ -50,7 +57,7 @@ class RozhlasVltavaIE(InfoExtractor):
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337', 'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
'md5': 'ba2fdbc1242fc16771c7695d271ec355', 'md5': 'ba2fdbc1242fc16771c7695d271ec355',
'info_dict': { 'info_dict': {
'id': 8891337, 'id': '8891337',
'title': 'md5:21f99739d04ab49d8c189ec711eef4ec', 'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
}, },
'playlist_count': 1, 'playlist_count': 1,
@ -69,7 +76,7 @@ class RozhlasVltavaIE(InfoExtractor):
}, { }, {
'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744', 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
'info_dict': { 'info_dict': {
'id': 8554744, 'id': '8554744',
'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko', 'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
}, },
'playlist_count': 5, 'playlist_count': 5,
@ -139,27 +146,62 @@ class RozhlasVltavaIE(InfoExtractor):
'chapter_number': 5, 'chapter_number': 5,
}, },
}] }]
}, {
'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969',
'info_dict': {
'id': '8946969',
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '10631121',
'ext': 'm4a',
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
'description': 'Karel Šiktanc: Černý jezdec, bílý kůň',
'duration': 2656,
'artist': 'Tvůrčí skupina Drama a literatura',
'channel_id': 'dvojka',
},
}],
'params': {'skip_download': 'dash'},
}] }]
def _extract_video(self, entry): def _extract_video(self, entry):
chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart'))) formats = []
audio_id = entry['meta']['ga']['contentId']
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
ext = audio.get('variant')
if ext == 'dash':
formats.extend(self._extract_mpd_formats(
audio['url'], audio_id, mpd_id=ext, fatal=False))
elif ext == 'hls':
formats.extend(self._extract_m3u8_formats(
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
else:
formats.append({
'url': audio['url'],
'ext': ext,
'format_id': ext,
'abr': int_or_none(audio.get('bitrate')),
'acodec': ext,
'vcodec': 'none',
})
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
return { return {
'id': entry['meta']['ga']['contentId'], 'id': audio_id,
'title': traverse_obj(entry, ('meta', 'ga', 'contentName')),
'description': entry.get('title'),
'duration': entry.get('duration'),
'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')),
'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')),
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None, 'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
'chapter_number': chapter_number, 'chapter_number': chapter_number,
'formats': [{ 'formats': formats,
'url': audio_link['url'], **traverse_obj(entry, {
'ext': audio_link.get('variant'), 'title': ('meta', 'ga', 'contentName'),
'format_id': audio_link.get('variant'), 'description': 'title',
'abr': audio_link.get('bitrate'), 'duration': ('duration', {int_or_none}),
'acodec': audio_link.get('variant'), 'artist': ('meta', 'ga', 'contentAuthor'),
'vcodec': 'none', 'channel_id': ('meta', 'ga', 'contentCreator'),
} for audio_link in entry['audioLinks']], })
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -173,7 +215,7 @@ def _real_extract(self, url):
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': data.get('embedId'), 'id': str_or_none(data.get('embedId')) or video_id,
'title': traverse_obj(data, ('series', 'title')), 'title': traverse_obj(data, ('series', 'title')),
'entries': map(self._extract_video, data['playlist']), 'entries': map(self._extract_video, data['playlist']),
} }