mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-01 19:08:14 +02:00
Compare commits
5 Commits
b6b1b217aa
...
277bf53576
Author | SHA1 | Date | |
---|---|---|---|
|
277bf53576 | ||
|
7975ddf245 | ||
|
4fa4bccabc | ||
|
104e991b28 | ||
|
8d9fd25060 |
|
@ -448,6 +448,7 @@
|
|||
from .dailywire import (
|
||||
DailyWireIE,
|
||||
DailyWirePodcastIE,
|
||||
DailyWireShowIE,
|
||||
)
|
||||
from .damtomo import (
|
||||
DamtomoRecordIE,
|
||||
|
|
|
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
|
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
|
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||
'timestamp': 1646058397,
|
||||
'upload_date': '20220228',
|
||||
'duration': 255,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
|
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
|
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
|
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
|
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
|
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'timestamp': 1638215626,
|
||||
'upload_date': '20211129',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||
'info_dict': {
|
||||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
|
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
|
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
|
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'title': 'The downsides of positive thinking',
|
||||
'description': 'The downsides of positive thinking',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20220223',
|
||||
'timestamp': 1645632746,
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
}
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1008,8 +1039,7 @@ def _real_extract(self, url):
|
|||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
|
@ -1069,83 +1099,133 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})
|
||||
),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
|
@ -1191,6 +1271,28 @@ def _real_extract(self, url):
|
|||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
})
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
|
@ -1202,6 +1304,19 @@ def _real_extract(self, url):
|
|||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
|
@ -1234,27 +1349,90 @@ def parse_media(media):
|
|||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model and (entry := parse_model(model)):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
|
|
|
@ -1,28 +1,72 @@
|
|||
import itertools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DailyWireBaseIE(InfoExtractor):
|
||||
_JSON_PATH = {
|
||||
'episode': ('props', 'pageProps', 'episodeData', 'episode'),
|
||||
'videos': ('props', 'pageProps', 'videoData', 'video'),
|
||||
'podcasts': ('props', 'pageProps', 'episode'),
|
||||
_GRAPHQL_API = 'https://v2server.dailywire.com/app/graphql'
|
||||
_GRAPHQL_QUERIES = {
|
||||
'getClipBySlug': 'query getClipBySlug($slug:String!){clip(where:{slug:$slug}){id,name,slug,description,image,show{id,name,slug},thumbnail,duration,createdBy{firstName,lastName},createdAt,videoURL}}',
|
||||
'getEpisodeBySlug': 'query getEpisodeBySlug($slug:String!){episode(where:{slug:$slug}){id,title,slug,description,createdAt,image,show{id,name,slug},segments{audio,video,duration,},createdBy{firstName,lastName}}}',
|
||||
'getPodcastEpisodes': 'query getPodcastEpisodes($where: PodcastEpisodeWhereInput, $orderBy: PodcastEpisodeOrderBy, $skip: Int, $first: Int) {listPodcastEpisode(where: $where, orderBy: $orderBy, skip: $skip, first: $first) {...ResPodcastEpisode}}, fragment ResPodcastEpisode on getPodcastEpisodeRes {id,title,description,slug,thumbnail,createdAt,audio,duration,podcast {id,name,slug,author {firstName,lastName}},season {id,name,slug}}',
|
||||
'getSeasonEpisodes': 'query getSeasonEpisodes($where:getSeasonEpisodesInput!,$first:Int,$skip:Int){getSeasonEpisodes(where:$where,first:$first,skip:$skip){episode{slug}}}',
|
||||
'getShowBySlug': 'query getShowBySlug($slug:String!){show(where:{slug:$slug}){id,name,description,image,seasons(orderBy:weight_DESC){id,name,slug}}}',
|
||||
'getVideoBySlug': 'query getVideoBySlug($slug:String!){video(where:{slug:$slug}){id,name,slug,description,image,thumbnail,videoURL,duration,createdBy{firstName,lastName},createdAt}}',
|
||||
}
|
||||
_GRAPHQL_VIDEO_QUERIES = {
|
||||
'clips': 'getClipBySlug',
|
||||
'episode': 'getEpisodeBySlug',
|
||||
'videos': 'getVideoBySlug',
|
||||
}
|
||||
_GRAPHQL_JSON_PATH = {
|
||||
'getClipBySlug': ('data', 'clip'),
|
||||
'getEpisodeBySlug': ('data', 'episode'),
|
||||
'getPodcastEpisodes': ('data', 'listPodcastEpisode'),
|
||||
'getSeasonEpisodes': ('data', 'getSeasonEpisodes', ..., 'episode', 'slug'),
|
||||
'getShowBySlug': ('data', 'show'),
|
||||
'getVideoBySlug': ('data', 'video'),
|
||||
}
|
||||
_API_HEADERS = {
|
||||
'Apollographql-Client-Name': 'DW_WEBSITE',
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://www.dailywire.com',
|
||||
'Referer': 'https://www.dailywire.com/',
|
||||
}
|
||||
|
||||
def _get_json(self, url):
|
||||
sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
|
||||
json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
|
||||
return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
|
||||
def _real_initialize(self):
|
||||
if access_token := self._get_cookies('https://www.dailywire.com').get('accessToken'):
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {access_token.value}'
|
||||
|
||||
def _call_api(self, slug, query, variables, message='Downloading JSON from GraphQL API'):
|
||||
json_data = self._download_json(
|
||||
self._GRAPHQL_API, slug, message, data=json.dumps(
|
||||
{'query': self._GRAPHQL_QUERIES[query], 'variables': variables}).encode(),
|
||||
headers=self._API_HEADERS)
|
||||
|
||||
return traverse_obj(json_data, self._GRAPHQL_JSON_PATH.get(query, ()))
|
||||
|
||||
def _paginate(self, slug, query, where):
|
||||
for i in itertools.count(0):
|
||||
page = self._call_api(
|
||||
slug, query, {'where': where, 'first': 10, 'skip': i * 10},
|
||||
message=f'Downloading page {i + 1}')
|
||||
if not page:
|
||||
break
|
||||
yield page
|
||||
|
||||
|
||||
class DailyWireIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos|clips)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailywire.com/episode/1-fauci',
|
||||
'info_dict': {
|
||||
|
@ -32,32 +76,79 @@ class DailyWireIE(DailyWireBaseIE):
|
|||
'title': '1. Fauci',
|
||||
'description': 'md5:9df630347ef85081b7e97dd30bc22853',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
|
||||
'creator': 'Caroline Roberts',
|
||||
'series_id': 'ckzplm0a097fn0826r2vc3j7h',
|
||||
'series': 'China: The Enemy Within',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
|
||||
'info_dict': {
|
||||
'id': 'cl0ngbaalplc80894sfdo9edf',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ep-124-bill-maher',
|
||||
'title': 'Ep. 124 - Bill Maher',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
|
||||
'creator': 'Caroline Roberts',
|
||||
'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
|
||||
'series_id': 'cjzvep7270hp00786l9hwccob',
|
||||
'series': 'The Sunday Special',
|
||||
'upload_date': '20220218',
|
||||
'creators': ['Caroline Roberts'],
|
||||
'timestamp': 1645182003,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/videos/the-hyperions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/episode/ep-3-avery-s-niece-new',
|
||||
'info_dict': {
|
||||
'id': 'clm8geguv3qku0870ewvcu0ed',
|
||||
'display_id': 'ep-3-avery-s-niece-new',
|
||||
'title': 'Ep 3 - Avery’s Niece',
|
||||
'description': 'md5:861ab336bd2bab2abebc25a1479a42e0',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/clm8geguv3qku0870ewvcu0ed/clm8geguv3qku0870ewvcu0ed-1694047935734.png',
|
||||
'series_id': 'clim20ue5f8160838ecz7ba8q',
|
||||
'ext': 'mp4',
|
||||
'subtitles': {'en-US': [{'ext': 'vtt'}]},
|
||||
'timestamp': 1694062826,
|
||||
'series': 'Convicting a Murderer',
|
||||
'creators': ['Scott Bowler '],
|
||||
'upload_date': '20230907',
|
||||
},
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/clips/the-making-of-run-hide-fight',
|
||||
'info_dict': {
|
||||
'id': 'ckjutyd6810dd0806ivcq2526',
|
||||
'display_id': 'the-making-of-run-hide-fight',
|
||||
'title': 'The Making of Run Hide Fight',
|
||||
'description': 'md5:085297d753b73ad87bdd8b050cc10d2c',
|
||||
'thumbnail': 'https://image.media.dailywire.com/K7OqsPwWH5c9hpWT68CHeZ4vRUtoz5Le/thumbnail.png',
|
||||
'duration': 916.790889,
|
||||
'creators': ['Paul Snyder'],
|
||||
'upload_date': '20210113',
|
||||
'timestamp': 1610506443,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/videos/choosing-death-the-legacy-of-roe',
|
||||
'info_dict': {
|
||||
'id': 'cl3260dva6pjr097819zw506s',
|
||||
'display_id': 'choosing-death-the-legacy-of-roe',
|
||||
'title': 'Choosing Death [The Legacy of Roe]',
|
||||
'description': 'md5:b07597f0ef32130365427a05fd1ccd25',
|
||||
'duration': 2618.0738,
|
||||
'timestamp': 1652308821,
|
||||
'upload_date': '20220511',
|
||||
'thumbnail': 'https://image.media.dailywire.com/FBgIBgmq635VuqTgWKjcGviEjJ2vJ02Zz/thumbnail.png',
|
||||
'subtitles': {'en-US': [{'ext': 'vtt'}]},
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, episode_info = self._get_json(url)
|
||||
urls = traverse_obj(
|
||||
episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
|
||||
sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
|
||||
episode_data = self._call_api(slug, self._GRAPHQL_VIDEO_QUERIES[sites_type], {'slug': slug})
|
||||
|
||||
if not episode_data:
|
||||
raise ExtractorError('video not found')
|
||||
|
||||
urls = traverse_obj(episode_data,
|
||||
(('segments', 'clips'), ..., ('video', 'audio'))
|
||||
) or [episode_data.get('videoURL')]
|
||||
|
||||
if 'Access Denied' in urls:
|
||||
self.report_warning(f'It looks like {slug} requires a login. Try passing cookies and try again.')
|
||||
|
||||
urls = [url_or_none(u) for u in urls if url_or_none(u)]
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for url in urls:
|
||||
|
@ -68,24 +159,27 @@ def _real_extract(self, url):
|
|||
formats.extend(format_)
|
||||
self._merge_subtitles(subs_, target=subtitles)
|
||||
return {
|
||||
'id': episode_info['id'],
|
||||
'id': episode_data.get('id'),
|
||||
'display_id': slug,
|
||||
'title': traverse_obj(episode_info, 'title', 'name'),
|
||||
'description': episode_info.get('description'),
|
||||
'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
|
||||
'duration': float_or_none(episode_info.get('duration')),
|
||||
'is_live': episode_info.get('isLive'),
|
||||
'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
|
||||
'title': traverse_obj(episode_data, 'title', 'name'),
|
||||
'description': episode_data.get('description'),
|
||||
'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'),
|
||||
from_dict=episode_data, delim=' '),
|
||||
'duration': float_or_none(episode_data.get('duration')),
|
||||
'timestamp': parse_iso8601(episode_data.get('createdAt')),
|
||||
'is_live': episode_data.get('isLive'),
|
||||
'thumbnail': traverse_obj(episode_data, 'thumbnail', 'image', expected_type=url_or_none),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series_id': traverse_obj(episode_info, ('show', 'id')),
|
||||
'series': traverse_obj(episode_info, ('show', 'name')),
|
||||
'series_id': traverse_obj(episode_data, ('show', 'id')),
|
||||
'series': traverse_obj(episode_data, ('show', 'name')),
|
||||
}
|
||||
|
||||
|
||||
class DailyWirePodcastIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+)/?(?P<id>[\w-]+)?'
|
||||
_TESTS = [{
|
||||
'note': 'serves shorter ad-free stream with paid cookies',
|
||||
'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
|
||||
'info_dict': {
|
||||
'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
|
||||
|
@ -93,21 +187,120 @@ class DailyWirePodcastIE(DailyWireBaseIE):
|
|||
'display_id': 'get-ready-for-recession-6-15-22',
|
||||
'title': 'Get Ready for Recession | 6.15.22',
|
||||
'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1667859984424.jpg',
|
||||
'duration': 900.117667,
|
||||
'timestamp': 1655261631,
|
||||
'season_id': 'morning-wire-morning-wire-podcast-season',
|
||||
'series_id': 'morning-wire',
|
||||
'creators': ['Georgia Howe'],
|
||||
'season': '2022',
|
||||
'series': 'Morning Wire',
|
||||
'upload_date': '20220615',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/podcasts/enough',
|
||||
'info_dict': {
|
||||
'id': 'ckx4kvm8710i80869lvuu1b8z',
|
||||
'title': 'Enough',
|
||||
'display_id': 'enough',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, episode_info = self._get_json(url)
|
||||
audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
|
||||
podcaster, slug = self._match_valid_url(url).group('podcaster', 'id')
|
||||
|
||||
return {
|
||||
'id': episode_info['id'],
|
||||
'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
|
||||
'display_id': slug,
|
||||
'title': episode_info.get('title'),
|
||||
'duration': float_or_none(episode_info.get('duration')),
|
||||
'thumbnail': episode_info.get('thumbnail'),
|
||||
'description': episode_info.get('description'),
|
||||
}
|
||||
def _extract_pod_ep_info(episode_data):
|
||||
print(episode_data)
|
||||
return {
|
||||
'id': episode_data.get('id'),
|
||||
'url': episode_data.get('audio'),
|
||||
'display_id': episode_data.get('slug'),
|
||||
'title': episode_data.get('title'),
|
||||
'duration': float_or_none(episode_data.get('duration')),
|
||||
'timestamp': parse_iso8601(episode_data.get('createdAt')),
|
||||
'thumbnail': episode_data.get('thumbnail'),
|
||||
'description': episode_data.get('description'),
|
||||
'creator': join_nonempty(('podcast', 'author', 'firstName'),
|
||||
('podcast', 'author', 'lastName'),
|
||||
from_dict=episode_data, delim=' '),
|
||||
'season': traverse_obj(episode_data, ('season', 'name')),
|
||||
'season_id': traverse_obj(episode_data, ('season', 'slug')),
|
||||
'series': traverse_obj(episode_data, ('podcast', 'name')),
|
||||
'series_id': traverse_obj(episode_data, ('podcast', 'slug')),
|
||||
}
|
||||
|
||||
if slug:
|
||||
episodes = self._call_api(slug, 'getPodcastEpisodes', {'where': {'slug': slug}})
|
||||
if episode_data := traverse_obj(episodes, ..., get_all=False):
|
||||
return _extract_pod_ep_info(episode_data)
|
||||
else:
|
||||
episodes = [
|
||||
episode for page in
|
||||
self._paginate(podcaster, 'getPodcastEpisodes', {'podcast': {'slug': podcaster}})
|
||||
for episode in page
|
||||
]
|
||||
|
||||
if episodes:
|
||||
podcast_data = traverse_obj(episodes, (..., 'podcast'), {}, get_all=False)
|
||||
return self.playlist_result(
|
||||
[_extract_pod_ep_info(e) for e in episodes],
|
||||
podcast_data.get('id'), podcast_data.get('name'), podcast_data.get('description'),
|
||||
display_id=podcast_data.get('slug'), thumbnail=podcast_data.get('coverImage'))
|
||||
|
||||
raise ExtractorError('Podcast not found')
|
||||
|
||||
|
||||
class DailyWireShowIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>show)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/show/apollo-11-what-we-saw',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': 'ckixsvamonvl40862ysxve50i',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/shows/ckixsvamonvl40862ysxve50i-1679082975554.jpg',
|
||||
'title': 'What We Saw',
|
||||
'description': 'md5:98d2a7d5cc8175494a4ca611058ed440',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'cltf80tk79fxi0942c7h394b5',
|
||||
'season_id': 'what-we-saw-season-3-an-empire-of-terror-season',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'season-3-an-empire-of-terror',
|
||||
'display_id': 'season-3-an-empire-of-terror',
|
||||
'series_id': 'ckixsvamonvl40862ysxve50i',
|
||||
'title': 'Season 3: An Empire of Terror',
|
||||
'description': 'What We Saw: An Empire of Terror premieres on March 6, 2024.',
|
||||
'creators': ['Scott Bowler '],
|
||||
'upload_date': '20240306',
|
||||
'timestamp': 1709704832,
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cltf80tk79fxi0942c7h394b5/cltf80tk79fxi0942c7h394b5-1709694601671.png',
|
||||
'series': 'What We Saw',
|
||||
}}]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = self._match_valid_url(url).group('id')
|
||||
|
||||
show_data = self._call_api(slug, 'getShowBySlug', {'slug': slug})
|
||||
if not show_data:
|
||||
raise ExtractorError('Show not found')
|
||||
|
||||
for season_data in show_data.get('seasons', []):
|
||||
season_data['episodes'] = [
|
||||
episode for page in
|
||||
self._paginate(season_data.get('slug'), 'getSeasonEpisodes', {'season': {'id': season_data.get('id')}})
|
||||
for episode in page
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
[self.url_result(f'https://www.dailywire.com/episode/{episode_slug}',
|
||||
season_id=season_data.get('slug'), season=season_data.get('title'), url_transparent=True)
|
||||
for season_data in show_data.get('seasons', []) for episode_slug in season_data['episodes']],
|
||||
show_data.get('id'), show_data.get('name'), show_data.get('description'),
|
||||
thumbnail=show_data.get('image'))
|
||||
|
|
Loading…
Reference in New Issue
Block a user