[^/?#]+)'
_TESTS = [{
- # movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
- 'md5': 'c86f60bf8b75436455b1b205f9745955',
+ # documentary
+ 'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
+ 'md5': 'd2a708902d3df230a357c99701aece05',
'info_dict': {
- 'id': 'ZX0ipMyFQq_6D4BA7b',
- 'display_id': 'les-novices',
+ 'id': '3FPa7JMu21_6D4BA7b',
+ 'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
'ext': 'mp4',
- 'title': 'Les novices',
- 'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
- 'upload_date': '20230821',
- 'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
- 'duration': 5177,
- 'episode': 'Les novices',
+ 'title': "Baudouin, l'héritage d'un roi",
+ 'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
+ 'duration': 4842,
+ 'upload_date': '20240130',
+ 'timestamp': 1706641242,
+ 'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
+ 'description': 'md5:78125c74a5cac06d7743a2d09126edad',
+ 'series': "Baudouin, l'héritage d'un roi",
},
}, {
# series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+ 'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
+ 'md5': 'f5e09637cadd55639c05874e22eb56bf',
'info_dict': {
- 'id': 'wJ0eeEPozr_6D4BA7b',
- 'display_id': 'opj-les-dents-de-la-terre-2',
+ 'id': 'obRRZ8m6g9_6D4BA7b',
+ 'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
'ext': 'mp4',
- 'title': "OPJ - Les dents de la Terre (2)",
- 'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
- 'upload_date': '20230823',
- 'series': 'OPJ',
- 'episode': 'Les dents de la Terre (2)',
- 'duration': 2877,
- 'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+ 'title': 'Toute la vie',
+ 'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
+ 'duration': 2526,
+ 'upload_date': '20230721',
+ 'timestamp': 1689971646,
+ 'series': 'Toute la vie',
+ 'episode': 'Mardi 23 mars 2021',
},
}, {
# movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
- 'md5': '32fa0cde16a4480d1251502a66856d5f',
+ 'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
+ 'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
'info_dict': {
- 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
- 'display_id': 'ceux-qui-travaillent',
+ 'id': 'DOcfvdLKXL_6D4BA7b',
+ 'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
'ext': 'mp4',
- 'title': 'Ceux qui travaillent',
- 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
- 'upload_date': '20210819',
+ 'title': 'Ce fleuve qui nous charrie',
+ 'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
+ 'duration': 5300,
+ 'upload_date': '20210822',
+ 'timestamp': 1629594105,
+ 'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
+ 'series': 'Ce fleuve qui nous charrie',
},
- 'skip': 'no longer available',
}, {
- # series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+ # news
+ 'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
+ 'md5': 'c62977d6d10754a2ecebba70ad370479',
'info_dict': {
- 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
- 'display_id': 'vestiaires-caro-actrice',
+ 'id': 'LgQFrOCNsc_6D4BA7b',
+ 'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
'ext': 'mp4',
- 'title': "Vestiaires - Caro actrice",
- 'description': 'md5:db15d2e1976641e08377f942778058ea',
- 'upload_date': '20210819',
- 'series': "Vestiaires",
- 'episode': 'Caro actrice',
+ 'title': 'TV5MONDE, le journal',
+ 'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
+ 'duration': 854,
+ 'upload_date': '20240508',
+ 'timestamp': 1715159640,
+ 'series': 'TV5MONDE, le journal',
+ 'episode': 'EDITION DU 08/05/24 - 11H',
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'no longer available',
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
- 'only_matching': True,
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
- 'only_matching': True,
}]
_GEO_BYPASS = False
@@ -98,7 +101,6 @@ def _real_extract(self, url):
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
- title = episode = self._html_search_regex(r'([^<]+)', webpage, 'title')
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
@@ -147,26 +149,7 @@ def process_video_files(v):
process_video_files(video_files)
metadata = self._parse_json(
- vpl_data['data-metadata'], display_id)
- duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
- or parse_duration(self._html_search_meta('duration', webpage)))
-
- description = self._html_search_regex(
- r'(?s)
]+class=["\']episode-texte[^>]+>(.+?)
', webpage,
- 'description', fatal=False)
-
- series = self._html_search_regex(
- r']+class=["\']episode-emission[^>]+>([^<]+)', webpage,
- 'series', default=None)
-
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- upload_date = self._search_regex(
- r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
- webpage, 'upload date', default=None)
- if upload_date:
- upload_date = upload_date.replace('_', '')
+ vpl_data.get('data-metadata') or '{}', display_id, fatal=False)
if not video_id:
video_id = self._search_regex(
@@ -175,16 +158,20 @@ def process_video_files(v):
default=display_id)
return {
+ **traverse_obj(metadata, ('content', {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('series', {str}),
+ 'timestamp': ('publishDate_ts', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': vpl_data.get('data-image'),
- 'duration': duration,
- 'upload_date': upload_date,
+ 'title': clean_html(get_element_by_class('main-title', webpage)),
+ 'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
+ 'thumbnail': url_or_none(vpl_data.get('data-image')),
'formats': formats,
'subtitles': self._extract_subtitles(self._parse_json(
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
- 'series': series,
- 'episode': episode,
}
diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py
index 9afe233284..e3e10557c2 100644
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@@ -1,10 +1,9 @@
+import functools
+import re
+
from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- smuggle_url,
- strip_or_none,
-)
+from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
+from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor):
@@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
- 'title': 'Épisode 01',
+ 'title': 'Ép 01. Mon dernier jour',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'episode': 'Ép 01. Mon dernier jour',
+ 'episode_number': 1,
+ 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
+ 'duration': 2625.963,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Alerte Amber',
+ 'channel': 'TVA',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
@@ -64,22 +72,24 @@ class QubIE(InfoExtractor):
def _real_extract(self, url):
entity_id = self._match_id(url)
- entity = self._download_json(
- 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
- entity_id, query={'id': entity_id})
+ webpage = self._download_webpage(url, entity_id)
+ entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
+ 'url': f'https://videos.tva.ca/details/_{video_id}',
+ 'ie_key': TVAIE.ie_key(),
'id': video_id,
'title': episode,
- # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
- 'url': 'https://videos.tva.ca/details/_' + video_id,
- 'description': entity.get('longDescription'),
- 'duration': float_or_none(entity.get('durationMillis'), 1000),
'episode': episode,
- 'episode_number': int_or_none(entity.get('episodeNumber')),
- # 'ie_key': 'BrightcoveNew',
- 'ie_key': TVAIE.ie_key(),
+ **traverse_obj(entity, {
+ 'description': ('longDescription', {str}),
+ 'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'channel': ('knownEntities', 'channel', 'name', {str}),
+ 'series': ('knownEntities', 'videoShow', 'name', {str}),
+ 'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ }),
}
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index c55786a0dc..80cba09155 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -191,17 +191,25 @@ def _get_thumbnails(self, thumbnail):
}] if thumbnail else None
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
- return self._extract_m3u8_formats(
+ formats = self._extract_m3u8_formats(
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
'allow_source': 'true',
'allow_audio_only': 'true',
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
+ 'platform': 'web',
'player': 'twitchweb',
+ 'supported_codecs': 'av1,h265,h264',
'playlist_include_framerate': 'true',
'sig': signature,
'token': token,
})
+ for fmt in formats:
+ if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
+ # mpegts does not yet have proper support for av1
+ fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
+
+ return formats
class TwitchVodIE(TwitchBaseIE):
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index ecc865655d..fc80dade8f 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -34,9 +34,9 @@
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
- _API_BASE = 'https://api.twitter.com/1.1/'
- _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
- _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+ _API_BASE = 'https://api.x.com/1.1/'
+ _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
_flow_token = None
@@ -153,6 +153,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
+ # XXX: Temporary workaround until twitter.com => x.com migration is completed
+ def _real_initialize(self):
+ if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
+ return
+ # User has not yet been migrated to x.com and has passed twitter.com cookies
+ TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
+ TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
+
@functools.cached_property
def _selected_api(self):
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
@@ -196,17 +204,15 @@ def _perform_login(self, username, password):
if self.is_logged_in:
return
- webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
- guest_token = self._search_regex(
- r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
+ guest_token = self._fetch_guest_token(None)
headers = {
**self._set_base_headers(),
'content-type': 'application/json',
'x-guest-token': guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
- 'Referer': 'https://twitter.com/',
- 'Origin': 'https://twitter.com',
+ 'Referer': 'https://x.com/',
+ 'Origin': 'https://x.com',
}
def build_login_json(*subtask_inputs):
@@ -1191,6 +1197,31 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 0,
'_old_archive_ids': ['twitter 1724884212803834154'],
},
+ }, {
+ # x.com
+ 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
+ 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
+ 'info_dict': {
+ 'id': '1790637589910654976',
+ 'ext': 'mp4',
+ 'title': 'Historic Vids - One of the most intense moments in history',
+ 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
+ 'display_id': '1790637656616943991',
+ 'uploader': 'Historic Vids',
+ 'uploader_id': 'historyinmemes',
+ 'uploader_url': 'https://twitter.com/historyinmemes',
+ 'channel_id': '855481986290524160',
+ 'upload_date': '20240515',
+ 'timestamp': 1715756260.0,
+ 'duration': 15.488,
+ 'tags': [],
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1790637656616943991'],
+ }
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 4316c31d2b..1e2d118aa6 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'filemoon\.sx',
r'hentai\.animestigma\.com',
r'thisav\.com',
+ r'gounlimited\.to',
+ r'highstream\.tv',
+ r'uqload\.com',
+ r'vedbam\.xyz',
+ r'vadbam\.net'
+ r'vidlo\.us',
+ r'wolfstream\.tv',
+ r'xvideosharing\.com',
+ r'(?:\w+\.)?viidshar\.com',
+ r'sxyprn\.com',
+ r'jable\.tv',
+ r'91porn\.com',
+ r'einthusan\.(?:tv|com|ca)',
+ r'yourupload\.com',
)
_TESTS = [{
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index e4a78c2977..28d5026850 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -451,6 +451,7 @@ def _real_extract(self, url):
info_page, 'view count', default=None))
formats = []
+ subtitles = {}
for format_id, format_url in data.items():
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@@ -462,12 +463,21 @@ def _real_extract(self, url):
formats.append({
'format_id': format_id,
'url': format_url,
+ 'ext': 'mp4',
+ 'source_preference': 1,
'height': height,
})
elif format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False, live=is_live))
+ m3u8_id=format_id, fatal=False, live=is_live)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif format_id.startswith('dash_'):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif format_id == 'rtmp':
formats.append({
'format_id': format_id,
@@ -475,7 +485,6 @@ def _real_extract(self, url):
'ext': 'flv',
})
- subtitles = {}
for sub in data.get('subs') or {}:
subtitles.setdefault(sub.get('lang', 'en'), []).append({
'ext': sub.get('title', '.srt').split('.')[-1],
@@ -496,6 +505,7 @@ def _real_extract(self, url):
'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live,
'subtitles': subtitles,
+ '_format_sort_fields': ('res', 'source'),
}
@@ -707,6 +717,7 @@ def _real_extract(self, url):
class VKPlayBaseIE(InfoExtractor):
+ _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
_RESOLUTIONS = {
'tiny': '256x144',
'lowest': '426x240',
@@ -765,7 +776,7 @@ def _extract_common_meta(self, stream_info):
class VKPlayIE(VKPlayBaseIE):
- _VALID_URL = r'https?://vkplay\.live/(?P[^/#?]+)/record/(?P[a-f0-9-]+)'
+ _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/record/(?P[\da-f-]+)'
_TESTS = [{
'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
'info_dict': {
@@ -776,13 +787,16 @@ class VKPlayIE(VKPlayBaseIE):
'uploader_id': '13159830',
'release_timestamp': 1683461378,
'release_date': '20230507',
- 'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+ 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
'duration': 10608,
'view_count': int,
'like_count': int,
'categories': ['Atomic Heart'],
},
'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -802,7 +816,7 @@ def _real_extract(self, url):
class VKPlayLiveIE(VKPlayBaseIE):
- _VALID_URL = r'https?://vkplay\.live/(?P[^/#?]+)/?(?:[#?]|$)'
+ _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P[^/#?]+)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://vkplay.live/bayda',
'info_dict': {
@@ -813,7 +827,7 @@ class VKPlayLiveIE(VKPlayBaseIE):
'uploader_id': '12279401',
'release_timestamp': 1687209962,
'release_date': '20230619',
- 'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+ 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
@@ -822,6 +836,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
},
'skip': 'livestream',
'params': {'skip_download': True},
+ }, {
+ 'url': 'https://live.vkplay.ru/lebwa',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
deleted file mode 100644
index ef77bedd27..0000000000
--- a/yt_dlp/extractor/voot.py
+++ /dev/null
@@ -1,212 +0,0 @@
-import json
-import time
-import uuid
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..networking.exceptions import HTTPError
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- jwt_decode_hs256,
- parse_age_limit,
- traverse_obj,
- try_call,
- try_get,
- unified_strdate,
-)
-
-
-class VootBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'voot'
- _GEO_BYPASS = False
- _LOGIN_HINT = 'Log in with "-u -p ", or use "-u token -p " to login with auth token.'
- _TOKEN = None
- _EXPIRY = 0
- _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
-
- def _perform_login(self, username, password):
- if self._TOKEN and self._EXPIRY:
- return
-
- if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
- VootBaseIE._TOKEN = password
- VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
- self.report_login()
-
- # Mobile number as username is not supported
- elif not username.isdigit():
- check_username = self._download_json(
- 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
- 'type': 'email',
- 'email': username
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Checking username', expected_status=403)
- if not traverse_obj(check_username, ('isExist', {bool})):
- if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
- self.raise_geo_restricted(countries=['IN'])
- raise ExtractorError('Incorrect username', expected=True)
- auth_token = traverse_obj(self._download_json(
- 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
- 'type': 'traditional',
- 'deviceId': str(uuid.uuid4()),
- 'deviceBrand': 'PC/MAC',
- 'data': {
- 'email': username,
- 'password': password
- }
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
- if not auth_token:
- raise ExtractorError('Incorrect password', expected=True)
- VootBaseIE._TOKEN = auth_token['accessToken']
- VootBaseIE._EXPIRY = auth_token['expirationTime']
-
- else:
- raise ExtractorError(self._LOGIN_HINT, expected=True)
-
- def _check_token_expiry(self):
- if int(time.time()) >= self._EXPIRY:
- raise ExtractorError('Access token has expired', expected=True)
-
- def _real_initialize(self):
- if not self._TOKEN:
- self.raise_login_required(self._LOGIN_HINT, method=None)
- self._check_token_expiry()
-
-
-class VootIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'''(?x)
- (?:
- voot:|
- https?://(?:www\.)?voot\.com/?
- (?:
- movies?/[^/]+/|
- (?:shows|kids)/(?:[^/]+/){4}
- )
- )
- (?P\d{3,})
- '''
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
- 'info_dict': {
- 'id': '441353',
- 'ext': 'mp4',
- 'title': 'Is this the end of Kamini?',
- 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
- 'timestamp': 1472103000,
- 'upload_date': '20160825',
- 'series': 'Ishq Ka Rang Safed',
- 'season_number': 1,
- 'episode': 'Is this the end of Kamini?',
- 'episode_number': 340,
- 'release_date': '20160825',
- 'season': 'Season 1',
- 'age_limit': 13,
- 'duration': 1146.0,
- },
- 'params': {'skip_download': 'm3u8'},
- }, {
- 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movies/pandavas-5/424627',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movie/fight-club/621842',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- media_info = self._download_json(
- 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
- query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
-
- try:
- m3u8_url = self._download_json(
- 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
- 'Downloading playback JSON', data=b'{}', headers={
- **self.geo_verification_headers(),
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- 'platform': 'androidwebdesktop',
- 'vootid': video_id,
- 'voottoken': self._TOKEN,
- })['m3u8']
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 400:
- self._check_token_expiry()
- raise
-
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._remove_duplicate_formats(formats)
-
- return {
- 'id': video_id,
- # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
- 'formats': traverse_obj(formats, (
- lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
- 'http_headers': self._API_HEADERS,
- **traverse_obj(media_info, ('result', 0, {
- 'title': ('fullTitle', {str}),
- 'description': ('fullSynopsis', {str}),
- 'series': ('showName', {str}),
- 'season_number': ('season', {int_or_none}),
- 'episode': ('fullTitle', {str}),
- 'episode_number': ('episode', {int_or_none}),
- 'timestamp': ('uploadTime', {int_or_none}),
- 'release_date': ('telecastDate', {unified_strdate}),
- 'age_limit': ('ageNemonic', {parse_age_limit}),
- 'duration': ('duration', {float_or_none}),
- })),
- }
-
-
-class VootSeriesIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P\d{3,})'
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
- 'playlist_mincount': 442,
- 'info_dict': {
- 'id': '100002',
- },
- }, {
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
- 'playlist_mincount': 341,
- 'info_dict': {
- 'id': '100003',
- },
- }]
- _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
- _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
-
- def _entries(self, show_id):
- show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
- for season in show_json.get('result', []):
- page_num = 1
- season_id = try_get(season, lambda x: x['id'], compat_str)
- season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)
- episodes_json = season_json.get('result', [])
- while episodes_json:
- page_num += 1
- for episode in episodes_json:
- video_id = episode.get('id')
- yield self.url_result(
- 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
- episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)['result']
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
- return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py
index 497233d95f..3d26549a40 100644
--- a/yt_dlp/extractor/vrt.py
+++ b/yt_dlp/extractor/vrt.py
@@ -16,6 +16,7 @@
join_nonempty,
jwt_encode_hs256,
make_archive_id,
+ merge_dicts,
parse_age_limit,
parse_iso8601,
str_or_none,
@@ -425,3 +426,64 @@ def _real_extract(self, url):
['description', 'twitter:description', 'og:description'], webpage),
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
}
+
+
+class Radio1BeIE(VRTBaseIE):
+ _VALID_URL = r'https?://radio1\.be/(?:lees|luister/select)/(?P[\w/-]+)'
+ _TESTS = [{
+ 'url': 'https://radio1.be/luister/select/de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+ 'info_dict': {
+ 'id': 'eb6c22e9-544f-44f4-af39-cf8cccd29e22',
+ 'title': 'Komt N-VA volgend jaar op in Wallonië?',
+ 'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+ 'description': 'md5:b374ea1c9302f38362df9dea1931468e',
+ 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+'
+ },
+ 'playlist_mincount': 1
+ }, {
+ 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
+ 'info_dict': {
+ 'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
+ 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
+ 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
+ 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+',
+ 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza'
+ },
+ 'playlist_mincount': 1
+ }]
+
+ def _extract_video_entries(self, next_js_data, display_id):
+ video_data = traverse_obj(
+ next_js_data, ((None, ('paragraphs', ...)), {lambda x: x if x['mediaReference'] else None}))
+ for data in video_data:
+ media_reference = data['mediaReference']
+ formats, subtitles = self._extract_formats_and_subtitles(
+ self._call_api(media_reference), display_id)
+
+ yield {
+ 'id': media_reference,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('body', {clean_html})
+ }),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['item']
+
+ return self.playlist_result(
+ self._extract_video_entries(next_js_data, display_id), **merge_dicts(traverse_obj(
+ next_js_data, ({
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'description': (('description', 'content'), {clean_html}),
+ }), get_all=False), {
+ 'display_id': display_id,
+ 'title': self._html_search_meta(['name', 'og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ }))
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
index bce5e8326b..f2256fdc60 100644
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -1,6 +1,6 @@
+import base64
import re
import urllib.parse
-from base64 import b64decode
from .common import InfoExtractor
from ..networking import HEADRequest
@@ -371,7 +371,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
data = self._parse_json(
self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id),
- channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8')))
+ channel_id, transform_source=lambda x: urllib.parse.unquote_plus(base64.b64decode(x).decode('utf-8')))
# XXX: can there be more than one series?
series = traverse_obj(data, ('series', 0), default={})
diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index 145246a148..d401d6d39d 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -12,6 +12,7 @@
jwt_decode_hs256,
traverse_obj,
try_call,
+ url_basename,
url_or_none,
urlencode_postdata,
variadic,
@@ -147,7 +148,7 @@ def _download_metadata(self, url, video_id, lang, props_keys):
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
if not metadata:
webpage = self._download_webpage(url, video_id)
- nextjs_data = self._search_nextjs_data(webpage, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
metadata = traverse_obj(nextjs_data, (
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
return metadata
@@ -194,8 +195,7 @@ def _real_extract(self, url):
return {
'id': video_id,
- 'formats': self._get_formats(video_data, (
- (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+ 'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
**traverse_obj(metadata, {
'title': ('displayName', {str}),
'description': ('description', {str}),
@@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
'params': {
'skip_download': 'm3u8',
},
+ }, {
+ 'note': 'manifest provides live-a (partial) and live-b (full) streams',
+ 'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
+ 'only_matching': True,
}]
_API_PATH = 'events'
@@ -285,12 +289,16 @@ def _real_extract(self, url):
video_data, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'watch archive', data={'method': 1})
- info['formats'] = self._get_formats(video_data, (
- ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+ # 'chromecastUrls' can be only partial videos, avoid
+ info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
for f in info['formats']:
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
if f.get('tbr'):
f['tbr'] = int(f['tbr'] / 2.5)
+ # prefer variants with the same basename as the master playlist to avoid partial streams
+ f['format_id'] = url_basename(f['url']).partition('.')[0]
+ if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
+ f['preference'] = -10
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
if hls_aes_key:
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
deleted file mode 100644
index 08c6d6c7c0..0000000000
--- a/yt_dlp/extractor/xfileshare.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- decode_packed_codes,
- determine_ext,
- int_or_none,
- js_to_json,
- urlencode_postdata,
-)
-
-
-# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
-def aa_decode(aa_code):
- symbol_table = [
- ('7', '((゚ー゚) + (o^_^o))'),
- ('6', '((o^_^o) +(o^_^o))'),
- ('5', '((゚ー゚) + (゚Θ゚))'),
- ('2', '((o^_^o) - (゚Θ゚))'),
- ('4', '(゚ー゚)'),
- ('3', '(o^_^o)'),
- ('1', '(゚Θ゚)'),
- ('0', '(c^_^o)'),
- ]
- delim = '(゚Д゚)[゚ε゚]+'
- ret = ''
- for aa_char in aa_code.split(delim):
- for val, pat in symbol_table:
- aa_char = aa_char.replace(pat, val)
- aa_char = aa_char.replace('+ ', '')
- m = re.match(r'^\d+', aa_char)
- if m:
- ret += chr(int(m.group(0), 8))
- else:
- m = re.match(r'^u([\da-f]+)', aa_char)
- if m:
- ret += chr(int(m.group(1), 16))
- return ret
-
-
-class XFileShareIE(InfoExtractor):
- _SITES = (
- (r'aparat\.cam', 'Aparat'),
- (r'clipwatching\.com', 'ClipWatching'),
- (r'gounlimited\.to', 'GoUnlimited'),
- (r'govid\.me', 'GoVid'),
- (r'holavid\.com', 'HolaVid'),
- (r'streamty\.com', 'Streamty'),
- (r'thevideobee\.to', 'TheVideoBee'),
- (r'uqload\.com', 'Uqload'),
- (r'vidbom\.com', 'VidBom'),
- (r'vidlo\.us', 'vidlo'),
- (r'vidlocker\.xyz', 'VidLocker'),
- (r'vidshare\.tv', 'VidShare'),
- (r'vup\.to', 'VUp'),
- (r'wolfstream\.tv', 'WolfStream'),
- (r'xvideosharing\.com', 'XVideoSharing'),
- )
-
- IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
- _VALID_URL = (r'https?://(?:www\.)?(?P%s)/(?:embed-)?(?P[0-9a-zA-Z]+)'
- % '|'.join(site for site in list(zip(*_SITES))[0]))
- _EMBED_REGEX = [r'