1
1
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-07-27 18:33:31 +02:00

Compare commits

...

5 Commits

Author SHA1 Message Date
dirkf
d19eb0932f
Merge 666a963046 into e1b3fa242c 2024-07-28 01:29:19 +09:00
dirkf
e1b3fa242c [Youtube] Find n function name in player 3400486c
Fixes #32877
2024-07-25 00:16:00 +01:00
dirkf
451046d62a [Youtube] Make n-sig throttling diagnostic up-to-date 2024-07-24 14:33:34 +01:00
df
666a963046 Improve metadata extraction 2021-09-13 15:04:14 +01:00
df
91557e752c Use player_token in JSON retrieval 2021-09-13 13:06:03 +01:00
3 changed files with 51 additions and 11 deletions

View File

@ -166,6 +166,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
),
(
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
(
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
),
]

View File

@ -3,9 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
bool_or_none,
extract_attributes,
int_or_none,
smuggle_url,
try_get,
unified_timestamp,
url_or_none,
)
@ -23,17 +27,20 @@ class SlidesLiveIE(InfoExtractor):
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'timestamp': 1597615266,
'timestamp': 1618809663,
'upload_date': '20170925',
}
}, {
# video_service_name = yoda
'url': 'https://slideslive.com/38935785',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', # d735b130beb40013a839de1c58a74689
'info_dict': {
'id': 'RMraDYN5ozA_',
'id': 'F31OTzeGyDK_',
'display_id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20210220',
'timestamp': 1613785940,
},
'params': {
'format': 'bestvideo',
@ -54,8 +61,17 @@ class SlidesLiveIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._search_regex(
r'<div\s[^>]*?id\s*=\s*(?P<q>\'|"|\b)player(?P=q)(?:\s[^>]*)?>.*?</div>',
webpage, 'player div', fatal=False, group=0)
player = (player and extract_attributes(player)) or {}
token = player.get('data-player-token')
if not token:
raise ExtractorError('Unable to get player token', expected=True)
video_data = self._download_json(
'https://ben.slideslive.com/player/' + video_id, video_id)
'https://ben.slideslive.com/player/' + video_id, video_id,
query={'player_token': token, })
service_name = video_data['video_service_name'].lower()
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = video_data['video_service_id']
@ -72,12 +88,23 @@ class SlidesLiveIE(InfoExtractor):
})
info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
'thumbnail': video_data.get(
'thumbnail',
self._html_search_meta(('thumbnailUrl', 'thumbnailURL'), webpage)),
'is_live': bool_or_none(video_data.get('is_live')),
'subtitles': subtitles,
'timestamp': (
int_or_none(video_data.get('updated_at'))
or unified_timestamp(
self._html_search_meta('uploadDate', webpage))),
'creator': self._og_search_property('author', webpage, fatal=False),
}
title = (
video_data.get('title')
or self._html_search_meta('name', webpage, display_name='meta title')
or self._og_search_title(webpage, fatal=False))
if service_name in ('url', 'yoda'):
info['title'] = video_data['title']
info['title'] = title or video_data['title']
if service_name == 'url':
info['url'] = service_id
else:
@ -93,6 +120,7 @@ class SlidesLiveIE(InfoExtractor):
self._sort_formats(formats)
info.update({
'id': service_id,
'display_id': video_id,
'formats': formats,
})
else:
@ -100,7 +128,7 @@ class SlidesLiveIE(InfoExtractor):
'_type': 'url_transparent',
'url': service_id,
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
'title': title,
})
if service_name == 'vimeo':
info['url'] = smuggle_url(

View File

@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except JSInterpreter.Exception as e:
self.report_warning(
'%s (%s %s)' % (
'Unable to decode n-parameter: download likely to be throttled',
'Unable to decode n-parameter: expect download to be blocked or throttled',
error_to_compat_str(e),
traceback.format_exc()),
video_id=video_id)
@ -1659,18 +1659,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
# old: .get("n"))&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b)
r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
\.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
(?:\(\s*(?P<b>[a-z])\s*=\s*(?:
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*]
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return func_name
return self._parse_json(self._search_regex(
r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
func_name, transform_source=js_to_json)[int(idx)]