Merge 666a963046 into e1b3fa242c

[Youtube] Find n function name in player 3400486c
Fixes #32877
2024-07-27 18:33:31 +02:00 · 2024-07-28 01:29:19 +09:00 · 2024-07-25 00:16:00 +01:00 · 2024-07-24 14:33:34 +01:00 · 2021-09-13 15:04:14 +01:00 · 2021-09-13 13:06:03 +01:00
3 changed files with 51 additions and 11 deletions
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -166,6 +166,14 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
        'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
    ),
+    (
+        'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
+        'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
+    ),
+    (
+        'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
+        '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
+    ),
 ]


--- a/youtube_dl/extractor/slideslive.py
+++ b/youtube_dl/extractor/slideslive.py
@ -3,9 +3,13 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    bool_or_none,
+    extract_attributes,
+    int_or_none,
    smuggle_url,
    try_get,
+    unified_timestamp,
    url_or_none,
 )

@ -23,17 +27,20 @@ class SlidesLiveIE(InfoExtractor):
            'description': 'Watch full version of this video at https://slideslive.com/38902413.',
            'uploader': 'SlidesLive Videos - A',
            'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
-            'timestamp': 1597615266,
+            'timestamp': 1618809663,
            'upload_date': '20170925',
        }
    }, {
        # video_service_name = yoda
        'url': 'https://slideslive.com/38935785',
-        'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
+        'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',  # d735b130beb40013a839de1c58a74689
        'info_dict': {
-            'id': 'RMraDYN5ozA_',
+            'id': 'F31OTzeGyDK_',
+            'display_id': '38935785',
            'ext': 'mp4',
            'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
+            'upload_date': '20210220',
+            'timestamp': 1613785940,
        },
        'params': {
            'format': 'bestvideo',
@ -54,8 +61,17 @@ class SlidesLiveIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        player = self._search_regex(
+            r'<div\s[^>]*?id\s*=\s*(?P<q>\'|"|\b)player(?P=q)(?:\s[^>]*)?>.*?</div>',
+            webpage, 'player div', fatal=False, group=0)
+        player = (player and extract_attributes(player)) or {}
+        token = player.get('data-player-token')
+        if not token:
+            raise ExtractorError('Unable to get player token', expected=True)
        video_data = self._download_json(
-            'https://ben.slideslive.com/player/' + video_id, video_id)
+            'https://ben.slideslive.com/player/' + video_id, video_id,
+            query={'player_token': token, })
        service_name = video_data['video_service_name'].lower()
        assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
        service_id = video_data['video_service_id']
@ -72,12 +88,23 @@ class SlidesLiveIE(InfoExtractor):
            })
        info = {
            'id': video_id,
-            'thumbnail': video_data.get('thumbnail'),
+            'thumbnail': video_data.get(
+                'thumbnail',
+                self._html_search_meta(('thumbnailUrl', 'thumbnailURL'), webpage)),
            'is_live': bool_or_none(video_data.get('is_live')),
            'subtitles': subtitles,
+            'timestamp': (
+                int_or_none(video_data.get('updated_at'))
+                or unified_timestamp(
+                    self._html_search_meta('uploadDate', webpage))),
+            'creator': self._og_search_property('author', webpage, fatal=False),
        }
+        title = (
+            video_data.get('title')
+            or self._html_search_meta('name', webpage, display_name='meta title')
+            or self._og_search_title(webpage, fatal=False))
        if service_name in ('url', 'yoda'):
-            info['title'] = video_data['title']
+            info['title'] = title or video_data['title']
            if service_name == 'url':
                info['url'] = service_id
            else:
@ -93,6 +120,7 @@ class SlidesLiveIE(InfoExtractor):
                self._sort_formats(formats)
                info.update({
                    'id': service_id,
+                    'display_id': video_id,
                    'formats': formats,
                })
        else:
@ -100,7 +128,7 @@ class SlidesLiveIE(InfoExtractor):
                '_type': 'url_transparent',
                'url': service_id,
                'ie_key': service_name.capitalize(),
-                'title': video_data.get('title'),
+                'title': title,
            })
            if service_name == 'vimeo':
                info['url'] = smuggle_url(
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        except JSInterpreter.Exception as e:
            self.report_warning(
                '%s (%s %s)' % (
-                    'Unable to decode n-parameter: download likely to be throttled',
+                    'Unable to decode n-parameter: expect download to be blocked or throttled',
                    error_to_compat_str(e),
                    traceback.format_exc()),
                video_id=video_id)
@ -1659,18 +1659,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    def _extract_n_function_name(self, jscode):
        func_name, idx = self._search_regex(
            # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
            # old: .get("n"))&&(b=nfunc[idx](b)
            # older: .get("n"))&&(b=nfunc(b)
            r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                    String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
+                    "n+"\[\s*\+?s*[\w$.]+\s*]
+                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
                (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
        if not idx:
            return func_name

        return self._parse_json(self._search_regex(
-            r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
+            r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
            'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
            func_name, transform_source=js_to_json)[int(idx)]
Author	SHA1	Message	Date
dirkf	d19eb0932f	Merge `666a963046` into `e1b3fa242c`	2024-07-28 01:29:19 +09:00
dirkf	e1b3fa242c	[Youtube] Find `n` function name in player `3400486c` Fixes #32877	2024-07-25 00:16:00 +01:00
dirkf	451046d62a	[Youtube] Make n-sig throttling diagnostic up-to-date	2024-07-24 14:33:34 +01:00
df	666a963046	Improve metadata extraction	2021-09-13 15:04:14 +01:00
df	91557e752c	Use player_token in JSON retrieval	2021-09-13 13:06:03 +01:00