mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-17 11:10:21 +02:00
Compare commits
26 Commits
5517a7b68f
...
d791073612
Author | SHA1 | Date | |
---|---|---|---|
|
d791073612 | ||
|
0efd83b31a | ||
|
12d8ea8246 | ||
|
8e15177b41 | ||
|
dd9ad97b1f | ||
|
61b17437dc | ||
|
db14294b5c | ||
|
51e99b0759 | ||
|
0d520bc008 | ||
|
f964b72450 | ||
|
8ea52ec344 | ||
|
b41348b988 | ||
|
833862cfbc | ||
|
eecdc5870c | ||
|
a40e0f6c5f | ||
|
01fe8e8fa6 | ||
|
3999a510f7 | ||
|
fddf9e0577 | ||
|
6c3140a8c1 | ||
|
41add1d7af | ||
|
bff727c043 | ||
|
39a45d48f9 | ||
|
a14bb53ab5 | ||
|
14505063ec | ||
|
e565e45a6f | ||
|
b44e0f8b98 |
|
@ -666,7 +666,7 @@ ## Filesystem Options:
|
|||
The name of the browser to load cookies
|
||||
from. Currently supported browsers are:
|
||||
brave, chrome, chromium, edge, firefox,
|
||||
opera, safari, vivaldi. Optionally, the
|
||||
opera, safari, vivaldi, whale. Optionally, the
|
||||
KEYRING used for decrypting Chromium cookies
|
||||
on Linux, the name/path of the PROFILE to
|
||||
load cookies from, and the CONTAINER name
|
||||
|
@ -1760,7 +1760,7 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen`, `mediaconnect` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
import ssl
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingTCPServer
|
||||
from socketserver import BaseRequestHandler, ThreadingTCPServer
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -118,6 +118,13 @@ def _io_refs(self, value):
|
|||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
self.socket.shutdown(*args, **kwargs)
|
||||
|
||||
def _wrap_ssl_read(self, *args, **kwargs):
|
||||
res = super()._wrap_ssl_read(*args, **kwargs)
|
||||
if res == 0:
|
||||
# Websockets does not treat 0 as an EOF, rather only b''
|
||||
return b''
|
||||
return res
|
||||
else:
|
||||
SSLTransport = None
|
||||
|
||||
|
@ -134,6 +141,34 @@ def __init__(self, request, *args, **kwargs):
|
|||
super().__init__(request, *args, **kwargs)
|
||||
|
||||
|
||||
class WebSocketProxyHandler(BaseRequestHandler):
|
||||
def __init__(self, *args, proxy_info=None, **kwargs):
|
||||
self.proxy_info = proxy_info
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def handle(self):
|
||||
import websockets.sync.server
|
||||
protocol = websockets.ServerProtocol()
|
||||
connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
|
||||
connection.handshake()
|
||||
for message in connection:
|
||||
if message == 'proxy_info':
|
||||
connection.send(json.dumps(self.proxy_info))
|
||||
connection.close()
|
||||
|
||||
|
||||
class WebSocketSecureProxyHandler(WebSocketProxyHandler):
|
||||
def __init__(self, request, *args, **kwargs):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
if SSLTransport:
|
||||
request = SSLTransport(request, ssl_context=sslctx, server_side=True)
|
||||
else:
|
||||
request = sslctx.wrap_socket(request, server_side=True)
|
||||
super().__init__(request, *args, **kwargs)
|
||||
|
||||
|
||||
class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
default_request_version = 'HTTP/1.1'
|
||||
|
@ -233,9 +268,30 @@ def proxy_info_request(self, handler, target_domain=None, target_port=None, **re
|
|||
return json.loads(handler.send(request).read().decode())
|
||||
|
||||
|
||||
class HTTPProxyWebSocketTestContext(HTTPProxyTestContext):
|
||||
REQUEST_HANDLER_CLASS = WebSocketProxyHandler
|
||||
REQUEST_PROTO = 'ws'
|
||||
|
||||
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||
request = Request(f'{self.REQUEST_PROTO}://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs)
|
||||
handler.validate(request)
|
||||
ws = handler.send(request)
|
||||
ws.send('proxy_info')
|
||||
proxy_info = ws.recv()
|
||||
ws.close()
|
||||
return json.loads(proxy_info)
|
||||
|
||||
|
||||
class HTTPProxyWebSocketSecureTestContext(HTTPProxyWebSocketTestContext):
|
||||
REQUEST_HANDLER_CLASS = WebSocketSecureProxyHandler
|
||||
REQUEST_PROTO = 'wss'
|
||||
|
||||
|
||||
CTX_MAP = {
|
||||
'http': HTTPProxyHTTPTestContext,
|
||||
'https': HTTPProxyHTTPSTestContext,
|
||||
'ws': HTTPProxyWebSocketTestContext,
|
||||
'wss': HTTPProxyWebSocketSecureTestContext,
|
||||
}
|
||||
|
||||
|
||||
|
@ -313,6 +369,8 @@ def test_http_with_idn(self, handler, ctx):
|
|||
'handler,ctx', [
|
||||
('Requests', 'https'),
|
||||
('CurlCFFI', 'https'),
|
||||
('Websockets', 'ws'),
|
||||
('Websockets', 'wss')
|
||||
], indirect=True)
|
||||
class TestHTTPConnectProxy:
|
||||
def test_http_connect_no_auth(self, handler, ctx):
|
||||
|
|
|
@ -1159,8 +1159,8 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('socks5h', False),
|
||||
]),
|
||||
('Websockets', 'ws', [
|
||||
('http', UnsupportedRequest),
|
||||
('https', UnsupportedRequest),
|
||||
('http', False),
|
||||
('https', False),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
|
@ -1241,8 +1241,8 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('Websockets', False, 'ws')
|
||||
], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail, scheme):
|
||||
run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
run_validation(handler, fail, Request(f'{scheme}://example.com', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request(f'{scheme}://example.com'), proxies={'no': '127.0.0.1,github.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler,scheme', [
|
||||
('Urllib', 'http'),
|
||||
|
|
|
@ -216,7 +216,9 @@ def handle(self):
|
|||
protocol = websockets.ServerProtocol()
|
||||
connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
|
||||
connection.handshake()
|
||||
connection.send(json.dumps(self.socks_info))
|
||||
for message in connection:
|
||||
if message == 'socks_info':
|
||||
connection.send(json.dumps(self.socks_info))
|
||||
connection.close()
|
||||
|
||||
|
||||
|
|
|
@ -4151,15 +4151,15 @@ def urlopen(self, req):
|
|||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
if (
|
||||
'unsupported proxy type: "https"' in ue.msg.lower()
|
||||
and 'requests' not in self._request_director.handlers
|
||||
and 'curl_cffi' not in self._request_director.handlers
|
||||
and 'Requests' not in self._request_director.handlers
|
||||
and 'CurlCFFI' not in self._request_director.handlers
|
||||
):
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
|
||||
|
||||
elif (
|
||||
re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
|
||||
and 'websockets' not in self._request_director.handlers
|
||||
and 'Websockets' not in self._request_director.handlers
|
||||
):
|
||||
raise RequestError(
|
||||
'This request requires WebSocket support. '
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
|
@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
|
@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
|
@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
|
@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
|
|
|
@ -957,7 +957,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
|
|||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
|
@ -1005,8 +1006,10 @@ def __check_blocked(self, content):
|
|||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id):
|
||||
basen = f'{video_id}_{url}'
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
|
@ -1028,16 +1031,18 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
|
|||
except LookupError:
|
||||
return webpage_bytes.decode('utf-8', 'replace')
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
url_or_request = self._create_request(url_or_request, data)
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1098,7 +1103,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
|
|
@ -2353,6 +2353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'format': '17', # 3gp format available on android
|
||||
'extractor_args': {'youtube': {'player_client': ['android']}},
|
||||
},
|
||||
'skip': 'android client broken',
|
||||
},
|
||||
{
|
||||
# Skip download of additional client configs (remix client config in this case)
|
||||
|
@ -2730,7 +2731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
@ -3317,7 +3318,36 @@ def _extract_heatmap(self, data):
|
|||
'value': ('intensityScoreNormalized', {float_or_none}),
|
||||
})) or None
|
||||
|
||||
def _extract_comment(self, comment_renderer, parent=None):
|
||||
def _extract_comment(self, entities, parent=None):
|
||||
comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
|
||||
if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
|
||||
return
|
||||
|
||||
toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
|
||||
time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
|
||||
|
||||
return {
|
||||
'id': comment_id,
|
||||
'parent': parent or 'root',
|
||||
**traverse_obj(comment_entity_payload, {
|
||||
'text': ('properties', 'content', 'content', {str}),
|
||||
'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
|
||||
'author_id': ('author', 'channelId', {self.ucid_or_none}),
|
||||
'author': ('author', 'displayName', {str}),
|
||||
'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
|
||||
'author_is_uploader': ('author', 'isCreator', {bool}),
|
||||
'author_is_verified': ('author', 'isVerified', {bool}),
|
||||
'author_url': ('author', 'channelCommand', 'innertubeCommand', (
|
||||
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
|
||||
), {lambda x: urljoin('https://www.youtube.com', x)}),
|
||||
}, get_all=False),
|
||||
'is_favorited': (None if toolbar_entity_payload is None else
|
||||
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
|
||||
'_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
|
||||
'timestamp': self._parse_time_text(time_text),
|
||||
}
|
||||
|
||||
def _extract_comment_old(self, comment_renderer, parent=None):
|
||||
comment_id = comment_renderer.get('commentId')
|
||||
if not comment_id:
|
||||
return
|
||||
|
@ -3398,21 +3428,39 @@ def extract_header(contents):
|
|||
break
|
||||
return _continuation
|
||||
|
||||
def extract_thread(contents):
|
||||
def extract_thread(contents, entity_payloads):
|
||||
if not parent:
|
||||
tracker['current_page_thread'] = 0
|
||||
for content in contents:
|
||||
if not parent and tracker['total_parent_comments'] >= max_parents:
|
||||
yield
|
||||
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
||||
comment_renderer = get_first(
|
||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||
expected_type=dict, default={})
|
||||
|
||||
comment = self._extract_comment(comment_renderer, parent)
|
||||
# old comment format
|
||||
if not entity_payloads:
|
||||
comment_renderer = get_first(
|
||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||
expected_type=dict, default={})
|
||||
|
||||
comment = self._extract_comment_old(comment_renderer, parent)
|
||||
|
||||
# new comment format
|
||||
else:
|
||||
view_model = (
|
||||
traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
|
||||
or traverse_obj(content, ('commentViewModel', {dict})))
|
||||
comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
|
||||
if not comment_keys:
|
||||
continue
|
||||
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
||||
comment = self._extract_comment(entities, parent)
|
||||
if comment:
|
||||
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
||||
|
||||
if not comment:
|
||||
continue
|
||||
comment_id = comment['id']
|
||||
|
||||
if comment.get('is_pinned'):
|
||||
tracker['pinned_comment_ids'].add(comment_id)
|
||||
# Sometimes YouTube may break and give us infinite looping comments.
|
||||
|
@ -3505,7 +3553,7 @@ def extract_thread(contents):
|
|||
check_get_keys = None
|
||||
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||
check_get_keys = [[*continuation_items_path, ..., (
|
||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
|
||||
try:
|
||||
response = self._extract_response(
|
||||
item_id=None, query=continuation,
|
||||
|
@ -3529,6 +3577,7 @@ def extract_thread(contents):
|
|||
raise
|
||||
is_forced_continuation = False
|
||||
continuation = None
|
||||
mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
|
||||
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||
if is_first_continuation:
|
||||
continuation = extract_header(continuation_items)
|
||||
|
@ -3537,7 +3586,7 @@ def extract_thread(contents):
|
|||
break
|
||||
continue
|
||||
|
||||
for entry in extract_thread(continuation_items):
|
||||
for entry in extract_thread(continuation_items, mutations):
|
||||
if not entry:
|
||||
return
|
||||
yield entry
|
||||
|
@ -3614,8 +3663,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
yt_query = {
|
||||
'videoId': video_id,
|
||||
}
|
||||
if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
|
||||
yt_query['params'] = 'CgIIAQ=='
|
||||
|
||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp_arg:
|
||||
|
@ -3631,19 +3678,24 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
|||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
requested_clients = []
|
||||
default = ['ios', 'android', 'web']
|
||||
android_clients = []
|
||||
default = ['ios', 'web']
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
for client in self._configuration_arg('player_client'):
|
||||
if client in allowed_clients:
|
||||
requested_clients.append(client)
|
||||
elif client == 'default':
|
||||
if client == 'default':
|
||||
requested_clients.extend(default)
|
||||
elif client == 'all':
|
||||
requested_clients.extend(allowed_clients)
|
||||
else:
|
||||
elif client not in allowed_clients:
|
||||
self.report_warning(f'Skipping unsupported client {client}')
|
||||
elif client.startswith('android'):
|
||||
android_clients.append(client)
|
||||
else:
|
||||
requested_clients.append(client)
|
||||
# Force deprioritization of broken Android clients for format de-duplication
|
||||
requested_clients.extend(android_clients)
|
||||
if not requested_clients:
|
||||
requested_clients = default
|
||||
|
||||
|
@ -3862,6 +3914,14 @@ def build_fragments(f):
|
|||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||
# Android client formats are broken due to integrity check enforcement
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
||||
is_broken = client_name and client_name.startswith(short_client_name('android'))
|
||||
if is_broken:
|
||||
self.report_warning(
|
||||
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
|
||||
'They will be deprioritized', only_once=True)
|
||||
|
||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||
fps = int_or_none(fmt.get('fps')) or 0
|
||||
dct = {
|
||||
|
@ -3874,7 +3934,7 @@ def build_fragments(f):
|
|||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
|
@ -3892,8 +3952,8 @@ def build_fragments(f):
|
|||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
'desc' if language_preference < -1 else '') or None,
|
||||
'language_preference': language_preference,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import ssl
|
||||
import sys
|
||||
import urllib.parse
|
||||
from http.client import HTTPConnection, HTTPResponse
|
||||
|
||||
from ._helper import (
|
||||
create_connection,
|
||||
|
@ -20,12 +23,14 @@
|
|||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
UnsupportedRequest,
|
||||
)
|
||||
from .websocket import WebSocketRequestHandler, WebSocketResponse
|
||||
from ..compat import functools
|
||||
from ..dependencies import websockets
|
||||
from ..dependencies import urllib3, websockets
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
from ..utils import int_or_none
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
if not websockets:
|
||||
raise ImportError('websockets is not installed')
|
||||
|
@ -36,6 +41,11 @@
|
|||
if websockets_version < (12, 0):
|
||||
raise ImportError('Only websockets>=12.0 is supported')
|
||||
|
||||
urllib3_supported = False
|
||||
urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) if urllib3 else None
|
||||
if urllib3_version and urllib3_version >= (1, 26, 17):
|
||||
urllib3_supported = True
|
||||
|
||||
import websockets.sync.client
|
||||
from websockets.uri import parse_uri
|
||||
|
||||
|
@ -98,7 +108,7 @@ class WebsocketsRH(WebSocketRequestHandler):
|
|||
https://github.com/python-websockets/websockets
|
||||
"""
|
||||
_SUPPORTED_URL_SCHEMES = ('wss', 'ws')
|
||||
_SUPPORTED_PROXY_SCHEMES = ('socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_PROXY_SCHEMES = ('socks4', 'socks4a', 'socks5', 'socks5h', 'http', 'https')
|
||||
_SUPPORTED_FEATURES = (Features.ALL_PROXY, Features.NO_PROXY)
|
||||
RH_NAME = 'websockets'
|
||||
|
||||
|
@ -108,12 +118,23 @@ def __init__(self, *args, **kwargs):
|
|||
for name in ('websockets.client', 'websockets.server'):
|
||||
logger = logging.getLogger(name)
|
||||
handler = logging.StreamHandler(stream=sys.stdout)
|
||||
handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
|
||||
handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: [{name}] %(message)s'))
|
||||
self.__logging_handlers[name] = handler
|
||||
logger.addHandler(handler)
|
||||
if self.verbose:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
def _validate(self, request):
|
||||
super()._validate(request)
|
||||
proxy = select_proxy(request.url, self._get_proxies(request))
|
||||
if (
|
||||
proxy
|
||||
and urllib.parse.urlparse(proxy).scheme.lower() == 'https'
|
||||
and urllib.parse.urlparse(request.url).scheme.lower() == 'wss'
|
||||
and not urllib3_supported
|
||||
):
|
||||
raise UnsupportedRequest('WSS over HTTPS proxies requires a supported version of urllib3')
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
super()._check_extensions(extensions)
|
||||
extensions.pop('timeout', None)
|
||||
|
@ -125,6 +146,38 @@ def close(self):
|
|||
for name, handler in self.__logging_handlers.items():
|
||||
logging.getLogger(name).removeHandler(handler)
|
||||
|
||||
def _make_sock(self, proxy, url, timeout):
|
||||
create_conn_kwargs = {
|
||||
'source_address': (self.source_address, 0) if self.source_address else None,
|
||||
'timeout': timeout
|
||||
}
|
||||
parsed_url = parse_uri(url)
|
||||
parsed_proxy_url = urllib.parse.urlparse(proxy)
|
||||
if proxy:
|
||||
if parsed_proxy_url.scheme.startswith('socks'):
|
||||
socks_proxy_options = make_socks_proxy_opts(proxy)
|
||||
return create_connection(
|
||||
address=(socks_proxy_options['addr'], socks_proxy_options['port']),
|
||||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (parsed_url.host, parsed_url.port), socks_proxy_options),
|
||||
**create_conn_kwargs
|
||||
)
|
||||
|
||||
elif parsed_proxy_url.scheme in ('http', 'https'):
|
||||
return create_http_connect_conn(
|
||||
proxy_url=proxy,
|
||||
url=url,
|
||||
timeout=timeout,
|
||||
ssl_context=self._make_sslcontext() if parsed_proxy_url.scheme == 'https' else None,
|
||||
source_address=self.source_address,
|
||||
username=parsed_proxy_url.username,
|
||||
password=parsed_proxy_url.password,
|
||||
)
|
||||
return create_connection(
|
||||
address=(parsed_url.host, parsed_url.port),
|
||||
**create_conn_kwargs
|
||||
)
|
||||
|
||||
def _send(self, request):
|
||||
timeout = self._calculate_timeout(request)
|
||||
headers = self._merge_headers(request.headers)
|
||||
|
@ -134,33 +187,22 @@ def _send(self, request):
|
|||
if cookie_header:
|
||||
headers['cookie'] = cookie_header
|
||||
|
||||
wsuri = parse_uri(request.url)
|
||||
create_conn_kwargs = {
|
||||
'source_address': (self.source_address, 0) if self.source_address else None,
|
||||
'timeout': timeout
|
||||
}
|
||||
proxy = select_proxy(request.url, self._get_proxies(request))
|
||||
try:
|
||||
if proxy:
|
||||
socks_proxy_options = make_socks_proxy_opts(proxy)
|
||||
sock = create_connection(
|
||||
address=(socks_proxy_options['addr'], socks_proxy_options['port']),
|
||||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (wsuri.host, wsuri.port), socks_proxy_options),
|
||||
**create_conn_kwargs
|
||||
)
|
||||
|
||||
ssl_context = None
|
||||
if parse_uri(request.url).secure:
|
||||
if WebsocketsSSLContext is not None:
|
||||
ssl_context = WebsocketsSSLContext(self._make_sslcontext())
|
||||
else:
|
||||
sock = create_connection(
|
||||
address=(wsuri.host, wsuri.port),
|
||||
**create_conn_kwargs
|
||||
)
|
||||
ssl_context = self._make_sslcontext()
|
||||
try:
|
||||
conn = websockets.sync.client.connect(
|
||||
sock=sock,
|
||||
sock=self._make_sock(proxy, request.url, timeout),
|
||||
uri=request.url,
|
||||
additional_headers=headers,
|
||||
open_timeout=timeout,
|
||||
user_agent_header=None,
|
||||
ssl_context=self._make_sslcontext() if wsuri.secure else None,
|
||||
ssl_context=ssl_context,
|
||||
close_timeout=0, # not ideal, but prevents yt-dlp hanging
|
||||
)
|
||||
return WebsocketsResponseAdapter(conn, url=request.url)
|
||||
|
@ -185,3 +227,105 @@ def _send(self, request):
|
|||
) from e
|
||||
except (OSError, TimeoutError, websockets.exceptions.WebSocketException) as e:
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
class NoCloseHTTPResponse(HTTPResponse):
|
||||
def begin(self):
|
||||
super().begin()
|
||||
# Revert the default behavior of closing the connection after reading the response
|
||||
if not self._check_close() and not self.chunked and self.length is None:
|
||||
self.will_close = False
|
||||
|
||||
|
||||
if urllib3_supported:
|
||||
from urllib3.util.ssltransport import SSLTransport
|
||||
|
||||
class WebsocketsSSLTransport(SSLTransport):
|
||||
"""
|
||||
Modified version of urllib3 SSLTransport to support additional operations used by websockets
|
||||
"""
|
||||
def setsockopt(self, *args, **kwargs):
|
||||
self.socket.setsockopt(*args, **kwargs)
|
||||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
self.unwrap()
|
||||
self.socket.shutdown(*args, **kwargs)
|
||||
|
||||
def _wrap_ssl_read(self, *args, **kwargs):
|
||||
res = super()._wrap_ssl_read(*args, **kwargs)
|
||||
if res == 0:
|
||||
# Websockets does not treat 0 as an EOF, rather only b''
|
||||
return b''
|
||||
return res
|
||||
else:
|
||||
WebsocketsSSLTransport = None
|
||||
|
||||
|
||||
class WebsocketsSSLContext:
|
||||
"""
|
||||
Dummy SSL Context for websockets which returns a WebsocketsSSLTransport instance
|
||||
for wrap socket when using TLS-in-TLS.
|
||||
"""
|
||||
def __init__(self, ssl_context: ssl.SSLContext):
|
||||
self.ssl_context = ssl_context
|
||||
|
||||
def wrap_socket(self, sock, server_hostname=None):
|
||||
if isinstance(sock, ssl.SSLSocket):
|
||||
return WebsocketsSSLTransport(sock, self.ssl_context, server_hostname=server_hostname)
|
||||
return self.ssl_context.wrap_socket(sock, server_hostname=server_hostname)
|
||||
|
||||
|
||||
def create_http_connect_conn(
|
||||
proxy_url,
|
||||
url,
|
||||
timeout=None,
|
||||
ssl_context=None,
|
||||
source_address=None,
|
||||
username=None,
|
||||
password=None,
|
||||
):
|
||||
|
||||
proxy_headers = HTTPHeaderDict()
|
||||
|
||||
if username is not None or password is not None:
|
||||
proxy_headers['Proxy-Authorization'] = 'Basic ' + base64.b64encode(
|
||||
f'{username or ""}:{password or ""}'.encode('utf-8')).decode('utf-8')
|
||||
|
||||
proxy_url_parsed = urllib.parse.urlparse(proxy_url)
|
||||
request_url_parsed = parse_uri(url)
|
||||
|
||||
conn = HTTPConnection(proxy_url_parsed.hostname, port=proxy_url_parsed.port, timeout=timeout)
|
||||
conn.response_class = NoCloseHTTPResponse
|
||||
|
||||
if hasattr(conn, '_create_connection'):
|
||||
conn._create_connection = create_connection
|
||||
|
||||
if source_address is not None:
|
||||
conn.source_address = (source_address, 0)
|
||||
|
||||
try:
|
||||
conn.connect()
|
||||
if ssl_context:
|
||||
conn.sock = ssl_context.wrap_socket(conn.sock, server_hostname=proxy_url_parsed.hostname)
|
||||
conn.request(
|
||||
method='CONNECT',
|
||||
url=f'{request_url_parsed.host}:{request_url_parsed.port}',
|
||||
headers=proxy_headers)
|
||||
response = conn.getresponse()
|
||||
except OSError as e:
|
||||
conn.close()
|
||||
raise ProxyError('Unable to connect to proxy', cause=e) from e
|
||||
|
||||
if response.status == 200:
|
||||
return conn.sock
|
||||
elif response.status == 407:
|
||||
conn.close()
|
||||
raise ProxyError('Got HTTP Error 407 with CONNECT: Proxy Authentication Required')
|
||||
else:
|
||||
conn.close()
|
||||
res_adapter = Response(
|
||||
fp=io.BytesIO(b''),
|
||||
url=proxy_url, headers=response.headers,
|
||||
status=response.status,
|
||||
reason=response.reason)
|
||||
raise HTTPError(response=res_adapter)
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import urllib.parse
|
||||
|
||||
from .common import RequestHandler, Response
|
||||
from .common import RequestHandler, Response, register_preference
|
||||
|
||||
|
||||
class WebSocketResponse(Response):
|
||||
|
@ -21,3 +22,10 @@ def recv(self):
|
|||
|
||||
class WebSocketRequestHandler(RequestHandler, abc.ABC):
|
||||
pass
|
||||
|
||||
|
||||
@register_preference(WebSocketRequestHandler)
|
||||
def websocket_preference(_, request):
|
||||
if urllib.parse.urlparse(request.url).scheme in ('ws', 'wss'):
|
||||
return 200
|
||||
return 0
|
||||
|
|
Loading…
Reference in New Issue
Block a user