mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-09 06:27:06 +01:00
Migrate
This commit is contained in:
parent
4bf912282a
commit
ba17d88d5e
@ -399,6 +399,7 @@ class YoutubeDL:
|
||||
- "detect_or_warn": check whether we can do anything
|
||||
about it, warn otherwise (default)
|
||||
source_address: Client-side IP address to bind to.
|
||||
impersonate: curl-impersonate target name to impersonate for requests.
|
||||
sleep_interval_requests: Number of seconds to sleep between requests
|
||||
during extraction
|
||||
sleep_interval: Number of seconds to sleep before each download when
|
||||
@ -3977,7 +3978,7 @@ def get_encoding(stream):
|
||||
})) or 'none'))
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
@ -4105,6 +4106,7 @@ def build_request_director(self, handlers):
|
||||
'client_certificate_key': 'client_certificate_key',
|
||||
'client_certificate_password': 'client_certificate_password',
|
||||
},
|
||||
'impersonate': 'impersonate',
|
||||
}),
|
||||
))
|
||||
return director
|
||||
|
@ -910,6 +910,7 @@ def parse_options(argv=None):
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
'impersonate': opts.impersonate,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval_requests': opts.sleep_interval_requests,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
|
@ -56,6 +56,10 @@
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import curl_cffi
|
||||
except ImportError:
|
||||
curl_cffi = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
|
@ -11,3 +11,7 @@
|
||||
# isort: split
|
||||
# TODO: all request handlers should be safely imported
|
||||
from . import _urllib
|
||||
try:
|
||||
from . import _curlcffi # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
|
308
yt_dlp/networking/_curlcffi.py
Normal file
308
yt_dlp/networking/_curlcffi.py
Normal file
@ -0,0 +1,308 @@
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
from enum import IntEnum
|
||||
|
||||
from .common import Features, Request, RequestHandler, Response, register_rh
|
||||
from .exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
SSLError,
|
||||
TransportError,
|
||||
)
|
||||
from .impersonate import ImpersonateRequestHandler
|
||||
from ._helper import InstanceStoreMixin, select_proxy
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..dependencies import curl_cffi
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
|
||||
if curl_cffi is None:
|
||||
raise ImportError('curl_cffi is not installed')
|
||||
|
||||
import curl_cffi.requests
|
||||
from curl_cffi import ffi
|
||||
from curl_cffi.const import CurlInfo, CurlOpt
|
||||
|
||||
|
||||
class CurlCFFISession(curl_cffi.requests.Session):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
verbose=False,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.verbose = verbose
|
||||
|
||||
@property
|
||||
def curl(self):
|
||||
# due to how curl_cffi handles threading
|
||||
curl = super().curl
|
||||
if self.verbose:
|
||||
curl.setopt(CurlOpt.VERBOSE, 1)
|
||||
return curl
|
||||
|
||||
def _set_curl_options(self, curl, method: str, url: str, *args, **kwargs):
|
||||
|
||||
res = super()._set_curl_options(curl, method, url, *args, **kwargs)
|
||||
data = traverse_obj(kwargs, 'data') or traverse_obj(args, 1)
|
||||
|
||||
# Attempt to align curl redirect handling with ours
|
||||
curl.setopt(CurlOpt.CUSTOMREQUEST, ffi.NULL)
|
||||
|
||||
if data and method != 'POST':
|
||||
# Don't strip data on 301,302,303 redirects for PUT etc.
|
||||
curl.setopt(CurlOpt.POSTREDIR, 1 | 2 | 4) # CURL_REDIR_POST_ALL
|
||||
|
||||
if method not in ('GET', 'POST'):
|
||||
curl.setopt(CurlOpt.CUSTOMREQUEST, method.encode())
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def get_error_code(error: curl_cffi.curl.CurlError):
|
||||
return int_or_none(re.search(r'ErrCode:\s+(\d+)', str(error)).group(1))
|
||||
|
||||
|
||||
@register_rh
|
||||
class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
RH_NAME = 'curl_cffi'
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https')
|
||||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_IMPERSONATE_TARGETS = curl_cffi.requests.BrowserType._member_names_
|
||||
|
||||
def _create_instance(self):
|
||||
session_opts = {}
|
||||
|
||||
if self.verbose:
|
||||
session_opts['verbose'] = True
|
||||
|
||||
session = CurlCFFISession(**session_opts)
|
||||
return session
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
super()._check_extensions(extensions)
|
||||
extensions.pop('impersonate', None)
|
||||
|
||||
def _generate_set_cookie(self, cookiejar):
|
||||
for cookie in cookiejar:
|
||||
encoder = LenientSimpleCookie()
|
||||
values = []
|
||||
_, value = encoder.value_encode(cookie.value)
|
||||
values.append(f'{cookie.name}={value}')
|
||||
if cookie.domain:
|
||||
values.append(f'Domain={cookie.domain}')
|
||||
if cookie.path:
|
||||
values.append(f'Path={cookie.path}')
|
||||
if cookie.secure:
|
||||
values.append('Secure')
|
||||
if cookie.expires:
|
||||
values.append(f'Expires={cookie.expires}')
|
||||
if cookie.version:
|
||||
values.append(f'Version={cookie.version}')
|
||||
yield '; '.join(values)
|
||||
|
||||
def _send(self, request: Request):
|
||||
# XXX: curl_cffi reads the whole response at once into memory
|
||||
# Streaming is not yet supported.
|
||||
# See: https://github.com/yifeikong/curl_cffi/issues/26
|
||||
max_redirects_exceeded = False
|
||||
session: CurlCFFISession = self._get_instance()
|
||||
cookiejar = request.extensions.get('cookiejar') or self.cookiejar
|
||||
|
||||
# Reset the internal curl cookie store to ensure consistency with our cookiejar
|
||||
# See: https://curl.se/libcurl/c/CURLOPT_COOKIELIST.html
|
||||
# XXX: does this actually work?
|
||||
session.curl.setopt(CurlOpt.COOKIELIST, b'ALL')
|
||||
session.cookies.clear()
|
||||
for cookie_str in self._generate_set_cookie(cookiejar):
|
||||
session.curl.setopt(CurlOpt.COOKIELIST, ('set-cookie: ' + cookie_str).encode())
|
||||
|
||||
# XXX: if we need to change http version
|
||||
# session.curl.setopt(CurlOpt.HTTP_VERSION, 2)
|
||||
if self.source_address is not None:
|
||||
session.curl.setopt(CurlOpt.INTERFACE, self.source_address.encode())
|
||||
|
||||
proxies = (request.proxies or self.proxies).copy()
|
||||
if 'no' in proxies:
|
||||
session.curl.setopt(CurlOpt.NOPROXY, proxies['no'].encode())
|
||||
proxies.pop('no', None)
|
||||
if 'all' in proxies:
|
||||
session.curl.setopt(CurlOpt.PROXY, proxies['all'].encode())
|
||||
else:
|
||||
# curl doesn't support per protocol proxies, so we select the one that matches the request protocol
|
||||
proxy = select_proxy(request.url, proxies=proxies)
|
||||
if proxy:
|
||||
session.curl.setopt(CurlOpt.PROXY, proxy.encode())
|
||||
|
||||
headers = self._get_impersonate_headers(request)
|
||||
|
||||
try:
|
||||
curl_response = session.request(
|
||||
method=request.method,
|
||||
url=request.url,
|
||||
headers=headers,
|
||||
data=request.data,
|
||||
verify=self.verify,
|
||||
max_redirects=5,
|
||||
timeout=request.extensions.get('timeout') or self.timeout,
|
||||
impersonate=self._get_impersonate_target(request),
|
||||
)
|
||||
except curl_cffi.requests.errors.RequestsError as e:
|
||||
error_code = get_error_code(e)
|
||||
if error_code in (CurlECode.PEER_FAILED_VERIFICATION, CurlECode.OBSOLETE51):
|
||||
# Error code 51 used to be this in curl <7.62.0
|
||||
# See: https://curl.se/libcurl/c/libcurl-errors.html
|
||||
raise CertificateVerifyError(cause=e) from e
|
||||
|
||||
elif error_code == CurlECode.SSL_CONNECT_ERROR:
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
elif error_code == CurlECode.TOO_MANY_REDIRECTS:
|
||||
# The response isn't exposed on too many redirects.
|
||||
# We are creating a dummy response here, but it's
|
||||
# not ideal since it only contains initial request data
|
||||
max_redirects_exceeded = True
|
||||
curl_response = curl_cffi.requests.cookies.Response(
|
||||
curl=session.curl,
|
||||
request=curl_cffi.requests.cookies.Request(
|
||||
url=request.url,
|
||||
headers=curl_cffi.requests.headers.Headers(request.headers),
|
||||
method=request.method,
|
||||
))
|
||||
|
||||
# We can try extract *some* data from curl
|
||||
curl_response.url = session.curl.getinfo(CurlInfo.EFFECTIVE_URL).decode()
|
||||
curl_response.status_code = session.curl.getinfo(CurlInfo.RESPONSE_CODE)
|
||||
|
||||
elif error_code == CurlECode.PARTIAL_FILE:
|
||||
raise IncompleteRead(
|
||||
# XXX: do we need partial to have the content?
|
||||
partial=[''] * int(session.curl.getinfo(CurlInfo.SIZE_DOWNLOAD)),
|
||||
expected=session.curl.getinfo(CurlInfo.CONTENT_LENGTH_DOWNLOAD),
|
||||
cause=e) from e
|
||||
else:
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
response = Response(
|
||||
io.BytesIO(curl_response.content),
|
||||
headers=curl_response.headers,
|
||||
url=curl_response.url,
|
||||
status=curl_response.status_code)
|
||||
|
||||
# XXX: this won't apply cookies from intermediate responses in a redirect chain
|
||||
# curl_cffi doesn't support CurlInfo.COOKIELIST yet which we need to reliably read cookies
|
||||
# See: https://github.com/yifeikong/curl_cffi/issues/4
|
||||
for cookie in session.cookies.jar:
|
||||
cookiejar.set_cookie(cookie)
|
||||
|
||||
if not 200 <= response.status < 300:
|
||||
raise HTTPError(response, redirect_loop=max_redirects_exceeded)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# https://curl.se/libcurl/c/libcurl-errors.html
|
||||
class CurlECode(IntEnum):
|
||||
OK = 0
|
||||
UNSUPPORTED_PROTOCOL = 1
|
||||
FAILED_INIT = 2
|
||||
URL_MALFORMAT = 3
|
||||
NOT_BUILT_IN = 4
|
||||
COULDNT_RESOLVE_PROXY = 5
|
||||
COULDNT_RESOLVE_HOST = 6
|
||||
COULDNT_CONNECT = 7
|
||||
WEIRD_SERVER_REPLY = 8
|
||||
REMOTE_ACCESS_DENIED = 9
|
||||
FTP_ACCEPT_FAILED = 10
|
||||
FTP_WEIRD_PASS_REPLY = 11
|
||||
FTP_ACCEPT_TIMEOUT = 12
|
||||
FTP_WEIRD_PASV_REPLY = 13
|
||||
FTP_WEIRD_227_FORMAT = 14
|
||||
FTP_CANT_GET_HOST = 15
|
||||
HTTP2 = 16
|
||||
FTP_COULDNT_SET_TYPE = 17
|
||||
PARTIAL_FILE = 18
|
||||
FTP_COULDNT_RETR_FILE = 19
|
||||
OBSOLETE20 = 20
|
||||
QUOTE_ERROR = 21
|
||||
HTTP_RETURNED_ERROR = 22
|
||||
WRITE_ERROR = 23
|
||||
OBSOLETE24 = 24
|
||||
UPLOAD_FAILED = 25
|
||||
READ_ERROR = 26
|
||||
OUT_OF_MEMORY = 27
|
||||
OPERATION_TIMEDOUT = 28
|
||||
OBSOLETE29 = 29
|
||||
FTP_PORT_FAILED = 30
|
||||
FTP_COULDNT_USE_REST = 31
|
||||
OBSOLETE32 = 32
|
||||
RANGE_ERROR = 33
|
||||
HTTP_POST_ERROR = 34
|
||||
SSL_CONNECT_ERROR = 35
|
||||
BAD_DOWNLOAD_RESUME = 36
|
||||
FILE_COULDNT_READ_FILE = 37
|
||||
LDAP_CANNOT_BIND = 38
|
||||
LDAP_SEARCH_FAILED = 39
|
||||
OBSOLETE40 = 40
|
||||
FUNCTION_NOT_FOUND = 41
|
||||
ABORTED_BY_CALLBACK = 42
|
||||
BAD_FUNCTION_ARGUMENT = 43
|
||||
OBSOLETE44 = 44
|
||||
INTERFACE_FAILED = 45
|
||||
OBSOLETE46 = 46
|
||||
TOO_MANY_REDIRECTS = 47
|
||||
UNKNOWN_OPTION = 48
|
||||
SETOPT_OPTION_SYNTAX = 49
|
||||
OBSOLETE50 = 50
|
||||
OBSOLETE51 = 51
|
||||
GOT_NOTHING = 52
|
||||
SSL_ENGINE_NOTFOUND = 53
|
||||
SSL_ENGINE_SETFAILED = 54
|
||||
SEND_ERROR = 55
|
||||
RECV_ERROR = 56
|
||||
OBSOLETE57 = 57
|
||||
SSL_CERTPROBLEM = 58
|
||||
SSL_CIPHER = 59
|
||||
PEER_FAILED_VERIFICATION = 60
|
||||
BAD_CONTENT_ENCODING = 61
|
||||
OBSOLETE62 = 62
|
||||
FILESIZE_EXCEEDED = 63
|
||||
USE_SSL_FAILED = 64
|
||||
SEND_FAIL_REWIND = 65
|
||||
SSL_ENGINE_INITFAILED = 66
|
||||
LOGIN_DENIED = 67
|
||||
TFTP_NOTFOUND = 68
|
||||
TFTP_PERM = 69
|
||||
REMOTE_DISK_FULL = 70
|
||||
TFTP_ILLEGAL = 71
|
||||
TFTP_UNKNOWNID = 72
|
||||
REMOTE_FILE_EXISTS = 73
|
||||
TFTP_NOSUCHUSER = 74
|
||||
OBSOLETE75 = 75
|
||||
OBSOLETE76 = 76
|
||||
SSL_CACERT_BADFILE = 77
|
||||
REMOTE_FILE_NOT_FOUND = 78
|
||||
SSH = 79
|
||||
SSL_SHUTDOWN_FAILED = 80
|
||||
AGAIN = 81
|
||||
SSL_CRL_BADFILE = 82
|
||||
SSL_ISSUER_ERROR = 83
|
||||
FTP_PRET_FAILED = 84
|
||||
RTSP_CSEQ_ERROR = 85
|
||||
RTSP_SESSION_ERROR = 86
|
||||
FTP_BAD_FILE_LIST = 87
|
||||
CHUNK_FAILED = 88
|
||||
NO_CONNECTION_AVAILABLE = 89
|
||||
SSL_PINNEDPUBKEYNOTMATCH = 90
|
||||
SSL_INVALIDCERTSTATUS = 91
|
||||
HTTP2_STREAM = 92
|
||||
RECURSIVE_API_CALL = 93
|
||||
AUTH_ERROR = 94
|
||||
HTTP3 = 95
|
||||
QUIC_CONNECT_ERROR = 96
|
||||
PROXY = 97
|
||||
SSL_CLIENTCERT = 98
|
||||
UNRECOVERABLE_POLL = 99
|
@ -373,7 +373,7 @@ def handle_response_read_exceptions(e):
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
@register_rh
|
||||
#@register_rh
|
||||
class UrllibRH(RequestHandler, InstanceStoreMixin):
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
|
56
yt_dlp/networking/impersonate.py
Normal file
56
yt_dlp/networking/impersonate.py
Normal file
@ -0,0 +1,56 @@
|
||||
from abc import ABC
|
||||
|
||||
from .exceptions import UnsupportedRequest
|
||||
from ..utils.networking import std_headers
|
||||
from .common import RequestHandler
|
||||
from ..compat.types import NoneType
|
||||
|
||||
|
||||
class ImpersonateRequestHandler(RequestHandler, ABC):
|
||||
"""
|
||||
Base class for request handlers that support browser impersonation.
|
||||
|
||||
This provides a method for checking the validity of the impersonate extension,
|
||||
which can be used in _check_extensions.
|
||||
|
||||
The following may be defined:
|
||||
`SUPPORTED_IMPERSONATE_TARGETS`: a tuple of supported targets to impersonate,
|
||||
in curl-impersonate target name format. Any Request with an impersonate
|
||||
target not in this list will raise an UnsupportedRequest.
|
||||
Set to None to disable this check.
|
||||
"""
|
||||
_SUPPORTED_IMPERSONATE_TARGETS: tuple = ()
|
||||
|
||||
def __init__(self, *, impersonate=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.impersonate = impersonate
|
||||
|
||||
def _get_impersonate_target(self, request):
|
||||
return request.extensions.get('impersonate') or self.impersonate
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
super()._check_extensions(extensions)
|
||||
self._check_impersonate_target(extensions.get('impersonate'))
|
||||
|
||||
def _check_impersonate_target(self, target):
|
||||
assert isinstance(target, (str, NoneType))
|
||||
if self._SUPPORTED_IMPERSONATE_TARGETS is None or target is None:
|
||||
return
|
||||
# XXX: this will raise even if the handler doesn't support the impersonate extension
|
||||
if target not in self._SUPPORTED_IMPERSONATE_TARGETS:
|
||||
raise UnsupportedRequest(f'Unsupported impersonate target: {target}')
|
||||
|
||||
def _validate(self, request):
|
||||
super()._validate(request)
|
||||
self._check_impersonate_target(self.impersonate)
|
||||
|
||||
def _get_impersonate_headers(self, request):
|
||||
headers = self._merge_headers(request.headers)
|
||||
impersonate_target = self._get_impersonate_target(request)
|
||||
if impersonate_target:
|
||||
# remove all headers present in std_headers
|
||||
headers.pop('User-Agent', None)
|
||||
for header in std_headers:
|
||||
if header in headers and std_headers[header] == headers[header]:
|
||||
headers.pop(header, None)
|
||||
return headers
|
@ -510,6 +510,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||
metavar='IP', dest='source_address', default=None,
|
||||
help='Client-side IP address to bind to',
|
||||
)
|
||||
network.add_option(
|
||||
'--impersonate',
|
||||
metavar='TARGET', dest='impersonate', default=None,
|
||||
help='curl-impersonate target name to impersonate for requests.',
|
||||
)
|
||||
network.add_option(
|
||||
'-4', '--force-ipv4',
|
||||
action='store_const', const='0.0.0.0', dest='source_address',
|
||||
|
Loading…
Reference in New Issue
Block a user