mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-23 19:56:12 +01:00
[rh:requests] Add handler for requests
HTTP library (#3668)
Adds support for HTTPS proxies and persistent connections (keep-alive) Closes https://github.com/yt-dlp/yt-dlp/issues/1890 Resolves https://github.com/yt-dlp/yt-dlp/issues/4070 Resolves https://github.com/ytdl-org/youtube-dl/issues/32549 Resolves https://github.com/ytdl-org/youtube-dl/issues/14523 Resolves https://github.com/ytdl-org/youtube-dl/issues/13734 Authored by: coletdjnz, Grub4K, bashonly
This commit is contained in:
parent
700444c23d
commit
8a8b54523a
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install pytest
|
||||
- name: Install dependencies
|
||||
run: pip install pytest -r requirements.txt
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
|
@ -157,6 +157,7 @@ ### Differences in default behavior
|
||||
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
|
||||
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
|
||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
@ -164,7 +165,7 @@ ### Differences in default behavior
|
||||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler`. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
@ -274,6 +275,7 @@ ### Networking
|
||||
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
|
||||
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
|
||||
|
||||
### Metadata
|
||||
|
||||
|
@ -4,3 +4,5 @@ websockets
|
||||
brotli; platform_python_implementation=='CPython'
|
||||
brotlicffi; platform_python_implementation!='CPython'
|
||||
certifi
|
||||
requests>=2.31.0,<3
|
||||
urllib3>=1.26.17,<3
|
9
setup.py
9
setup.py
@ -62,7 +62,14 @@ def py2exe_params():
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
@ -43,6 +43,7 @@
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
NoSupportingHandlers,
|
||||
ProxyError,
|
||||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
@ -305,7 +306,7 @@ def setup_class(cls):
|
||||
|
||||
|
||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_verify_cert(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(CertificateVerifyError):
|
||||
@ -316,7 +317,7 @@ def test_verify_cert(self, handler):
|
||||
assert r.status == 200
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_ssl_error(self, handler):
|
||||
# HTTPS server with too old TLS version
|
||||
# XXX: is there a better way to test this than to create a new server?
|
||||
@ -334,7 +335,7 @@ def test_ssl_error(self, handler):
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_percent_encode(self, handler):
|
||||
with handler() as rh:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
@ -346,7 +347,7 @@ def test_percent_encode(self, handler):
|
||||
assert res.status == 200
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
# This isn't a comprehensive test,
|
||||
@ -361,14 +362,14 @@ def test_remove_dot_segments(self, handler):
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_unicode_path_redirection(self, handler):
|
||||
with handler() as rh:
|
||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_raise_http_error(self, handler):
|
||||
with handler() as rh:
|
||||
for bad_status in (400, 500, 599, 302):
|
||||
@ -378,7 +379,7 @@ def test_raise_http_error(self, handler):
|
||||
# Should not raise an error
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_response_url(self, handler):
|
||||
with handler() as rh:
|
||||
# Response url should be that of the last url in redirect chain
|
||||
@ -389,7 +390,7 @@ def test_response_url(self, handler):
|
||||
assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
|
||||
res2.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_redirect(self, handler):
|
||||
with handler() as rh:
|
||||
def do_req(redirect_status, method, assert_no_content=False):
|
||||
@ -444,7 +445,7 @@ def do_req(redirect_status, method, assert_no_content=False):
|
||||
with pytest.raises(HTTPError):
|
||||
do_req(code, 'GET')
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_request_cookie_header(self, handler):
|
||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||
with handler() as rh:
|
||||
@ -476,19 +477,19 @@ def test_request_cookie_header(self, handler):
|
||||
assert b'Cookie: test=ytdlp' not in data
|
||||
assert b'Cookie: test=test' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_redirect_loop(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(HTTPError, match='redirect loop'):
|
||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_incompleteread(self, handler):
|
||||
with handler(timeout=2) as rh:
|
||||
with pytest.raises(IncompleteRead):
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_cookies(self, handler):
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
@ -505,7 +506,7 @@ def test_cookies(self, handler):
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||
assert b'Cookie: test=ytdlp' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_headers(self, handler):
|
||||
|
||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||
@ -521,7 +522,7 @@ def test_headers(self, handler):
|
||||
assert b'Test2: test2' not in data
|
||||
assert b'Test3: test3' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_timeout(self, handler):
|
||||
with handler() as rh:
|
||||
# Default timeout is 20 seconds, so this should go through
|
||||
@ -537,7 +538,7 @@ def test_timeout(self, handler):
|
||||
validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(source_address=source_address) as rh:
|
||||
@ -545,13 +546,13 @@ def test_source_address(self, handler):
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
|
||||
assert source_address == data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_gzip_trailing_garbage(self, handler):
|
||||
with handler() as rh:
|
||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
def test_brotli(self, handler):
|
||||
with handler() as rh:
|
||||
@ -562,7 +563,7 @@ def test_brotli(self, handler):
|
||||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
@ -572,7 +573,7 @@ def test_deflate(self, handler):
|
||||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
@ -582,7 +583,7 @@ def test_gzip(self, handler):
|
||||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
@ -593,7 +594,7 @@ def test_multiple_encodings(self, handler):
|
||||
assert res.headers.get('Content-Encoding') == pair
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_unsupported_encoding(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
@ -603,7 +604,7 @@ def test_unsupported_encoding(self, handler):
|
||||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||
assert res.read() == b'raw'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
@ -633,7 +634,7 @@ def setup_class(cls):
|
||||
cls.geo_proxy_thread.daemon = True
|
||||
cls.geo_proxy_thread.start()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_http_proxy(self, handler):
|
||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
||||
@ -659,7 +660,7 @@ def test_http_proxy(self, handler):
|
||||
assert res != f'normal: {real_url}'
|
||||
assert 'Accept' in res
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_noproxy(self, handler):
|
||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
||||
# NO_PROXY
|
||||
@ -669,7 +670,7 @@ def test_noproxy(self, handler):
|
||||
'utf-8')
|
||||
assert 'Accept' in nop_response
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_allproxy(self, handler):
|
||||
url = 'http://foo.com/bar'
|
||||
with handler() as rh:
|
||||
@ -677,7 +678,7 @@ def test_allproxy(self, handler):
|
||||
'utf-8')
|
||||
assert response == f'normal: {url}'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_http_proxy_with_idn(self, handler):
|
||||
with handler(proxies={
|
||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
||||
@ -715,27 +716,27 @@ def _run_test(self, handler, **handler_kwargs):
|
||||
) as rh:
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_combined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_nocombined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_combined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
'client_certificate_password': 'foobar',
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_certificate_nocombined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
@ -819,6 +820,75 @@ def test_httplib_validation_errors(self, handler, req, match, version_check):
|
||||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
|
||||
class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.ReadTimeout(), TransportError),
|
||||
(lambda: requests.exceptions.Timeout(), TransportError),
|
||||
(lambda: requests.exceptions.ConnectionError(), TransportError),
|
||||
(lambda: requests.exceptions.ProxyError(), ProxyError),
|
||||
(lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
|
||||
(lambda: requests.exceptions.SSLError(), SSLError),
|
||||
(lambda: requests.exceptions.InvalidURL(), RequestError),
|
||||
(lambda: requests.exceptions.InvalidHeader(), RequestError),
|
||||
# catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError),
|
||||
(lambda: requests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
def mock_get_instance(*args, **kwargs):
|
||||
class MockSession:
|
||||
def request(self, *args, **kwargs):
|
||||
raise raised()
|
||||
return MockSession()
|
||||
|
||||
monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
|
||||
|
||||
with pytest.raises(expected) as exc_info:
|
||||
rh.send(Request('http://fake'))
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
@pytest.mark.parametrize('raised,expected,match', [
|
||||
(lambda: urllib3.exceptions.SSLError(), SSLError, None),
|
||||
(lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
|
||||
(lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.DecodeError(), TransportError, None),
|
||||
(lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
|
||||
(
|
||||
lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 4 more expected'
|
||||
),
|
||||
(
|
||||
lambda: urllib3.exceptions.IncompleteRead(partial=3, expected=5),
|
||||
IncompleteRead,
|
||||
'3 bytes read, 5 more expected'
|
||||
),
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
|
||||
from urllib3.response import HTTPResponse as Urllib3Response
|
||||
from requests.models import Response as RequestsResponse
|
||||
from yt_dlp.networking._requests import RequestsResponseAdapter
|
||||
requests_res = RequestsResponse()
|
||||
requests_res.raw = Urllib3Response(body=b'', status=200)
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
def mock_read(*args, **kwargs):
|
||||
raise raised()
|
||||
monkeypatch.setattr(res.fp, 'read', mock_read)
|
||||
|
||||
with pytest.raises(expected, match=match) as exc_info:
|
||||
res.read()
|
||||
|
||||
assert exc_info.type is expected
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
if error:
|
||||
@ -855,6 +925,10 @@ class HTTPSupportedRH(ValidationRH):
|
||||
('file', UnsupportedRequest, {}),
|
||||
('file', False, {'enable_file_urls': True}),
|
||||
]),
|
||||
('Requests', [
|
||||
('http', False, {}),
|
||||
('https', False, {}),
|
||||
]),
|
||||
(NoCheckRH, [('http', False, {})]),
|
||||
(ValidationRH, [('http', UnsupportedRequest, {})])
|
||||
]
|
||||
@ -870,6 +944,14 @@ class HTTPSupportedRH(ValidationRH):
|
||||
('socks5h', False),
|
||||
('socks', UnsupportedRequest),
|
||||
]),
|
||||
('Requests', [
|
||||
('http', False),
|
||||
('https', False),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
('socks5h', False),
|
||||
]),
|
||||
(NoCheckRH, [('http', False)]),
|
||||
(HTTPSupportedRH, [('http', UnsupportedRequest)]),
|
||||
]
|
||||
@ -880,6 +962,10 @@ class HTTPSupportedRH(ValidationRH):
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
('Requests', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
]),
|
||||
(NoCheckRH, [('all', False)]),
|
||||
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
|
||||
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
|
||||
@ -894,6 +980,13 @@ class HTTPSupportedRH(ValidationRH):
|
||||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
('Requests', [
|
||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||
({'timeout': 1}, False),
|
||||
({'timeout': 'notatimeout'}, AssertionError),
|
||||
({'unsupported': 'value'}, UnsupportedRequest),
|
||||
]),
|
||||
(NoCheckRH, [
|
||||
({'cookiejar': 'notacookiejar'}, False),
|
||||
({'somerandom': 'test'}, False), # but any extension is allowed through
|
||||
@ -909,7 +1002,7 @@ class HTTPSupportedRH(ValidationRH):
|
||||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
@ -932,13 +1025,13 @@ def test_proxy_scheme(self, handler, scheme, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
|
||||
def test_empty_proxy(self, handler):
|
||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
||||
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
||||
|
||||
@ -1242,6 +1335,13 @@ def test_urllib_file_urls(self):
|
||||
rh = self.build_handler(ydl, UrllibRH)
|
||||
assert rh.enable_file_urls is True
|
||||
|
||||
def test_compat_opt_prefer_urllib(self):
|
||||
# This assumes urllib only has a preference when this compat opt is given
|
||||
with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
|
||||
director = ydl.build_request_director([UrllibRH])
|
||||
assert len(director.preferences) == 1
|
||||
assert director.preferences.pop()(UrllibRH, None)
|
||||
|
||||
|
||||
class TestRequest:
|
||||
|
||||
|
@ -263,7 +263,7 @@ def ctx(request):
|
||||
|
||||
|
||||
class TestSocks4Proxy:
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4_no_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
@ -271,7 +271,7 @@ def test_socks4_no_auth(self, handler, ctx):
|
||||
rh, proxies={'all': f'socks4://{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
|
||||
@ -281,7 +281,7 @@ def test_socks4_auth(self, handler, ctx):
|
||||
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4a_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
@ -289,7 +289,7 @@ def test_socks4a_ipv4_target(self, handler, ctx):
|
||||
assert response['version'] == 4
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks4a_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
@ -298,7 +298,7 @@ def test_socks4a_domain_target(self, handler, ctx):
|
||||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
@ -308,7 +308,7 @@ def test_ipv4_client_source_address(self, handler, ctx):
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks4CD.REQUEST_REJECTED_OR_FAILED,
|
||||
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
|
||||
@ -320,7 +320,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
@ -329,7 +329,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
|
||||
@ -339,7 +339,7 @@ def test_timeout(self, handler, ctx):
|
||||
|
||||
class TestSocks5Proxy:
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_no_auth(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
@ -347,7 +347,7 @@ def test_socks5_no_auth(self, handler, ctx):
|
||||
assert response['auth_methods'] == [0x0]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_user_pass(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
|
||||
with handler() as rh:
|
||||
@ -360,7 +360,7 @@ def test_socks5_user_pass(self, handler, ctx):
|
||||
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
@ -368,7 +368,7 @@ def test_socks5_ipv4_target(self, handler, ctx):
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
@ -376,7 +376,7 @@ def test_socks5_domain_target(self, handler, ctx):
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5h_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
@ -385,7 +385,7 @@ def test_socks5h_domain_target(self, handler, ctx):
|
||||
assert response['domain_address'] == 'localhost'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5h_ip_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
@ -394,7 +394,7 @@ def test_socks5h_ip_target(self, handler, ctx):
|
||||
assert response['domain_address'] is None
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_socks5_ipv6_destination(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
@ -402,7 +402,7 @@ def test_socks5_ipv6_destination(self, handler, ctx):
|
||||
assert response['ipv6_address'] == '::1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
@ -413,7 +413,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||
|
||||
# XXX: is there any feasible way of testing IPv6 source addresses?
|
||||
# Same would go for non-proxy source_address test...
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
@ -422,7 +422,7 @@ def test_ipv4_client_source_address(self, handler, ctx):
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks5Reply.GENERAL_FAILURE,
|
||||
Socks5Reply.CONNECTION_NOT_ALLOWED,
|
||||
|
@ -3968,7 +3968,7 @@ def get_encoding(stream):
|
||||
})) or 'none'))
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
@ -4057,6 +4057,9 @@ def urlopen(self, req):
|
||||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
if 'unsupported proxy type: "https"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
@ -4099,6 +4102,8 @@ def build_request_director(self, handlers, preferences=None):
|
||||
}),
|
||||
))
|
||||
director.preferences.update(preferences or [])
|
||||
if 'prefer-legacy-http-handler' in self.params['compat_opts']:
|
||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
|
@ -21,7 +21,9 @@ def get_hidden_imports():
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# Only `websockets` is required, others are collected just in case
|
||||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
|
||||
|
@ -58,6 +58,15 @@
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import urllib3
|
||||
except ImportError:
|
||||
urllib3 = None
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
|
@ -1,4 +1,6 @@
|
||||
# flake8: noqa: F401
|
||||
import warnings
|
||||
|
||||
from .common import (
|
||||
HEADRequest,
|
||||
PUTRequest,
|
||||
@ -11,3 +13,11 @@
|
||||
# isort: split
|
||||
# TODO: all request handlers should be safely imported
|
||||
from . import _urllib
|
||||
from ..utils import bug_reports_message
|
||||
|
||||
try:
|
||||
from . import _requests
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
warnings.warn(f'Failed to import "requests" request handler: {e}' + bug_reports_message())
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
from .exceptions import RequestError, UnsupportedRequest
|
||||
from ..dependencies import certifi
|
||||
from ..socks import ProxyType
|
||||
from ..socks import ProxyType, sockssocket
|
||||
from ..utils import format_field, traverse_obj
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
@ -224,6 +224,24 @@ def _socket_connect(ip_addr, timeout, source_address):
|
||||
raise
|
||||
|
||||
|
||||
def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, source_address):
|
||||
af, socktype, proto, canonname, sa = proxy_ip_addr
|
||||
sock = sockssocket(af, socktype, proto)
|
||||
try:
|
||||
connect_proxy_args = proxy_args.copy()
|
||||
connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
|
||||
sock.setproxy(**connect_proxy_args)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect(dest_addr)
|
||||
return sock
|
||||
except socket.error:
|
||||
sock.close()
|
||||
raise
|
||||
|
||||
|
||||
def create_connection(
|
||||
address,
|
||||
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
|
398
yt_dlp/networking/_requests.py
Normal file
398
yt_dlp/networking/_requests.py
Normal file
@ -0,0 +1,398 @@
|
||||
import contextlib
|
||||
import functools
|
||||
import http.client
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import warnings
|
||||
|
||||
from ..dependencies import brotli, requests, urllib3
|
||||
from ..utils import bug_reports_message, int_or_none, variadic
|
||||
|
||||
if requests is None:
|
||||
raise ImportError('requests module is not installed')
|
||||
|
||||
if urllib3 is None:
|
||||
raise ImportError('urllib3 module is not installed')
|
||||
|
||||
urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
|
||||
|
||||
if urllib3_version < (1, 26, 17):
|
||||
raise ImportError('Only urllib3 >= 1.26.17 is supported')
|
||||
|
||||
if requests.__build__ < 0x023100:
|
||||
raise ImportError('Only requests >= 2.31.0 is supported')
|
||||
|
||||
import requests.adapters
|
||||
import requests.utils
|
||||
import urllib3.connection
|
||||
import urllib3.exceptions
|
||||
|
||||
from ._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
create_connection,
|
||||
create_socks_proxy_socket,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
)
|
||||
from .common import (
|
||||
Features,
|
||||
RequestHandler,
|
||||
Response,
|
||||
register_preference,
|
||||
register_rh,
|
||||
)
|
||||
from .exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
ProxyError,
|
||||
RequestError,
|
||||
SSLError,
|
||||
TransportError,
|
||||
)
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
|
||||
SUPPORTED_ENCODINGS = [
|
||||
'gzip', 'deflate'
|
||||
]
|
||||
|
||||
if brotli is not None:
|
||||
SUPPORTED_ENCODINGS.append('br')
|
||||
|
||||
"""
|
||||
Override urllib3's behavior to not convert lower-case percent-encoded characters
|
||||
to upper-case during url normalization process.
|
||||
|
||||
RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
|
||||
and normalizers should convert them to uppercase for consistency [1].
|
||||
|
||||
However, some sites may have an incorrect implementation where they provide
|
||||
a percent-encoded url that is then compared case-sensitively.[2]
|
||||
|
||||
While this is a very rare case, since urllib does not do this normalization step, it
|
||||
is best to avoid it in requests too for compatability reasons.
|
||||
|
||||
1: https://tools.ietf.org/html/rfc3986#section-2.1
|
||||
2: https://github.com/streamlink/streamlink/pull/4003
|
||||
"""
|
||||
|
||||
|
||||
class Urllib3PercentREOverride:
|
||||
def __init__(self, r: re.Pattern):
|
||||
self.re = r
|
||||
|
||||
# pass through all other attribute calls to the original re
|
||||
def __getattr__(self, item):
|
||||
return self.re.__getattribute__(item)
|
||||
|
||||
def subn(self, repl, string, *args, **kwargs):
|
||||
return string, self.re.subn(repl, string, *args, **kwargs)[1]
|
||||
|
||||
|
||||
# urllib3 >= 1.25.8 uses subn:
|
||||
# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
|
||||
import urllib3.util.url # noqa: E305
|
||||
|
||||
if hasattr(urllib3.util.url, 'PERCENT_RE'):
|
||||
urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
|
||||
elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
|
||||
urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
|
||||
else:
|
||||
warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
|
||||
|
||||
"""
|
||||
Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
|
||||
server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
|
||||
however this is an issue because we set check_hostname to True in our SSLContext.
|
||||
|
||||
Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
|
||||
|
||||
This has been fixed in urllib3 2.0+.
|
||||
See: https://github.com/urllib3/urllib3/issues/517
|
||||
"""
|
||||
|
||||
if urllib3_version < (2, 0, 0):
|
||||
with contextlib.suppress():
|
||||
urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
|
||||
|
||||
|
||||
# Requests will not automatically handle no_proxy by default
|
||||
# due to buggy no_proxy handling with proxy dict [1].
|
||||
# 1. https://github.com/psf/requests/issues/5000
|
||||
requests.adapters.select_proxy = select_proxy
|
||||
|
||||
|
||||
class RequestsResponseAdapter(Response):
|
||||
def __init__(self, res: requests.models.Response):
|
||||
super().__init__(
|
||||
fp=res.raw, headers=res.headers, url=res.url,
|
||||
status=res.status_code, reason=res.reason)
|
||||
|
||||
self._requests_response = res
|
||||
|
||||
def read(self, amt: int = None):
|
||||
try:
|
||||
# Interact with urllib3 response directly.
|
||||
return self.fp.read(amt, decode_content=True)
|
||||
|
||||
# See urllib3.response.HTTPResponse.read() for exceptions raised on read
|
||||
except urllib3.exceptions.SSLError as e:
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.IncompleteRead as e:
|
||||
# urllib3 IncompleteRead.partial is always an integer
|
||||
raise IncompleteRead(partial=e.partial, expected=e.expected) from e
|
||||
|
||||
except urllib3.exceptions.ProtocolError as e:
|
||||
# http.client.IncompleteRead may be contained within ProtocolError
|
||||
# See urllib3.response.HTTPResponse._error_catcher()
|
||||
ir_err = next(
|
||||
(err for err in (e.__context__, e.__cause__, *variadic(e.args))
|
||||
if isinstance(err, http.client.IncompleteRead)), None)
|
||||
if ir_err is not None:
|
||||
raise IncompleteRead(partial=len(ir_err.partial), expected=ir_err.expected) from e
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# catch-all for any other urllib3 response exceptions
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
|
||||
class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
|
||||
def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
|
||||
self._pm_args = {}
|
||||
if ssl_context:
|
||||
self._pm_args['ssl_context'] = ssl_context
|
||||
if source_address:
|
||||
self._pm_args['source_address'] = (source_address, 0)
|
||||
self._proxy_ssl_context = proxy_ssl_context or ssl_context
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def init_poolmanager(self, *args, **kwargs):
|
||||
return super().init_poolmanager(*args, **kwargs, **self._pm_args)
|
||||
|
||||
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
||||
extra_kwargs = {}
|
||||
if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
|
||||
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
|
||||
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
|
||||
|
||||
def cert_verify(*args, **kwargs):
|
||||
# lean on SSLContext for cert verification
|
||||
pass
|
||||
|
||||
|
||||
class RequestsSession(requests.sessions.Session):
|
||||
"""
|
||||
Ensure unified redirect method handling with our urllib redirect handler.
|
||||
"""
|
||||
def rebuild_method(self, prepared_request, response):
|
||||
new_method = get_redirect_method(prepared_request.method, response.status_code)
|
||||
|
||||
# HACK: requests removes headers/body on redirect unless code was a 307/308.
|
||||
if new_method == prepared_request.method:
|
||||
response._real_status_code = response.status_code
|
||||
response.status_code = 308
|
||||
|
||||
prepared_request.method = new_method
|
||||
|
||||
def rebuild_auth(self, prepared_request, response):
|
||||
# HACK: undo status code change from rebuild_method, if applicable.
|
||||
# rebuild_auth runs after requests would remove headers/body based on status code
|
||||
if hasattr(response, '_real_status_code'):
|
||||
response.status_code = response._real_status_code
|
||||
del response._real_status_code
|
||||
return super().rebuild_auth(prepared_request, response)
|
||||
|
||||
|
||||
class Urllib3LoggingFilter(logging.Filter):
|
||||
|
||||
def filter(self, record):
|
||||
# Ignore HTTP request messages since HTTPConnection prints those
|
||||
if record.msg == '%s://%s:%s "%s %s %s" %s %s':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Urllib3LoggingHandler(logging.Handler):
|
||||
"""Redirect urllib3 logs to our logger"""
|
||||
def __init__(self, logger, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._logger = logger
|
||||
|
||||
def emit(self, record):
|
||||
try:
|
||||
msg = self.format(record)
|
||||
if record.levelno >= logging.ERROR:
|
||||
self._logger.error(msg)
|
||||
else:
|
||||
self._logger.stdout(msg)
|
||||
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
|
||||
@register_rh
|
||||
class RequestsRH(RequestHandler, InstanceStoreMixin):
|
||||
|
||||
"""Requests RequestHandler
|
||||
https://github.com/psf/requests
|
||||
"""
|
||||
_SUPPORTED_URL_SCHEMES = ('http', 'https')
|
||||
_SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
RH_NAME = 'requests'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# Forward urllib3 debug messages to our logger
|
||||
logger = logging.getLogger('urllib3')
|
||||
handler = Urllib3LoggingHandler(logger=self._logger)
|
||||
handler.setFormatter(logging.Formatter('requests: %(message)s'))
|
||||
handler.addFilter(Urllib3LoggingFilter())
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
if self.verbose:
|
||||
# Setting this globally is not ideal, but is easier than hacking with urllib3.
|
||||
# It could technically be problematic for scripts embedding yt-dlp.
|
||||
# However, it is unlikely debug traffic is used in that context in a way this will cause problems.
|
||||
urllib3.connection.HTTPConnection.debuglevel = 1
|
||||
logger.setLevel(logging.DEBUG)
|
||||
# this is expected if we are using --no-check-certificate
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
def close(self):
|
||||
self._clear_instances()
|
||||
|
||||
def _check_extensions(self, extensions):
|
||||
super()._check_extensions(extensions)
|
||||
extensions.pop('cookiejar', None)
|
||||
extensions.pop('timeout', None)
|
||||
|
||||
def _create_instance(self, cookiejar):
|
||||
session = RequestsSession()
|
||||
http_adapter = RequestsHTTPAdapter(
|
||||
ssl_context=self._make_sslcontext(),
|
||||
source_address=self.source_address,
|
||||
max_retries=urllib3.util.retry.Retry(False),
|
||||
)
|
||||
session.adapters.clear()
|
||||
session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
|
||||
session.mount('https://', http_adapter)
|
||||
session.mount('http://', http_adapter)
|
||||
session.cookies = cookiejar
|
||||
session.trust_env = False # no need, we already load proxies from env
|
||||
return session
|
||||
|
||||
def _send(self, request):
|
||||
|
||||
headers = self._merge_headers(request.headers)
|
||||
add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
|
||||
|
||||
max_redirects_exceeded = False
|
||||
|
||||
session = self._get_instance(
|
||||
cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
|
||||
|
||||
try:
|
||||
requests_res = session.request(
|
||||
method=request.method,
|
||||
url=request.url,
|
||||
data=request.data,
|
||||
headers=headers,
|
||||
timeout=float(request.extensions.get('timeout') or self.timeout),
|
||||
proxies=request.proxies or self.proxies,
|
||||
allow_redirects=True,
|
||||
stream=True
|
||||
)
|
||||
|
||||
except requests.exceptions.TooManyRedirects as e:
|
||||
max_redirects_exceeded = True
|
||||
requests_res = e.response
|
||||
|
||||
except requests.exceptions.SSLError as e:
|
||||
if 'CERTIFICATE_VERIFY_FAILED' in str(e):
|
||||
raise CertificateVerifyError(cause=e) from e
|
||||
raise SSLError(cause=e) from e
|
||||
|
||||
except requests.exceptions.ProxyError as e:
|
||||
raise ProxyError(cause=e) from e
|
||||
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except urllib3.exceptions.HTTPError as e:
|
||||
# Catch any urllib3 exceptions that may leak through
|
||||
raise TransportError(cause=e) from e
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
# Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
|
||||
raise RequestError(cause=e) from e
|
||||
|
||||
res = RequestsResponseAdapter(requests_res)
|
||||
|
||||
if not 200 <= res.status < 300:
|
||||
raise HTTPError(res, redirect_loop=max_redirects_exceeded)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@register_preference(RequestsRH)
|
||||
def requests_preference(rh, request):
|
||||
return 100
|
||||
|
||||
|
||||
# Use our socks proxy implementation with requests to avoid an extra dependency.
|
||||
class SocksHTTPConnection(urllib3.connection.HTTPConnection):
|
||||
def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
|
||||
self._proxy_args = _socks_options
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _new_conn(self):
|
||||
try:
|
||||
return create_connection(
|
||||
address=(self._proxy_args['addr'], self._proxy_args['port']),
|
||||
timeout=self.timeout,
|
||||
source_address=self.source_address,
|
||||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
|
||||
except (socket.timeout, TimeoutError) as e:
|
||||
raise urllib3.exceptions.ConnectTimeoutError(
|
||||
self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
|
||||
except SocksProxyError as e:
|
||||
raise urllib3.exceptions.ProxyError(str(e), e) from e
|
||||
except (OSError, socket.error) as e:
|
||||
raise urllib3.exceptions.NewConnectionError(
|
||||
self, f'Failed to establish a new connection: {e}') from e
|
||||
|
||||
|
||||
class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
|
||||
pass
|
||||
|
||||
|
||||
class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
|
||||
ConnectionCls = SocksHTTPConnection
|
||||
|
||||
|
||||
class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
|
||||
ConnectionCls = SocksHTTPSConnection
|
||||
|
||||
|
||||
class SocksProxyManager(urllib3.PoolManager):
|
||||
|
||||
def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
|
||||
connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
|
||||
super().__init__(num_pools, headers, **connection_pool_kw)
|
||||
self.pool_classes_by_scheme = {
|
||||
'http': SocksHTTPConnectionPool,
|
||||
'https': SocksHTTPSConnectionPool
|
||||
}
|
||||
|
||||
|
||||
requests.adapters.SOCKSProxyManager = SocksProxyManager
|
@ -3,7 +3,6 @@
|
||||
import functools
|
||||
import http.client
|
||||
import io
|
||||
import socket
|
||||
import ssl
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
@ -24,6 +23,7 @@
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
create_connection,
|
||||
create_socks_proxy_socket,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
@ -40,7 +40,6 @@
|
||||
)
|
||||
from ..dependencies import brotli
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
from ..socks import sockssocket
|
||||
from ..utils import update_url_query
|
||||
from ..utils.networking import normalize_url
|
||||
|
||||
@ -190,25 +189,12 @@ class SocksConnection(base_class):
|
||||
_create_connection = create_connection
|
||||
|
||||
def connect(self):
|
||||
def sock_socket_connect(ip_addr, timeout, source_address):
|
||||
af, socktype, proto, canonname, sa = ip_addr
|
||||
sock = sockssocket(af, socktype, proto)
|
||||
try:
|
||||
connect_proxy_args = proxy_args.copy()
|
||||
connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
|
||||
sock.setproxy(**connect_proxy_args)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect((self.host, self.port))
|
||||
return sock
|
||||
except socket.error:
|
||||
sock.close()
|
||||
raise
|
||||
self.sock = create_connection(
|
||||
(proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
|
||||
source_address=self.source_address, _create_socket_func=sock_socket_connect)
|
||||
(proxy_args['addr'], proxy_args['port']),
|
||||
timeout=self.timeout,
|
||||
source_address=self.source_address,
|
||||
_create_socket_func=functools.partial(
|
||||
create_socks_proxy_socket, (self.host, self.port), proxy_args))
|
||||
if isinstance(self, http.client.HTTPSConnection):
|
||||
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
|
||||
|
||||
|
@ -471,11 +471,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
'prefer-legacy-http-handler'
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
|
||||
'2022': ['no-external-downloader-progress', 'playlist-match-filter'],
|
||||
'2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler'],
|
||||
}
|
||||
}, help=(
|
||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||
|
Loading…
Reference in New Issue
Block a user