From a2e77303e3385da640a0904cd6cb76235fa9691b Mon Sep 17 00:00:00 2001 From: coletdev Date: Sat, 19 Mar 2022 11:10:20 +1300 Subject: [PATCH] [downloader/http] Retry on more errors (#3065) Closes #3056, #2071 Related: #3034, #2969 Authored-by: coletdjnz --- yt_dlp/downloader/http.py | 40 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 10ba61024..c2a57726e 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import errno import os -import socket +import ssl import time import random @@ -10,6 +9,7 @@ from ..compat import ( compat_str, compat_urllib_error, + compat_http_client ) from ..utils import ( ContentTooShortError, @@ -23,6 +23,8 @@ XAttrUnavailableError, ) +RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) + class HttpFD(FileDownloader): def real_download(self, filename, info_dict): @@ -125,14 +127,7 @@ def establish_connection(): set_range(request, range_start, range_end) # Establish connection try: - try: - ctx.data = self.ydl.urlopen(request) - except (compat_urllib_error.URLError, ) as err: - # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 - reason = getattr(err, 'reason', None) - if isinstance(reason, socket.timeout): - raise RetryDownload(err) - raise err + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range @@ -202,13 +197,14 @@ def establish_connection(): # Unexpected HTTP error raise raise RetryDownload(err) - except socket.timeout as err: + except compat_urllib_error.URLError as err: + if isinstance(err.reason, ssl.CertificateError): + raise + raise RetryDownload(err) + # In urllib.request.AbstractHTTPHandler, the response is partially read on request. + # Any errors that occur during this will not be wrapped by URLError + except RESPONSE_READ_EXCEPTIONS as err: raise RetryDownload(err) - except socket.error as err: - if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): - # Connection reset is no problem, just retry - raise RetryDownload(err) - raise def download(): nonlocal throttle_start @@ -254,16 +250,8 @@ def retry(e): try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - # SSLError on python 2 (inherits socket.error) may have - # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': - retry(e) - raise + except RESPONSE_READ_EXCEPTIONS as err: + retry(err) byte_counter += len(data_block)