From 1d485a1a799bbeeb2faea0595676ca7d4c0f3716 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 07:18:36 +0530 Subject: [PATCH] [cleanup] Misc fixes Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364 --- devscripts/lazy_load_template.py | 2 +- yt_dlp/YoutubeDL.py | 18 ++++++++++-------- yt_dlp/compat/__init__.py | 4 ---- yt_dlp/compat/_deprecated.py | 5 +++++ yt_dlp/compat/asyncio.py | 1 - yt_dlp/compat/compat_utils.py | 22 +++++++++++++++++++--- yt_dlp/compat/re.py | 1 - yt_dlp/dependencies.py | 11 +++++++++++ yt_dlp/downloader/common.py | 6 ++++-- yt_dlp/downloader/external.py | 2 +- yt_dlp/downloader/fragment.py | 12 ++++-------- yt_dlp/downloader/mhtml.py | 2 +- yt_dlp/extractor/common.py | 3 +-- yt_dlp/extractor/fujitv.py | 6 +++--- yt_dlp/extractor/funimation.py | 3 +++ yt_dlp/extractor/youtube.py | 2 +- yt_dlp/postprocessor/embedthumbnail.py | 4 ++-- yt_dlp/postprocessor/xattrpp.py | 9 ++++++--- yt_dlp/utils.py | 4 +++- 19 files changed, 75 insertions(+), 42 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 0058915ae..e4b4f5825 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -7,7 +7,7 @@ class LazyLoadMetaClass(type): def __getattr__(cls, name): if '_real_class' not in cls.__dict__: write_string( - f'WARNING: Falling back to normal extractor since lazy extractor ' + 'WARNING: Falling back to normal extractor since lazy extractor ' f'{cls.__name__} does not have attribute {name}{bug_reports_message()}') return getattr(cls._get_real_class(), name) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2857e9106..1e61be733 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -62,6 +62,7 @@ DEFAULT_OUTTMPL, LINK_TEMPLATES, NO_DEFAULT, + NUMBER_RE, OUTTMPL_TYPES, POSTPROCESS_WHEN, STR_FORMAT_RE_TMPL, @@ -1049,7 +1050,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -1071,18 +1072,18 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') - MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) - INTERNAL_FORMAT_RE = re.compile(r'''(?x) + INTERNAL_FORMAT_RE = re.compile(rf'''(?x) (?P-)? - (?P{field}) - (?P(?:{math_op}{math_field})*) + (?P{FIELD_RE}) + (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P.+?))? (?P (?P(?.*?))? (?:\|(?P.*?))? - )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + )$''') def _traverse_infodict(k): k = k.split('.') @@ -2336,7 +2337,7 @@ def _fill_common_fields(self, info_dict, is_video=True): video_id=info_dict['id'], ie=info_dict['extractor']) elif not info_dict.get('title'): self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) @@ -3669,10 +3670,11 @@ def python_implementation(): ) or 'none' write_debug('exe versions: %s' % exe_str) + from .compat.compat_utils import get_package_info from .dependencies import available_dependencies write_debug('Optional libraries: %s' % (', '.join(sorted({ - module.__name__.split('.')[0] for module in available_dependencies.values() + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() })) or 'none')) self._setup_opener() diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 3c395f6d9..a0cd62110 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -46,10 +46,6 @@ def compat_ord(c): return c if isinstance(c, int) else ord(c) -def compat_setenv(key, value, env=os.environ): - env[key] = value - - if compat_os_name == 'nt' and sys.version_info < (3, 8): # os.path.realpath on Windows does not follow symbolic links # prior to Python 3.8 (see https://bugs.python.org/issue9949) diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py index f84439825..390f76577 100644 --- a/yt_dlp/compat/_deprecated.py +++ b/yt_dlp/compat/_deprecated.py @@ -44,4 +44,9 @@ compat_urllib_request = urllib.request compat_urlparse = compat_urllib_parse = urllib.parse + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + __all__ = [x for x in globals() if x.startswith('compat_')] diff --git a/yt_dlp/compat/asyncio.py b/yt_dlp/compat/asyncio.py index f80dc192d..c61e5c8fd 100644 --- a/yt_dlp/compat/asyncio.py +++ b/yt_dlp/compat/asyncio.py @@ -1,5 +1,4 @@ # flake8: noqa: F405 - from asyncio import * # noqa: F403 from .compat_utils import passthrough_module diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 938daf926..b1d58f5b9 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -1,9 +1,28 @@ +import collections import contextlib import importlib import sys import types +_NO_ATTRIBUTE = object() + +_Package = collections.namedtuple('Package', ('name', 'version')) + + +def get_package_info(module): + parent = module.__name__.split('.')[0] + parent_module = None + with contextlib.suppress(ImportError): + parent_module = importlib.import_module(parent) + + for attr in ('__version__', 'version_string', 'version'): + version = getattr(parent_module, attr, None) + if version is not None: + break + return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version)) + + def _is_package(module): try: module.__getattribute__('__path__') @@ -12,9 +31,6 @@ def _is_package(module): return True -_NO_ATTRIBUTE = object() - - def passthrough_module(parent, child, *, callback=lambda _: None): parent_module = importlib.import_module(parent) child_module = importlib.import_module(child, parent) diff --git a/yt_dlp/compat/re.py b/yt_dlp/compat/re.py index d4532950a..e1d3a2645 100644 --- a/yt_dlp/compat/re.py +++ b/yt_dlp/compat/re.py @@ -1,5 +1,4 @@ # flake8: noqa: F405 - from re import * # F403 from .compat_utils import passthrough_module diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py index 99cc6e29c..a4c2e5f06 100644 --- a/yt_dlp/dependencies.py +++ b/yt_dlp/dependencies.py @@ -1,4 +1,6 @@ # flake8: noqa: F401 +"""Imports all optional dependencies for the project. +An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambigious namespace""" try: import brotlicffi as brotli @@ -28,6 +30,15 @@ from Crypto.Cipher import AES as Cryptodome_AES except ImportError: Cryptodome_AES = None + else: + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + Cryptodome_AES.new(b'abcdefghijklmnop') + except TypeError: + pass + else: + Cryptodome_AES._yt_dlp__identifier = 'pycrypto' try: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 022a9cd17..d79863300 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -12,6 +12,7 @@ QuietMultilinePrinter, ) from ..utils import ( + NUMBER_RE, LockingUnsupportedError, Namespace, decodeArgument, @@ -91,7 +92,8 @@ def _set_ydl(self, ydl): 'trouble', 'write_debug', ): - setattr(self, func, getattr(ydl, func)) + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) def to_screen(self, *args, **kargs): self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) @@ -170,7 +172,7 @@ def best_block_size(elapsed_time, bytes): @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 4fe56bb95..4f9f8f6e5 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -368,7 +368,7 @@ def _call_downloader(self, tmpfilename, info_dict): # These exists only for compatibility. Extractors should use # info_dict['downloader_options']['ffmpeg_args'] instead - args += info_dict.get('_ffmpeg_args') + args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 390c840bb..451e3cc2f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,3 +1,4 @@ +import concurrent.futures import contextlib import http.client import json @@ -5,12 +6,6 @@ import os import time -try: - import concurrent.futures - can_threaded_download = True -except ImportError: - can_threaded_download = False - from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 @@ -28,6 +23,8 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + console_title = to_screen + def report_retry(self, err, count, retries): super().to_screen( f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') @@ -501,8 +498,7 @@ def append_fragment(frag_content, frag_index, ctx): max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) - if can_threaded_download and max_workers > 1: - + if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() download_fragment(fragment, ctx_copy) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 7bc3ab049..8a6619960 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -173,7 +173,7 @@ def real_download(self, filename, info_dict): mime_type = b'image/png' if frag_content.startswith((b'GIF87a', b'GIF89a')): mime_type = b'image/gif' - if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + if frag_content.startswith(b'RIFF') and frag_content[8:12] == b'WEBP': mime_type = b'image/webp' frag_header = io.BytesIO() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 63f7b5d4a..441d8a136 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1922,8 +1922,7 @@ def calculate_preference(self, format): def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort(self, field_preference) - formats.sort(key=lambda f: format_sort.calculate_preference(f)) + formats.sort(key=self.FormatSort(self, field_preference).calculate_preference) def _check_formats(self, formats, video_id): if formats: diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 15d75a972..f66149d2c 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076', 'info_dict': { 'id': '5d40110076', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1318 『まる子、まぼろしの洋館を見る』の巻', 'series': 'ちびまる子ちゃん', 'series_id': '5d40', @@ -28,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083', 'info_dict': { 'id': '5d40810083', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻', 'description': 'md5:3972d900b896adc8ab1849e310507efa', 'series': 'ちびまる子ちゃん', @@ -51,7 +51,7 @@ def _real_extract(self, url): for src in src_json['video_selector']: if not src.get('url'): continue - fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4') + fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts') for f in fmt: f.update(dict(zip(('height', 'width'), self._BITRATE_MAP.get(f.get('tbr'), ())))) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 1e3309605..12cacd3b4 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -242,6 +242,9 @@ def _real_extract(self, url): 'language_preference': language_preference(lang.lower()), }) formats.extend(current_formats) + if not formats and (requested_languages or requested_versions): + self.raise_no_formats( + 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id) self._remove_duplicate_formats(formats) self._sort_formats(formats, ('lang', 'source')) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 210e5b36c..078f49696 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3107,7 +3107,7 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live, durati 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) except ExtractorError as e: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n' + 'nsig extraction failed: You may experience throttling for some formats\n' f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) throttled = True diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index caa841b2e..207be776e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -79,9 +79,9 @@ def run(self, info): original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath'] - thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] # Convert unsupported thumbnail formats (see #25687, #25717) # PNG is preferred since JPEG is lossy + thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' @@ -100,7 +100,7 @@ def run(self, info): elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) - mimetype = 'image/%s' % ('jpeg' if thumbnail_ext in ('jpg', 'jpeg') else thumbnail_ext) + mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}' old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 3c431941b..d6ac9b876 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,3 +1,5 @@ +import os + from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( @@ -28,6 +30,7 @@ def run(self, info): self.to_screen('Writing metadata to file\'s xattrs') filename = info['filepath'] + mtime = os.stat(filename).st_mtime try: xattr_mapping = { @@ -53,8 +56,6 @@ def run(self, info): write_xattr(filename, xattrname, byte_value) num_written += 1 - return [], info - except XAttrUnavailableError as e: raise PostProcessingError(str(e)) @@ -73,4 +74,6 @@ def run(self, info): else: msg += '(You may have to enable them in your /etc/fstab)' raise PostProcessingError(str(e)) - return [], info + + self.try_utime(filename, mtime, mtime) + return [], info diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0612139e0..35426568b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -245,6 +245,8 @@ def random_user_agent(): PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" JSON_LD_RE = r'(?is)]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)' +NUMBER_RE = r'\d+(?:\.\d+)?' + def preferredencoding(): """Get preferred encoding. @@ -3427,7 +3429,7 @@ def parse_dfxp_time_expr(time_expr): if not time_expr: return - mobj = re.match(r'^(?P\d+(?:\.\d+)?)s?$', time_expr) + mobj = re.match(rf'^(?P{NUMBER_RE})s?$', time_expr) if mobj: return float(mobj.group('time_offset'))