From 8c01f4a92d7b0e101c72506d9a30c127f28dccad Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Tue, 7 Jan 2025 13:44:12 -0500 Subject: [PATCH] Complete implementation --- yt_dlp/YoutubeDL.py | 53 ++++++++++++++++++++++++++++++++++++------ yt_dlp/__init__.py | 2 ++ yt_dlp/options.py | 8 +++++++ yt_dlp/utils/_utils.py | 6 +++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5856a9cbd..eb2e5211a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -70,6 +70,7 @@ from .update import ( ) from .utils import ( DEFAULT_OUTTMPL, + DEFAULT_MAX_FILE_NAME, IDENTITY, LINK_TEMPLATES, MEDIA_EXTENSIONS, @@ -266,6 +267,8 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) + max_file_name: Limit length of filename (extension included) + filesystem_encoding: Encoding to use when calculating filename length in bytes windowsfilenames: True: Force filenames to be Windows compatible False: Sanitize filenames only minimally This option has no effect when running on Windows @@ -1424,7 +1427,10 @@ class YoutubeDL: return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): - print(outtmpl) + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + + def evaluate_outtmpl_for_filename(self, outtmpl, info_dict, *args, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) ext_suffix = '.%(ext\x00s)s' # not sure why this has null char suffix = '' @@ -1433,10 +1439,44 @@ class YoutubeDL: suffix = ext_suffix % info_dict outtmpl = self.escape_outtmpl(outtmpl) filename = outtmpl % info_dict - encoding = sys.getfilesystemencoding() # make option to override - filename = filename.encode(encoding) - filename = filename[:255 - len('.flac.part')] # make option to override - filename = filename.decode(encoding, 'ignore') + + def parse_max_file_name(max_file_name: str): + try: + max_length = int(max_file_name[:-1]) + except ValueError: + raise ValueError('Invalid --max-filename-length specified') + + if max_file_name[-1].lower() == 'c': + return 'c', max_length + elif max_file_name[-1].lower() == 'b': + return 'b', max_length + else: + raise ValueError("--max-filename-length must end with 'b' or 'c'") + + max_file_name = self.params.get('max_file_name', DEFAULT_MAX_FILE_NAME) + mode, max_file_name = parse_max_file_name(max_file_name) + encoding = self.params.get('filesystem_encoding', sys.getfilesystemencoding()) + + # extension may be replaced later + if mode == 'b': + max_suffix_len = len('.annotations.xml'.encode(encoding)) + else: + max_suffix_len = len('.annotations.xml') + + def trim_filename(name: str, length: int): + if length < 1: + raise ValueError('Cannot trim filename to such short length') + if mode == 'b': + name = name.encode(encoding) + name = name[:length] + return name.decode(encoding, 'ignore') + else: + return name[:length] + + # only trim last component of path - assume the directories are valid names + head, tail = os.path.split(filename) + tail = trim_filename(tail, max_file_name - max_suffix_len) + filename = os.path.join(head, tail) return filename + suffix @_catch_unsafe_extension_error @@ -1446,8 +1486,7 @@ class YoutubeDL: outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) try: outtmpl = self._outtmpl_expandpath(outtmpl) - filename = self.evaluate_outtmpl(outtmpl, info_dict, True) - print(filename) + filename = self.evaluate_outtmpl_for_filename(outtmpl, info_dict, True) if not filename: return None diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..00817b0e5 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -886,6 +886,8 @@ def parse_options(argv=None): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, + 'max_file_name': opts.max_file_name, + 'filesystem_encoding': opts.filesystem_encoding, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 06b65e0ea..7025928a0 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1380,6 +1380,14 @@ def create_parser(): '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, help='Limit the filename length (excluding extension) to the specified number of characters') + filesystem.add_option( + '--max-filename-length', metavar='LENGTH', + dest='max_file_name', + help='Limit the filename length (including extension) to the specified number of characters or bytes') + filesystem.add_option( + '--filesystem-encoding', metavar='ENCODING', + dest='filesystem_encoding', + help='Override filesystem encoding used when calculating filename length in bytes') filesystem.add_option( '-w', '--no-overwrites', action='store_false', dest='overwrites', default=None, diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 699bf1e7f..90172125c 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2852,6 +2852,12 @@ OUTTMPL_TYPES = { 'pl_infojson': 'info.json', } +# https://en.m.wikipedia.org/wiki/Comparison_of_file_systems#Limits +if platform.system() in ('Darwin', 'Windows'): + DEFAULT_MAX_FILE_NAME = '255c' +else: + DEFAULT_MAX_FILE_NAME = '255b' + # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting