From e625be0d10d96a20702d630dcc88e3269554e172 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 3 May 2021 22:36:03 +0530 Subject: [PATCH] Improve output template internal formatting * Allow slicing lists/strings using `field.start:end:step` * A field can also be used as offset like `field1+num+field2` * A default value can be given using `field|default` * Capture all format strings and set it to `None` if invalid. This prevents invalid fields from causing errors --- README.md | 9 +-- yt_dlp/YoutubeDL.py | 84 ++++++++++++++++------- yt_dlp/postprocessor/execafterdownload.py | 2 +- yt_dlp/utils.py | 8 +-- 4 files changed, 71 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index cef6eaf36..5ae596da7 100644 --- a/README.md +++ b/README.md @@ -842,13 +842,14 @@ # OUTPUT TEMPLATE It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s` -2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` separator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the date-time formatting. Eg: `%(epoch+-3600>%H-%M-%S)s` -3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` +1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` +1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` To summarize, the general syntax for a field is: ``` -%(name[.keys][+offset][>strf])[flags][width][.precision][length]type +%(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type ``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0af036458..79ba3ef93 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -843,29 +843,67 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): if sanitize is None: sanitize = lambda k, v: v - # Internal Formatting = name.key1.key2+number>strf - INTERNAL_FORMAT_RE = FORMAT_RE.format( - r'''(?P - (?P\w+(?:\.[-\w]+)*) - (?:\+(?P-?\d+(?:\.\d+)?))? - (?:>(?P.+?))? - )''') - for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl): - mobj = mobj.groupdict() - # Object traversal - fields = mobj['fields'].split('.') - final_key = mobj['final_key'] - value = traverse_dict(template_dict, fields) - # Offset the value - if mobj['add']: - value = float_or_none(value) - if value is not None: - value = value + float(mobj['add']) - # Datetime formatting - if mobj['strf_format']: - value = strftime_or_none(value, mobj['strf_format']) - if mobj['type'] in 'crs' and value is not None: # string - value = sanitize('%{}'.format(mobj['type']) % fields[-1], value) + EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P[^)]*)') + # Field is of the form key1.key2... + # where keys (except first) can be string, int or slice + FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*' + INTERNAL_FORMAT_RE = re.compile(r'''(?x) + (?P-)? + (?P{0}) + (?P(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*) + (?:>(?P.+?))? + (?:\|(?P.*?))? + $'''.format(FIELD_RE)) + MATH_OPERATORS_RE = re.compile(r'(?.+]+'), cmd): + if not re.search(FORMAT_RE.format(r'[^)]*'), cmd): if '{}' not in cmd: cmd += ' {}' return cmd.replace('{}', compat_shlex_quote(info['filepath'])) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08e2d19d2..baa2a415e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6112,11 +6112,11 @@ def traverse_dict(dictn, keys, casesense=True): key = key.lower() dictn = dictn.get(key) elif isinstance(dictn, (list, tuple, compat_str)): - key, n = int_or_none(key), len(dictn) - if key is not None and -n <= key < n: - dictn = dictn[key] + if ':' in key: + key = slice(*map(int_or_none, key.split(':'))) else: - dictn = None + key = int_or_none(key) + dictn = try_get(dictn, lambda x: x[key]) else: return None return dictn