From 6db9c4d57d033fb22c94a2e6f1ecf0207e700b4c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 14:06:46 +0530 Subject: [PATCH] Ignore format-specific fields in initial pass of `--match-filter` Closes #3074 --- test/helper.py | 10 +--------- test/test_YoutubeDL.py | 2 +- yt_dlp/YoutubeDL.py | 12 +++++++++++- yt_dlp/utils.py | 16 ++++++++++++---- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/test/helper.py b/test/helper.py index 1070e0668..28c21b2eb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict): def sanitize_got_info_dict(got_dict): IGNORED_FIELDS = ( - # Format keys - 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', - 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', - 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference', - 'language', 'language_preference', 'quality', 'source_preference', 'http_headers', - 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', - - # RTMP formats - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', + *YoutubeDL._format_fields, # Lists 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 7637297be..f9b40501d 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -931,7 +931,7 @@ def get_videos(filter_=None): res = get_videos() self.assertEqual(res, ['1', '2']) - def f(v): + def f(v, incomplete): if v['id'] == '1': return None else: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 740f9a7bd..5771fbcf7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -513,6 +513,16 @@ class YoutubeDL(object): 'track_number', 'disc_number', 'release_year', )) + _format_fields = { + # NB: Keep in sync with the docstring of extractor/common.py + 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note', + 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', + 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', + 'preference', 'language', 'language_preference', 'quality', 'source_preference', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + } _format_selection_exts = { 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, 'video': {'mp4', 'flv', 'webm', '3gp'}, @@ -2541,7 +2551,7 @@ def is_wellformed(f): info_dict, _ = self.pre_process(info_dict) - if self._match_entry(info_dict) is not None: + if self._match_entry(info_dict, incomplete=self._format_fields) is not None: return info_dict self.post_extract(info_dict) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e359c6bba..e9eaf7b4e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3545,6 +3545,11 @@ def _match_one(filter_part, dct, incomplete): '=': operator.eq, } + if isinstance(incomplete, bool): + is_incomplete = lambda _: incomplete + else: + is_incomplete = lambda k: k in incomplete + operator_rex = re.compile(r'''(?x)\s* (?P[a-z_]+) \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* @@ -3583,7 +3588,7 @@ def _match_one(filter_part, dct, incomplete): if numeric_comparison is not None and m['op'] in STRING_OPERATORS: raise ValueError('Operator %s only supports string values!' % m['op']) if actual_value is None: - return incomplete or m['none_inclusive'] + return is_incomplete(m['key']) or m['none_inclusive'] return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) UNARY_OPERATORS = { @@ -3598,7 +3603,7 @@ def _match_one(filter_part, dct, incomplete): if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if incomplete and actual_value is None: + if is_incomplete(m.group('key')) and actual_value is None: return True return op(actual_value) @@ -3606,8 +3611,11 @@ def _match_one(filter_part, dct, incomplete): def match_str(filter_str, dct, incomplete=False): - """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false - When incomplete, all conditions passes on missing fields + """ Filter a dictionary with a simple string syntax. + @returns Whether the filter passes + @param incomplete Set of keys that is expected to be missing from dct. + Can be True/False to indicate all/none of the keys may be missing. + All conditions on incomplete keys pass if the key is missing """ return all( _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)