Format selector mergeall to download and merge all formats

2024-12-24 12:15:50 +01:00 · 2021-04-10 20:10:30 +05:30 · 2021-04-10 20:10:30 +05:30 · f8d4ad9ab0
commit f8d4ad9ab0
parent 3ffc7c89b0
2 changed files with 81 additions and 65 deletions
--- a/README.md
+++ b/README.md
@ -979,8 +979,9 @@ # FORMAT SELECTION
 You can also use special names to select particular edge case formats:

 - `all`: Select all formats
- - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio.
- - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio.
+ - `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
+ - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio
+ - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio
 - `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
 - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
 - `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]`
@ -1094,10 +1095,17 @@ # For this case, an output template should be used since
 # by default, bestvideo and bestaudio will have the same file name.
 $ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s'

+# Download and merge the best format that has a video stream,
+# and all audio-only formats into one file
+$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams
+
+# Download and merge the best format that has a video stream,
+# and the best 2 audio-only formats into one file
+$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams


 # The following examples show the old method (without -S) of format selection
-# and how to use -S to achieve a similar but better result
+# and how to use -S to achieve a similar but (generally) better result

 # Download the worst video available (old method)
 $ yt-dlp -f 'wv*+wa/w'
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1541,83 +1541,6 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins
                selectors.append(current_selector)
            return selectors

-        def _build_selector_function(selector):
-            if isinstance(selector, list):  # ,
-                fs = [_build_selector_function(s) for s in selector]
-
-                def selector_function(ctx):
-                    for f in fs:
-                        for format in f(ctx):
-                            yield format
-                return selector_function
-
-            elif selector.type == GROUP:  # ()
-                selector_function = _build_selector_function(selector.selector)
-
-            elif selector.type == PICKFIRST:  # /
-                fs = [_build_selector_function(s) for s in selector.selector]
-
-                def selector_function(ctx):
-                    for f in fs:
-                        picked_formats = list(f(ctx))
-                        if picked_formats:
-                            return picked_formats
-                    return []
-
-            elif selector.type == SINGLE:  # atom
-                format_spec = selector.selector if selector.selector is not None else 'best'
-
-                if format_spec == 'all':
-                    def selector_function(ctx):
-                        formats = list(ctx['formats'])
-                        if formats:
-                            for f in formats:
-                                yield f
-
-                else:
-                    format_fallback = False
-                    mobj = re.match(
-                        r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
-                        format_spec)
-                    if mobj is not None:
-                        format_idx = int_or_none(mobj.group('n'), default=1)
-                        format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
-                        format_type = (mobj.group('type') or [None])[0]
-                        not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
-                        format_modified = mobj.group('mod') is not None
-
-                        format_fallback = not format_type and not format_modified  # for b, w
-                        filter_f = (
-                            (lambda f: f.get('%scodec' % format_type) != 'none')
-                            if format_type and format_modified  # bv*, ba*, wv*, wa*
-                            else (lambda f: f.get('%scodec' % not_format_type) == 'none')
-                            if format_type  # bv, ba, wv, wa
-                            else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
-                            if not format_modified  # b, w
-                            else None)  # b*, w*
-                    else:
-                        format_idx = -1
-                        filter_f = ((lambda f: f.get('ext') == format_spec)
-                                    if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
-                                    else (lambda f: f.get('format_id') == format_spec))  # id
-
-                    def selector_function(ctx):
-                        formats = list(ctx['formats'])
-                        if not formats:
-                            return
-                        matches = list(filter(filter_f, formats)) if filter_f is not None else formats
-                        n = len(matches)
-                        if -n <= format_idx < n:
-                            yield matches[format_idx]
-                        elif format_fallback and ctx['incomplete_formats']:
-                            # for extractors with incomplete formats (audio only (soundcloud)
-                            # or video only (imgur)) best/worst will fallback to
-                            # best/worst {video,audio}-only format
-                            n = len(formats)
-                            if -n <= format_idx < n:
-                                yield formats[format_idx]
-
-            elif selector.type == MERGE:        # +
        def _merge(formats_pair):
            format_1, format_2 = formats_pair

@ -1678,6 +1601,91 @@ def _merge(formats_pair):

            return new_dict

+        def _build_selector_function(selector):
+            if isinstance(selector, list):  # ,
+                fs = [_build_selector_function(s) for s in selector]
+
+                def selector_function(ctx):
+                    for f in fs:
+                        for format in f(ctx):
+                            yield format
+                return selector_function
+
+            elif selector.type == GROUP:  # ()
+                selector_function = _build_selector_function(selector.selector)
+
+            elif selector.type == PICKFIRST:  # /
+                fs = [_build_selector_function(s) for s in selector.selector]
+
+                def selector_function(ctx):
+                    for f in fs:
+                        picked_formats = list(f(ctx))
+                        if picked_formats:
+                            return picked_formats
+                    return []
+
+            elif selector.type == SINGLE:  # atom
+                format_spec = (selector.selector if selector.selector is not None else 'best').lower()
+
+                # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
+                if format_spec == 'all':
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        if formats:
+                            for f in formats:
+                                yield f
+                elif format_spec == 'mergeall':
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        merged_format = formats[0]
+                        for f in formats[1:]:
+                            merged_format = _merge((merged_format, f))
+                        yield merged_format
+
+                else:
+                    format_fallback = False
+                    mobj = re.match(
+                        r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
+                        format_spec)
+                    if mobj is not None:
+                        format_idx = int_or_none(mobj.group('n'), default=1)
+                        format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
+                        format_type = (mobj.group('type') or [None])[0]
+                        not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
+                        format_modified = mobj.group('mod') is not None
+
+                        format_fallback = not format_type and not format_modified  # for b, w
+                        filter_f = (
+                            (lambda f: f.get('%scodec' % format_type) != 'none')
+                            if format_type and format_modified  # bv*, ba*, wv*, wa*
+                            else (lambda f: f.get('%scodec' % not_format_type) == 'none')
+                            if format_type  # bv, ba, wv, wa
+                            else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+                            if not format_modified  # b, w
+                            else None)  # b*, w*
+                    else:
+                        format_idx = -1
+                        filter_f = ((lambda f: f.get('ext') == format_spec)
+                                    if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
+                                    else (lambda f: f.get('format_id') == format_spec))  # id
+
+                    def selector_function(ctx):
+                        formats = list(ctx['formats'])
+                        if not formats:
+                            return
+                        matches = list(filter(filter_f, formats)) if filter_f is not None else formats
+                        n = len(matches)
+                        if -n <= format_idx < n:
+                            yield matches[format_idx]
+                        elif format_fallback and ctx['incomplete_formats']:
+                            # for extractors with incomplete formats (audio only (soundcloud)
+                            # or video only (imgur)) best/worst will fallback to
+                            # best/worst {video,audio}-only format
+                            n = len(formats)
+                            if -n <= format_idx < n:
+                                yield formats[format_idx]
+
+            elif selector.type == MERGE:        # +
                selector_1, selector_2 = map(_build_selector_function, selector.selector)

                def selector_function(ctx):