[downloader/aria2c] Native progress for aria2c via RPC (#3724)

Authored by: Lesmiscore, pukkandan Closes #2038
2024-11-30 08:32:58 +01:00 · 2023-01-02 02:16:25 +09:00 · 2023-01-02 02:16:25 +09:00 · 8c53322cda
commit 8c53322cda
parent 193fb150b7
4 changed files with 119 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -153,6 +153,7 @@ ### Differences in default behavior
 * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
 * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
 * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
 * yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: `aria2c`). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
 For ease of use, a few more compat options are available:
@ -160,7 +161,7 @@ ### Differences in default behavior
 * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
 * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
 * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
-* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options
+* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options
 # INSTALLATION
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@ -1,9 +1,11 @@
 import enum
 import json
 import os.path
 import re
 import subprocess
 import sys
 import time
 import uuid
 from .fragment import FragmentFD
 from ..compat import functools
@ -20,8 +22,10 @@
    determine_ext,
    encodeArgument,
    encodeFilename,
    find_available_port,
    handle_youtubedl_headers,
    remove_end,
    sanitized_Request,
    traverse_obj,
 )
@ -60,7 +64,6 @@ def real_download(self, filename, info_dict):
            }
            if filename != '-':
                fsize = os.path.getsize(encodeFilename(tmpfilename))
                self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
                self.try_rename(tmpfilename, filename)
                status.update({
                    'downloaded_bytes': fsize,
@ -129,8 +132,7 @@ def _call_downloader(self, tmpfilename, info_dict):
        self._debug_cmd(cmd)
        if 'fragments' not in info_dict:
-            _, stderr, returncode = Popen.run(
+            _, stderr, returncode = self._call_process(cmd, info_dict)
                cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
            if returncode and stderr:
                self.to_stderr(stderr)
            return returncode
@ -140,7 +142,7 @@ def _call_downloader(self, tmpfilename, info_dict):
        retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
                                     frag_index=None, fatal=not skip_unavailable_fragments)
        for retry in retry_manager:
-            _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
+            _, stderr, returncode = self._call_process(cmd, info_dict)
            if not returncode:
                break
            # TODO: Decide whether to retry based on error code
@ -172,6 +174,9 @@ def _call_downloader(self, tmpfilename, info_dict):
        self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
        return 0
    def _call_process(self, cmd, info_dict):
        return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
 class CurlFD(ExternalFD):
    AVAILABLE_OPT = '-V'
@ -256,6 +261,14 @@ def supports_manifest(manifest):
    def _aria2c_filename(fn):
        return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
    def _call_downloader(self, tmpfilename, info_dict):
        if 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
            info_dict['__rpc'] = {
                'port': find_available_port() or 19190,
                'secret': str(uuid.uuid4()),
            }
        return super()._call_downloader(tmpfilename, info_dict)
    def _make_cmd(self, tmpfilename, info_dict):
        cmd = [self.exe, '-c',
               '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@ -276,6 +289,12 @@ def _make_cmd(self, tmpfilename, info_dict):
        cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
        cmd += self._configuration_args()
        if '__rpc' in info_dict:
            cmd += [
                '--enable-rpc',
                f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
                f'--rpc-secret={info_dict["__rpc"]["secret"]}']
        # aria2c strips out spaces from the beginning/end of filenames and paths.
        # We work around this issue by adding a "./" to the beginning of the
        # filename and relative path, and adding a "/" at the end of the path.
@ -304,6 +323,88 @@ def _make_cmd(self, tmpfilename, info_dict):
            cmd += ['--', info_dict['url']]
        return cmd
    def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
        # Does not actually need to be UUID, just unique
        sanitycheck = str(uuid.uuid4())
        d = json.dumps({
            'jsonrpc': '2.0',
            'id': sanitycheck,
            'method': method,
            'params': [f'token:{rpc_secret}', *params],
        }).encode('utf-8')
        request = sanitized_Request(
            f'http://localhost:{rpc_port}/jsonrpc',
            data=d, headers={
                'Content-Type': 'application/json',
                'Content-Length': f'{len(d)}',
                'Ytdl-request-proxy': '__noproxy__',
            })
        with self.ydl.urlopen(request) as r:
            resp = json.load(r)
        assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
        return resp['result']
    def _call_process(self, cmd, info_dict):
        if '__rpc' not in info_dict:
            return super()._call_process(cmd, info_dict)
        send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
        started = time.time()
        fragmented = 'fragments' in info_dict
        frag_count = len(info_dict['fragments']) if fragmented else 1
        status = {
            'filename': info_dict.get('_filename'),
            'status': 'downloading',
            'elapsed': 0,
            'downloaded_bytes': 0,
            'fragment_count': frag_count if fragmented else None,
            'fragment_index': 0 if fragmented else None,
        }
        self._hook_progress(status, info_dict)
        def get_stat(key, *obj, average=False):
            val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
            return sum(val) / (len(val) if average else 1)
        with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
            # Add a small sleep so that RPC client can receive response,
            # or the connection stalls infinitely
            time.sleep(0.2)
            retval = p.poll()
            while retval is None:
                # We don't use tellStatus as we won't know the GID without reading stdout
                # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
                active = send_rpc('aria2.tellActive')
                completed = send_rpc('aria2.tellStopped', [0, frag_count])
                downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
                speed = get_stat('downloadSpeed', active)
                total = frag_count * get_stat('totalLength', active, completed, average=True)
                if total < downloaded:
                    total = None
                status.update({
                    'downloaded_bytes': int(downloaded),
                    'speed': speed,
                    'total_bytes': None if fragmented else total,
                    'total_bytes_estimate': total,
                    'eta': (total - downloaded) / (speed or 1),
                    'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
                    'elapsed': time.time() - started
                })
                self._hook_progress(status, info_dict)
                if not active and len(completed) >= frag_count:
                    send_rpc('aria2.shutdown')
                    retval = p.wait()
                    break
                time.sleep(0.1)
                retval = p.poll()
            return '', p.stderr.read(), retval
 class HttpieFD(ExternalFD):
    AVAILABLE_OPT = '--version'
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -464,14 +464,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
            'allowed_values': {
                'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
                'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
-                'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley',
+                'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
-                'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
+                'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
                'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
            }, 'aliases': {
                'youtube-dl': ['all', '-multistreams'],
                'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
                '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
-                '2022': [],
+                '2022': ['no-external-downloader-progress'],
            }
        }, help=(
            'Options that can help keep compatibility with youtube-dl or youtube-dlc '
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -5243,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field):
    }
 def find_available_port(interface=''):
    try:
        with socket.socket() as sock:
            sock.bind((interface, 0))
            return sock.getsockname()[1]
    except OSError:
        return None
 # Templates for internet shortcut files, which are plain text files.
 DOT_URL_LINK_TEMPLATE = '''\
 [InternetShortcut]