From 03b4de722a6cf86dbcc6d17a63145ec59a573bf6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 16 Oct 2021 18:31:00 +0530 Subject: [PATCH] [downloader] Fix slow progress hooks Closes #1301 --- yt_dlp/YoutubeDL.py | 16 +++++++++++----- yt_dlp/downloader/common.py | 5 +---- yt_dlp/downloader/dash.py | 5 ++--- yt_dlp/downloader/hls.py | 5 ++--- yt_dlp/postprocessor/common.py | 13 +++++++------ 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index aff7d6ddb..fd8ad0f98 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -950,13 +950,18 @@ def validate_outtmpl(cls, outtmpl): except ValueError as err: return err + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + return info_dict + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set - info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + info_dict = self._copy_infodict(info_dict) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None @@ -2265,7 +2270,7 @@ def is_wellformed(f): formats_dict[format_id].append(format) # Make sure all formats have unique format_id - common_exts = set(ext for exts in self._format_selection_exts.values() for ext in exts) + common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 for i, format in enumerate(ambiguous_formats): @@ -2523,7 +2528,8 @@ def dl(self, name, info, subtitle=False, test=False): fd.add_progress_hook(ph) urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) self.write_debug('Invoking downloader on "%s"' % urls) - new_info = dict(info) + + new_info = copy.deepcopy(self._copy_infodict(info)) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 89cdffd24..96b78a968 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -405,13 +405,10 @@ def real_download(self, filename, info_dict): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. # So keep this behavior if possible - status['info_dict'] = copy.deepcopy(info_dict) for ph in self._progress_hooks: ph(status) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 734eab3ef..6444ad692 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -55,9 +55,8 @@ def real_download(self, filename, info_dict): if real_downloader: self.to_screen( '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_copy = info_dict.copy() - info_copy['fragments'] = fragments_to_download + info_dict['fragments'] = fragments_to_download fd = real_downloader(self.ydl, self.params) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 3c5a2617d..61312c5ba 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -245,13 +245,12 @@ def is_ad_fragment_end(s): fragments = [fragments[0] if fragments else None] if real_downloader: - info_copy = info_dict.copy() - info_copy['fragments'] = fragments + info_dict['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: # fd.add_progress_hook(ph) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) if is_webvtt: def pack_fragment(frag_content, frag_index): diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d2daeb0fb..b36716743 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -17,11 +17,12 @@ class PostProcessorMetaClass(type): def run_wrapper(func): @functools.wraps(func) def run(self, info, *args, **kwargs): - self._hook_progress({'status': 'started'}, info) + info_copy = copy.deepcopy(self._copy_infodict(info)) + self._hook_progress({'status': 'started'}, info_copy) ret = func(self, info, *args, **kwargs) if ret is not None: _, info = ret - self._hook_progress({'status': 'finished'}, info) + self._hook_progress({'status': 'finished'}, info_copy) return ret return run @@ -93,6 +94,9 @@ def set_downloader(self, downloader): for ph in getattr(downloader, '_postprocessor_hooks', []): self.add_progress_hook(ph) + def _copy_infodict(self, info_dict): + return getattr(self._downloader, '_copy_infodict', dict)(info_dict) + @staticmethod def _restrict_to(*, video=True, audio=True, images=True): allowed = {'video': video, 'audio': audio, 'images': images} @@ -142,11 +146,8 @@ def _configuration_args(self, exe, *args, **kwargs): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) status.update({ - 'info_dict': copy.deepcopy(info_dict), + 'info_dict': info_dict, 'postprocessor': self.pp_key(), }) for ph in self._progress_hooks: