From e756f45ba0648f972be71ce328419a623e381028 Mon Sep 17 00:00:00 2001 From: Matthew Date: Mon, 2 Jan 2023 04:55:11 +0000 Subject: [PATCH] Improve handling for overriding extractors with plugins (#5916) * Extractors replaced with plugin extractors now show in debug output * Better testcase handling * Added documentation Authored by: coletdjnz, pukkandan --- README.md | 9 ++++++--- yt_dlp/YoutubeDL.py | 22 +++++++++++++++------- yt_dlp/extractor/common.py | 13 +++++++++++-- yt_dlp/extractor/extractors.py | 2 ++ yt_dlp/extractor/testurl.py | 11 ++++++----- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 83e69a236..c4bd6ef0c 100644 --- a/README.md +++ b/README.md @@ -1841,7 +1841,7 @@ ## Installing Plugins * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` 3. **pip and other locations in `PYTHONPATH`** - * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Note: This does not apply for Pyinstaller/py2exe builds. @@ -1854,9 +1854,12 @@ ## Installing Plugins ## Developing Plugins -See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. +See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. -All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). +Due to the mechanics behind this, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8ce71a2dc..e7b469059 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -33,7 +33,7 @@ from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .plugins import directories as plugin_directories -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -3730,7 +3730,10 @@ def print_debug_header(self): # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3808,12 +3811,17 @@ def get_encoding(stream): proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') - for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items(): - if not plugins: - continue - write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % ( + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in plugins.items()))))) + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + plugin_dirs = plugin_directories() if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9031f3c11..f48b97a6b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3442,13 +3442,17 @@ def get_testcases(cls, include_onlymatching=False): continue t['name'] = cls.ie_key() yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_testcases(include_onlymatching) @classmethod def get_webpage_testcases(cls): tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() - return tests + yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_webpage_testcases() @classproperty(cache=True) def age_limit(cls): @@ -3710,10 +3714,12 @@ def __init_subclass__(cls, *, plugin_name=None, **kwargs): if plugin_name: mro = inspect.getmro(cls) super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] - cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key + cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}' while getattr(super_class, '__wrapped__', None): super_class = super_class.__wrapped__ setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + _PLUGIN_OVERRIDES[super_class].append(cls) return super().__init_subclass__(**kwargs) @@ -3770,3 +3776,6 @@ class UnsupportedURLIE(InfoExtractor): def _real_extract(self, url): raise UnsupportedError(url) + + +_PLUGIN_OVERRIDES = collections.defaultdict(list) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index beda02917..baa69d242 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -24,3 +24,5 @@ globals().update(_PLUGIN_CLASSES) _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() + +from .common import _PLUGIN_OVERRIDES # noqa: F401 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index dccca1004..0da01aa53 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -23,11 +23,12 @@ def _real_extract(self, url): if len(matching_extractors) == 0: raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - try: # Check for exact match - extractor = next( - ie for ie in matching_extractors - if ie.IE_NAME.lower() == extractor_id.lower()) - except StopIteration: + extractor = next(( # Check for exact match + ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower() + ), None) or next(( # Check for exact match without plugin suffix + ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower() + ), None) + if not extractor: raise ExtractorError( 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True)