[lazy_extractor] Create instance only after pre-checking archive

2024-12-28 14:15:53 +01:00 · 2021-08-23 04:45:30 +05:30 · 2021-08-23 04:45:30 +05:30 · 251ae04e6a
commit 251ae04e6a
parent 5bc4a65eea
4 changed files with 14 additions and 12 deletions
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@ -9,6 +9,7 @@ def __getattr__(cls, name):

 class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
    _module = None
+    _WORKING = True

    @classmethod
    def _get_real_class(cls):
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -30,16 +30,14 @@
 with open('devscripts/lazy_load_template.py', 'rt') as f:
    module_template = f.read()

+CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
 module_contents = [
    module_template,
-    getsource(InfoExtractor.ie_key),
-    getsource(InfoExtractor._match_valid_url),
-    getsource(InfoExtractor.suitable),
+    *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
    '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']

 ie_template = '''
 class {name}({bases}):
-    _VALID_URL = {valid_url!r}
    _module = '{module}'
 '''

@ -60,14 +58,17 @@ def get_base_name(base):


 def build_lazy_ie(ie, name):
-    valid_url = getattr(ie, '_VALID_URL', None)
    s = ie_template.format(
        name=name,
        bases=', '.join(map(get_base_name, ie.__bases__)),
-        valid_url=valid_url,
        module=ie.__module__)
+    valid_url = getattr(ie, '_VALID_URL', None)
+    if valid_url:
+        s += f'    _VALID_URL = {valid_url!r}\n'
+    if not ie._WORKING:
+        s += f'    _WORKING = False\n'
    if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
-        s += '\n' + getsource(ie.suitable)
+        s += f'\n{getsource(ie.suitable)}'
    if hasattr(ie, '_make_valid_url'):
        # search extractors
        s += make_valid_template.format(valid_url=ie._make_valid_url())
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1179,7 +1179,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
            ie_key = 'Generic'

        if ie_key:
-            ies = [self.get_info_extractor(ie_key)]
+            ies = [get_info_extractor(ie_key)]
        else:
            ies = self._ies

@ -1188,7 +1188,6 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
                continue

            ie_key = ie.ie_key()
-            ie = self.get_info_extractor(ie_key)
            if not ie.working():
                self.report_warning('The program functionality for this site has been marked as broken, '
                                    'and will probably not work.')
@ -1198,7 +1197,8 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
                self.to_screen("[%s] %s: has already been recorded in archive" % (
                               ie_key, temp_id))
                break
-            return self.__extract_info(url, ie, download, extra_info, process)
+            return self.__extract_info(url, self.get_info_extractor(ie.ie_key()),
+                                       download, extra_info, process)
        else:
            self.report_error('no suitable InfoExtractor for URL %s' % url)

--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -110,14 +110,14 @@ def _real_main(argv=None):

    if opts.list_extractors:
        for ie in list_extractors(opts.age_limit):
-            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
+            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            for mu in matchedUrls:
                write_string('  ' + mu + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.list_extractor_descriptions:
        for ie in list_extractors(opts.age_limit):
-            if not ie._WORKING:
+            if not ie.working():
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
            if desc is False: