[GlomexEmbed] Avoid large match objects

Closes #2512 Authored by: zmousm
2024-12-24 04:05:53 +01:00 · 2022-01-30 15:35:39 +02:00 · 2022-01-30 15:35:39 +02:00 · 19afd9ea51
commit 19afd9ea51
parent b72270d27e
1 changed files with 7 additions and 2 deletions
--- a/yt_dlp/extractor/glomex.py
+++ b/yt_dlp/extractor/glomex.py
@ -198,8 +198,13 @@ def _extract_urls(cls, webpage, origin_url):
            )+</script>
        )''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}

-        for mobj in re.finditer(EMBED_RE, webpage):
-            mdict = mobj.groupdict()
+        for mtup in re.findall(EMBED_RE, webpage):
+            # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512
+            mdict = dict(zip((
+                'url', '_',
+                'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player',
+                'script_tag', '_', '_', 'integration_js', '_', 'id_js',
+            ), mtup))
            if mdict.get('url'):
                url = unescapeHTML(mdict['url'])
                if not cls.suitable(url):