[GlomexEmbed] Avoid large match objects

Closes #2512
Authored by: zmousm
This commit is contained in:
Zenon Mousmoulas 2022-01-30 15:35:39 +02:00 committed by GitHub
parent b72270d27e
commit 19afd9ea51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -198,8 +198,13 @@ def _extract_urls(cls, webpage, origin_url):
)+</script> )+</script>
)''' % {'quot_re': r'["\']', 'url_re': VALID_SRC} )''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}
for mobj in re.finditer(EMBED_RE, webpage): for mtup in re.findall(EMBED_RE, webpage):
mdict = mobj.groupdict() # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512
mdict = dict(zip((
'url', '_',
'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player',
'script_tag', '_', '_', 'integration_js', '_', 'id_js',
), mtup))
if mdict.get('url'): if mdict.get('url'):
url = unescapeHTML(mdict['url']) url = unescapeHTML(mdict['url'])
if not cls.suitable(url): if not cls.suitable(url):