[ie/substack] Fix embed extraction (#8218)

Authored by: handlerug
This commit is contained in:
Umar Getagazov 2023-10-07 01:45:46 +03:00 committed by GitHub
parent 48cceec1dd
commit fbcc299bd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -50,7 +50,7 @@ def _extract_embed_urls(cls, url, webpage):
if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage): if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
return return
mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage) mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P<subdomain>[^\\"\']+)', webpage)
if mobj: if mobj:
parsed = urllib.parse.urlparse(url) parsed = urllib.parse.urlparse(url)
yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()