From 12037d8b0a578fcc78a5c8f98964e48ee6060e25 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 4 Jun 2023 06:10:30 -0500 Subject: [PATCH] [extractor/substack] Fix extraction (#7218) Closes #7155 Authored by: bashonly --- yt_dlp/extractor/substack.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index fa3826388b..3782ceed1c 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -2,7 +2,7 @@ import urllib.parse from .common import InfoExtractor -from ..utils import str_or_none, traverse_obj +from ..utils import js_to_json, str_or_none, traverse_obj class SubstackIE(InfoExtractor): @@ -14,7 +14,7 @@ class SubstackIE(InfoExtractor): 'id': '47660949', 'ext': 'mp4', 'title': 'I MADE A VLOG', - 'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6', + 'description': 'md5:9248af9a759321e1027226f988f54d96', 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18', 'uploader': 'Maybe Baby', 'uploader_id': '33628', @@ -77,7 +77,9 @@ def _real_extract(self, url): display_id, username = self._match_valid_url(url).group('id', 'username') webpage = self._download_webpage(url, display_id) - webpage_info = self._search_json(r']*>\s*window\._preloads\s*=', webpage, 'preloads', display_id) + webpage_info = self._parse_json(self._search_json( + r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string', + display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id) post_type = webpage_info['post']['type'] formats, subtitles = [], {}