[ie] Do not smuggle http_headers

See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x

Authored by: coletdjnz
This commit is contained in:
bashonly 2023-08-16 18:42:48 -05:00 committed by Simon Sawicki
parent d4f14a72dc
commit f04b5bedad
No known key found for this signature in database
9 changed files with 19 additions and 15 deletions

View File

@ -1293,6 +1293,10 @@ def test_clean_header(self):
assert 'Youtubedl-no-compression' not in rh.headers assert 'Youtubedl-no-compression' not in rh.headers
assert rh.headers.get('Accept-Encoding') == 'identity' assert rh.headers.get('Accept-Encoding') == 'identity'
with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
rh = self.build_handler(ydl)
assert 'Ytdl-socks-proxy' not in rh.headers
def test_build_handler_params(self): def test_build_handler_params(self):
with FakeYDL({ with FakeYDL({
'http_headers': {'test': 'testtest'}, 'http_headers': {'test': 'testtest'},

View File

@ -105,7 +105,7 @@ def _real_extract(self, url):
'chapter': module.get('title'), 'chapter': module.get('title'),
'chapter_id': str_or_none(module.get('id')), 'chapter_id': str_or_none(module.get('id')),
'title': activity.get('title'), 'title': activity.get('title'),
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}}) 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'})
} }

View File

@ -138,7 +138,7 @@ def _real_extract(self, url):
# of the video. # of the video.
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(data_url, {'http_headers': headers}), 'url': smuggle_url(data_url, {'referer': webpage_url}),
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'series': series_title, 'series': series_title,

View File

@ -106,4 +106,4 @@ def _real_extract(self, url):
return self.url_result(src, YoutubeTabIE) return self.url_result(src, YoutubeTabIE)
return self.url_result(smuggle_url( return self.url_result(smuggle_url(
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
{'http_headers': {'Referer': url}})) {'referer': url}))

View File

@ -17,6 +17,7 @@
determine_protocol, determine_protocol,
dict_get, dict_get,
extract_basic_auth, extract_basic_auth,
filter_dict,
format_field, format_field,
int_or_none, int_or_none,
is_html, is_html,
@ -2435,10 +2436,10 @@ def _real_extract(self, url):
# to accept raw bytes and being able to download only a chunk. # to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream # It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this. # after a HEAD request, but not sure if we can rely on this.
full_response = self._request_webpage(url, video_id, headers={ full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Accept-Encoding': 'identity', 'Accept-Encoding': 'identity',
**smuggled_data.get('http_headers', {}) 'Referer': smuggled_data.get('referer'),
}) }))
new_url = full_response.url new_url = full_response.url
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
if new_url != extract_basic_auth(url)[0]: if new_url != extract_basic_auth(url)[0]:
@ -2458,7 +2459,7 @@ def _real_extract(self, url):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m: if m:
self.report_detected('direct video link') self.report_detected('direct video link')
headers = smuggled_data.get('http_headers', {}) headers = filter_dict({'Referer': smuggled_data.get('referer')})
format_id = str(m.group('format_id')) format_id = str(m.group('format_id'))
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response) ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
subtitles = {} subtitles = {}
@ -2710,7 +2711,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
'url': smuggle_url(json_ld['url'], { 'url': smuggle_url(json_ld['url'], {
'force_videoid': video_id, 'force_videoid': video_id,
'to_generic': True, 'to_generic': True,
'http_headers': {'Referer': url}, 'referer': url,
}), }),
}, json_ld)] }, json_ld)]

View File

@ -530,7 +530,7 @@ def _real_extract(self, url):
if service_name == 'vimeo': if service_name == 'vimeo':
info['url'] = smuggle_url( info['url'] = smuggle_url(
f'https://player.vimeo.com/video/{service_id}', f'https://player.vimeo.com/video/{service_id}',
{'http_headers': {'Referer': url}}) {'referer': url})
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id')) video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
if not video_slides: if not video_slides:

View File

@ -32,9 +32,7 @@ def _parse_video(self, video):
'description': video.get('description'), 'description': video.get('description'),
'url': smuggle_url( 'url': smuggle_url(
'https://player.vimeo.com/video/' + vimeo_id, { 'https://player.vimeo.com/video/' + vimeo_id, {
'http_headers': { 'referer': 'https://storyfire.com/',
'Referer': 'https://storyfire.com/',
}
}), }),
'thumbnail': video.get('storyImage'), 'thumbnail': video.get('storyImage'),
'view_count': int_or_none(video.get('views')), 'view_count': int_or_none(video.get('views')),

View File

@ -37,14 +37,14 @@ class VimeoBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _smuggle_referrer(url, referrer_url): def _smuggle_referrer(url, referrer_url):
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) return smuggle_url(url, {'referer': referrer_url})
def _unsmuggle_headers(self, url): def _unsmuggle_headers(self, url):
"""@returns (url, smuggled_data, headers)""" """@returns (url, smuggled_data, headers)"""
url, data = unsmuggle_url(url, {}) url, data = unsmuggle_url(url, {})
headers = self.get_param('http_headers').copy() headers = self.get_param('http_headers').copy()
if 'http_headers' in data: if 'referer' in data:
headers.update(data['http_headers']) headers['Referer'] = data['referer']
return url, data, headers return url, data, headers
def _perform_login(self, username, password): def _perform_login(self, username, password):

View File

@ -123,6 +123,7 @@ def clean_headers(headers: HTTPHeaderDict):
if 'Youtubedl-No-Compression' in headers: # compat if 'Youtubedl-No-Compression' in headers: # compat
del headers['Youtubedl-No-Compression'] del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity' headers['Accept-Encoding'] = 'identity'
headers.pop('Ytdl-socks-proxy', None)
def remove_dot_segments(path): def remove_dot_segments(path):