From c380cc28c4e94b4b61db7f86d35e48197b407266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 May 2020 04:19:33 +0700 Subject: [PATCH] [utils] Improve cookie files support + Add support for UTF-8 in cookie files * Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at) --- test/test_YoutubeDLCookieJar.py | 7 ++ test/testdata/cookies/malformed_cookies.txt | 9 +++ youtube_dl/utils.py | 82 +++++++++++++++++++-- 3 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 test/testdata/cookies/malformed_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index f959798de..05f48bd74 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -39,6 +39,13 @@ def assert_cookie_has_value(key): assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + def test_malformed_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + # Cookies should be empty since all malformed cookie file entries + # will be ignored + self.assertFalse(cookiejar._cookies) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt new file mode 100644 index 000000000..17bc40354 --- /dev/null +++ b/test/testdata/cookies/malformed_cookies.txt @@ -0,0 +1,9 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +# Cookie file entry with invalid number of fields - 6 instead of 7 +www.foobar.foobar FALSE / FALSE 0 COOKIE + +# Cookie file entry with invalid expires at +www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 38262bee4..112279ed7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -7,6 +7,7 @@ import binascii import calendar import codecs +import collections import contextlib import ctypes import datetime @@ -30,6 +31,7 @@ import subprocess import sys import tempfile +import time import traceback import xml.etree.ElementTree import zlib @@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): 1. https://curl.haxx.se/docs/http-cookies.html """ _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by youtube-dl. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """ + Save cookies to a file. + + Most of the code is taken from CPython 3.8 and slightly adapted + to support cookie files with UTF-8 in both python 2 and 3. + """ + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + # Store session cookies with `expires` set to 0 instead of an empty # string for cookie in self: if cookie.expires is None: cookie.expires = 0 - compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) + + with io.open(filename, 'w', encoding='utf-8') as f: + f.write(self._HEADER) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: + secure = 'TRUE' + else: + secure = 'FALSE' + if cookie.domain.startswith('.'): + initial_dot = 'TRUE' + else: + initial_dot = 'FALSE' + if cookie.expires is not None: + expires = compat_str(cookie.expires) + else: + expires = '' + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = '' + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + '\t'.join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value]) + '\n') def load(self, filename=None, ignore_discard=False, ignore_expires=False): """Load cookies from a file.""" @@ -2752,12 +2806,30 @@ def load(self, filename=None, ignore_discard=False, ignore_expires=False): else: raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + cf = io.StringIO() - with open(filename) as f: + with io.open(filename, encoding='utf-8') as f: for line in f: - if line.startswith(self._HTTPONLY_PREFIX): - line = line[len(self._HTTPONLY_PREFIX):] - cf.write(compat_str(line)) + try: + cf.write(prepare_line(line)) + except compat_cookiejar.LoadError as e: + write_string( + 'WARNING: skipping cookie file entry due to %s: %r\n' + % (e, line), sys.stderr) + continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to