From b4a3d461e4a00dfc60047b667aa3136c8b03eda8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 01:52:24 +0700 Subject: [PATCH] [utils] Handle HTMLParseError in extract_attributes (closes #13349) --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 41b094d89..2b93b3604 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -916,6 +916,8 @@ def test_extract_attributes(self): supports_outside_bmp = False if supports_outside_bmp: self.assertEqual(extract_attributes(''), {'x': 'Smile \U0001f600!'}) + # Malformed HTML should not break attributes extraction on older Python + self.assertEqual(extract_attributes(''), {}) def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 16bf49408..1973bd483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import zlib from .compat import ( + compat_HTMLParseError, compat_HTMLParser, compat_basestring, compat_chr, @@ -409,8 +410,12 @@ def extract_attributes(html_element): but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() - parser.feed(html_element) - parser.close() + try: + parser.feed(html_element) + parser.close() + # Older Python may throw HTMLParseError in case of malformed HTML + except compat_HTMLParseError: + pass return parser.attrs