[utils] Handle HTMLParseError in extract_attributes (closes #13349)

This commit is contained in:
Sergey M․ 2017-06-12 01:52:24 +07:00
parent 72b409559c
commit b4a3d461e4
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 9 additions and 2 deletions

View File

@ -916,6 +916,8 @@ def test_extract_attributes(self):
supports_outside_bmp = False supports_outside_bmp = False
if supports_outside_bmp: if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'}) self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
# Malformed HTML should not break attributes extraction on older Python
self.assertEqual(extract_attributes('<mal"formed/>'), {})
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')

View File

@ -36,6 +36,7 @@
import zlib import zlib
from .compat import ( from .compat import (
compat_HTMLParseError,
compat_HTMLParser, compat_HTMLParser,
compat_basestring, compat_basestring,
compat_chr, compat_chr,
@ -409,8 +410,12 @@ def extract_attributes(html_element):
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
""" """
parser = HTMLAttributeParser() parser = HTMLAttributeParser()
parser.feed(html_element) try:
parser.close() parser.feed(html_element)
parser.close()
# Older Python may throw HTMLParseError in case of malformed HTML
except compat_HTMLParseError:
pass
return parser.attrs return parser.attrs