From 91757b0f373ec3201f95066eeb0e09ebdcc1a067 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 17:15:27 +0200 Subject: [PATCH] [utils] Escape all HTML entities written in hexadecimal form --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index a8ab876850..abaf1ab733 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -200,6 +200,8 @@ def test_ordered_set(self): def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') self.assertEqual( unescapeHTML('é'), 'é') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 472d4df41f..245d623d86 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -348,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'):