[utils] Place sanitize url function near other sanitizing functions

This commit is contained in:
Sergey M․ 2015-03-17 21:34:22 +06:00
parent dc03a42537
commit 92a4793b3c
2 changed files with 28 additions and 31 deletions

View File

@ -39,6 +39,7 @@
read_batch_urls, read_batch_urls,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url_path_consecutive_slashes,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_to_int, str_to_int,
@ -55,7 +56,6 @@
xpath_with_ns, xpath_with_ns,
render_table, render_table,
match_str, match_str,
url_sanitize_consecutive_slashes,
) )
@ -169,6 +169,26 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_sanitize_url_path_consecutive_slashes(self):
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname//'),
'http://hostname/')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/'),
'http://hostname/')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
'http://hostname/abc/')
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([]), [])
@ -539,21 +559,6 @@ def test_match_str(self):
'like_count > 100 & dislike_count <? 50 & description', 'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10})) {'like_count': 190, 'dislike_count': 10}))
def test_url_sanitize_consecutive_slashes(self):
self.assertEqual(url_sanitize_consecutive_slashes(
'http://hostname/foo//bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(url_sanitize_consecutive_slashes(
'http://hostname//foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(url_sanitize_consecutive_slashes(
'http://hostname//'), 'http://hostname/')
self.assertEqual(url_sanitize_consecutive_slashes(
'http://hostname/foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(url_sanitize_consecutive_slashes(
'http://hostname/'), 'http://hostname/')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -326,6 +326,13 @@ def sanitize_path(s):
return os.path.join(*sanitized_path) return os.path.join(*sanitized_path)
def sanitize_url_path_consecutive_slashes(url):
"""Collapses consecutive slashes in URLs' path"""
parsed_url = list(compat_urlparse.urlparse(url))
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
return compat_urlparse.urlunparse(parsed_url)
def orderedSet(iterable): def orderedSet(iterable):
""" Remove all duplicates from the input iterable """ """ Remove all duplicates from the input iterable """
res = [] res = []
@ -1804,18 +1811,3 @@ def proxy_open(self, req, proxy, type):
return None # No Proxy return None # No Proxy
return compat_urllib_request.ProxyHandler.proxy_open( return compat_urllib_request.ProxyHandler.proxy_open(
self, req, proxy, type) self, req, proxy, type)
def url_sanitize_consecutive_slashes(url):
"""Sanitize URLs with consecutive slashes
For example, transform both
http://hostname/foo//bar/filename.html
and
http://hostname//foo/bar/filename.html
into
http://hostname/foo/bar/filename.html
"""
parsed_url = list(compat_urlparse.urlparse(url))
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
return compat_urlparse.urlunparse(parsed_url)