From ebcc1d0dc3acc146a759d9a6490f03600b83506c Mon Sep 17 00:00:00 2001 From: levlam Date: Fri, 4 Oct 2019 18:00:51 +0300 Subject: [PATCH] Strip empty characters in file names. GitOrigin-RevId: 896c899c9d0893b467844b5640d8f65b240bc759 --- td/telegram/DocumentsManager.cpp | 3 +++ td/telegram/misc.cpp | 11 +++++++---- td/telegram/misc.h | 2 +- test/string_cleaning.cpp | 9 +++++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/td/telegram/DocumentsManager.cpp b/td/telegram/DocumentsManager.cpp index ee7c8d8d..51ee96b1 100644 --- a/td/telegram/DocumentsManager.cpp +++ b/td/telegram/DocumentsManager.cpp @@ -13,6 +13,7 @@ #include "td/telegram/files/FileLocation.h" #include "td/telegram/files/FileManager.h" #include "td/telegram/files/FileType.h" +#include "td/telegram/misc.h" #include "td/telegram/net/DcId.h" #include "td/telegram/Photo.h" #include "td/telegram/StickersManager.h" @@ -337,6 +338,8 @@ Document DocumentsManager::on_get_document(RemoteDocument remote_document, Dialo return {}; } + file_name = strip_empty_characters(file_name, 255, true); + auto suggested_file_name = file_name; if (suggested_file_name.empty()) { suggested_file_name = to_string(static_cast(id)); diff --git a/td/telegram/misc.cpp b/td/telegram/misc.cpp index 5f5bbbd7..9d619d5d 100644 --- a/td/telegram/misc.cpp +++ b/td/telegram/misc.cpp @@ -136,11 +136,11 @@ bool clean_input_string(string &str) { return true; } -string strip_empty_characters(string str, size_t max_length) { +string strip_empty_characters(string str, size_t max_length, bool strip_rtlo) { static const char *space_characters[] = {u8"\u1680", u8"\u180E", u8"\u2000", u8"\u2001", u8"\u2002", u8"\u2003", u8"\u2004", u8"\u2005", u8"\u2006", u8"\u2007", - u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202F", - u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"}; + u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202E", + u8"\u202F", u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"}; static bool can_be_first[std::numeric_limits::max() + 1]; static bool can_be_first_inited = [&] { for (auto space_ch : space_characters) { @@ -162,7 +162,10 @@ string strip_empty_characters(string str, size_t max_length) { bool found = false; for (auto space_ch : space_characters) { if (space_ch[0] == str[i] && space_ch[1] == str[i + 1] && space_ch[2] == str[i + 2]) { - found = true; + if (static_cast(str[i + 2]) != 0xAE || static_cast(str[i + 1]) != 0x80 || + static_cast(str[i]) != 0xE2 || strip_rtlo) { + found = true; + } break; } } diff --git a/td/telegram/misc.h b/td/telegram/misc.h index f25d5ea4..62804318 100644 --- a/td/telegram/misc.h +++ b/td/telegram/misc.h @@ -22,7 +22,7 @@ string clean_username(string str) TD_WARN_UNUSED_RESULT; bool clean_input_string(string &str) TD_WARN_UNUSED_RESULT; // strips empty characters and ensures that string length is no more than max_length -string strip_empty_characters(string str, size_t max_length) TD_WARN_UNUSED_RESULT; +string strip_empty_characters(string str, size_t max_length, bool strip_rtlo = false) TD_WARN_UNUSED_RESULT; // checks if string is empty after strip_empty_characters bool is_empty_string(const string &str) TD_WARN_UNUSED_RESULT; diff --git a/test/string_cleaning.cpp b/test/string_cleaning.cpp index 160b7b03..b35cee1c 100644 --- a/test/string_cleaning.cpp +++ b/test/string_cleaning.cpp @@ -69,8 +69,8 @@ TEST(StringCleaning, clean_input_string) { check_clean_input_string("\xcc\xb3\xcc\xbf\xcc\x8a", "", true); } -static void check_strip_empty_characters(string str, size_t max_length, string expected) { - ASSERT_EQ(expected, strip_empty_characters(str, max_length)); +static void check_strip_empty_characters(string str, size_t max_length, string expected, bool strip_rtlo = false) { + ASSERT_EQ(expected, strip_empty_characters(str, max_length, strip_rtlo)); } TEST(StringCleaning, strip_empty_characters) { @@ -82,9 +82,14 @@ TEST(StringCleaning, strip_empty_characters) { u8"\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFF" u8"\uFFFC\uFFFC"; string spaces_replace = " "; + string rtlo = u8"\u202E"; string empty = "\xE2\x80\x8C\xE2\x80\x8D\xE2\x80\xAE\xC2\xA0\xC2\xA0"; check_strip_empty_characters(spaces, 1000000, ""); + check_strip_empty_characters(spaces + rtlo, 1000000, ""); + check_strip_empty_characters(spaces + rtlo, 1000000, "", true); + check_strip_empty_characters(spaces + rtlo + "a", 1000000, rtlo + "a"); + check_strip_empty_characters(spaces + rtlo + "a", 1000000, "a", true); check_strip_empty_characters(empty, 1000000, ""); check_strip_empty_characters(empty + "a", 1000000, empty + "a"); check_strip_empty_characters(spaces + empty + spaces + "abc" + spaces, 1000000, empty + spaces_replace + "abc");