Strip empty characters in file names.

GitOrigin-RevId: 896c899c9d0893b467844b5640d8f65b240bc759
This commit is contained in:
levlam 2019-10-04 18:00:51 +03:00
parent 407f3d1c1a
commit ebcc1d0dc3
4 changed files with 18 additions and 7 deletions

View File

@ -13,6 +13,7 @@
#include "td/telegram/files/FileLocation.h"
#include "td/telegram/files/FileManager.h"
#include "td/telegram/files/FileType.h"
#include "td/telegram/misc.h"
#include "td/telegram/net/DcId.h"
#include "td/telegram/Photo.h"
#include "td/telegram/StickersManager.h"
@ -337,6 +338,8 @@ Document DocumentsManager::on_get_document(RemoteDocument remote_document, Dialo
return {};
}
file_name = strip_empty_characters(file_name, 255, true);
auto suggested_file_name = file_name;
if (suggested_file_name.empty()) {
suggested_file_name = to_string(static_cast<uint64>(id));

View File

@ -136,11 +136,11 @@ bool clean_input_string(string &str) {
return true;
}
string strip_empty_characters(string str, size_t max_length) {
string strip_empty_characters(string str, size_t max_length, bool strip_rtlo) {
static const char *space_characters[] = {u8"\u1680", u8"\u180E", u8"\u2000", u8"\u2001", u8"\u2002",
u8"\u2003", u8"\u2004", u8"\u2005", u8"\u2006", u8"\u2007",
u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202F",
u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"};
u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202E",
u8"\u202F", u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"};
static bool can_be_first[std::numeric_limits<unsigned char>::max() + 1];
static bool can_be_first_inited = [&] {
for (auto space_ch : space_characters) {
@ -162,7 +162,10 @@ string strip_empty_characters(string str, size_t max_length) {
bool found = false;
for (auto space_ch : space_characters) {
if (space_ch[0] == str[i] && space_ch[1] == str[i + 1] && space_ch[2] == str[i + 2]) {
found = true;
if (static_cast<unsigned char>(str[i + 2]) != 0xAE || static_cast<unsigned char>(str[i + 1]) != 0x80 ||
static_cast<unsigned char>(str[i]) != 0xE2 || strip_rtlo) {
found = true;
}
break;
}
}

View File

@ -22,7 +22,7 @@ string clean_username(string str) TD_WARN_UNUSED_RESULT;
bool clean_input_string(string &str) TD_WARN_UNUSED_RESULT;
// strips empty characters and ensures that string length is no more than max_length
string strip_empty_characters(string str, size_t max_length) TD_WARN_UNUSED_RESULT;
string strip_empty_characters(string str, size_t max_length, bool strip_rtlo = false) TD_WARN_UNUSED_RESULT;
// checks if string is empty after strip_empty_characters
bool is_empty_string(const string &str) TD_WARN_UNUSED_RESULT;

View File

@ -69,8 +69,8 @@ TEST(StringCleaning, clean_input_string) {
check_clean_input_string("\xcc\xb3\xcc\xbf\xcc\x8a", "", true);
}
static void check_strip_empty_characters(string str, size_t max_length, string expected) {
ASSERT_EQ(expected, strip_empty_characters(str, max_length));
static void check_strip_empty_characters(string str, size_t max_length, string expected, bool strip_rtlo = false) {
ASSERT_EQ(expected, strip_empty_characters(str, max_length, strip_rtlo));
}
TEST(StringCleaning, strip_empty_characters) {
@ -82,9 +82,14 @@ TEST(StringCleaning, strip_empty_characters) {
u8"\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFF"
u8"\uFFFC\uFFFC";
string spaces_replace = " ";
string rtlo = u8"\u202E";
string empty = "\xE2\x80\x8C\xE2\x80\x8D\xE2\x80\xAE\xC2\xA0\xC2\xA0";
check_strip_empty_characters(spaces, 1000000, "");
check_strip_empty_characters(spaces + rtlo, 1000000, "");
check_strip_empty_characters(spaces + rtlo, 1000000, "", true);
check_strip_empty_characters(spaces + rtlo + "a", 1000000, rtlo + "a");
check_strip_empty_characters(spaces + rtlo + "a", 1000000, "a", true);
check_strip_empty_characters(empty, 1000000, "");
check_strip_empty_characters(empty + "a", 1000000, empty + "a");
check_strip_empty_characters(spaces + empty + spaces + "abc" + spaces, 1000000, empty + spaces_replace + "abc");