Strip empty characters in file names.
GitOrigin-RevId: 896c899c9d0893b467844b5640d8f65b240bc759
This commit is contained in:
parent
407f3d1c1a
commit
ebcc1d0dc3
@ -13,6 +13,7 @@
|
||||
#include "td/telegram/files/FileLocation.h"
|
||||
#include "td/telegram/files/FileManager.h"
|
||||
#include "td/telegram/files/FileType.h"
|
||||
#include "td/telegram/misc.h"
|
||||
#include "td/telegram/net/DcId.h"
|
||||
#include "td/telegram/Photo.h"
|
||||
#include "td/telegram/StickersManager.h"
|
||||
@ -337,6 +338,8 @@ Document DocumentsManager::on_get_document(RemoteDocument remote_document, Dialo
|
||||
return {};
|
||||
}
|
||||
|
||||
file_name = strip_empty_characters(file_name, 255, true);
|
||||
|
||||
auto suggested_file_name = file_name;
|
||||
if (suggested_file_name.empty()) {
|
||||
suggested_file_name = to_string(static_cast<uint64>(id));
|
||||
|
@ -136,11 +136,11 @@ bool clean_input_string(string &str) {
|
||||
return true;
|
||||
}
|
||||
|
||||
string strip_empty_characters(string str, size_t max_length) {
|
||||
string strip_empty_characters(string str, size_t max_length, bool strip_rtlo) {
|
||||
static const char *space_characters[] = {u8"\u1680", u8"\u180E", u8"\u2000", u8"\u2001", u8"\u2002",
|
||||
u8"\u2003", u8"\u2004", u8"\u2005", u8"\u2006", u8"\u2007",
|
||||
u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202F",
|
||||
u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"};
|
||||
u8"\u2008", u8"\u2009", u8"\u200A", u8"\u200B", u8"\u202E",
|
||||
u8"\u202F", u8"\u205F", u8"\u3000", u8"\uFEFF", u8"\uFFFC"};
|
||||
static bool can_be_first[std::numeric_limits<unsigned char>::max() + 1];
|
||||
static bool can_be_first_inited = [&] {
|
||||
for (auto space_ch : space_characters) {
|
||||
@ -162,7 +162,10 @@ string strip_empty_characters(string str, size_t max_length) {
|
||||
bool found = false;
|
||||
for (auto space_ch : space_characters) {
|
||||
if (space_ch[0] == str[i] && space_ch[1] == str[i + 1] && space_ch[2] == str[i + 2]) {
|
||||
if (static_cast<unsigned char>(str[i + 2]) != 0xAE || static_cast<unsigned char>(str[i + 1]) != 0x80 ||
|
||||
static_cast<unsigned char>(str[i]) != 0xE2 || strip_rtlo) {
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ string clean_username(string str) TD_WARN_UNUSED_RESULT;
|
||||
bool clean_input_string(string &str) TD_WARN_UNUSED_RESULT;
|
||||
|
||||
// strips empty characters and ensures that string length is no more than max_length
|
||||
string strip_empty_characters(string str, size_t max_length) TD_WARN_UNUSED_RESULT;
|
||||
string strip_empty_characters(string str, size_t max_length, bool strip_rtlo = false) TD_WARN_UNUSED_RESULT;
|
||||
|
||||
// checks if string is empty after strip_empty_characters
|
||||
bool is_empty_string(const string &str) TD_WARN_UNUSED_RESULT;
|
||||
|
@ -69,8 +69,8 @@ TEST(StringCleaning, clean_input_string) {
|
||||
check_clean_input_string("\xcc\xb3\xcc\xbf\xcc\x8a", "", true);
|
||||
}
|
||||
|
||||
static void check_strip_empty_characters(string str, size_t max_length, string expected) {
|
||||
ASSERT_EQ(expected, strip_empty_characters(str, max_length));
|
||||
static void check_strip_empty_characters(string str, size_t max_length, string expected, bool strip_rtlo = false) {
|
||||
ASSERT_EQ(expected, strip_empty_characters(str, max_length, strip_rtlo));
|
||||
}
|
||||
|
||||
TEST(StringCleaning, strip_empty_characters) {
|
||||
@ -82,9 +82,14 @@ TEST(StringCleaning, strip_empty_characters) {
|
||||
u8"\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFF"
|
||||
u8"\uFFFC\uFFFC";
|
||||
string spaces_replace = " ";
|
||||
string rtlo = u8"\u202E";
|
||||
string empty = "\xE2\x80\x8C\xE2\x80\x8D\xE2\x80\xAE\xC2\xA0\xC2\xA0";
|
||||
|
||||
check_strip_empty_characters(spaces, 1000000, "");
|
||||
check_strip_empty_characters(spaces + rtlo, 1000000, "");
|
||||
check_strip_empty_characters(spaces + rtlo, 1000000, "", true);
|
||||
check_strip_empty_characters(spaces + rtlo + "a", 1000000, rtlo + "a");
|
||||
check_strip_empty_characters(spaces + rtlo + "a", 1000000, "a", true);
|
||||
check_strip_empty_characters(empty, 1000000, "");
|
||||
check_strip_empty_characters(empty + "a", 1000000, empty + "a");
|
||||
check_strip_empty_characters(spaces + empty + spaces + "abc" + spaces, 1000000, empty + spaces_replace + "abc");
|
||||
|
Loading…
Reference in New Issue
Block a user