Add and use utf8_encode.

This commit is contained in:
levlam 2022-05-31 15:08:16 +03:00
parent bf0caca138
commit a282cce5d4
4 changed files with 25 additions and 11 deletions

View File

@ -713,7 +713,7 @@ static Result<int32> to_int32(Slice str) {
int32 integer_value = 0;
for (auto c : str) {
if (!is_digit(c)) {
return Status::Error(400, PSLICE() << "Can't parse \"" << str << "\" as number");
return Status::Error(400, PSLICE() << "Can't parse \"" << utf8_encode(str.str()) << "\" as number");
}
integer_value = integer_value * 10 + c - '0';
}
@ -725,12 +725,12 @@ static Result<td_api::object_ptr<td_api::date>> get_date_object(Slice date) {
return nullptr;
}
if (date.size() > 10u || date.size() < 8u) {
return Status::Error(400, PSLICE() << "Date \"" << date << "\" has wrong length");
return Status::Error(400, PSLICE() << "Date \"" << utf8_encode(date.str()) << "\" has wrong length");
}
auto parts = full_split(date, '.');
if (parts.size() != 3 || parts[0].size() > 2 || parts[1].size() > 2 || parts[2].size() != 4 || parts[0].empty() ||
parts[1].empty()) {
return Status::Error(400, PSLICE() << "Date \"" << date << "\" has wrong parts");
return Status::Error(400, PSLICE() << "Date \"" << utf8_encode(date.str()) << "\" has wrong parts");
}
TRY_RESULT(day, to_int32(parts[0]));
TRY_RESULT(month, to_int32(parts[1]));

View File

@ -42,6 +42,7 @@
#include "td/utils/Time.h"
#include "td/utils/tl_helpers.h"
#include "td/utils/tl_parsers.h"
#include "td/utils/utf8.h"
#include <algorithm>
#include <cmath>
@ -989,27 +990,28 @@ Status FileManager::check_local_location(FullLocalFileLocation &location, int64
} else if (!are_modification_times_equal(location.mtime_nsec_, stat.mtime_nsec_)) {
VLOG(file_loader) << "File \"" << location.path_ << "\" was modified: old mtime = " << location.mtime_nsec_
<< ", new mtime = " << stat.mtime_nsec_;
return Status::Error(400, PSLICE() << "File \"" << location.path_ << "\" was modified");
return Status::Error(400, PSLICE() << "File \"" << utf8_encode(location.path_) << "\" was modified");
}
if (skip_file_size_checks) {
return Status::OK();
}
if ((location.file_type_ == FileType::Thumbnail || location.file_type_ == FileType::EncryptedThumbnail) &&
size > MAX_THUMBNAIL_SIZE && !begins_with(PathView(location.path_).file_name(), "map")) {
return Status::Error(400, PSLICE() << "File \"" << location.path_ << "\" is too big for a thumbnail "
<< tag("size", format::as_size(size)));
return Status::Error(400, PSLICE() << "File \"" << utf8_encode(location.path_) << "\" of size " << size
<< " is too big for a thumbnail");
}
if (size > MAX_FILE_SIZE) {
return Status::Error(400, PSLICE() << "File \"" << location.path_ << "\" of size " << size << " bytes is too big");
return Status::Error(
400, PSLICE() << "File \"" << utf8_encode(location.path_) << "\" of size " << size << " bytes is too big");
}
if (location.file_type_ == FileType::Photo && size > MAX_PHOTO_SIZE) {
return Status::Error(
400, PSLICE() << "File \"" << location.path_ << "\" of size " << size << " bytes is too big for a photo");
return Status::Error(400, PSLICE() << "File \"" << utf8_encode(location.path_) << "\" of size " << size
<< " bytes is too big for a photo");
}
if (location.file_type_ == FileType::VideoNote &&
size > G()->shared_config().get_option_integer("video_note_size_max", DEFAULT_VIDEO_NOTE_SIZE_MAX)) {
return Status::Error(
400, PSLICE() << "File \"" << location.path_ << "\" of size " << size << " bytes is too big for a video note");
return Status::Error(400, PSLICE() << "File \"" << utf8_encode(location.path_) << "\" of size " << size
<< " bytes is too big for a video note");
}
return Status::OK();
}

View File

@ -7,6 +7,8 @@
#include "td/utils/utf8.h"
#include "td/utils/logging.h"
#include "td/utils/misc.h"
#include "td/utils/SliceBuilder.h"
#include "td/utils/unicode.h"
namespace td {
@ -121,4 +123,11 @@ string utf8_to_lower(Slice str) {
return result;
}
string utf8_encode(CSlice data) {
if (check_utf8(data)) {
return data.str();
}
return PSTRING() << "url_decode(" << url_encode(data) << ')';
}
} // namespace td

View File

@ -118,4 +118,7 @@ T utf8_utf16_substr(T str, size_t offset, size_t length) {
/// Returns UTF-8 string converted to lower case.
string utf8_to_lower(Slice str);
/// Returns valid UTF-8 representation of the string.
string utf8_encode(CSlice data);
} // namespace td