From d2680fff48ea340cd1f4c81665683fe62ad006ba Mon Sep 17 00:00:00 2001 From: levlam Date: Sun, 4 Feb 2024 22:12:18 +0300 Subject: [PATCH] Move include of utf8.h to tl_parsers.cpp. --- tdutils/td/utils/tl_parsers.cpp | 19 +++++++++++++++++++ tdutils/td/utils/tl_parsers.h | 20 +++++++------------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tdutils/td/utils/tl_parsers.cpp b/tdutils/td/utils/tl_parsers.cpp index 8812623fd..3e2bae03f 100644 --- a/tdutils/td/utils/tl_parsers.cpp +++ b/tdutils/td/utils/tl_parsers.cpp @@ -6,7 +6,9 @@ // #include "td/utils/tl_parsers.h" +#include "td/utils/logging.h" #include "td/utils/misc.h" +#include "td/utils/utf8.h" namespace td { @@ -55,4 +57,21 @@ BufferSlice TlBufferParser::as_buffer_slice(Slice slice) { return BufferSlice(slice); } +bool TlBufferParser::is_valid_utf8(CSlice str) const { + if (check_utf8(str)) { + return true; + } + LOG(WARNING) << "Wrong UTF-8 string [[" << str << "]] in " << format::as_hex_dump<4>(parent_->as_slice()); + return false; +} + +size_t TlBufferParser::last_utf8_character_position(Slice str) { + CHECK(!str.empty()); + size_t position = str.size() - 1; + while (position != 0 && !is_utf8_character_first_code_unit(static_cast(str[position]))) { + position--; + } + return position; +} + } // namespace td diff --git a/tdutils/td/utils/tl_parsers.h b/tdutils/td/utils/tl_parsers.h index 2dc220738..76a74b9eb 100644 --- a/tdutils/td/utils/tl_parsers.h +++ b/tdutils/td/utils/tl_parsers.h @@ -9,12 +9,10 @@ #include "td/utils/buffer.h" #include "td/utils/common.h" #include "td/utils/format.h" -#include "td/utils/logging.h" #include "td/utils/Slice.h" #include "td/utils/SliceBuilder.h" #include "td/utils/Status.h" #include "td/utils/UInt.h" -#include "td/utils/utf8.h" #include #include @@ -207,19 +205,11 @@ class TlBufferParser : public TlParser { c = ' '; } } - if (check_utf8(result)) { + if (is_valid_utf8(result)) { return result; } - CHECK(!result.empty()); - LOG(WARNING) << "Wrong UTF-8 string [[" << result << "]] in " << format::as_hex_dump<4>(parent_->as_slice()); - - // trying to remove last character - size_t new_size = result.size() - 1; - while (new_size != 0 && !is_utf8_character_first_code_unit(static_cast(result[new_size]))) { - new_size--; - } - result.resize(new_size); - if (check_utf8(result)) { + result.resize(last_utf8_character_position(result)); + if (is_valid_utf8(result)) { return result; } @@ -235,6 +225,10 @@ class TlBufferParser : public TlParser { const BufferSlice *parent_; BufferSlice as_buffer_slice(Slice slice); + + bool is_valid_utf8(CSlice str) const; + + static size_t last_utf8_character_position(Slice str); }; template <>