From 4d68487c1274355452e11d8c428a80f913d43e28 Mon Sep 17 00:00:00 2001 From: levlam Date: Mon, 7 Oct 2019 03:41:04 +0300 Subject: [PATCH] Add utf8_utf16_length. GitOrigin-RevId: d5e713df1f3e0cdf70004d0898c5b55246dd014e --- tdutils/td/utils/port/wstring_convert.cpp | 5 +---- tdutils/td/utils/utf8.h | 9 +++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tdutils/td/utils/port/wstring_convert.cpp b/tdutils/td/utils/port/wstring_convert.cpp index 81d95fea..ffe7e15e 100644 --- a/tdutils/td/utils/port/wstring_convert.cpp +++ b/tdutils/td/utils/port/wstring_convert.cpp @@ -21,10 +21,7 @@ Result to_wstring(CSlice slice) { return Status::Error("Wrong encoding"); } - size_t wstring_len = 0; - for (auto c : slice) { - wstring_len += ((c & 0xc0) != 0x80) + ((c & 0xf8) == 0xf0); - } + size_t wstring_len = utf8_utf16_length(slice); std::wstring result(wstring_len, static_cast(0)); if (wstring_len) { diff --git a/tdutils/td/utils/utf8.h b/tdutils/td/utils/utf8.h index 398dbb09..059416cd 100644 --- a/tdutils/td/utils/utf8.h +++ b/tdutils/td/utils/utf8.h @@ -28,6 +28,15 @@ inline size_t utf8_length(Slice str) { return result; } +/// returns length of UTF-8 string in UTF-16 code units +inline size_t utf8_utf16_length(Slice str) { + size_t result = 0; + for (auto c : str) { + result += is_utf8_character_first_code_unit(c) + ((c & 0xf8) == 0xf0); + } + return result; +} + /// appends a Unicode character using UTF-8 encoding void append_utf8_character(string &str, uint32 ch);