2018-12-31 22:04:05 +03:00
|
|
|
//
|
2022-01-01 03:35:39 +03:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
2018-12-31 22:04:05 +03:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/utils/port/wstring_convert.h"
|
|
|
|
|
|
|
|
char disable_linker_warning_about_empty_file_wstring_convert_cpp TD_UNUSED;
|
|
|
|
|
|
|
|
#if TD_PORT_WINDOWS
|
|
|
|
|
2021-09-14 12:16:29 +03:00
|
|
|
#include "td/utils/base64.h"
|
|
|
|
#include "td/utils/SliceBuilder.h"
|
2018-05-18 18:55:40 +03:00
|
|
|
#include "td/utils/utf8.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2018-05-18 18:55:40 +03:00
|
|
|
#include <cwchar>
|
2018-12-31 22:04:05 +03:00
|
|
|
|
|
|
|
namespace td {
|
|
|
|
|
2018-05-18 17:22:18 +03:00
|
|
|
Result<std::wstring> to_wstring(CSlice slice) {
|
2018-05-18 18:55:40 +03:00
|
|
|
if (!check_utf8(slice)) {
|
2021-09-14 12:16:29 +03:00
|
|
|
return Status::Error(PSLICE() << "String was expected to be encoded in UTF-8: " << base64_encode(slice));
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2018-05-18 18:55:40 +03:00
|
|
|
|
2019-10-07 03:41:04 +03:00
|
|
|
size_t wstring_len = utf8_utf16_length(slice);
|
2018-05-18 18:55:40 +03:00
|
|
|
|
|
|
|
std::wstring result(wstring_len, static_cast<wchar_t>(0));
|
|
|
|
if (wstring_len) {
|
|
|
|
wchar_t *res = &result[0];
|
|
|
|
for (size_t i = 0; i < slice.size();) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 a = static_cast<unsigned char>(slice[i++]);
|
2018-05-18 18:55:40 +03:00
|
|
|
if (a >= 0x80) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 b = static_cast<unsigned char>(slice[i++]);
|
2018-05-18 18:55:40 +03:00
|
|
|
if (a >= 0xe0) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 c = static_cast<unsigned char>(slice[i++]);
|
2018-05-18 18:55:40 +03:00
|
|
|
if (a >= 0xf0) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 d = static_cast<unsigned char>(slice[i++]);
|
|
|
|
uint32 val = ((a & 0x07) << 18) + ((b & 0x3f) << 12) + ((c & 0x3f) << 6) + (d & 0x3f) - 0x10000;
|
2018-05-18 18:55:40 +03:00
|
|
|
*res++ = static_cast<wchar_t>(0xD800 + (val >> 10));
|
|
|
|
*res++ = static_cast<wchar_t>(0xDC00 + (val & 0x3ff));
|
|
|
|
} else {
|
|
|
|
*res++ = static_cast<wchar_t>(((a & 0x0f) << 12) + ((b & 0x3f) << 6) + (c & 0x3f));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*res++ = static_cast<wchar_t>(((a & 0x1f) << 6) + (b & 0x3f));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*res++ = static_cast<wchar_t>(a);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK(res == &result[0] + wstring_len);
|
|
|
|
}
|
|
|
|
return result;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
Result<string> from_wstring(const wchar_t *begin, size_t size) {
|
2018-05-18 18:55:40 +03:00
|
|
|
size_t result_len = 0;
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 cur = begin[i];
|
2018-05-18 18:55:40 +03:00
|
|
|
if ((cur & 0xF800) == 0xD800) {
|
|
|
|
if (i < size) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 next = begin[++i];
|
2018-05-18 18:55:40 +03:00
|
|
|
if ((next & 0xFC00) == 0xDC00 && (cur & 0x400) == 0) {
|
|
|
|
result_len += 4;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-08 14:43:47 +03:00
|
|
|
return Status::Error("Wrong wstring encoding");
|
2018-05-18 18:55:40 +03:00
|
|
|
}
|
|
|
|
result_len += 1 + (cur >= 0x80) + (cur >= 0x800);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string result(result_len, '\0');
|
|
|
|
if (result_len) {
|
|
|
|
char *res = &result[0];
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 cur = begin[i];
|
|
|
|
// TODO conversion uint32 -> signed char is implementation defined
|
2018-05-18 18:55:40 +03:00
|
|
|
if (cur <= 0x7f) {
|
|
|
|
*res++ = static_cast<char>(cur);
|
|
|
|
} else if (cur <= 0x7ff) {
|
|
|
|
*res++ = static_cast<char>(0xc0 | (cur >> 6));
|
|
|
|
*res++ = static_cast<char>(0x80 | (cur & 0x3f));
|
|
|
|
} else if ((cur & 0xF800) != 0xD800) {
|
|
|
|
*res++ = static_cast<char>(0xe0 | (cur >> 12));
|
|
|
|
*res++ = static_cast<char>(0x80 | ((cur >> 6) & 0x3f));
|
|
|
|
*res++ = static_cast<char>(0x80 | (cur & 0x3f));
|
|
|
|
} else {
|
2021-10-17 22:56:40 +03:00
|
|
|
uint32 next = begin[++i];
|
|
|
|
uint32 val = ((cur - 0xD800) << 10) + next - 0xDC00 + 0x10000;
|
2018-05-18 18:55:40 +03:00
|
|
|
|
|
|
|
*res++ = static_cast<char>(0xf0 | (val >> 18));
|
|
|
|
*res++ = static_cast<char>(0x80 | ((val >> 12) & 0x3f));
|
|
|
|
*res++ = static_cast<char>(0x80 | ((val >> 6) & 0x3f));
|
|
|
|
*res++ = static_cast<char>(0x80 | (val & 0x3f));
|
|
|
|
}
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2018-05-18 18:55:40 +03:00
|
|
|
return result;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
Result<string> from_wstring(const std::wstring &str) {
|
|
|
|
return from_wstring(str.data(), str.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
Result<string> from_wstring(const wchar_t *begin) {
|
2018-05-18 18:55:40 +03:00
|
|
|
return from_wstring(begin, std::wcslen(begin));
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace td
|
|
|
|
|
|
|
|
#endif
|