Keep only Slice overload of utf8_utf16_substr.
This commit is contained in:
parent
5c5d19e76f
commit
faa738d6a9
@ -16889,7 +16889,7 @@ tl_object_ptr<td_api::userFullInfo> ContactsManager::get_user_full_info_object(U
|
||||
return true;
|
||||
}
|
||||
if (entity.type == MessageEntity::Type::Url &&
|
||||
!LinkManager::is_internal_link(utf8_utf16_substr(Slice(bio.text), entity.offset, entity.length))) {
|
||||
!LinkManager::is_internal_link(utf8_utf16_substr(bio.text, entity.offset, entity.length))) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1793,7 +1793,7 @@ string get_first_url(const FormattedText &text) {
|
||||
if (entity.length <= 4) {
|
||||
continue;
|
||||
}
|
||||
Slice url = utf8_utf16_substr(text.text, entity.offset, entity.length);
|
||||
auto url = utf8_utf16_substr(text.text, entity.offset, entity.length);
|
||||
string scheme = to_lower(url.substr(0, 4));
|
||||
if (scheme == "ton:" || begins_with(scheme, "tg:") || scheme == "ftp:" || is_plain_domain(url)) {
|
||||
continue;
|
||||
|
@ -119,4 +119,41 @@ string utf8_encode(CSlice data) {
|
||||
return PSTRING() << "url_decode(" << url_encode(data) << ')';
|
||||
}
|
||||
|
||||
size_t utf8_utf16_length(Slice str) {
|
||||
size_t result = 0;
|
||||
for (auto c : str) {
|
||||
result += is_utf8_character_first_code_unit(c) + ((c & 0xf8) == 0xf0);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Slice utf8_utf16_truncate(Slice str, size_t length) {
|
||||
for (size_t i = 0; i < str.size(); i++) {
|
||||
auto c = static_cast<unsigned char>(str[i]);
|
||||
if (is_utf8_character_first_code_unit(c)) {
|
||||
if (length <= 0) {
|
||||
return str.substr(0, i);
|
||||
} else {
|
||||
length--;
|
||||
if (c >= 0xf0) { // >= 4 bytes in symbol => surrogate pair
|
||||
length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
Slice utf8_utf16_substr(Slice str, size_t offset) {
|
||||
if (offset == 0) {
|
||||
return str;
|
||||
}
|
||||
auto offset_pos = utf8_utf16_truncate(str, offset).size();
|
||||
return str.substr(offset_pos);
|
||||
}
|
||||
|
||||
Slice utf8_utf16_substr(Slice str, size_t offset, size_t length) {
|
||||
return utf8_utf16_truncate(utf8_utf16_substr(str, offset), length);
|
||||
}
|
||||
|
||||
} // namespace td
|
||||
|
@ -29,13 +29,7 @@ inline size_t utf8_length(Slice str) {
|
||||
}
|
||||
|
||||
/// returns length of UTF-8 string in UTF-16 code units
|
||||
inline size_t utf8_utf16_length(Slice str) {
|
||||
size_t result = 0;
|
||||
for (auto c : str) {
|
||||
result += is_utf8_character_first_code_unit(c) + ((c & 0xf8) == 0xf0);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
size_t utf8_utf16_length(Slice str);
|
||||
|
||||
/// appends a Unicode character using UTF-8 encoding
|
||||
void append_utf8_character(string &str, uint32 ch);
|
||||
@ -69,23 +63,7 @@ T utf8_truncate(T str, size_t length) {
|
||||
}
|
||||
|
||||
/// truncates UTF-8 string to the given length given in UTF-16 code units
|
||||
template <class T>
|
||||
T utf8_utf16_truncate(T str, size_t length) {
|
||||
for (size_t i = 0; i < str.size(); i++) {
|
||||
auto c = static_cast<unsigned char>(str[i]);
|
||||
if (is_utf8_character_first_code_unit(c)) {
|
||||
if (length <= 0) {
|
||||
return str.substr(0, i);
|
||||
} else {
|
||||
length--;
|
||||
if (c >= 0xf0) { // >= 4 bytes in symbol => surrogate pair
|
||||
length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
Slice utf8_utf16_truncate(Slice str, size_t length);
|
||||
|
||||
template <class T>
|
||||
T utf8_substr(T str, size_t offset) {
|
||||
@ -101,19 +79,9 @@ T utf8_substr(T str, size_t offset, size_t length) {
|
||||
return utf8_truncate(utf8_substr(str, offset), length);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T utf8_utf16_substr(T str, size_t offset) {
|
||||
if (offset == 0) {
|
||||
return str;
|
||||
}
|
||||
auto offset_pos = utf8_utf16_truncate(str, offset).size();
|
||||
return str.substr(offset_pos);
|
||||
}
|
||||
Slice utf8_utf16_substr(Slice str, size_t offset);
|
||||
|
||||
template <class T>
|
||||
T utf8_utf16_substr(T str, size_t offset, size_t length) {
|
||||
return utf8_utf16_truncate(utf8_utf16_substr(str, offset), length);
|
||||
}
|
||||
Slice utf8_utf16_substr(Slice str, size_t offset, size_t length);
|
||||
|
||||
/// Returns UTF-8 string converted to lower case.
|
||||
string utf8_to_lower(Slice str);
|
||||
|
@ -959,8 +959,8 @@ TEST(MessageEntities, fix_formatted_text) {
|
||||
}
|
||||
}
|
||||
|
||||
check_fix_formatted_text(str, entities, td::utf8_utf16_substr(str, 3, 11), fixed_entities, false, false, false,
|
||||
false);
|
||||
check_fix_formatted_text(str, entities, td::utf8_utf16_substr(str, 3, 11).str(), fixed_entities, false, false,
|
||||
false, false);
|
||||
}
|
||||
|
||||
for (td::string text : {"\t", "\r", "\n", "\t ", "\r ", "\n "}) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user