diff --git a/td/telegram/MessageContent.cpp b/td/telegram/MessageContent.cpp index 98c839010..150258ff6 100644 --- a/td/telegram/MessageContent.cpp +++ b/td/telegram/MessageContent.cpp @@ -5975,14 +5975,14 @@ void update_used_hashtags(Td *td, const MessageContent *content) { } while (utf16_pos < entity.offset && ptr < end) { utf16_pos += 1 + (ptr[0] >= 0xf0); - ptr = next_utf8_unsafe(ptr, &skipped_code, "update_used_hashtags"); + ptr = next_utf8_unsafe(ptr, &skipped_code); } CHECK(utf16_pos == entity.offset); auto from = ptr; while (utf16_pos < entity.offset + entity.length && ptr < end) { utf16_pos += 1 + (ptr[0] >= 0xf0); - ptr = next_utf8_unsafe(ptr, &skipped_code, "update_used_hashtags 2"); + ptr = next_utf8_unsafe(ptr, &skipped_code); } CHECK(utf16_pos == entity.offset + entity.length); auto to = ptr; diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index e0ad2b291..312a14c23 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -262,7 +262,7 @@ static vector match_mentions(Slice str) { if (ptr != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_mentions"); + next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev); if (is_word_character(prev)) { ptr++; @@ -280,7 +280,7 @@ static vector match_mentions(Slice str) { } uint32 next = 0; if (ptr != end) { - next_utf8_unsafe(ptr, &next, "match_mentions 2"); + next_utf8_unsafe(ptr, &next); } if (is_word_character(next)) { continue; @@ -306,7 +306,7 @@ static vector match_bot_commands(Slice str) { if (ptr != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_bot_commands"); + next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev); if (is_word_character(prev) || prev == '/' || prev == '<' || prev == '>') { ptr++; @@ -339,7 +339,7 @@ static vector match_bot_commands(Slice str) { uint32 next = 0; if (ptr != end) { - next_utf8_unsafe(ptr, &next, "match_bot_commands 2"); + next_utf8_unsafe(ptr, &next); } if (is_word_character(next) || next == '/' || next == '<' || next == '>') { continue; @@ -382,7 +382,7 @@ static vector match_hashtags(Slice str) { if (ptr != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_hashtags"); + next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev); if (is_hashtag_letter(prev, category)) { ptr++; @@ -395,7 +395,7 @@ static vector match_hashtags(Slice str) { bool was_letter = false; while (ptr != end) { uint32 code; - auto next_ptr = next_utf8_unsafe(ptr, &code, "match_hashtags 2"); + auto next_ptr = next_utf8_unsafe(ptr, &code); if (!is_hashtag_letter(code, category)) { break; } @@ -443,7 +443,7 @@ static vector match_cashtags(Slice str) { if (ptr != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_cashtags"); + next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev); if (is_hashtag_letter(prev, category) || prev == '$') { ptr++; @@ -467,7 +467,7 @@ static vector match_cashtags(Slice str) { if (cashtag_end != end) { uint32 code; - next_utf8_unsafe(ptr, &code, "match_cashtags 2"); + next_utf8_unsafe(ptr, &code); if (is_hashtag_letter(code, category) || code == '$') { continue; } @@ -506,7 +506,7 @@ static vector match_media_timestamps(Slice str) { if (media_timestamp_begin != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(media_timestamp_begin), &prev, "match_media_timestamps 1"); + next_utf8_unsafe(prev_utf8_unsafe(media_timestamp_begin), &prev); if (is_word_character(prev)) { continue; @@ -514,7 +514,7 @@ static vector match_media_timestamps(Slice str) { } if (media_timestamp_end != end) { uint32 next; - next_utf8_unsafe(media_timestamp_end, &next, "match_media_timestamps 2"); + next_utf8_unsafe(media_timestamp_end, &next); if (is_word_character(next)) { continue; @@ -546,7 +546,7 @@ static vector match_bank_card_numbers(Slice str) { } if (ptr != begin) { uint32 prev; - next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_bank_card_numbers"); + next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev); if (prev == '.' || prev == ',' || prev == '+' || prev == '-' || prev == '_' || get_unicode_simple_category(prev) == UnicodeSimpleCategory::Letter) { @@ -582,7 +582,7 @@ static vector match_bank_card_numbers(Slice str) { } if (card_number_end != end) { uint32 next; - next_utf8_unsafe(card_number_end, &next, "match_bank_card_numbers 2"); + next_utf8_unsafe(card_number_end, &next); if (next == '-' || next == '_' || get_unicode_simple_category(next) == UnicodeSimpleCategory::Letter) { continue; } @@ -657,7 +657,7 @@ static vector match_tg_urls(Slice str) { auto path_end_ptr = ptr + 1; while (path_end_ptr != end) { uint32 code = 0; - auto next_ptr = next_utf8_unsafe(path_end_ptr, &code, "match_tg_urls"); + auto next_ptr = next_utf8_unsafe(path_end_ptr, &code); if (!is_url_path_symbol(code)) { break; } @@ -739,7 +739,7 @@ static vector match_urls(Slice str) { while (domain_begin_ptr != begin) { domain_begin_ptr = prev_utf8_unsafe(domain_begin_ptr); uint32 code = 0; - auto next_ptr = next_utf8_unsafe(domain_begin_ptr, &code, "match_urls 0"); + auto next_ptr = next_utf8_unsafe(domain_begin_ptr, &code); if (!is_domain_symbol(code)) { domain_begin_ptr = next_ptr; break; @@ -752,7 +752,7 @@ static vector match_urls(Slice str) { // try to find '@' to the right if there is no '@' to the left while (domain_end_ptr != end) { uint32 code = 0; - auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code, "match_urls"); + auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code); if (code == '@') { last_at_ptr = domain_end_ptr; } @@ -765,7 +765,7 @@ static vector match_urls(Slice str) { } while (domain_end_ptr != end) { uint32 code = 0; - auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code, "match_urls 2"); + auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code); if (!is_domain_symbol(code)) { break; } @@ -776,7 +776,7 @@ static vector match_urls(Slice str) { while (domain_begin_ptr != begin) { domain_begin_ptr = prev_utf8_unsafe(domain_begin_ptr); uint32 code = 0; - auto next_ptr = next_utf8_unsafe(domain_begin_ptr, &code, "match_urls 3"); + auto next_ptr = next_utf8_unsafe(domain_begin_ptr, &code); if (!is_user_data_symbol(code)) { domain_begin_ptr = next_ptr; break; @@ -808,7 +808,7 @@ static vector match_urls(Slice str) { auto path_end_ptr = url_end_ptr + 1; while (path_end_ptr != end) { uint32 code = 0; - auto next_ptr = next_utf8_unsafe(path_end_ptr, &code, "match_urls 4"); + auto next_ptr = next_utf8_unsafe(path_end_ptr, &code); if (!is_url_path_symbol(code)) { break; } @@ -836,7 +836,7 @@ static vector match_urls(Slice str) { while (user_data_begin_ptr != begin) { user_data_begin_ptr = prev_utf8_unsafe(user_data_begin_ptr); uint32 code = 0; - auto next_ptr = next_utf8_unsafe(user_data_begin_ptr, &code, "match_urls 5"); + auto next_ptr = next_utf8_unsafe(user_data_begin_ptr, &code); if (!is_user_data_symbol(code)) { user_data_begin_ptr = next_ptr; break; @@ -856,7 +856,7 @@ static vector match_urls(Slice str) { while (protocol_begin_ptr != begin) { protocol_begin_ptr = prev_utf8_unsafe(protocol_begin_ptr); uint32 code = 0; - auto next_ptr = next_utf8_unsafe(protocol_begin_ptr, &code, "match_urls 6"); + auto next_ptr = next_utf8_unsafe(protocol_begin_ptr, &code); if (!is_protocol_symbol(code)) { protocol_begin_ptr = next_ptr; break; @@ -876,7 +876,7 @@ static vector match_urls(Slice str) { auto prefix_end = prefix.uend(); auto prefix_back = prev_utf8_unsafe(prefix_end); uint32 code = 0; - next_utf8_unsafe(prefix_back, &code, "match_urls 7"); + next_utf8_unsafe(prefix_back, &code); if (is_word_character(code) || code == '/' || code == '#' || code == '@') { is_bad = true; } @@ -1669,7 +1669,7 @@ static void fix_entity_offsets(Slice text, vector &entities) { while (ptr != end && cnt > 0) { unsigned char c = ptr[0]; utf16_pos += 1 + (c >= 0xf0); - ptr = next_utf8_unsafe(ptr, &skipped_code, "fix_entity_offsets"); + ptr = next_utf8_unsafe(ptr, &skipped_code); pos = static_cast(ptr - begin); if (entity_begin == pos) { diff --git a/td/telegram/MessagesDb.cpp b/td/telegram/MessagesDb.cpp index 87a819201..e079426b3 100644 --- a/td/telegram/MessagesDb.cpp +++ b/td/telegram/MessagesDb.cpp @@ -736,7 +736,7 @@ class MessagesDbImpl final : public MessagesDbSyncInterface { for (auto ptr = query.ubegin(), end = query.uend(); ptr < end;) { uint32 code; auto code_ptr = ptr; - ptr = next_utf8_unsafe(ptr, &code, "prepare_query"); + ptr = next_utf8_unsafe(ptr, &code); if (is_word_character(code)) { if (!in_word) { in_word = true; diff --git a/td/telegram/StickersManager.cpp b/td/telegram/StickersManager.cpp index be6267ca1..6ea30f7f9 100644 --- a/td/telegram/StickersManager.cpp +++ b/td/telegram/StickersManager.cpp @@ -8460,7 +8460,7 @@ vector StickersManager::get_emoji_language_codes(const vector &i } if (!text.empty()) { uint32 code = 0; - next_utf8_unsafe(text.ubegin(), &code, "get_emoji_language_codes"); + next_utf8_unsafe(text.ubegin(), &code); if ((0x410 <= code && code <= 0x44F) || code == 0x401 || code == 0x451) { // the first letter is cyrillic if (!td::contains(language_codes, "ru") && !td::contains(language_codes, "uk") && diff --git a/tdutils/td/utils/Hints.cpp b/tdutils/td/utils/Hints.cpp index dfe584fc5..762281315 100644 --- a/tdutils/td/utils/Hints.cpp +++ b/tdutils/td/utils/Hints.cpp @@ -34,7 +34,7 @@ vector Hints::fix_words(vector words) { return words; } -vector Hints::get_words(Slice name, bool is_search) { +vector Hints::get_words(Slice name) { bool in_word = false; string word; vector words; @@ -42,7 +42,7 @@ vector Hints::get_words(Slice name, bool is_search) { auto end = name.uend(); while (pos != end) { uint32 code; - pos = next_utf8_unsafe(pos, &code, is_search ? "get_words_search" : "get_words_add"); + pos = next_utf8_unsafe(pos, &code); code = prepare_search_character(code); if (code == 0) { @@ -94,7 +94,7 @@ void Hints::add(KeyT key, Slice name) { return; } vector old_transliterations; - for (auto &old_word : get_words(it->second, false)) { + for (auto &old_word : get_words(it->second)) { delete_word(old_word, key, word_to_keys_); for (auto &w : get_word_transliterations(old_word, false)) { @@ -116,7 +116,7 @@ void Hints::add(KeyT key, Slice name) { } vector transliterations; - for (auto &word : get_words(name, false)) { + for (auto &word : get_words(name)) { add_word(word, key, word_to_keys_); for (auto &w : get_word_transliterations(word, false)) { @@ -166,7 +166,7 @@ std::pair> Hints::search(Slice query, int32 limit, b return {key_to_name_.size(), std::move(results)}; } - auto words = get_words(query, true); + auto words = get_words(query); if (return_all_for_empty_query && words.empty()) { results.reserve(key_to_name_.size()); for (auto &it : key_to_name_) { diff --git a/tdutils/td/utils/Hints.h b/tdutils/td/utils/Hints.h index d9f4115c7..f59e45f50 100644 --- a/tdutils/td/utils/Hints.h +++ b/tdutils/td/utils/Hints.h @@ -52,7 +52,7 @@ class Hints { static vector fix_words(vector words); - static vector get_words(Slice name, bool is_search); + static vector get_words(Slice name); static void add_search_results(vector &results, const string &word, const std::map> &word_to_keys); diff --git a/tdutils/td/utils/filesystem.cpp b/tdutils/td/utils/filesystem.cpp index ae7483cf4..edb5df867 100644 --- a/tdutils/td/utils/filesystem.cpp +++ b/tdutils/td/utils/filesystem.cpp @@ -133,7 +133,7 @@ static string clean_filename_part(Slice name, int max_length) { int size = 0; for (auto *it = name.ubegin(); it != name.uend() && size < max_length;) { uint32 code; - it = next_utf8_unsafe(it, &code, "clean_filename_part"); + it = next_utf8_unsafe(it, &code); if (!is_ok(code)) { if (prepare_search_character(code) == 0) { continue; diff --git a/tdutils/td/utils/port/IPAddress.cpp b/tdutils/td/utils/port/IPAddress.cpp index 7e1e72532..32c26403d 100644 --- a/tdutils/td/utils/port/IPAddress.cpp +++ b/tdutils/td/utils/port/IPAddress.cpp @@ -53,7 +53,7 @@ static void punycode(string &result, Slice part) { auto end = part.uend(); while (begin != end) { uint32 code; - begin = next_utf8_unsafe(begin, &code, "punycode"); + begin = next_utf8_unsafe(begin, &code); if (code <= 127u) { result += to_lower(static_cast(code)); processed++; diff --git a/tdutils/td/utils/translit.cpp b/tdutils/td/utils/translit.cpp index 3b1fbfd62..65cfcf007 100644 --- a/tdutils/td/utils/translit.cpp +++ b/tdutils/td/utils/translit.cpp @@ -56,7 +56,7 @@ static void add_word_transliterations(vector &result, Slice word, bool a auto end = word.uend(); while (pos != end) { uint32 code; - pos = next_utf8_unsafe(pos, &code, "add_word_transliterations"); + pos = next_utf8_unsafe(pos, &code); auto it = simple_rules.find(code); if (it != simple_rules.end()) { s += it->second; @@ -89,7 +89,7 @@ static void add_word_transliterations(vector &result, Slice word, bool a } uint32 code; - pos = next_utf8_unsafe(pos, &code, "add_word_transliterations 2"); + pos = next_utf8_unsafe(pos, &code); auto it = simple_rules.find(code); if (it != simple_rules.end()) { s += it->second; diff --git a/tdutils/td/utils/utf8.cpp b/tdutils/td/utils/utf8.cpp index db93a858d..e4e62cf06 100644 --- a/tdutils/td/utils/utf8.cpp +++ b/tdutils/td/utils/utf8.cpp @@ -6,7 +6,6 @@ // #include "td/utils/utf8.h" -#include "td/utils/logging.h" #include "td/utils/misc.h" #include "td/utils/SliceBuilder.h" #include "td/utils/unicode.h" @@ -81,7 +80,7 @@ void append_utf8_character(string &str, uint32 ch) { } } -const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code, const char *source) { +const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code) { uint32 a = ptr[0]; if ((a & 0x80) == 0) { *code = a; @@ -96,7 +95,7 @@ const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code, co *code = ((a & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f); return ptr + 4; } - LOG(FATAL) << a << " " << source; + UNREACHABLE(); *code = 0; return ptr; } @@ -107,7 +106,7 @@ string utf8_to_lower(Slice str) { auto end = str.uend(); while (pos != end) { uint32 code; - pos = next_utf8_unsafe(pos, &code, "utf8_to_lower"); + pos = next_utf8_unsafe(pos, &code); append_utf8_character(result, unicode_to_lower(code)); } return result; diff --git a/tdutils/td/utils/utf8.h b/tdutils/td/utils/utf8.h index e16eef4d1..236cc87c0 100644 --- a/tdutils/td/utils/utf8.h +++ b/tdutils/td/utils/utf8.h @@ -49,7 +49,7 @@ inline const unsigned char *prev_utf8_unsafe(const unsigned char *ptr) { } /// moves pointer one UTF-8 character forward and saves code of the skipped character in *code -const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code, const char *source); +const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code); /// truncates UTF-8 string to the given length in Unicode characters template diff --git a/tdutils/test/bitmask.cpp b/tdutils/test/bitmask.cpp index 63b4c27bd..e81c406bb 100644 --- a/tdutils/test/bitmask.cpp +++ b/tdutils/test/bitmask.cpp @@ -50,7 +50,7 @@ class RangeSet { RangeSet res; for (auto begin = data.ubegin(); begin != data.uend();) { uint32 size; - begin = next_utf8_unsafe(begin, &size, "RangeSet"); + begin = next_utf8_unsafe(begin, &size); if (!is_empty && size != 0) { res.ranges_.push_back({curr * BIT_SIZE, (curr + size) * BIT_SIZE});