From ece4fd0e93120a850543b6f2f17a5e26f0556bfb Mon Sep 17 00:00:00 2001 From: levlam Date: Thu, 2 Jan 2020 17:46:19 +0300 Subject: [PATCH] Add message entities tests. GitOrigin-RevId: fd04d3ebc4619d2b6be4282043a1333d824d7ee6 --- td/telegram/MessageEntity.cpp | 11 +++++++++++ test/message_entities.cpp | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 94ddd5fd3..a257b95ba 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -2526,6 +2526,10 @@ static std::pair remove_invalid_entities(const string &text, vect int32 last_non_whitespace_utf16_offset = -1; for (size_t pos = 0; pos <= text.size(); pos++) { + while (current_entity < entities.size() && utf16_offset >= entities[current_entity].offset && + entities[current_entity].length == 0) { + nested_entities_stack.push_back(&entities[current_entity++]); + } while (!nested_entities_stack.empty()) { auto *entity = nested_entities_stack.back(); auto entity_end = entity->offset + entity->length; @@ -2590,6 +2594,9 @@ Status fix_formatted_text(string &text, vector &entities, bool al TRY_RESULT(result, clean_input_string_with_entities(text, entities)); + // now entities are still sorted by offset and length, but not type, + // because some characters could be deleted and some entities bacame to end together + size_t last_non_whitespace_pos; int32 last_non_whitespace_utf16_offset; std::tie(last_non_whitespace_pos, last_non_whitespace_utf16_offset) = remove_invalid_entities(result, entities); @@ -2602,6 +2609,10 @@ Status fix_formatted_text(string &text, vector &entities, bool al return Status::Error(3, "Message must be non-empty"); } + if (!std::is_sorted(entities.begin(), entities.end())) { + std::sort(entities.begin(), entities.end()); // re-sort entities if needed after removal of some characters + } + if (for_draft) { text = std::move(result); } else { diff --git a/test/message_entities.cpp b/test/message_entities.cpp index 960dae19e..b43e4b5fe 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -734,6 +734,12 @@ TEST(MessageEntities, fix_formatted_text) { check_fix_formatted_text(str, entities, td::utf8_utf16_substr(str, 3, 11), fixed_entities, false, false, false, false); } + + for (td::string text : {"\t", "\r", "\n", "\t ", "\r ", "\n "}) { + for (auto type : {td::MessageEntity::Type::Bold, td::MessageEntity::Type::TextUrl}) { + check_fix_formatted_text(text, {{type, 0, 1, "http://telegram.org/"}}, "", {}, true, false, false, true); + } + } } static void check_parse_html(td::string text, const td::string &result, const td::vector &entities) { @@ -813,6 +819,15 @@ TEST(MessageEntities, parse_html) { check_parse_html("🏟 🏟<a", "🏟 🏟a", "🏟 🏟", "🏟 🏟<", {}); + check_parse_html("\t", "\t", {{td::MessageEntity::Type::Italic, 0, 1}}); + check_parse_html("\r", "\r", {{td::MessageEntity::Type::Italic, 0, 1}}); + check_parse_html("\n", "\n", {{td::MessageEntity::Type::Italic, 0, 1}}); + check_parse_html("\t", "\t", + {{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}}); + check_parse_html("\r", "\r", + {{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}}); + check_parse_html("\n", "\n", + {{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}}); check_parse_html(" ", " ", {{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Bold, 0, 1},