From 6b21b27cae065b8e879b1c85e18c2d793c55877d Mon Sep 17 00:00:00 2001 From: levlam Date: Fri, 21 Feb 2020 16:22:40 +0300 Subject: [PATCH] Add random fix_formatted_text test. GitOrigin-RevId: 35d2bf822bfebe4221a3495bb83fb2555a984a1c --- td/telegram/MessageEntity.cpp | 40 +++++++++++++++++++++++------------ test/message_entities.cpp | 21 ++++++++++++++++++ 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index b4724871c..eb58d6e98 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -2585,6 +2585,8 @@ vector get_message_entities(vector clean_input_string_with_entities(const string &text, vector &entities) { + check_is_sorted(entities); + struct EntityInfo { MessageEntity *entity; int32 utf16_skipped_before; @@ -2799,7 +2801,7 @@ static std::pair remove_invalid_entities(const string &text, vect // enitities must contain only splittable entities void split_entities(vector &entities, const vector &other_entities) { check_is_sorted(entities); - check_non_intersecting(other_entities); + check_is_sorted(other_entities); int32 begin_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {}; int32 end_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {}; @@ -2842,16 +2844,30 @@ void split_entities(vector &entities, const vector } flush_entities(end_offset); }; - for (auto &other_entity : other_entities) { - add_entities(other_entity.offset); - auto old_size = result.size(); - add_entities(other_entity.offset + other_entity.length); - if (is_pre_entity(other_entity.type)) { - result.resize(old_size); + + vector nested_entities_stack; + auto add_offset = [&](int32 offset) { + while (!nested_entities_stack.empty() && + offset >= nested_entities_stack.back()->offset + nested_entities_stack.back()->length) { + // remove non-intersecting entities from the stack + auto old_size = result.size(); + add_entities(nested_entities_stack.back()->offset + nested_entities_stack.back()->length); + if (is_pre_entity(nested_entities_stack.back()->type)) { + result.resize(old_size); + } + nested_entities_stack.pop_back(); } + + add_entities(offset); + }; + for (auto &other_entity : other_entities) { + add_offset(other_entity.offset); + nested_entities_stack.push_back(&other_entity); } - add_entities(std::numeric_limits::max()); + add_offset(std::numeric_limits::max()); + entities = std::move(result); + // entities are sorted only by offset now, re-sort if needed if (!std::is_sorted(entities.begin(), entities.end())) { std::sort(entities.begin(), entities.end()); @@ -2884,19 +2900,17 @@ static void fix_entities(vector &entities) { if (!blockquote_entities.empty()) { remove_intersecting_entities(blockquote_entities); // blockquote entities can't intersect each other - split_entities(splittable_entities, blockquote_entities); // blockquote entities can contain continuous entities, but can't intersect them in the other ways remove_entities_intersecting_blockquote(continuous_entities, blockquote_entities); - } - split_entities(splittable_entities, continuous_entities); // split by remaining continuous entities - - if (!blockquote_entities.empty()) { combine(continuous_entities, std::move(blockquote_entities)); std::sort(continuous_entities.begin(), continuous_entities.end()); } + // must be called once to not merge some adjacent entities + split_entities(splittable_entities, continuous_entities); + if (splittable_entities.empty()) { splittable_entities = std::move(continuous_entities); } else if (!continuous_entities.empty()) { diff --git a/test/message_entities.cpp b/test/message_entities.cpp index 548cfab31..89211d9f9 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -10,6 +10,7 @@ #include "td/utils/format.h" #include "td/utils/logging.h" #include "td/utils/misc.h" +#include "td/utils/Random.h" #include "td/utils/Slice.h" #include "td/utils/tests.h" #include "td/utils/utf8.h" @@ -879,6 +880,26 @@ TEST(MessageEntities, fix_formatted_text) { "abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}}); check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}, "abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}); + + check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}}); + + for (size_t i = 0; i < 100000; i++) { + str = td::string(td::Random::fast(10, 30), 'a'); + + auto n = td::Random::fast(1, 10); + td::vector entities; + for (int j = 0; j < n; j++) { + td::int32 type = td::Random::fast(0, 16); + td::int32 offset = td::Random::fast(0, static_cast(str.size()) - 1); + auto max_length = static_cast(str.size() - offset); + if ((i & 1) != 0 && max_length > 4) { + max_length = 4; + } + td::int32 length = td::Random::fast(0, max_length); + entities.emplace_back(static_cast(type), offset, length); + } + ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok()); + } } static void check_parse_html(td::string text, const td::string &result, const td::vector &entities) {