From 81d0172f339668d087a06bf3eef04e9f09d6db74 Mon Sep 17 00:00:00 2001 From: levlam Date: Mon, 24 Feb 2020 20:26:08 +0300 Subject: [PATCH] Correctly merge new entities. GitOrigin-RevId: 6ca976a3c17030cffb0c32119389ea5a8c1050ff --- td/telegram/ClientActor.cpp | 1 + td/telegram/MessageEntity.cpp | 65 ++++++++++++++++++++++++++++++----- test/message_entities.cpp | 56 ++++++++++++++++++++++++++---- 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/td/telegram/ClientActor.cpp b/td/telegram/ClientActor.cpp index d570c8c9..a53fd7e6 100644 --- a/td/telegram/ClientActor.cpp +++ b/td/telegram/ClientActor.cpp @@ -12,6 +12,7 @@ #include "td/telegram/Td.h" namespace td { + ClientActor::ClientActor(unique_ptr callback) { td_ = create_actor("Td", std::move(callback)); } diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index eb58d6e9..fc33325e 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -1296,7 +1296,10 @@ static void remove_entities_intersecting_blockquote(vector &entit blockquote_it->offset + blockquote_it->length <= entities[i].offset)) { blockquote_it++; } - if (blockquote_it != blockquote_entities.end() && blockquote_it->offset < entities[i].offset + entities[i].length) { + if (blockquote_it != blockquote_entities.end() && + (blockquote_it->offset + blockquote_it->length < entities[i].offset + entities[i].length || + (entities[i].offset < blockquote_it->offset && + blockquote_it->offset < entities[i].offset + entities[i].length))) { continue; } if (i != left_entities) { @@ -2874,6 +2877,21 @@ void split_entities(vector &entities, const vector } } +static vector resplit_entities(vector &&splittable_entities, + vector &&entities) { + if (!splittable_entities.empty()) { + split_entities(splittable_entities, entities); // can merge some entities + + if (entities.empty()) { + return std::move(splittable_entities); + } + + combine(entities, std::move(splittable_entities)); + std::sort(entities.begin(), entities.end()); + } + return std::move(entities); +} + static void fix_entities(vector &entities) { if (!std::is_sorted(entities.begin(), entities.end())) { std::sort(entities.begin(), entities.end()); @@ -2909,15 +2927,44 @@ static void fix_entities(vector &entities) { } // must be called once to not merge some adjacent entities - split_entities(splittable_entities, continuous_entities); + entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities)); + check_is_sorted(entities); +} - if (splittable_entities.empty()) { - splittable_entities = std::move(continuous_entities); - } else if (!continuous_entities.empty()) { - combine(splittable_entities, std::move(continuous_entities)); - std::sort(splittable_entities.begin(), splittable_entities.end()); +static void merge_new_entities(vector &entities, vector new_entities) { + check_is_sorted(entities); + if (new_entities.empty()) { + // fast path + return; } - entities = std::move(splittable_entities); + + check_non_intersecting(new_entities); + + vector continuous_entities; + vector blockquote_entities; + vector splittable_entities; + for (auto &entity : entities) { + if (is_splittable_entity(entity.type)) { + splittable_entities.push_back(std::move(entity)); + } else if (is_blockquote_entity(entity.type)) { + blockquote_entities.push_back(std::move(entity)); + } else { + continuous_entities.push_back(std::move(entity)); + } + } + + remove_entities_intersecting_blockquote(new_entities, blockquote_entities); + + // merge before combining with blockquote entities + continuous_entities = merge_entities(std::move(continuous_entities), std::move(new_entities)); + + if (!blockquote_entities.empty()) { + combine(continuous_entities, std::move(blockquote_entities)); + std::sort(continuous_entities.begin(), continuous_entities.end()); + } + + // must be called once to not merge some adjacent entities + entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities)); check_is_sorted(entities); } @@ -3017,7 +3064,7 @@ Status fix_formatted_text(string &text, vector &entities, bool al } if (!skip_new_entities) { - entities = merge_entities(std::move(entities), find_entities(text, skip_bot_commands)); + merge_new_entities(entities, find_entities(text, skip_bot_commands)); } // TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH diff --git a/test/message_entities.cpp b/test/message_entities.cpp index 7f865e81..96f77201 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -882,14 +882,41 @@ TEST(MessageEntities, fix_formatted_text) { "abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}); check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}}); + check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Pre, 0, 3}}, "example.com", + {{td::MessageEntity::Type::Pre, 0, 3}}); + check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 3}}, "example.com", + {{td::MessageEntity::Type::BlockQuote, 0, 3}}); + check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 11}}, "example.com", + {{td::MessageEntity::Type::BlockQuote, 0, 11}, {td::MessageEntity::Type::Url, 0, 11}}); + check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 11}}, "example.com", + {{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 11}}); + check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 3}}, "example.com", + {{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 3}}); + check_fix_formatted_text("example.com a", {{td::MessageEntity::Type::Italic, 0, 13}}, "example.com a", + {{td::MessageEntity::Type::Url, 0, 11}, + {td::MessageEntity::Type::Italic, 0, 11}, + {td::MessageEntity::Type::Italic, 11, 2}}); + check_fix_formatted_text("a example.com", {{td::MessageEntity::Type::Italic, 0, 13}}, "a example.com", + {{td::MessageEntity::Type::Italic, 0, 2}, + {td::MessageEntity::Type::Url, 2, 11}, + {td::MessageEntity::Type::Italic, 2, 11}}); for (size_t i = 0; i < 100000; i++) { - str = td::string(td::Random::fast(1, 20), 'a'); + bool is_url = td::Random::fast(0, 1) == 1; + td::int32 url_offset = 0; + td::int32 url_end = 0; + if (is_url) { + str = td::string(td::Random::fast(1, 5), 'a') + ":example.com:" + td::string(td::Random::fast(1, 5), 'a'); + url_offset = static_cast(str.find('e')); + url_end = url_offset + 11; + } else { + str = td::string(td::Random::fast(1, 20), 'a'); + } auto n = td::Random::fast(1, 20); td::vector entities; for (int j = 0; j < n; j++) { - td::int32 type = td::Random::fast(0, 16); + td::int32 type = td::Random::fast(4, 16); td::int32 offset = td::Random::fast(0, static_cast(str.size()) - 1); auto max_length = static_cast(str.size() - offset); if ((i & 1) != 0 && max_length > 4) { @@ -903,22 +930,37 @@ TEST(MessageEntities, fix_formatted_text) { td::vector result(length); for (auto &entity : entities) { for (auto pos = 0; pos < entity.length; pos++) { - result[entity.offset + pos] |= 1 << static_cast(entity.type); + result[entity.offset + pos] |= (1 << static_cast(entity.type)); } } return result; }; auto old_type_mask = get_type_mask(str.size(), entities); - ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok()); + ASSERT_TRUE(td::fix_formatted_text(str, entities, false, false, true, false).is_ok()); auto new_type_mask = get_type_mask(str.size(), entities); - auto spliitable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15); + auto splittable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15); for (std::size_t pos = 0; pos < str.size(); pos++) { if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre - ASSERT_EQ(new_type_mask[pos] & spliitable_mask, 0); + ASSERT_EQ(0, new_type_mask[pos] & splittable_mask); } else { - ASSERT_EQ(new_type_mask[pos] & spliitable_mask, old_type_mask[pos] & spliitable_mask); + ASSERT_EQ(old_type_mask[pos] & splittable_mask, new_type_mask[pos] & splittable_mask); } } + bool keep_url = is_url; + td::MessageEntity url_entity(td::MessageEntity::Type::Url, url_offset, url_end - url_offset); + for (auto &entity : entities) { + if (entity == url_entity) { + continue; + } + td::int32 offset = entity.offset; + td::int32 end = offset + entity.length; + + if (keep_url && ((1 << static_cast(entity.type)) & splittable_mask) == 0 && + !(end <= url_offset || url_end <= offset)) { + keep_url = (entity.type == td::MessageEntity::Type::BlockQuote && offset <= url_offset && url_end <= end); + } + } + ASSERT_EQ(keep_url, std::count(entities.begin(), entities.end(), url_entity) == 1); } }