Add random fix_formatted_text test.

GitOrigin-RevId: 35d2bf822bfebe4221a3495bb83fb2555a984a1c
This commit is contained in:
levlam 2020-02-21 16:22:40 +03:00
parent 185d0fd22f
commit 6b21b27cae
2 changed files with 48 additions and 13 deletions

View File

@ -2585,6 +2585,8 @@ vector<MessageEntity> get_message_entities(vector<tl_object_ptr<secret_api::Mess
// like clean_input_string but also fixes entities // like clean_input_string but also fixes entities
// entities must be sorted, can be nested, but must not intersect each other // entities must be sorted, can be nested, but must not intersect each other
static Result<string> clean_input_string_with_entities(const string &text, vector<MessageEntity> &entities) { static Result<string> clean_input_string_with_entities(const string &text, vector<MessageEntity> &entities) {
check_is_sorted(entities);
struct EntityInfo { struct EntityInfo {
MessageEntity *entity; MessageEntity *entity;
int32 utf16_skipped_before; int32 utf16_skipped_before;
@ -2799,7 +2801,7 @@ static std::pair<size_t, int32> remove_invalid_entities(const string &text, vect
// enitities must contain only splittable entities // enitities must contain only splittable entities
void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity> &other_entities) { void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity> &other_entities) {
check_is_sorted(entities); check_is_sorted(entities);
check_non_intersecting(other_entities); check_is_sorted(other_entities);
int32 begin_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {}; int32 begin_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
int32 end_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {}; int32 end_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
@ -2842,16 +2844,30 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
} }
flush_entities(end_offset); flush_entities(end_offset);
}; };
for (auto &other_entity : other_entities) {
add_entities(other_entity.offset); vector<const MessageEntity *> nested_entities_stack;
auto old_size = result.size(); auto add_offset = [&](int32 offset) {
add_entities(other_entity.offset + other_entity.length); while (!nested_entities_stack.empty() &&
if (is_pre_entity(other_entity.type)) { offset >= nested_entities_stack.back()->offset + nested_entities_stack.back()->length) {
result.resize(old_size); // remove non-intersecting entities from the stack
auto old_size = result.size();
add_entities(nested_entities_stack.back()->offset + nested_entities_stack.back()->length);
if (is_pre_entity(nested_entities_stack.back()->type)) {
result.resize(old_size);
}
nested_entities_stack.pop_back();
} }
add_entities(offset);
};
for (auto &other_entity : other_entities) {
add_offset(other_entity.offset);
nested_entities_stack.push_back(&other_entity);
} }
add_entities(std::numeric_limits<int32>::max()); add_offset(std::numeric_limits<int32>::max());
entities = std::move(result); entities = std::move(result);
// entities are sorted only by offset now, re-sort if needed // entities are sorted only by offset now, re-sort if needed
if (!std::is_sorted(entities.begin(), entities.end())) { if (!std::is_sorted(entities.begin(), entities.end())) {
std::sort(entities.begin(), entities.end()); std::sort(entities.begin(), entities.end());
@ -2884,19 +2900,17 @@ static void fix_entities(vector<MessageEntity> &entities) {
if (!blockquote_entities.empty()) { if (!blockquote_entities.empty()) {
remove_intersecting_entities(blockquote_entities); // blockquote entities can't intersect each other remove_intersecting_entities(blockquote_entities); // blockquote entities can't intersect each other
split_entities(splittable_entities, blockquote_entities);
// blockquote entities can contain continuous entities, but can't intersect them in the other ways // blockquote entities can contain continuous entities, but can't intersect them in the other ways
remove_entities_intersecting_blockquote(continuous_entities, blockquote_entities); remove_entities_intersecting_blockquote(continuous_entities, blockquote_entities);
}
split_entities(splittable_entities, continuous_entities); // split by remaining continuous entities
if (!blockquote_entities.empty()) {
combine(continuous_entities, std::move(blockquote_entities)); combine(continuous_entities, std::move(blockquote_entities));
std::sort(continuous_entities.begin(), continuous_entities.end()); std::sort(continuous_entities.begin(), continuous_entities.end());
} }
// must be called once to not merge some adjacent entities
split_entities(splittable_entities, continuous_entities);
if (splittable_entities.empty()) { if (splittable_entities.empty()) {
splittable_entities = std::move(continuous_entities); splittable_entities = std::move(continuous_entities);
} else if (!continuous_entities.empty()) { } else if (!continuous_entities.empty()) {

View File

@ -10,6 +10,7 @@
#include "td/utils/format.h" #include "td/utils/format.h"
#include "td/utils/logging.h" #include "td/utils/logging.h"
#include "td/utils/misc.h" #include "td/utils/misc.h"
#include "td/utils/Random.h"
#include "td/utils/Slice.h" #include "td/utils/Slice.h"
#include "td/utils/tests.h" #include "td/utils/tests.h"
#include "td/utils/utf8.h" #include "td/utils/utf8.h"
@ -879,6 +880,26 @@ TEST(MessageEntities, fix_formatted_text) {
"abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}}); "abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}, check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}},
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}); "abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
for (size_t i = 0; i < 100000; i++) {
str = td::string(td::Random::fast(10, 30), 'a');
auto n = td::Random::fast(1, 10);
td::vector<td::MessageEntity> entities;
for (int j = 0; j < n; j++) {
td::int32 type = td::Random::fast(0, 16);
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
auto max_length = static_cast<int>(str.size() - offset);
if ((i & 1) != 0 && max_length > 4) {
max_length = 4;
}
td::int32 length = td::Random::fast(0, max_length);
entities.emplace_back(static_cast<td::MessageEntity::Type>(type), offset, length);
}
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok());
}
} }
static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) { static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) {