Add random fix_formatted_text test.
GitOrigin-RevId: 35d2bf822bfebe4221a3495bb83fb2555a984a1c
This commit is contained in:
parent
185d0fd22f
commit
6b21b27cae
@ -2585,6 +2585,8 @@ vector<MessageEntity> get_message_entities(vector<tl_object_ptr<secret_api::Mess
|
|||||||
// like clean_input_string but also fixes entities
|
// like clean_input_string but also fixes entities
|
||||||
// entities must be sorted, can be nested, but must not intersect each other
|
// entities must be sorted, can be nested, but must not intersect each other
|
||||||
static Result<string> clean_input_string_with_entities(const string &text, vector<MessageEntity> &entities) {
|
static Result<string> clean_input_string_with_entities(const string &text, vector<MessageEntity> &entities) {
|
||||||
|
check_is_sorted(entities);
|
||||||
|
|
||||||
struct EntityInfo {
|
struct EntityInfo {
|
||||||
MessageEntity *entity;
|
MessageEntity *entity;
|
||||||
int32 utf16_skipped_before;
|
int32 utf16_skipped_before;
|
||||||
@ -2799,7 +2801,7 @@ static std::pair<size_t, int32> remove_invalid_entities(const string &text, vect
|
|||||||
// enitities must contain only splittable entities
|
// enitities must contain only splittable entities
|
||||||
void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity> &other_entities) {
|
void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity> &other_entities) {
|
||||||
check_is_sorted(entities);
|
check_is_sorted(entities);
|
||||||
check_non_intersecting(other_entities);
|
check_is_sorted(other_entities);
|
||||||
|
|
||||||
int32 begin_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
|
int32 begin_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
|
||||||
int32 end_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
|
int32 end_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
|
||||||
@ -2842,16 +2844,30 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
|
|||||||
}
|
}
|
||||||
flush_entities(end_offset);
|
flush_entities(end_offset);
|
||||||
};
|
};
|
||||||
for (auto &other_entity : other_entities) {
|
|
||||||
add_entities(other_entity.offset);
|
vector<const MessageEntity *> nested_entities_stack;
|
||||||
auto old_size = result.size();
|
auto add_offset = [&](int32 offset) {
|
||||||
add_entities(other_entity.offset + other_entity.length);
|
while (!nested_entities_stack.empty() &&
|
||||||
if (is_pre_entity(other_entity.type)) {
|
offset >= nested_entities_stack.back()->offset + nested_entities_stack.back()->length) {
|
||||||
result.resize(old_size);
|
// remove non-intersecting entities from the stack
|
||||||
|
auto old_size = result.size();
|
||||||
|
add_entities(nested_entities_stack.back()->offset + nested_entities_stack.back()->length);
|
||||||
|
if (is_pre_entity(nested_entities_stack.back()->type)) {
|
||||||
|
result.resize(old_size);
|
||||||
|
}
|
||||||
|
nested_entities_stack.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
add_entities(offset);
|
||||||
|
};
|
||||||
|
for (auto &other_entity : other_entities) {
|
||||||
|
add_offset(other_entity.offset);
|
||||||
|
nested_entities_stack.push_back(&other_entity);
|
||||||
}
|
}
|
||||||
add_entities(std::numeric_limits<int32>::max());
|
add_offset(std::numeric_limits<int32>::max());
|
||||||
|
|
||||||
entities = std::move(result);
|
entities = std::move(result);
|
||||||
|
|
||||||
// entities are sorted only by offset now, re-sort if needed
|
// entities are sorted only by offset now, re-sort if needed
|
||||||
if (!std::is_sorted(entities.begin(), entities.end())) {
|
if (!std::is_sorted(entities.begin(), entities.end())) {
|
||||||
std::sort(entities.begin(), entities.end());
|
std::sort(entities.begin(), entities.end());
|
||||||
@ -2884,19 +2900,17 @@ static void fix_entities(vector<MessageEntity> &entities) {
|
|||||||
|
|
||||||
if (!blockquote_entities.empty()) {
|
if (!blockquote_entities.empty()) {
|
||||||
remove_intersecting_entities(blockquote_entities); // blockquote entities can't intersect each other
|
remove_intersecting_entities(blockquote_entities); // blockquote entities can't intersect each other
|
||||||
split_entities(splittable_entities, blockquote_entities);
|
|
||||||
|
|
||||||
// blockquote entities can contain continuous entities, but can't intersect them in the other ways
|
// blockquote entities can contain continuous entities, but can't intersect them in the other ways
|
||||||
remove_entities_intersecting_blockquote(continuous_entities, blockquote_entities);
|
remove_entities_intersecting_blockquote(continuous_entities, blockquote_entities);
|
||||||
}
|
|
||||||
|
|
||||||
split_entities(splittable_entities, continuous_entities); // split by remaining continuous entities
|
|
||||||
|
|
||||||
if (!blockquote_entities.empty()) {
|
|
||||||
combine(continuous_entities, std::move(blockquote_entities));
|
combine(continuous_entities, std::move(blockquote_entities));
|
||||||
std::sort(continuous_entities.begin(), continuous_entities.end());
|
std::sort(continuous_entities.begin(), continuous_entities.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// must be called once to not merge some adjacent entities
|
||||||
|
split_entities(splittable_entities, continuous_entities);
|
||||||
|
|
||||||
if (splittable_entities.empty()) {
|
if (splittable_entities.empty()) {
|
||||||
splittable_entities = std::move(continuous_entities);
|
splittable_entities = std::move(continuous_entities);
|
||||||
} else if (!continuous_entities.empty()) {
|
} else if (!continuous_entities.empty()) {
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "td/utils/format.h"
|
#include "td/utils/format.h"
|
||||||
#include "td/utils/logging.h"
|
#include "td/utils/logging.h"
|
||||||
#include "td/utils/misc.h"
|
#include "td/utils/misc.h"
|
||||||
|
#include "td/utils/Random.h"
|
||||||
#include "td/utils/Slice.h"
|
#include "td/utils/Slice.h"
|
||||||
#include "td/utils/tests.h"
|
#include "td/utils/tests.h"
|
||||||
#include "td/utils/utf8.h"
|
#include "td/utils/utf8.h"
|
||||||
@ -879,6 +880,26 @@ TEST(MessageEntities, fix_formatted_text) {
|
|||||||
"abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}});
|
"abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}});
|
||||||
check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}},
|
check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}},
|
||||||
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
|
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
|
||||||
|
|
||||||
|
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 100000; i++) {
|
||||||
|
str = td::string(td::Random::fast(10, 30), 'a');
|
||||||
|
|
||||||
|
auto n = td::Random::fast(1, 10);
|
||||||
|
td::vector<td::MessageEntity> entities;
|
||||||
|
for (int j = 0; j < n; j++) {
|
||||||
|
td::int32 type = td::Random::fast(0, 16);
|
||||||
|
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
|
||||||
|
auto max_length = static_cast<int>(str.size() - offset);
|
||||||
|
if ((i & 1) != 0 && max_length > 4) {
|
||||||
|
max_length = 4;
|
||||||
|
}
|
||||||
|
td::int32 length = td::Random::fast(0, max_length);
|
||||||
|
entities.emplace_back(static_cast<td::MessageEntity::Type>(type), offset, length);
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) {
|
static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) {
|
||||||
|
Reference in New Issue
Block a user