Correctly merge new entities.
GitOrigin-RevId: 6ca976a3c17030cffb0c32119389ea5a8c1050ff
This commit is contained in:
parent
c17bb8a163
commit
81d0172f33
@ -12,6 +12,7 @@
|
||||
#include "td/telegram/Td.h"
|
||||
|
||||
namespace td {
|
||||
|
||||
ClientActor::ClientActor(unique_ptr<TdCallback> callback) {
|
||||
td_ = create_actor<Td>("Td", std::move(callback));
|
||||
}
|
||||
|
@ -1296,7 +1296,10 @@ static void remove_entities_intersecting_blockquote(vector<MessageEntity> &entit
|
||||
blockquote_it->offset + blockquote_it->length <= entities[i].offset)) {
|
||||
blockquote_it++;
|
||||
}
|
||||
if (blockquote_it != blockquote_entities.end() && blockquote_it->offset < entities[i].offset + entities[i].length) {
|
||||
if (blockquote_it != blockquote_entities.end() &&
|
||||
(blockquote_it->offset + blockquote_it->length < entities[i].offset + entities[i].length ||
|
||||
(entities[i].offset < blockquote_it->offset &&
|
||||
blockquote_it->offset < entities[i].offset + entities[i].length))) {
|
||||
continue;
|
||||
}
|
||||
if (i != left_entities) {
|
||||
@ -2874,6 +2877,21 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
|
||||
}
|
||||
}
|
||||
|
||||
static vector<MessageEntity> resplit_entities(vector<MessageEntity> &&splittable_entities,
|
||||
vector<MessageEntity> &&entities) {
|
||||
if (!splittable_entities.empty()) {
|
||||
split_entities(splittable_entities, entities); // can merge some entities
|
||||
|
||||
if (entities.empty()) {
|
||||
return std::move(splittable_entities);
|
||||
}
|
||||
|
||||
combine(entities, std::move(splittable_entities));
|
||||
std::sort(entities.begin(), entities.end());
|
||||
}
|
||||
return std::move(entities);
|
||||
}
|
||||
|
||||
static void fix_entities(vector<MessageEntity> &entities) {
|
||||
if (!std::is_sorted(entities.begin(), entities.end())) {
|
||||
std::sort(entities.begin(), entities.end());
|
||||
@ -2909,15 +2927,44 @@ static void fix_entities(vector<MessageEntity> &entities) {
|
||||
}
|
||||
|
||||
// must be called once to not merge some adjacent entities
|
||||
split_entities(splittable_entities, continuous_entities);
|
||||
entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
|
||||
check_is_sorted(entities);
|
||||
}
|
||||
|
||||
if (splittable_entities.empty()) {
|
||||
splittable_entities = std::move(continuous_entities);
|
||||
} else if (!continuous_entities.empty()) {
|
||||
combine(splittable_entities, std::move(continuous_entities));
|
||||
std::sort(splittable_entities.begin(), splittable_entities.end());
|
||||
static void merge_new_entities(vector<MessageEntity> &entities, vector<MessageEntity> new_entities) {
|
||||
check_is_sorted(entities);
|
||||
if (new_entities.empty()) {
|
||||
// fast path
|
||||
return;
|
||||
}
|
||||
entities = std::move(splittable_entities);
|
||||
|
||||
check_non_intersecting(new_entities);
|
||||
|
||||
vector<MessageEntity> continuous_entities;
|
||||
vector<MessageEntity> blockquote_entities;
|
||||
vector<MessageEntity> splittable_entities;
|
||||
for (auto &entity : entities) {
|
||||
if (is_splittable_entity(entity.type)) {
|
||||
splittable_entities.push_back(std::move(entity));
|
||||
} else if (is_blockquote_entity(entity.type)) {
|
||||
blockquote_entities.push_back(std::move(entity));
|
||||
} else {
|
||||
continuous_entities.push_back(std::move(entity));
|
||||
}
|
||||
}
|
||||
|
||||
remove_entities_intersecting_blockquote(new_entities, blockquote_entities);
|
||||
|
||||
// merge before combining with blockquote entities
|
||||
continuous_entities = merge_entities(std::move(continuous_entities), std::move(new_entities));
|
||||
|
||||
if (!blockquote_entities.empty()) {
|
||||
combine(continuous_entities, std::move(blockquote_entities));
|
||||
std::sort(continuous_entities.begin(), continuous_entities.end());
|
||||
}
|
||||
|
||||
// must be called once to not merge some adjacent entities
|
||||
entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
|
||||
check_is_sorted(entities);
|
||||
}
|
||||
|
||||
@ -3017,7 +3064,7 @@ Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool al
|
||||
}
|
||||
|
||||
if (!skip_new_entities) {
|
||||
entities = merge_entities(std::move(entities), find_entities(text, skip_bot_commands));
|
||||
merge_new_entities(entities, find_entities(text, skip_bot_commands));
|
||||
}
|
||||
|
||||
// TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH
|
||||
|
@ -882,14 +882,41 @@ TEST(MessageEntities, fix_formatted_text) {
|
||||
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
|
||||
|
||||
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
|
||||
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Pre, 0, 3}}, "example.com",
|
||||
{{td::MessageEntity::Type::Pre, 0, 3}});
|
||||
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 3}}, "example.com",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 3}});
|
||||
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 11}}, "example.com",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 11}, {td::MessageEntity::Type::Url, 0, 11}});
|
||||
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 11}}, "example.com",
|
||||
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 11}});
|
||||
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 3}}, "example.com",
|
||||
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 3}});
|
||||
check_fix_formatted_text("example.com a", {{td::MessageEntity::Type::Italic, 0, 13}}, "example.com a",
|
||||
{{td::MessageEntity::Type::Url, 0, 11},
|
||||
{td::MessageEntity::Type::Italic, 0, 11},
|
||||
{td::MessageEntity::Type::Italic, 11, 2}});
|
||||
check_fix_formatted_text("a example.com", {{td::MessageEntity::Type::Italic, 0, 13}}, "a example.com",
|
||||
{{td::MessageEntity::Type::Italic, 0, 2},
|
||||
{td::MessageEntity::Type::Url, 2, 11},
|
||||
{td::MessageEntity::Type::Italic, 2, 11}});
|
||||
|
||||
for (size_t i = 0; i < 100000; i++) {
|
||||
str = td::string(td::Random::fast(1, 20), 'a');
|
||||
bool is_url = td::Random::fast(0, 1) == 1;
|
||||
td::int32 url_offset = 0;
|
||||
td::int32 url_end = 0;
|
||||
if (is_url) {
|
||||
str = td::string(td::Random::fast(1, 5), 'a') + ":example.com:" + td::string(td::Random::fast(1, 5), 'a');
|
||||
url_offset = static_cast<td::int32>(str.find('e'));
|
||||
url_end = url_offset + 11;
|
||||
} else {
|
||||
str = td::string(td::Random::fast(1, 20), 'a');
|
||||
}
|
||||
|
||||
auto n = td::Random::fast(1, 20);
|
||||
td::vector<td::MessageEntity> entities;
|
||||
for (int j = 0; j < n; j++) {
|
||||
td::int32 type = td::Random::fast(0, 16);
|
||||
td::int32 type = td::Random::fast(4, 16);
|
||||
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
|
||||
auto max_length = static_cast<int>(str.size() - offset);
|
||||
if ((i & 1) != 0 && max_length > 4) {
|
||||
@ -903,22 +930,37 @@ TEST(MessageEntities, fix_formatted_text) {
|
||||
td::vector<td::int32> result(length);
|
||||
for (auto &entity : entities) {
|
||||
for (auto pos = 0; pos < entity.length; pos++) {
|
||||
result[entity.offset + pos] |= 1 << static_cast<td::int32>(entity.type);
|
||||
result[entity.offset + pos] |= (1 << static_cast<td::int32>(entity.type));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
auto old_type_mask = get_type_mask(str.size(), entities);
|
||||
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok());
|
||||
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, false, true, false).is_ok());
|
||||
auto new_type_mask = get_type_mask(str.size(), entities);
|
||||
auto spliitable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15);
|
||||
auto splittable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15);
|
||||
for (std::size_t pos = 0; pos < str.size(); pos++) {
|
||||
if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre
|
||||
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, 0);
|
||||
ASSERT_EQ(0, new_type_mask[pos] & splittable_mask);
|
||||
} else {
|
||||
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, old_type_mask[pos] & spliitable_mask);
|
||||
ASSERT_EQ(old_type_mask[pos] & splittable_mask, new_type_mask[pos] & splittable_mask);
|
||||
}
|
||||
}
|
||||
bool keep_url = is_url;
|
||||
td::MessageEntity url_entity(td::MessageEntity::Type::Url, url_offset, url_end - url_offset);
|
||||
for (auto &entity : entities) {
|
||||
if (entity == url_entity) {
|
||||
continue;
|
||||
}
|
||||
td::int32 offset = entity.offset;
|
||||
td::int32 end = offset + entity.length;
|
||||
|
||||
if (keep_url && ((1 << static_cast<td::int32>(entity.type)) & splittable_mask) == 0 &&
|
||||
!(end <= url_offset || url_end <= offset)) {
|
||||
keep_url = (entity.type == td::MessageEntity::Type::BlockQuote && offset <= url_offset && url_end <= end);
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(keep_url, std::count(entities.begin(), entities.end(), url_entity) == 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user