Correctly merge new entities.

GitOrigin-RevId: 6ca976a3c17030cffb0c32119389ea5a8c1050ff
This commit is contained in:
levlam 2020-02-24 20:26:08 +03:00
parent c17bb8a163
commit 81d0172f33
3 changed files with 106 additions and 16 deletions

View File

@ -12,6 +12,7 @@
#include "td/telegram/Td.h" #include "td/telegram/Td.h"
namespace td { namespace td {
ClientActor::ClientActor(unique_ptr<TdCallback> callback) { ClientActor::ClientActor(unique_ptr<TdCallback> callback) {
td_ = create_actor<Td>("Td", std::move(callback)); td_ = create_actor<Td>("Td", std::move(callback));
} }

View File

@ -1296,7 +1296,10 @@ static void remove_entities_intersecting_blockquote(vector<MessageEntity> &entit
blockquote_it->offset + blockquote_it->length <= entities[i].offset)) { blockquote_it->offset + blockquote_it->length <= entities[i].offset)) {
blockquote_it++; blockquote_it++;
} }
if (blockquote_it != blockquote_entities.end() && blockquote_it->offset < entities[i].offset + entities[i].length) { if (blockquote_it != blockquote_entities.end() &&
(blockquote_it->offset + blockquote_it->length < entities[i].offset + entities[i].length ||
(entities[i].offset < blockquote_it->offset &&
blockquote_it->offset < entities[i].offset + entities[i].length))) {
continue; continue;
} }
if (i != left_entities) { if (i != left_entities) {
@ -2874,6 +2877,21 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
} }
} }
static vector<MessageEntity> resplit_entities(vector<MessageEntity> &&splittable_entities,
vector<MessageEntity> &&entities) {
if (!splittable_entities.empty()) {
split_entities(splittable_entities, entities); // can merge some entities
if (entities.empty()) {
return std::move(splittable_entities);
}
combine(entities, std::move(splittable_entities));
std::sort(entities.begin(), entities.end());
}
return std::move(entities);
}
static void fix_entities(vector<MessageEntity> &entities) { static void fix_entities(vector<MessageEntity> &entities) {
if (!std::is_sorted(entities.begin(), entities.end())) { if (!std::is_sorted(entities.begin(), entities.end())) {
std::sort(entities.begin(), entities.end()); std::sort(entities.begin(), entities.end());
@ -2909,15 +2927,44 @@ static void fix_entities(vector<MessageEntity> &entities) {
} }
// must be called once to not merge some adjacent entities // must be called once to not merge some adjacent entities
split_entities(splittable_entities, continuous_entities); entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
check_is_sorted(entities);
}
if (splittable_entities.empty()) { static void merge_new_entities(vector<MessageEntity> &entities, vector<MessageEntity> new_entities) {
splittable_entities = std::move(continuous_entities); check_is_sorted(entities);
} else if (!continuous_entities.empty()) { if (new_entities.empty()) {
combine(splittable_entities, std::move(continuous_entities)); // fast path
std::sort(splittable_entities.begin(), splittable_entities.end()); return;
} }
entities = std::move(splittable_entities);
check_non_intersecting(new_entities);
vector<MessageEntity> continuous_entities;
vector<MessageEntity> blockquote_entities;
vector<MessageEntity> splittable_entities;
for (auto &entity : entities) {
if (is_splittable_entity(entity.type)) {
splittable_entities.push_back(std::move(entity));
} else if (is_blockquote_entity(entity.type)) {
blockquote_entities.push_back(std::move(entity));
} else {
continuous_entities.push_back(std::move(entity));
}
}
remove_entities_intersecting_blockquote(new_entities, blockquote_entities);
// merge before combining with blockquote entities
continuous_entities = merge_entities(std::move(continuous_entities), std::move(new_entities));
if (!blockquote_entities.empty()) {
combine(continuous_entities, std::move(blockquote_entities));
std::sort(continuous_entities.begin(), continuous_entities.end());
}
// must be called once to not merge some adjacent entities
entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
check_is_sorted(entities); check_is_sorted(entities);
} }
@ -3017,7 +3064,7 @@ Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool al
} }
if (!skip_new_entities) { if (!skip_new_entities) {
entities = merge_entities(std::move(entities), find_entities(text, skip_bot_commands)); merge_new_entities(entities, find_entities(text, skip_bot_commands));
} }
// TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH // TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH

View File

@ -882,14 +882,41 @@ TEST(MessageEntities, fix_formatted_text) {
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}); "abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}}); check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Pre, 0, 3}}, "example.com",
{{td::MessageEntity::Type::Pre, 0, 3}});
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 3}}, "example.com",
{{td::MessageEntity::Type::BlockQuote, 0, 3}});
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 11}}, "example.com",
{{td::MessageEntity::Type::BlockQuote, 0, 11}, {td::MessageEntity::Type::Url, 0, 11}});
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 11}}, "example.com",
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 11}});
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 3}}, "example.com",
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 3}});
check_fix_formatted_text("example.com a", {{td::MessageEntity::Type::Italic, 0, 13}}, "example.com a",
{{td::MessageEntity::Type::Url, 0, 11},
{td::MessageEntity::Type::Italic, 0, 11},
{td::MessageEntity::Type::Italic, 11, 2}});
check_fix_formatted_text("a example.com", {{td::MessageEntity::Type::Italic, 0, 13}}, "a example.com",
{{td::MessageEntity::Type::Italic, 0, 2},
{td::MessageEntity::Type::Url, 2, 11},
{td::MessageEntity::Type::Italic, 2, 11}});
for (size_t i = 0; i < 100000; i++) { for (size_t i = 0; i < 100000; i++) {
str = td::string(td::Random::fast(1, 20), 'a'); bool is_url = td::Random::fast(0, 1) == 1;
td::int32 url_offset = 0;
td::int32 url_end = 0;
if (is_url) {
str = td::string(td::Random::fast(1, 5), 'a') + ":example.com:" + td::string(td::Random::fast(1, 5), 'a');
url_offset = static_cast<td::int32>(str.find('e'));
url_end = url_offset + 11;
} else {
str = td::string(td::Random::fast(1, 20), 'a');
}
auto n = td::Random::fast(1, 20); auto n = td::Random::fast(1, 20);
td::vector<td::MessageEntity> entities; td::vector<td::MessageEntity> entities;
for (int j = 0; j < n; j++) { for (int j = 0; j < n; j++) {
td::int32 type = td::Random::fast(0, 16); td::int32 type = td::Random::fast(4, 16);
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1); td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
auto max_length = static_cast<int>(str.size() - offset); auto max_length = static_cast<int>(str.size() - offset);
if ((i & 1) != 0 && max_length > 4) { if ((i & 1) != 0 && max_length > 4) {
@ -903,22 +930,37 @@ TEST(MessageEntities, fix_formatted_text) {
td::vector<td::int32> result(length); td::vector<td::int32> result(length);
for (auto &entity : entities) { for (auto &entity : entities) {
for (auto pos = 0; pos < entity.length; pos++) { for (auto pos = 0; pos < entity.length; pos++) {
result[entity.offset + pos] |= 1 << static_cast<td::int32>(entity.type); result[entity.offset + pos] |= (1 << static_cast<td::int32>(entity.type));
} }
} }
return result; return result;
}; };
auto old_type_mask = get_type_mask(str.size(), entities); auto old_type_mask = get_type_mask(str.size(), entities);
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok()); ASSERT_TRUE(td::fix_formatted_text(str, entities, false, false, true, false).is_ok());
auto new_type_mask = get_type_mask(str.size(), entities); auto new_type_mask = get_type_mask(str.size(), entities);
auto spliitable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15); auto splittable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15);
for (std::size_t pos = 0; pos < str.size(); pos++) { for (std::size_t pos = 0; pos < str.size(); pos++) {
if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, 0); ASSERT_EQ(0, new_type_mask[pos] & splittable_mask);
} else { } else {
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, old_type_mask[pos] & spliitable_mask); ASSERT_EQ(old_type_mask[pos] & splittable_mask, new_type_mask[pos] & splittable_mask);
} }
} }
bool keep_url = is_url;
td::MessageEntity url_entity(td::MessageEntity::Type::Url, url_offset, url_end - url_offset);
for (auto &entity : entities) {
if (entity == url_entity) {
continue;
}
td::int32 offset = entity.offset;
td::int32 end = offset + entity.length;
if (keep_url && ((1 << static_cast<td::int32>(entity.type)) & splittable_mask) == 0 &&
!(end <= url_offset || url_end <= offset)) {
keep_url = (entity.type == td::MessageEntity::Type::BlockQuote && offset <= url_offset && url_end <= end);
}
}
ASSERT_EQ(keep_url, std::count(entities.begin(), entities.end(), url_entity) == 1);
} }
} }