Correctly merge new entities.
GitOrigin-RevId: 6ca976a3c17030cffb0c32119389ea5a8c1050ff
This commit is contained in:
parent
c17bb8a163
commit
81d0172f33
@ -12,6 +12,7 @@
|
|||||||
#include "td/telegram/Td.h"
|
#include "td/telegram/Td.h"
|
||||||
|
|
||||||
namespace td {
|
namespace td {
|
||||||
|
|
||||||
ClientActor::ClientActor(unique_ptr<TdCallback> callback) {
|
ClientActor::ClientActor(unique_ptr<TdCallback> callback) {
|
||||||
td_ = create_actor<Td>("Td", std::move(callback));
|
td_ = create_actor<Td>("Td", std::move(callback));
|
||||||
}
|
}
|
||||||
|
@ -1296,7 +1296,10 @@ static void remove_entities_intersecting_blockquote(vector<MessageEntity> &entit
|
|||||||
blockquote_it->offset + blockquote_it->length <= entities[i].offset)) {
|
blockquote_it->offset + blockquote_it->length <= entities[i].offset)) {
|
||||||
blockquote_it++;
|
blockquote_it++;
|
||||||
}
|
}
|
||||||
if (blockquote_it != blockquote_entities.end() && blockquote_it->offset < entities[i].offset + entities[i].length) {
|
if (blockquote_it != blockquote_entities.end() &&
|
||||||
|
(blockquote_it->offset + blockquote_it->length < entities[i].offset + entities[i].length ||
|
||||||
|
(entities[i].offset < blockquote_it->offset &&
|
||||||
|
blockquote_it->offset < entities[i].offset + entities[i].length))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (i != left_entities) {
|
if (i != left_entities) {
|
||||||
@ -2874,6 +2877,21 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static vector<MessageEntity> resplit_entities(vector<MessageEntity> &&splittable_entities,
|
||||||
|
vector<MessageEntity> &&entities) {
|
||||||
|
if (!splittable_entities.empty()) {
|
||||||
|
split_entities(splittable_entities, entities); // can merge some entities
|
||||||
|
|
||||||
|
if (entities.empty()) {
|
||||||
|
return std::move(splittable_entities);
|
||||||
|
}
|
||||||
|
|
||||||
|
combine(entities, std::move(splittable_entities));
|
||||||
|
std::sort(entities.begin(), entities.end());
|
||||||
|
}
|
||||||
|
return std::move(entities);
|
||||||
|
}
|
||||||
|
|
||||||
static void fix_entities(vector<MessageEntity> &entities) {
|
static void fix_entities(vector<MessageEntity> &entities) {
|
||||||
if (!std::is_sorted(entities.begin(), entities.end())) {
|
if (!std::is_sorted(entities.begin(), entities.end())) {
|
||||||
std::sort(entities.begin(), entities.end());
|
std::sort(entities.begin(), entities.end());
|
||||||
@ -2909,15 +2927,44 @@ static void fix_entities(vector<MessageEntity> &entities) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// must be called once to not merge some adjacent entities
|
// must be called once to not merge some adjacent entities
|
||||||
split_entities(splittable_entities, continuous_entities);
|
entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
|
||||||
|
check_is_sorted(entities);
|
||||||
|
}
|
||||||
|
|
||||||
if (splittable_entities.empty()) {
|
static void merge_new_entities(vector<MessageEntity> &entities, vector<MessageEntity> new_entities) {
|
||||||
splittable_entities = std::move(continuous_entities);
|
check_is_sorted(entities);
|
||||||
} else if (!continuous_entities.empty()) {
|
if (new_entities.empty()) {
|
||||||
combine(splittable_entities, std::move(continuous_entities));
|
// fast path
|
||||||
std::sort(splittable_entities.begin(), splittable_entities.end());
|
return;
|
||||||
}
|
}
|
||||||
entities = std::move(splittable_entities);
|
|
||||||
|
check_non_intersecting(new_entities);
|
||||||
|
|
||||||
|
vector<MessageEntity> continuous_entities;
|
||||||
|
vector<MessageEntity> blockquote_entities;
|
||||||
|
vector<MessageEntity> splittable_entities;
|
||||||
|
for (auto &entity : entities) {
|
||||||
|
if (is_splittable_entity(entity.type)) {
|
||||||
|
splittable_entities.push_back(std::move(entity));
|
||||||
|
} else if (is_blockquote_entity(entity.type)) {
|
||||||
|
blockquote_entities.push_back(std::move(entity));
|
||||||
|
} else {
|
||||||
|
continuous_entities.push_back(std::move(entity));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_entities_intersecting_blockquote(new_entities, blockquote_entities);
|
||||||
|
|
||||||
|
// merge before combining with blockquote entities
|
||||||
|
continuous_entities = merge_entities(std::move(continuous_entities), std::move(new_entities));
|
||||||
|
|
||||||
|
if (!blockquote_entities.empty()) {
|
||||||
|
combine(continuous_entities, std::move(blockquote_entities));
|
||||||
|
std::sort(continuous_entities.begin(), continuous_entities.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
// must be called once to not merge some adjacent entities
|
||||||
|
entities = resplit_entities(std::move(splittable_entities), std::move(continuous_entities));
|
||||||
check_is_sorted(entities);
|
check_is_sorted(entities);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3017,7 +3064,7 @@ Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool al
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!skip_new_entities) {
|
if (!skip_new_entities) {
|
||||||
entities = merge_entities(std::move(entities), find_entities(text, skip_bot_commands));
|
merge_new_entities(entities, find_entities(text, skip_bot_commands));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH
|
// TODO MAX_MESSAGE_LENGTH and MAX_CAPTION_LENGTH
|
||||||
|
@ -882,14 +882,41 @@ TEST(MessageEntities, fix_formatted_text) {
|
|||||||
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
|
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
|
||||||
|
|
||||||
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
|
check_fix_formatted_text("example.com", {}, "example.com", {{td::MessageEntity::Type::Url, 0, 11}});
|
||||||
|
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Pre, 0, 3}}, "example.com",
|
||||||
|
{{td::MessageEntity::Type::Pre, 0, 3}});
|
||||||
|
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 3}}, "example.com",
|
||||||
|
{{td::MessageEntity::Type::BlockQuote, 0, 3}});
|
||||||
|
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::BlockQuote, 0, 11}}, "example.com",
|
||||||
|
{{td::MessageEntity::Type::BlockQuote, 0, 11}, {td::MessageEntity::Type::Url, 0, 11}});
|
||||||
|
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 11}}, "example.com",
|
||||||
|
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 11}});
|
||||||
|
check_fix_formatted_text("example.com", {{td::MessageEntity::Type::Italic, 0, 3}}, "example.com",
|
||||||
|
{{td::MessageEntity::Type::Url, 0, 11}, {td::MessageEntity::Type::Italic, 0, 3}});
|
||||||
|
check_fix_formatted_text("example.com a", {{td::MessageEntity::Type::Italic, 0, 13}}, "example.com a",
|
||||||
|
{{td::MessageEntity::Type::Url, 0, 11},
|
||||||
|
{td::MessageEntity::Type::Italic, 0, 11},
|
||||||
|
{td::MessageEntity::Type::Italic, 11, 2}});
|
||||||
|
check_fix_formatted_text("a example.com", {{td::MessageEntity::Type::Italic, 0, 13}}, "a example.com",
|
||||||
|
{{td::MessageEntity::Type::Italic, 0, 2},
|
||||||
|
{td::MessageEntity::Type::Url, 2, 11},
|
||||||
|
{td::MessageEntity::Type::Italic, 2, 11}});
|
||||||
|
|
||||||
for (size_t i = 0; i < 100000; i++) {
|
for (size_t i = 0; i < 100000; i++) {
|
||||||
str = td::string(td::Random::fast(1, 20), 'a');
|
bool is_url = td::Random::fast(0, 1) == 1;
|
||||||
|
td::int32 url_offset = 0;
|
||||||
|
td::int32 url_end = 0;
|
||||||
|
if (is_url) {
|
||||||
|
str = td::string(td::Random::fast(1, 5), 'a') + ":example.com:" + td::string(td::Random::fast(1, 5), 'a');
|
||||||
|
url_offset = static_cast<td::int32>(str.find('e'));
|
||||||
|
url_end = url_offset + 11;
|
||||||
|
} else {
|
||||||
|
str = td::string(td::Random::fast(1, 20), 'a');
|
||||||
|
}
|
||||||
|
|
||||||
auto n = td::Random::fast(1, 20);
|
auto n = td::Random::fast(1, 20);
|
||||||
td::vector<td::MessageEntity> entities;
|
td::vector<td::MessageEntity> entities;
|
||||||
for (int j = 0; j < n; j++) {
|
for (int j = 0; j < n; j++) {
|
||||||
td::int32 type = td::Random::fast(0, 16);
|
td::int32 type = td::Random::fast(4, 16);
|
||||||
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
|
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
|
||||||
auto max_length = static_cast<int>(str.size() - offset);
|
auto max_length = static_cast<int>(str.size() - offset);
|
||||||
if ((i & 1) != 0 && max_length > 4) {
|
if ((i & 1) != 0 && max_length > 4) {
|
||||||
@ -903,22 +930,37 @@ TEST(MessageEntities, fix_formatted_text) {
|
|||||||
td::vector<td::int32> result(length);
|
td::vector<td::int32> result(length);
|
||||||
for (auto &entity : entities) {
|
for (auto &entity : entities) {
|
||||||
for (auto pos = 0; pos < entity.length; pos++) {
|
for (auto pos = 0; pos < entity.length; pos++) {
|
||||||
result[entity.offset + pos] |= 1 << static_cast<td::int32>(entity.type);
|
result[entity.offset + pos] |= (1 << static_cast<td::int32>(entity.type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
auto old_type_mask = get_type_mask(str.size(), entities);
|
auto old_type_mask = get_type_mask(str.size(), entities);
|
||||||
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, true, true, false).is_ok());
|
ASSERT_TRUE(td::fix_formatted_text(str, entities, false, false, true, false).is_ok());
|
||||||
auto new_type_mask = get_type_mask(str.size(), entities);
|
auto new_type_mask = get_type_mask(str.size(), entities);
|
||||||
auto spliitable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15);
|
auto splittable_mask = (1 << 5) | (1 << 6) | (1 << 14) | (1 << 15);
|
||||||
for (std::size_t pos = 0; pos < str.size(); pos++) {
|
for (std::size_t pos = 0; pos < str.size(); pos++) {
|
||||||
if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre
|
if ((new_type_mask[pos] & ((1 << 7) | (1 << 8) | (1 << 9))) != 0) { // pre
|
||||||
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, 0);
|
ASSERT_EQ(0, new_type_mask[pos] & splittable_mask);
|
||||||
} else {
|
} else {
|
||||||
ASSERT_EQ(new_type_mask[pos] & spliitable_mask, old_type_mask[pos] & spliitable_mask);
|
ASSERT_EQ(old_type_mask[pos] & splittable_mask, new_type_mask[pos] & splittable_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bool keep_url = is_url;
|
||||||
|
td::MessageEntity url_entity(td::MessageEntity::Type::Url, url_offset, url_end - url_offset);
|
||||||
|
for (auto &entity : entities) {
|
||||||
|
if (entity == url_entity) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
td::int32 offset = entity.offset;
|
||||||
|
td::int32 end = offset + entity.length;
|
||||||
|
|
||||||
|
if (keep_url && ((1 << static_cast<td::int32>(entity.type)) & splittable_mask) == 0 &&
|
||||||
|
!(end <= url_offset || url_end <= offset)) {
|
||||||
|
keep_url = (entity.type == td::MessageEntity::Type::BlockQuote && offset <= url_offset && url_end <= end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_EQ(keep_url, std::count(entities.begin(), entities.end(), url_entity) == 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user