Add more fix_formatted_text tests.

GitOrigin-RevId: c24e52c88921226d63fead4dbf7331921bd8f185
This commit is contained in:
levlam 2020-02-21 14:56:26 +03:00
parent 962ea55765
commit 185d0fd22f
2 changed files with 141 additions and 18 deletions

View File

@ -1146,13 +1146,36 @@ static void check_non_intersecting(const vector<MessageEntity> &entities) {
}
}
static int32 get_entity_type_mask(MessageEntity::Type type) {
static constexpr int32 get_entity_type_mask(MessageEntity::Type type) {
return 1 << static_cast<int32>(type);
}
static constexpr int32 get_splittable_entities_mask() {
return get_entity_type_mask(MessageEntity::Type::Bold) | get_entity_type_mask(MessageEntity::Type::Italic) |
get_entity_type_mask(MessageEntity::Type::Underline) |
get_entity_type_mask(MessageEntity::Type::Strikethrough);
}
static constexpr int32 get_blockquote_entities_mask() {
return get_entity_type_mask(MessageEntity::Type::BlockQuote);
}
static constexpr int32 get_continuous_entities_mask() {
return get_entity_type_mask(MessageEntity::Type::Mention) | get_entity_type_mask(MessageEntity::Type::Hashtag) |
get_entity_type_mask(MessageEntity::Type::BotCommand) | get_entity_type_mask(MessageEntity::Type::Url) |
get_entity_type_mask(MessageEntity::Type::EmailAddress) | get_entity_type_mask(MessageEntity::Type::TextUrl) |
get_entity_type_mask(MessageEntity::Type::MentionName) | get_entity_type_mask(MessageEntity::Type::Cashtag) |
get_entity_type_mask(MessageEntity::Type::PhoneNumber) |
get_entity_type_mask(MessageEntity::Type::BankCardNumber);
}
static constexpr int32 get_pre_entities_mask() {
return get_entity_type_mask(MessageEntity::Type::Pre) | get_entity_type_mask(MessageEntity::Type::Code) |
get_entity_type_mask(MessageEntity::Type::PreCode);
}
static int32 is_splittable_entity(MessageEntity::Type type) {
return type == MessageEntity::Type::Bold || type == MessageEntity::Type::Italic ||
type == MessageEntity::Type::Underline || type == MessageEntity::Type::Strikethrough;
return (get_entity_type_mask(type) & get_splittable_entities_mask()) != 0;
}
static int32 is_blockquote_entity(MessageEntity::Type type) {
@ -1160,15 +1183,11 @@ static int32 is_blockquote_entity(MessageEntity::Type type) {
}
static int32 is_continuous_entity(MessageEntity::Type type) {
return type == MessageEntity::Type::Mention || type == MessageEntity::Type::Hashtag ||
type == MessageEntity::Type::BotCommand || type == MessageEntity::Type::Url ||
type == MessageEntity::Type::EmailAddress || type == MessageEntity::Type::TextUrl ||
type == MessageEntity::Type::MentionName || type == MessageEntity::Type::Cashtag ||
type == MessageEntity::Type::PhoneNumber || type == MessageEntity::Type::BankCardNumber;
return (get_entity_type_mask(type) & get_continuous_entities_mask()) != 0;
}
static int32 is_pre_entity(MessageEntity::Type type) {
return type == MessageEntity::Type::Pre || type == MessageEntity::Type::Code || type == MessageEntity::Type::PreCode;
return (get_entity_type_mask(type) & get_pre_entities_mask()) != 0;
}
static constexpr size_t SPLITTABLE_ENTITY_TYPE_COUNT = 4;
@ -1215,9 +1234,14 @@ static bool are_entities_valid(const vector<MessageEntity> &entities) {
// Pre and Code can't contain nested entities
return false;
}
if (is_continuous_entity(parent_type) &&
(is_pre_entity(entity.type) || is_continuous_entity(entity.type) || is_blockquote_entity(entity.type))) {
// continuous can't contain other continuous and blockquote
// parents are not pre after this point
if (is_pre_entity(entity.type) && (nested_entity_type_mask & ~get_blockquote_entities_mask()) != 0) {
// Pre and Code can't be contained in other entities, except blockquote
return false;
}
if ((is_continuous_entity(entity.type) || is_blockquote_entity(entity.type)) &&
(nested_entity_type_mask & get_continuous_entities_mask()) != 0) {
// continuous and blockquote can't be contained in continuous
return false;
}
}
@ -1225,7 +1249,7 @@ static bool are_entities_valid(const vector<MessageEntity> &entities) {
if (is_splittable_entity(entity.type)) {
auto index = get_splittable_entity_type_index(entity.type);
if (end_pos[index] >= entity.offset) {
// the entities may be need to merged
// the entities can be merged
return false;
}
end_pos[index] = entity.offset + entity.length;
@ -2788,11 +2812,11 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
auto index = get_splittable_entity_type_index(type);
if (end_pos[index] != 0 && begin_pos[index] < offset) {
if (end_pos[index] <= offset) {
result.emplace_back(type, begin_pos[index], end_pos[index]);
result.emplace_back(type, begin_pos[index], end_pos[index] - begin_pos[index]);
begin_pos[index] = 0;
end_pos[index] = 0;
} else {
result.emplace_back(type, begin_pos[index], offset);
result.emplace_back(type, begin_pos[index], offset - begin_pos[index]);
begin_pos[index] = offset;
}
}
@ -2826,7 +2850,7 @@ void split_entities(vector<MessageEntity> &entities, const vector<MessageEntity>
result.resize(old_size);
}
}
add_entities(std::numeric_limits<size_t>::max());
add_entities(std::numeric_limits<int32>::max());
entities = std::move(result);
// entities are sorted only by offset now, re-sort if needed
if (!std::is_sorted(entities.begin(), entities.end())) {

View File

@ -575,8 +575,9 @@ TEST(MessageEntities, url) {
static void check_fix_formatted_text(td::string str, td::vector<td::MessageEntity> entities,
const td::string &expected_str,
const td::vector<td::MessageEntity> &expected_entities, bool allow_empty,
bool skip_new_entities, bool skip_bot_commands, bool for_draft) {
const td::vector<td::MessageEntity> &expected_entities, bool allow_empty = true,
bool skip_new_entities = false, bool skip_bot_commands = false,
bool for_draft = true) {
ASSERT_TRUE(
td::fix_formatted_text(str, entities, allow_empty, skip_new_entities, skip_bot_commands, for_draft).is_ok());
ASSERT_STREQ(expected_str, str);
@ -780,6 +781,104 @@ TEST(MessageEntities, fix_formatted_text) {
check_fix_formatted_text(text, {{type, 0, 1, "http://telegram.org/"}}, "", {}, true, false, false, true);
}
}
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 1, 1}, {td::MessageEntity::Type::Italic, 0, 1}},
"abc", {{td::MessageEntity::Type::Italic, 0, 2}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 1, 1}, {td::MessageEntity::Type::Italic, 1, 1}},
"abc", {{td::MessageEntity::Type::Italic, 1, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Italic, 1, 2}},
"abc", {{td::MessageEntity::Type::Italic, 0, 3}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Italic, 2, 1}},
"abc", {{td::MessageEntity::Type::Italic, 0, 3}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Italic, 2, 1}},
"abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Italic, 2, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 1, 2}},
"abc",
{{td::MessageEntity::Type::Italic, 0, 1},
{td::MessageEntity::Type::Bold, 1, 2},
{td::MessageEntity::Type::Italic, 1, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 2, 1}},
"abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 2, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Bold, 2, 1}},
"abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Bold, 2, 1}});
// _a*b*_
check_fix_formatted_text(
"ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 1, 1}}, "ab",
{{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 1, 1}});
check_fix_formatted_text("ab",
{{td::MessageEntity::Type::Underline, 0, 1},
{td::MessageEntity::Type::Underline, 1, 1},
{td::MessageEntity::Type::Strikethrough, 1, 1}},
"ab",
{{td::MessageEntity::Type::Underline, 0, 1},
{td::MessageEntity::Type::Underline, 1, 1},
{td::MessageEntity::Type::Strikethrough, 1, 1}});
check_fix_formatted_text(
"ab", {{td::MessageEntity::Type::Strikethrough, 0, 2}, {td::MessageEntity::Type::Underline, 1, 1}}, "ab",
{{td::MessageEntity::Type::Strikethrough, 0, 2}, {td::MessageEntity::Type::Underline, 1, 1}});
check_fix_formatted_text("ab",
{{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Strikethrough, 1, 1},
{td::MessageEntity::Type::Underline, 1, 1}},
"ab",
{{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Underline, 1, 1},
{td::MessageEntity::Type::Strikethrough, 1, 1}});
// _*a*b_
check_fix_formatted_text(
"ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}}, "ab",
{{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}});
check_fix_formatted_text(
"ab",
{{td::MessageEntity::Type::Underline, 0, 1},
{td::MessageEntity::Type::Underline, 1, 1},
{td::MessageEntity::Type::Strikethrough, 0, 1}},
"ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}});
// _*a*_\r_*b*_
check_fix_formatted_text("a\rb",
{{td::MessageEntity::Type::Bold, 0, 1},
{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Bold, 2, 1},
{td::MessageEntity::Type::Strikethrough, 2, 1}},
"ab",
{{td::MessageEntity::Type::Bold, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 2}});
check_fix_formatted_text("a\nb",
{{td::MessageEntity::Type::Bold, 0, 1},
{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Bold, 2, 1},
{td::MessageEntity::Type::Strikethrough, 2, 1}},
"a\nb",
{{td::MessageEntity::Type::Bold, 0, 1},
{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Bold, 2, 1},
{td::MessageEntity::Type::Strikethrough, 2, 1}});
// _`a`_
check_fix_formatted_text("a", {{td::MessageEntity::Type::Pre, 0, 1}, {td::MessageEntity::Type::Strikethrough, 0, 1}},
"a", {{td::MessageEntity::Type::Pre, 0, 1}});
check_fix_formatted_text("a", {{td::MessageEntity::Type::Strikethrough, 0, 1}, {td::MessageEntity::Type::Pre, 0, 1}},
"a", {{td::MessageEntity::Type::Pre, 0, 1}});
check_fix_formatted_text("abc",
{{td::MessageEntity::Type::Pre, 0, 3}, {td::MessageEntity::Type::Strikethrough, 1, 1}},
"abc", {{td::MessageEntity::Type::Pre, 0, 3}});
check_fix_formatted_text(
"abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 0, 3}}, "abc",
{{td::MessageEntity::Type::Strikethrough, 0, 1},
{td::MessageEntity::Type::Pre, 1, 1},
{td::MessageEntity::Type::Strikethrough, 2, 1}});
check_fix_formatted_text(
"abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 1, 2}}, "abc",
{{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 2, 1}});
check_fix_formatted_text(
"abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 0, 2}}, "abc",
{{td::MessageEntity::Type::Strikethrough, 0, 1}, {td::MessageEntity::Type::Pre, 1, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::Pre, 0, 3}, {td::MessageEntity::Type::BlockQuote, 1, 1}},
"abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}});
check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}},
"abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}});
}
static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) {