diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index eafd2294..b4724871 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -1146,13 +1146,36 @@ static void check_non_intersecting(const vector &entities) { } } -static int32 get_entity_type_mask(MessageEntity::Type type) { +static constexpr int32 get_entity_type_mask(MessageEntity::Type type) { return 1 << static_cast(type); } +static constexpr int32 get_splittable_entities_mask() { + return get_entity_type_mask(MessageEntity::Type::Bold) | get_entity_type_mask(MessageEntity::Type::Italic) | + get_entity_type_mask(MessageEntity::Type::Underline) | + get_entity_type_mask(MessageEntity::Type::Strikethrough); +} + +static constexpr int32 get_blockquote_entities_mask() { + return get_entity_type_mask(MessageEntity::Type::BlockQuote); +} + +static constexpr int32 get_continuous_entities_mask() { + return get_entity_type_mask(MessageEntity::Type::Mention) | get_entity_type_mask(MessageEntity::Type::Hashtag) | + get_entity_type_mask(MessageEntity::Type::BotCommand) | get_entity_type_mask(MessageEntity::Type::Url) | + get_entity_type_mask(MessageEntity::Type::EmailAddress) | get_entity_type_mask(MessageEntity::Type::TextUrl) | + get_entity_type_mask(MessageEntity::Type::MentionName) | get_entity_type_mask(MessageEntity::Type::Cashtag) | + get_entity_type_mask(MessageEntity::Type::PhoneNumber) | + get_entity_type_mask(MessageEntity::Type::BankCardNumber); +} + +static constexpr int32 get_pre_entities_mask() { + return get_entity_type_mask(MessageEntity::Type::Pre) | get_entity_type_mask(MessageEntity::Type::Code) | + get_entity_type_mask(MessageEntity::Type::PreCode); +} + static int32 is_splittable_entity(MessageEntity::Type type) { - return type == MessageEntity::Type::Bold || type == MessageEntity::Type::Italic || - type == MessageEntity::Type::Underline || type == MessageEntity::Type::Strikethrough; + return (get_entity_type_mask(type) & get_splittable_entities_mask()) != 0; } static int32 is_blockquote_entity(MessageEntity::Type type) { @@ -1160,15 +1183,11 @@ static int32 is_blockquote_entity(MessageEntity::Type type) { } static int32 is_continuous_entity(MessageEntity::Type type) { - return type == MessageEntity::Type::Mention || type == MessageEntity::Type::Hashtag || - type == MessageEntity::Type::BotCommand || type == MessageEntity::Type::Url || - type == MessageEntity::Type::EmailAddress || type == MessageEntity::Type::TextUrl || - type == MessageEntity::Type::MentionName || type == MessageEntity::Type::Cashtag || - type == MessageEntity::Type::PhoneNumber || type == MessageEntity::Type::BankCardNumber; + return (get_entity_type_mask(type) & get_continuous_entities_mask()) != 0; } static int32 is_pre_entity(MessageEntity::Type type) { - return type == MessageEntity::Type::Pre || type == MessageEntity::Type::Code || type == MessageEntity::Type::PreCode; + return (get_entity_type_mask(type) & get_pre_entities_mask()) != 0; } static constexpr size_t SPLITTABLE_ENTITY_TYPE_COUNT = 4; @@ -1215,9 +1234,14 @@ static bool are_entities_valid(const vector &entities) { // Pre and Code can't contain nested entities return false; } - if (is_continuous_entity(parent_type) && - (is_pre_entity(entity.type) || is_continuous_entity(entity.type) || is_blockquote_entity(entity.type))) { - // continuous can't contain other continuous and blockquote + // parents are not pre after this point + if (is_pre_entity(entity.type) && (nested_entity_type_mask & ~get_blockquote_entities_mask()) != 0) { + // Pre and Code can't be contained in other entities, except blockquote + return false; + } + if ((is_continuous_entity(entity.type) || is_blockquote_entity(entity.type)) && + (nested_entity_type_mask & get_continuous_entities_mask()) != 0) { + // continuous and blockquote can't be contained in continuous return false; } } @@ -1225,7 +1249,7 @@ static bool are_entities_valid(const vector &entities) { if (is_splittable_entity(entity.type)) { auto index = get_splittable_entity_type_index(entity.type); if (end_pos[index] >= entity.offset) { - // the entities may be need to merged + // the entities can be merged return false; } end_pos[index] = entity.offset + entity.length; @@ -2788,11 +2812,11 @@ void split_entities(vector &entities, const vector auto index = get_splittable_entity_type_index(type); if (end_pos[index] != 0 && begin_pos[index] < offset) { if (end_pos[index] <= offset) { - result.emplace_back(type, begin_pos[index], end_pos[index]); + result.emplace_back(type, begin_pos[index], end_pos[index] - begin_pos[index]); begin_pos[index] = 0; end_pos[index] = 0; } else { - result.emplace_back(type, begin_pos[index], offset); + result.emplace_back(type, begin_pos[index], offset - begin_pos[index]); begin_pos[index] = offset; } } @@ -2826,7 +2850,7 @@ void split_entities(vector &entities, const vector result.resize(old_size); } } - add_entities(std::numeric_limits::max()); + add_entities(std::numeric_limits::max()); entities = std::move(result); // entities are sorted only by offset now, re-sort if needed if (!std::is_sorted(entities.begin(), entities.end())) { diff --git a/test/message_entities.cpp b/test/message_entities.cpp index bba2bcec..548cfab3 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -575,8 +575,9 @@ TEST(MessageEntities, url) { static void check_fix_formatted_text(td::string str, td::vector entities, const td::string &expected_str, - const td::vector &expected_entities, bool allow_empty, - bool skip_new_entities, bool skip_bot_commands, bool for_draft) { + const td::vector &expected_entities, bool allow_empty = true, + bool skip_new_entities = false, bool skip_bot_commands = false, + bool for_draft = true) { ASSERT_TRUE( td::fix_formatted_text(str, entities, allow_empty, skip_new_entities, skip_bot_commands, for_draft).is_ok()); ASSERT_STREQ(expected_str, str); @@ -780,6 +781,104 @@ TEST(MessageEntities, fix_formatted_text) { check_fix_formatted_text(text, {{type, 0, 1, "http://telegram.org/"}}, "", {}, true, false, false, true); } } + + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 1, 1}, {td::MessageEntity::Type::Italic, 0, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 2}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 1, 1}, {td::MessageEntity::Type::Italic, 1, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 1, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Italic, 1, 2}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 3}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Italic, 2, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 3}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Italic, 2, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Italic, 2, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 1, 2}}, + "abc", + {{td::MessageEntity::Type::Italic, 0, 1}, + {td::MessageEntity::Type::Bold, 1, 2}, + {td::MessageEntity::Type::Italic, 1, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 2, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 2}, {td::MessageEntity::Type::Bold, 2, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Bold, 2, 1}}, + "abc", {{td::MessageEntity::Type::Italic, 0, 1}, {td::MessageEntity::Type::Bold, 2, 1}}); + + // _a*b*_ + check_fix_formatted_text( + "ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 1, 1}}, "ab", + {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 1, 1}}); + check_fix_formatted_text("ab", + {{td::MessageEntity::Type::Underline, 0, 1}, + {td::MessageEntity::Type::Underline, 1, 1}, + {td::MessageEntity::Type::Strikethrough, 1, 1}}, + "ab", + {{td::MessageEntity::Type::Underline, 0, 1}, + {td::MessageEntity::Type::Underline, 1, 1}, + {td::MessageEntity::Type::Strikethrough, 1, 1}}); + check_fix_formatted_text( + "ab", {{td::MessageEntity::Type::Strikethrough, 0, 2}, {td::MessageEntity::Type::Underline, 1, 1}}, "ab", + {{td::MessageEntity::Type::Strikethrough, 0, 2}, {td::MessageEntity::Type::Underline, 1, 1}}); + check_fix_formatted_text("ab", + {{td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Strikethrough, 1, 1}, + {td::MessageEntity::Type::Underline, 1, 1}}, + "ab", + {{td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Underline, 1, 1}, + {td::MessageEntity::Type::Strikethrough, 1, 1}}); + + // _*a*b_ + check_fix_formatted_text( + "ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}}, "ab", + {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}}); + check_fix_formatted_text( + "ab", + {{td::MessageEntity::Type::Underline, 0, 1}, + {td::MessageEntity::Type::Underline, 1, 1}, + {td::MessageEntity::Type::Strikethrough, 0, 1}}, + "ab", {{td::MessageEntity::Type::Underline, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 1}}); + + // _*a*_\r_*b*_ + check_fix_formatted_text("a\rb", + {{td::MessageEntity::Type::Bold, 0, 1}, + {td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Bold, 2, 1}, + {td::MessageEntity::Type::Strikethrough, 2, 1}}, + "ab", + {{td::MessageEntity::Type::Bold, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 2}}); + check_fix_formatted_text("a\nb", + {{td::MessageEntity::Type::Bold, 0, 1}, + {td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Bold, 2, 1}, + {td::MessageEntity::Type::Strikethrough, 2, 1}}, + "a\nb", + {{td::MessageEntity::Type::Bold, 0, 1}, + {td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Bold, 2, 1}, + {td::MessageEntity::Type::Strikethrough, 2, 1}}); + + // _`a`_ + check_fix_formatted_text("a", {{td::MessageEntity::Type::Pre, 0, 1}, {td::MessageEntity::Type::Strikethrough, 0, 1}}, + "a", {{td::MessageEntity::Type::Pre, 0, 1}}); + check_fix_formatted_text("a", {{td::MessageEntity::Type::Strikethrough, 0, 1}, {td::MessageEntity::Type::Pre, 0, 1}}, + "a", {{td::MessageEntity::Type::Pre, 0, 1}}); + check_fix_formatted_text("abc", + {{td::MessageEntity::Type::Pre, 0, 3}, {td::MessageEntity::Type::Strikethrough, 1, 1}}, + "abc", {{td::MessageEntity::Type::Pre, 0, 3}}); + check_fix_formatted_text( + "abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 0, 3}}, "abc", + {{td::MessageEntity::Type::Strikethrough, 0, 1}, + {td::MessageEntity::Type::Pre, 1, 1}, + {td::MessageEntity::Type::Strikethrough, 2, 1}}); + check_fix_formatted_text( + "abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 1, 2}}, "abc", + {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 2, 1}}); + check_fix_formatted_text( + "abc", {{td::MessageEntity::Type::Pre, 1, 1}, {td::MessageEntity::Type::Strikethrough, 0, 2}}, "abc", + {{td::MessageEntity::Type::Strikethrough, 0, 1}, {td::MessageEntity::Type::Pre, 1, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::Pre, 0, 3}, {td::MessageEntity::Type::BlockQuote, 1, 1}}, + "abc", {{td::MessageEntity::Type::BlockQuote, 1, 1}}); + check_fix_formatted_text("abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}, + "abc", {{td::MessageEntity::Type::BlockQuote, 0, 3}, {td::MessageEntity::Type::Pre, 1, 1}}); } static void check_parse_html(td::string text, const td::string &result, const td::vector &entities) {