Add message entities tests.

GitOrigin-RevId: fd04d3ebc4619d2b6be4282043a1333d824d7ee6
This commit is contained in:
levlam 2020-01-02 17:46:19 +03:00
parent 170fd545fd
commit ece4fd0e93
2 changed files with 26 additions and 0 deletions

View File

@ -2526,6 +2526,10 @@ static std::pair<size_t, int32> remove_invalid_entities(const string &text, vect
int32 last_non_whitespace_utf16_offset = -1; int32 last_non_whitespace_utf16_offset = -1;
for (size_t pos = 0; pos <= text.size(); pos++) { for (size_t pos = 0; pos <= text.size(); pos++) {
while (current_entity < entities.size() && utf16_offset >= entities[current_entity].offset &&
entities[current_entity].length == 0) {
nested_entities_stack.push_back(&entities[current_entity++]);
}
while (!nested_entities_stack.empty()) { while (!nested_entities_stack.empty()) {
auto *entity = nested_entities_stack.back(); auto *entity = nested_entities_stack.back();
auto entity_end = entity->offset + entity->length; auto entity_end = entity->offset + entity->length;
@ -2590,6 +2594,9 @@ Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool al
TRY_RESULT(result, clean_input_string_with_entities(text, entities)); TRY_RESULT(result, clean_input_string_with_entities(text, entities));
// now entities are still sorted by offset and length, but not type,
// because some characters could be deleted and some entities bacame to end together
size_t last_non_whitespace_pos; size_t last_non_whitespace_pos;
int32 last_non_whitespace_utf16_offset; int32 last_non_whitespace_utf16_offset;
std::tie(last_non_whitespace_pos, last_non_whitespace_utf16_offset) = remove_invalid_entities(result, entities); std::tie(last_non_whitespace_pos, last_non_whitespace_utf16_offset) = remove_invalid_entities(result, entities);
@ -2602,6 +2609,10 @@ Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool al
return Status::Error(3, "Message must be non-empty"); return Status::Error(3, "Message must be non-empty");
} }
if (!std::is_sorted(entities.begin(), entities.end())) {
std::sort(entities.begin(), entities.end()); // re-sort entities if needed after removal of some characters
}
if (for_draft) { if (for_draft) {
text = std::move(result); text = std::move(result);
} else { } else {

View File

@ -734,6 +734,12 @@ TEST(MessageEntities, fix_formatted_text) {
check_fix_formatted_text(str, entities, td::utf8_utf16_substr(str, 3, 11), fixed_entities, false, false, false, check_fix_formatted_text(str, entities, td::utf8_utf16_substr(str, 3, 11), fixed_entities, false, false, false,
false); false);
} }
for (td::string text : {"\t", "\r", "\n", "\t ", "\r ", "\n "}) {
for (auto type : {td::MessageEntity::Type::Bold, td::MessageEntity::Type::TextUrl}) {
check_fix_formatted_text(text, {{type, 0, 1, "http://telegram.org/"}}, "", {}, true, false, false, true);
}
}
} }
static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) { static void check_parse_html(td::string text, const td::string &result, const td::vector<td::MessageEntity> &entities) {
@ -813,6 +819,15 @@ TEST(MessageEntities, parse_html) {
check_parse_html("🏟 🏟&lt;<i>a</ >", "🏟 🏟<a", {{td::MessageEntity::Type::Italic, 6, 1}}); check_parse_html("🏟 🏟&lt;<i>a</ >", "🏟 🏟<a", {{td::MessageEntity::Type::Italic, 6, 1}});
check_parse_html("🏟 🏟&lt;<i>a</i >", "🏟 🏟<a", {{td::MessageEntity::Type::Italic, 6, 1}}); check_parse_html("🏟 🏟&lt;<i>a</i >", "🏟 🏟<a", {{td::MessageEntity::Type::Italic, 6, 1}});
check_parse_html("🏟 🏟&lt;<b></b>", "🏟 🏟<", {}); check_parse_html("🏟 🏟&lt;<b></b>", "🏟 🏟<", {});
check_parse_html("<i>\t</i>", "\t", {{td::MessageEntity::Type::Italic, 0, 1}});
check_parse_html("<i>\r</i>", "\r", {{td::MessageEntity::Type::Italic, 0, 1}});
check_parse_html("<i>\n</i>", "\n", {{td::MessageEntity::Type::Italic, 0, 1}});
check_parse_html("<a href=telegram.org>\t</a>", "\t",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}});
check_parse_html("<a href=telegram.org>\r</a>", "\r",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}});
check_parse_html("<a href=telegram.org>\n</a>", "\n",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}});
check_parse_html("<code><i><b> </b></i></code><i><b><code> </code></b></i>", " ", check_parse_html("<code><i><b> </b></i></code><i><b><code> </code></b></i>", " ",
{{td::MessageEntity::Type::Code, 0, 1}, {{td::MessageEntity::Type::Code, 0, 1},
{td::MessageEntity::Type::Bold, 0, 1}, {td::MessageEntity::Type::Bold, 0, 1},