diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 907d4a8a..933458a2 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -1169,9 +1169,10 @@ static void check_is_sorted_impl(const vector &entities, int line LOG_CHECK(std::is_sorted(entities.begin(), entities.end())) << line << " " << entities; } -static void check_non_intersecting(const vector &entities) { +#define check_non_intersecting(entities) check_non_intersecting_impl(entities, __LINE__) +static void check_non_intersecting_impl(const vector &entities, int line) { for (size_t i = 0; i + 1 < entities.size(); i++) { - CHECK(entities[i].offset + entities[i].length <= entities[i + 1].offset); + LOG_CHECK(entities[i].offset + entities[i].length <= entities[i + 1].offset) << line << " " << entities; } } @@ -1273,6 +1274,11 @@ static bool are_entities_valid(const vector &entities) { // continuous and blockquote can't be contained in continuous return false; } + if ((nested_entity_type_mask & get_splittable_entities_mask()) != 0) { + // the previous nested entity may be needed to splitted for consistency + // alternatively, better entity merging needs to be implemented + return false; + } } if (is_splittable_entity(entity.type)) { @@ -1955,16 +1961,16 @@ static FormattedText parse_text_url_entities_v3(Slice text, vector text_urls = find_text_url_entities_v3(parsed_part_text); - for (size_t index = 0; index < SPLITTABLE_ENTITY_TYPE_COUNT; index++) { - check_non_intersecting(part_splittable_entities[index]); - } int32 text_utf16_offset = max_end; size_t prev_pos = 0; @@ -2047,39 +2053,37 @@ static FormattedText parse_text_url_entities_v3(Slice text, vector part_end) { - // begins before end of the segment, but ends after it - // need to keep the entity for future segments, so split the entity - // entities don't intersect each other, so there can be at most one such entity - result.entities.emplace_back(splittable_entities[pos].type, - splittable_entities[pos].offset - skipped_length, - part_end - splittable_entities[pos].offset); + // now add all left splittable entities from [part_begin, part_end) + for (size_t index = 0; index < SPLITTABLE_ENTITY_TYPE_COUNT; index++) { + auto &pos = splittable_entity_pos[index]; + auto &splittable_entities = part_splittable_entities[index]; + while (pos < splittable_entities.size() && splittable_entities[pos].offset < part_end) { + if (splittable_entities[pos].offset + splittable_entities[pos].length > part_end) { + // begins before end of the segment, but ends after it + // need to keep the entity for future segments, so split the entity + // entities don't intersect each other, so there can be at most one such entity + result.entities.emplace_back(splittable_entities[pos].type, splittable_entities[pos].offset - skipped_length, + part_end - splittable_entities[pos].offset); - splittable_entities[pos].length = - splittable_entities[pos].offset + splittable_entities[pos].length - part_end; - splittable_entities[pos].offset = part_end; - } else { - result.entities.emplace_back(splittable_entities[pos].type, - splittable_entities[pos].offset - skipped_length, - splittable_entities[pos].length); - pos++; - } - } - if (pos == splittable_entities.size()) { - splittable_entities.clear(); + splittable_entities[pos].length = + splittable_entities[pos].offset + splittable_entities[pos].length - part_end; + splittable_entities[pos].offset = part_end; } else { - CHECK(pos == splittable_entities.size() - 1); - CHECK(!text.empty()); - splittable_entities[0] = std::move(splittable_entities.back()); - splittable_entities.resize(1); + result.entities.emplace_back(splittable_entities[pos].type, splittable_entities[pos].offset - skipped_length, + splittable_entities[pos].length); + pos++; } } + if (pos == splittable_entities.size()) { + splittable_entities.clear(); + } else { + CHECK(pos == splittable_entities.size() - 1); + CHECK(!text.empty()); + splittable_entities[0] = std::move(splittable_entities.back()); + splittable_entities.resize(1); + } } part_begin = part_end; diff --git a/td/telegram/cli.cpp b/td/telegram/cli.cpp index c91a3f41..45b958b4 100644 --- a/td/telegram/cli.cpp +++ b/td/telegram/cli.cpp @@ -2694,7 +2694,8 @@ class CliClient final : public Actor { } else if (op == "gtes") { execute(td_api::make_object(args)); } else if (op == "pm") { - send_request(td_api::make_object(as_formatted_text(args))); + send_request( + td_api::make_object(td_api::make_object(args, Auto()))); } else if (op == "pte") { send_request( td_api::make_object(args, td_api::make_object(2))); diff --git a/test/message_entities.cpp b/test/message_entities.cpp index 9527ac28..4a427648 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -1502,7 +1502,7 @@ TEST(MessageEntities, parse_markdown_v3) { {td::MessageEntity::Type::Italic, 123, 17}, {td::MessageEntity::Type::Bold, 129, 15}}); - td::vector parts{"a", " #test ", "__", "**", "~~", "[", "](t.me)", "`"}; + td::vector parts{"a", " #test__a", "__", "**", "~~", "[", "](t.me)", "`"}; td::vector types{ td::MessageEntity::Type::Bold, td::MessageEntity::Type::Italic, td::MessageEntity::Type::Underline, td::MessageEntity::Type::Strikethrough, td::MessageEntity::Type::Code, td::MessageEntity::Type::Pre, @@ -1527,7 +1527,16 @@ TEST(MessageEntities, parse_markdown_v3) { entities.emplace_back(type, offset, length); } - ASSERT_TRUE(fix_formatted_text(str, entities, true, true, true, true).is_ok()); - td::parse_markdown_v3({std::move(str), std::move(entities)}); + td::FormattedText text{std::move(str), std::move(entities)}; + while (true) { + ASSERT_TRUE(fix_formatted_text(text.text, text.entities, true, true, true, true).is_ok()); + auto parsed_text = td::parse_markdown_v3(text); + ASSERT_TRUE(fix_formatted_text(parsed_text.text, parsed_text.entities, true, true, true, true).is_ok()); + if (parsed_text == text) { + break; + } + text = std::move(parsed_text); + } + ASSERT_EQ(text, td::parse_markdown_v3(text)); } }