diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 7ca16bf89..e9ff7b0cf 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -2013,12 +2013,16 @@ Result> parse_markdown_v2(string &text) { vector nested_entities; bool have_blockquote = false; + bool can_start_blockquote = true; for (size_t i = 0; i < text.size(); i++) { auto c = static_cast(text[i]); if (c == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) { i++; utf16_offset += 1; text[result_size++] = text[i]; + if (text[i] != '\r') { + can_start_blockquote = (text[i] == '\n'); + } continue; } @@ -2038,46 +2042,48 @@ Result> parse_markdown_v2(string &text) { if (reserved_characters.find(text[i]) == Slice::npos) { if (is_utf8_character_first_code_unit(c)) { utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogate pair + if (c != '\r') { + can_start_blockquote = false; + } } text[result_size++] = text[i]; continue; } - bool is_end_of_an_entity = false; - if (!nested_entities.empty()) { - is_end_of_an_entity = [&] { - if (have_blockquote && c == '\n' && (i + 1 == text.size() || text[i + 1] != '>')) { - return true; - } - switch (nested_entities.back().type) { - case MessageEntity::Type::Bold: - return c == '*'; - case MessageEntity::Type::Italic: - return c == '_' && text[i + 1] != '_'; - case MessageEntity::Type::Code: - return c == '`'; - case MessageEntity::Type::Pre: - case MessageEntity::Type::PreCode: - return c == '`' && text[i + 1] == '`' && text[i + 2] == '`'; - case MessageEntity::Type::TextUrl: - return c == ']'; - case MessageEntity::Type::Underline: - return c == '_' && text[i + 1] == '_'; - case MessageEntity::Type::Strikethrough: - return c == '~'; - case MessageEntity::Type::Spoiler: - return c == '|' && text[i + 1] == '|'; - case MessageEntity::Type::CustomEmoji: - return c == ']'; - case MessageEntity::Type::BlockQuote: - return false; - default: - UNREACHABLE(); - return false; - } - }(); - } - + bool is_end_of_an_entity = [&] { + if (nested_entities.empty()) { + return false; + } + if (have_blockquote && c == '\n' && (i + 1 == text.size() || text[i + 1] != '>')) { + return true; + } + switch (nested_entities.back().type) { + case MessageEntity::Type::Bold: + return c == '*'; + case MessageEntity::Type::Italic: + return c == '_' && text[i + 1] != '_'; + case MessageEntity::Type::Code: + return c == '`'; + case MessageEntity::Type::Pre: + case MessageEntity::Type::PreCode: + return c == '`' && text[i + 1] == '`' && text[i + 2] == '`'; + case MessageEntity::Type::TextUrl: + return c == ']'; + case MessageEntity::Type::Underline: + return c == '_' && text[i + 1] == '_'; + case MessageEntity::Type::Strikethrough: + return c == '~'; + case MessageEntity::Type::Spoiler: + return c == '|' && text[i + 1] == '|'; + case MessageEntity::Type::CustomEmoji: + return c == ']'; + case MessageEntity::Type::BlockQuote: + return false; + default: + UNREACHABLE(); + return false; + } + }(); if (!is_end_of_an_entity) { // begin of an entity MessageEntity::Type type; @@ -2149,19 +2155,17 @@ Result> parse_markdown_v2(string &text) { case '\n': utf16_offset += 1; text[result_size++] = '\n'; + can_start_blockquote = true; type = MessageEntity::Type::Size; - if (i + 1 < text.size() && text[i + 1] == '>') { - i++; - if (!have_blockquote) { + break; + case '>': + if (can_start_blockquote) { + if (have_blockquote) { + type = MessageEntity::Type::Size; + } else { type = MessageEntity::Type::BlockQuote; have_blockquote = true; } - } - break; - case '>': - if (i == 0) { - type = MessageEntity::Type::BlockQuote; - have_blockquote = true; } else { return Status::Error(400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'"); @@ -2258,6 +2262,7 @@ Result> parse_markdown_v2(string &text) { CHECK(have_blockquote); have_blockquote = false; text[result_size++] = text[i]; + can_start_blockquote = true; utf16_offset += 1; skip_entity = false; break; diff --git a/test/message_entities.cpp b/test/message_entities.cpp index ec38ef7ca..d0ee0a79d 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -1424,6 +1424,7 @@ TEST(MessageEntities, parse_markdown) { check_parse_markdown("🏟 🏟![👍](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier"); check_parse_markdown("🏟 🏟![👍](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified"); check_parse_markdown(">*b\n>ld \n>bo\nld*\nasd\ndef", "Can't find end of Bold entity at byte offset 1"); + check_parse_markdown(">\n*a*>2", "Character '>' is reserved and must be escaped with the preceding '\\'"); check_parse_markdown("", "", {}); check_parse_markdown("\\\\", "\\", {}); @@ -1493,6 +1494,8 @@ TEST(MessageEntities, parse_markdown) { check_parse_markdown("abc\n> \n> \n>\ndef", "abc\n \n \n\ndef", {{td::MessageEntity::Type::BlockQuote, 4, 5}}); check_parse_markdown(">", "", {}); check_parse_markdown(">a", "a", {{td::MessageEntity::Type::BlockQuote, 0, 1}}); + check_parse_markdown("\r>a", "\ra", {{td::MessageEntity::Type::BlockQuote, 1, 1}}); + check_parse_markdown("\r\r>\r\ra\r\n\r", "\r\r\r\ra\r\n\r", {{td::MessageEntity::Type::BlockQuote, 2, 5}}); check_parse_markdown( ">*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic " "bold___ bold*", @@ -1513,6 +1516,22 @@ TEST(MessageEntities, parse_markdown) { {{td::MessageEntity::Type::Code, 0, 14}, {td::MessageEntity::Type::BlockQuote, 15, 4}}); check_parse_markdown(">1", "1", {{td::MessageEntity::Type::BlockQuote, 0, 1}}); check_parse_markdown(">\n1", "\n1", {{td::MessageEntity::Type::BlockQuote, 0, 1}}); + check_parse_markdown(">\n\r>2", "\n\r2", + {{td::MessageEntity::Type::BlockQuote, 0, 1}, {td::MessageEntity::Type::BlockQuote, 2, 1}}); + check_parse_markdown(">\n**>2", "\n2", + {{td::MessageEntity::Type::BlockQuote, 0, 1}, {td::MessageEntity::Type::BlockQuote, 1, 1}}); + // check_parse_markdown("*>abcd*", "abcd", + // {{td::MessageEntity::Type::BlockQuote, 0, 4}, {td::MessageEntity::Type::Bold, 0, 4}}); + check_parse_markdown(">*abcd*", "abcd", + {{td::MessageEntity::Type::BlockQuote, 0, 4}, {td::MessageEntity::Type::Bold, 0, 4}}); + // check_parse_markdown(">*abcd\n*", "abcd\n", + // {{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 5}}); + check_parse_markdown(">*abcd*\n", "abcd\n", + {{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 4}}); + check_parse_markdown("*>abcd\n*", "abcd\n", + {{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 5}}); + check_parse_markdown("abc\n>def\n>def\n\r>ghi2\njkl", "abc\ndef\ndef\n\rghi2\njkl", + {{td::MessageEntity::Type::BlockQuote, 4, 8}, {td::MessageEntity::Type::BlockQuote, 13, 5}}); } static void check_parse_markdown_v3(td::string text, td::vector entities,