Allow \r and other empty entities as a separator between MarkdownV2 blockquotes.
This commit is contained in:
parent
ce8a4b6d21
commit
da031b3faa
@ -2013,12 +2013,16 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
||||
vector<EntityInfo> nested_entities;
|
||||
|
||||
bool have_blockquote = false;
|
||||
bool can_start_blockquote = true;
|
||||
for (size_t i = 0; i < text.size(); i++) {
|
||||
auto c = static_cast<unsigned char>(text[i]);
|
||||
if (c == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
|
||||
i++;
|
||||
utf16_offset += 1;
|
||||
text[result_size++] = text[i];
|
||||
if (text[i] != '\r') {
|
||||
can_start_blockquote = (text[i] == '\n');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2038,46 +2042,48 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
||||
if (reserved_characters.find(text[i]) == Slice::npos) {
|
||||
if (is_utf8_character_first_code_unit(c)) {
|
||||
utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogate pair
|
||||
if (c != '\r') {
|
||||
can_start_blockquote = false;
|
||||
}
|
||||
}
|
||||
text[result_size++] = text[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_end_of_an_entity = false;
|
||||
if (!nested_entities.empty()) {
|
||||
is_end_of_an_entity = [&] {
|
||||
if (have_blockquote && c == '\n' && (i + 1 == text.size() || text[i + 1] != '>')) {
|
||||
return true;
|
||||
}
|
||||
switch (nested_entities.back().type) {
|
||||
case MessageEntity::Type::Bold:
|
||||
return c == '*';
|
||||
case MessageEntity::Type::Italic:
|
||||
return c == '_' && text[i + 1] != '_';
|
||||
case MessageEntity::Type::Code:
|
||||
return c == '`';
|
||||
case MessageEntity::Type::Pre:
|
||||
case MessageEntity::Type::PreCode:
|
||||
return c == '`' && text[i + 1] == '`' && text[i + 2] == '`';
|
||||
case MessageEntity::Type::TextUrl:
|
||||
return c == ']';
|
||||
case MessageEntity::Type::Underline:
|
||||
return c == '_' && text[i + 1] == '_';
|
||||
case MessageEntity::Type::Strikethrough:
|
||||
return c == '~';
|
||||
case MessageEntity::Type::Spoiler:
|
||||
return c == '|' && text[i + 1] == '|';
|
||||
case MessageEntity::Type::CustomEmoji:
|
||||
return c == ']';
|
||||
case MessageEntity::Type::BlockQuote:
|
||||
return false;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
}();
|
||||
}
|
||||
|
||||
bool is_end_of_an_entity = [&] {
|
||||
if (nested_entities.empty()) {
|
||||
return false;
|
||||
}
|
||||
if (have_blockquote && c == '\n' && (i + 1 == text.size() || text[i + 1] != '>')) {
|
||||
return true;
|
||||
}
|
||||
switch (nested_entities.back().type) {
|
||||
case MessageEntity::Type::Bold:
|
||||
return c == '*';
|
||||
case MessageEntity::Type::Italic:
|
||||
return c == '_' && text[i + 1] != '_';
|
||||
case MessageEntity::Type::Code:
|
||||
return c == '`';
|
||||
case MessageEntity::Type::Pre:
|
||||
case MessageEntity::Type::PreCode:
|
||||
return c == '`' && text[i + 1] == '`' && text[i + 2] == '`';
|
||||
case MessageEntity::Type::TextUrl:
|
||||
return c == ']';
|
||||
case MessageEntity::Type::Underline:
|
||||
return c == '_' && text[i + 1] == '_';
|
||||
case MessageEntity::Type::Strikethrough:
|
||||
return c == '~';
|
||||
case MessageEntity::Type::Spoiler:
|
||||
return c == '|' && text[i + 1] == '|';
|
||||
case MessageEntity::Type::CustomEmoji:
|
||||
return c == ']';
|
||||
case MessageEntity::Type::BlockQuote:
|
||||
return false;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
}();
|
||||
if (!is_end_of_an_entity) {
|
||||
// begin of an entity
|
||||
MessageEntity::Type type;
|
||||
@ -2149,19 +2155,17 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
||||
case '\n':
|
||||
utf16_offset += 1;
|
||||
text[result_size++] = '\n';
|
||||
can_start_blockquote = true;
|
||||
type = MessageEntity::Type::Size;
|
||||
if (i + 1 < text.size() && text[i + 1] == '>') {
|
||||
i++;
|
||||
if (!have_blockquote) {
|
||||
break;
|
||||
case '>':
|
||||
if (can_start_blockquote) {
|
||||
if (have_blockquote) {
|
||||
type = MessageEntity::Type::Size;
|
||||
} else {
|
||||
type = MessageEntity::Type::BlockQuote;
|
||||
have_blockquote = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
if (i == 0) {
|
||||
type = MessageEntity::Type::BlockQuote;
|
||||
have_blockquote = true;
|
||||
} else {
|
||||
return Status::Error(400, PSLICE() << "Character '" << text[i]
|
||||
<< "' is reserved and must be escaped with the preceding '\\'");
|
||||
@ -2258,6 +2262,7 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
||||
CHECK(have_blockquote);
|
||||
have_blockquote = false;
|
||||
text[result_size++] = text[i];
|
||||
can_start_blockquote = true;
|
||||
utf16_offset += 1;
|
||||
skip_entity = false;
|
||||
break;
|
||||
|
@ -1424,6 +1424,7 @@ TEST(MessageEntities, parse_markdown) {
|
||||
check_parse_markdown("π π![π](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier");
|
||||
check_parse_markdown("π π![π](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified");
|
||||
check_parse_markdown(">*b\n>ld \n>bo\nld*\nasd\ndef", "Can't find end of Bold entity at byte offset 1");
|
||||
check_parse_markdown(">\n*a*>2", "Character '>' is reserved and must be escaped with the preceding '\\'");
|
||||
|
||||
check_parse_markdown("", "", {});
|
||||
check_parse_markdown("\\\\", "\\", {});
|
||||
@ -1493,6 +1494,8 @@ TEST(MessageEntities, parse_markdown) {
|
||||
check_parse_markdown("abc\n> \n> \n>\ndef", "abc\n \n \n\ndef", {{td::MessageEntity::Type::BlockQuote, 4, 5}});
|
||||
check_parse_markdown(">", "", {});
|
||||
check_parse_markdown(">a", "a", {{td::MessageEntity::Type::BlockQuote, 0, 1}});
|
||||
check_parse_markdown("\r>a", "\ra", {{td::MessageEntity::Type::BlockQuote, 1, 1}});
|
||||
check_parse_markdown("\r\r>\r\ra\r\n\r", "\r\r\r\ra\r\n\r", {{td::MessageEntity::Type::BlockQuote, 2, 5}});
|
||||
check_parse_markdown(
|
||||
">*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic "
|
||||
"bold___ bold*",
|
||||
@ -1513,6 +1516,22 @@ TEST(MessageEntities, parse_markdown) {
|
||||
{{td::MessageEntity::Type::Code, 0, 14}, {td::MessageEntity::Type::BlockQuote, 15, 4}});
|
||||
check_parse_markdown(">1", "1", {{td::MessageEntity::Type::BlockQuote, 0, 1}});
|
||||
check_parse_markdown(">\n1", "\n1", {{td::MessageEntity::Type::BlockQuote, 0, 1}});
|
||||
check_parse_markdown(">\n\r>2", "\n\r2",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 1}, {td::MessageEntity::Type::BlockQuote, 2, 1}});
|
||||
check_parse_markdown(">\n**>2", "\n2",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 1}, {td::MessageEntity::Type::BlockQuote, 1, 1}});
|
||||
// check_parse_markdown("*>abcd*", "abcd",
|
||||
// {{td::MessageEntity::Type::BlockQuote, 0, 4}, {td::MessageEntity::Type::Bold, 0, 4}});
|
||||
check_parse_markdown(">*abcd*", "abcd",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 4}, {td::MessageEntity::Type::Bold, 0, 4}});
|
||||
// check_parse_markdown(">*abcd\n*", "abcd\n",
|
||||
// {{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 5}});
|
||||
check_parse_markdown(">*abcd*\n", "abcd\n",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 4}});
|
||||
check_parse_markdown("*>abcd\n*", "abcd\n",
|
||||
{{td::MessageEntity::Type::BlockQuote, 0, 5}, {td::MessageEntity::Type::Bold, 0, 5}});
|
||||
check_parse_markdown("abc\n>def\n>def\n\r>ghi2\njkl", "abc\ndef\ndef\n\rghi2\njkl",
|
||||
{{td::MessageEntity::Type::BlockQuote, 4, 8}, {td::MessageEntity::Type::BlockQuote, 13, 5}});
|
||||
}
|
||||
|
||||
static void check_parse_markdown_v3(td::string text, td::vector<td::MessageEntity> entities,
|
||||
|
Loadingβ¦
Reference in New Issue
Block a user