Add BlockQuote parsing in MarkdownV2.
This commit is contained in:
parent
bdbee0765b
commit
ea450564b0
@ -2010,8 +2010,9 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
, entity_begin_pos(entity_begin_pos) {
|
, entity_begin_pos(entity_begin_pos) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
std::vector<EntityInfo> nested_entities;
|
vector<EntityInfo> nested_entities;
|
||||||
|
|
||||||
|
bool have_blockquote = false;
|
||||||
for (size_t i = 0; i < text.size(); i++) {
|
for (size_t i = 0; i < text.size(); i++) {
|
||||||
auto c = static_cast<unsigned char>(text[i]);
|
auto c = static_cast<unsigned char>(text[i]);
|
||||||
if (c == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
|
if (c == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
|
||||||
@ -2021,7 +2022,7 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice reserved_characters("_*[]()~`>#+-=|{}.!");
|
Slice reserved_characters("_*[]()~`>#+-=|{}.!\n");
|
||||||
if (!nested_entities.empty()) {
|
if (!nested_entities.empty()) {
|
||||||
switch (nested_entities.back().type) {
|
switch (nested_entities.back().type) {
|
||||||
case MessageEntity::Type::Code:
|
case MessageEntity::Type::Code:
|
||||||
@ -2045,6 +2046,9 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
bool is_end_of_an_entity = false;
|
bool is_end_of_an_entity = false;
|
||||||
if (!nested_entities.empty()) {
|
if (!nested_entities.empty()) {
|
||||||
is_end_of_an_entity = [&] {
|
is_end_of_an_entity = [&] {
|
||||||
|
if (have_blockquote && c == '\n' && (i + 1 == text.size() || text[i + 1] != '>')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
switch (nested_entities.back().type) {
|
switch (nested_entities.back().type) {
|
||||||
case MessageEntity::Type::Bold:
|
case MessageEntity::Type::Bold:
|
||||||
return c == '*';
|
return c == '*';
|
||||||
@ -2065,6 +2069,8 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
return c == '|' && text[i + 1] == '|';
|
return c == '|' && text[i + 1] == '|';
|
||||||
case MessageEntity::Type::CustomEmoji:
|
case MessageEntity::Type::CustomEmoji:
|
||||||
return c == ']';
|
return c == ']';
|
||||||
|
case MessageEntity::Type::BlockQuote:
|
||||||
|
return false;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return false;
|
return false;
|
||||||
@ -2140,14 +2146,42 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
<< "' is reserved and must be escaped with the preceding '\\'");
|
<< "' is reserved and must be escaped with the preceding '\\'");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case '\n':
|
||||||
|
utf16_offset += 1;
|
||||||
|
text[result_size++] = '\n';
|
||||||
|
type = MessageEntity::Type::Size;
|
||||||
|
if (i + 1 < text.size() && text[i + 1] == '>') {
|
||||||
|
i++;
|
||||||
|
if (!have_blockquote) {
|
||||||
|
type = MessageEntity::Type::BlockQuote;
|
||||||
|
have_blockquote = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '>':
|
||||||
|
if (i == 0) {
|
||||||
|
type = MessageEntity::Type::BlockQuote;
|
||||||
|
have_blockquote = true;
|
||||||
|
} else {
|
||||||
|
return Status::Error(400, PSLICE() << "Character '" << text[i]
|
||||||
|
<< "' is reserved and must be escaped with the preceding '\\'");
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return Status::Error(
|
return Status::Error(
|
||||||
400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'");
|
400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'");
|
||||||
}
|
}
|
||||||
|
if (type == MessageEntity::Type::Size) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
nested_entities.emplace_back(type, std::move(argument), utf16_offset, entity_byte_offset, result_size);
|
nested_entities.emplace_back(type, std::move(argument), utf16_offset, entity_byte_offset, result_size);
|
||||||
} else {
|
} else {
|
||||||
// end of an entity
|
// end of an entity
|
||||||
auto type = nested_entities.back().type;
|
auto type = nested_entities.back().type;
|
||||||
|
if (c == '\n' && type != MessageEntity::Type::BlockQuote) {
|
||||||
|
return Status::Error(400, PSLICE() << "Can't find end of " << nested_entities.back().type
|
||||||
|
<< " entity at byte offset " << nested_entities.back().entity_byte_offset);
|
||||||
|
}
|
||||||
auto argument = std::move(nested_entities.back().argument);
|
auto argument = std::move(nested_entities.back().argument);
|
||||||
UserId user_id;
|
UserId user_id;
|
||||||
CustomEmojiId custom_emoji_id;
|
CustomEmojiId custom_emoji_id;
|
||||||
@ -2220,6 +2254,12 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
TRY_RESULT_ASSIGN(custom_emoji_id, LinkManager::get_link_custom_emoji_id(url));
|
TRY_RESULT_ASSIGN(custom_emoji_id, LinkManager::get_link_custom_emoji_id(url));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MessageEntity::Type::BlockQuote:
|
||||||
|
CHECK(have_blockquote);
|
||||||
|
have_blockquote = false;
|
||||||
|
text[result_size++] = text[i];
|
||||||
|
utf16_offset += 1;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return false;
|
return false;
|
||||||
@ -2239,6 +2279,18 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
|||||||
nested_entities.pop_back();
|
nested_entities.pop_back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (have_blockquote) {
|
||||||
|
CHECK(!nested_entities.empty());
|
||||||
|
if (nested_entities.back().type == MessageEntity::Type::BlockQuote) {
|
||||||
|
have_blockquote = false;
|
||||||
|
auto entity_offset = nested_entities.back().entity_offset;
|
||||||
|
auto entity_length = utf16_offset - entity_offset;
|
||||||
|
if (entity_length != 0) {
|
||||||
|
entities.emplace_back(MessageEntity::Type::BlockQuote, entity_offset, entity_length);
|
||||||
|
}
|
||||||
|
nested_entities.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!nested_entities.empty()) {
|
if (!nested_entities.empty()) {
|
||||||
return Status::Error(400, PSLICE() << "Can't find end of " << nested_entities.back().type
|
return Status::Error(400, PSLICE() << "Can't find end of " << nested_entities.back().type
|
||||||
<< " entity at byte offset " << nested_entities.back().entity_byte_offset);
|
<< " entity at byte offset " << nested_entities.back().entity_byte_offset);
|
||||||
|
@ -1375,6 +1375,9 @@ TEST(MessageEntities, parse_html) {
|
|||||||
static void check_parse_markdown(td::string text, const td::string &result,
|
static void check_parse_markdown(td::string text, const td::string &result,
|
||||||
const td::vector<td::MessageEntity> &entities) {
|
const td::vector<td::MessageEntity> &entities) {
|
||||||
auto r_entities = td::parse_markdown_v2(text);
|
auto r_entities = td::parse_markdown_v2(text);
|
||||||
|
if (r_entities.is_error()) {
|
||||||
|
LOG(ERROR) << r_entities.error();
|
||||||
|
}
|
||||||
ASSERT_TRUE(r_entities.is_ok());
|
ASSERT_TRUE(r_entities.is_ok());
|
||||||
ASSERT_EQ(entities, r_entities.ok());
|
ASSERT_EQ(entities, r_entities.ok());
|
||||||
ASSERT_STREQ(result, text);
|
ASSERT_STREQ(result, text);
|
||||||
@ -1389,7 +1392,7 @@ static void check_parse_markdown(td::string text, td::Slice error_message) {
|
|||||||
|
|
||||||
TEST(MessageEntities, parse_markdown) {
|
TEST(MessageEntities, parse_markdown) {
|
||||||
td::Slice reserved_characters("]()>#+-=|{}.!");
|
td::Slice reserved_characters("]()>#+-=|{}.!");
|
||||||
td::Slice begin_characters("_*[~`");
|
td::Slice begin_characters("_*[~`>");
|
||||||
for (char c = 1; c < 126; c++) {
|
for (char c = 1; c < 126; c++) {
|
||||||
if (begin_characters.find(c) != td::Slice::npos) {
|
if (begin_characters.find(c) != td::Slice::npos) {
|
||||||
continue;
|
continue;
|
||||||
@ -1426,6 +1429,7 @@ TEST(MessageEntities, parse_markdown) {
|
|||||||
check_parse_markdown("π π__", "Can't find end of Underline entity at byte offset 9");
|
check_parse_markdown("π π__", "Can't find end of Underline entity at byte offset 9");
|
||||||
check_parse_markdown("π π||test\\|", "Can't find end of Spoiler entity at byte offset 9");
|
check_parse_markdown("π π||test\\|", "Can't find end of Spoiler entity at byte offset 9");
|
||||||
check_parse_markdown("π π!", "Character '!' is reserved and must be escaped with the preceding '\\'");
|
check_parse_markdown("π π!", "Character '!' is reserved and must be escaped with the preceding '\\'");
|
||||||
|
check_parse_markdown("π π>", "Character '>' is reserved and must be escaped with the preceding '\\'");
|
||||||
check_parse_markdown("π π![", "Can't find end of CustomEmoji entity at byte offset 9");
|
check_parse_markdown("π π![", "Can't find end of CustomEmoji entity at byte offset 9");
|
||||||
check_parse_markdown("π π![π", "Can't find end of CustomEmoji entity at byte offset 9");
|
check_parse_markdown("π π![π", "Can't find end of CustomEmoji entity at byte offset 9");
|
||||||
check_parse_markdown("π π![π]", "Custom emoji entity must contain a tg://emoji URL");
|
check_parse_markdown("π π![π]", "Custom emoji entity must contain a tg://emoji URL");
|
||||||
@ -1435,6 +1439,7 @@ TEST(MessageEntities, parse_markdown) {
|
|||||||
check_parse_markdown("π π![π](tg://emoji#test)", "Custom emoji URL must have an emoji identifier");
|
check_parse_markdown("π π![π](tg://emoji#test)", "Custom emoji URL must have an emoji identifier");
|
||||||
check_parse_markdown("π π![π](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier");
|
check_parse_markdown("π π![π](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier");
|
||||||
check_parse_markdown("π π![π](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified");
|
check_parse_markdown("π π![π](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified");
|
||||||
|
check_parse_markdown(">*b\n>ld \n>bo\nld*\nasd\ndef", "Can't find end of Bold entity at byte offset 1");
|
||||||
|
|
||||||
check_parse_markdown("", "", {});
|
check_parse_markdown("", "", {});
|
||||||
check_parse_markdown("\\\\", "\\", {});
|
check_parse_markdown("\\\\", "\\", {});
|
||||||
@ -1499,6 +1504,29 @@ TEST(MessageEntities, parse_markdown) {
|
|||||||
{{0, 12, td::UserId(static_cast<td::int64>(123456))}});
|
{{0, 12, td::UserId(static_cast<td::int64>(123456))}});
|
||||||
check_parse_markdown("π π![π](TG://EMoJI/?test=1231&id=25#id=32)a", "π ππa",
|
check_parse_markdown("π π![π](TG://EMoJI/?test=1231&id=25#id=32)a", "π ππa",
|
||||||
{{td::MessageEntity::Type::CustomEmoji, 5, 2, td::CustomEmojiId(static_cast<td::int64>(25))}});
|
{{td::MessageEntity::Type::CustomEmoji, 5, 2, td::CustomEmojiId(static_cast<td::int64>(25))}});
|
||||||
|
check_parse_markdown("> \n> \n>", " \n \n", {{td::MessageEntity::Type::BlockQuote, 0, 4}});
|
||||||
|
check_parse_markdown("> \\>\n \\> \n>", " >\n > \n", {{td::MessageEntity::Type::BlockQuote, 0, 3}});
|
||||||
|
check_parse_markdown("abc\n> \n> \n>\ndef", "abc\n \n \n\ndef", {{td::MessageEntity::Type::BlockQuote, 4, 5}});
|
||||||
|
check_parse_markdown(">", "", {});
|
||||||
|
check_parse_markdown(">a", "a", {{td::MessageEntity::Type::BlockQuote, 0, 1}});
|
||||||
|
check_parse_markdown(
|
||||||
|
">*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic "
|
||||||
|
"bold___ bold*",
|
||||||
|
"bold italic bold italic bold strikethrough italic bold strikethrough spoiler underline italic bold bold",
|
||||||
|
{{td::MessageEntity::Type::BlockQuote, 0, 103},
|
||||||
|
{td::MessageEntity::Type::Bold, 0, 103},
|
||||||
|
{td::MessageEntity::Type::Italic, 5, 93},
|
||||||
|
{td::MessageEntity::Type::Strikethrough, 17, 59},
|
||||||
|
{td::MessageEntity::Type::Spoiler, 43, 33},
|
||||||
|
{td::MessageEntity::Type::Underline, 77, 21}});
|
||||||
|
check_parse_markdown(">*b\n>ld \n>bo\n>ld*\nasd\ndef", "b\nld \nbo\nld\nasd\ndef",
|
||||||
|
{{td::MessageEntity::Type::BlockQuote, 0, 12}, {td::MessageEntity::Type::Bold, 0, 11}});
|
||||||
|
check_parse_markdown("*a\n>b\n>ld \n>bo\n>ld\nasd*\ndef", "a\nb\nld \nbo\nld\nasd\ndef",
|
||||||
|
{{td::MessageEntity::Type::Bold, 0, 17}, {td::MessageEntity::Type::BlockQuote, 2, 12}});
|
||||||
|
check_parse_markdown(">`b\n>ld \n>bo\nld`\n>asd\ndef", "b\n>ld \n>bo\nld\nasd\ndef",
|
||||||
|
{{td::MessageEntity::Type::BlockQuote, 0, 18}, {td::MessageEntity::Type::Code, 0, 13}});
|
||||||
|
check_parse_markdown("`>b\n>ld \n>bo\nld`\n>asd\ndef", ">b\n>ld \n>bo\nld\nasd\ndef",
|
||||||
|
{{td::MessageEntity::Type::Code, 0, 14}, {td::MessageEntity::Type::BlockQuote, 15, 4}});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_parse_markdown_v3(td::string text, td::vector<td::MessageEntity> entities,
|
static void check_parse_markdown_v3(td::string text, td::vector<td::MessageEntity> entities,
|
||||||
|
Loadingβ¦
Reference in New Issue
Block a user