Allow attributes without value and support expandable blockquote entities in HTML parse_mode.

This commit is contained in:
levlam 2024-05-21 11:50:41 +03:00
parent 663eb78da5
commit 36ebd42a92
2 changed files with 26 additions and 8 deletions

View File

@ -3261,7 +3261,8 @@ Result<vector<MessageEntity>> parse_html(string &str) {
break;
}
auto attribute_begin_pos = i;
while (!is_space(text[i]) && text[i] != '=') {
while (!is_space(text[i]) && text[i] != '=' && text[i] != '>' && text[i] != '/' && text[i] != '"' &&
text[i] != '\'') {
i++;
}
Slice attribute_name(text + attribute_begin_pos, i - attribute_begin_pos);
@ -3273,8 +3274,14 @@ Result<vector<MessageEntity>> parse_html(string &str) {
i++;
}
if (text[i] != '=') {
return Status::Error(400, PSLICE() << "Expected equal sign in declaration of an attribute of the tag \""
<< tag_name << "\" at byte offset " << begin_pos);
if (text[i] == 0) {
return Status::Error(400, PSLICE()
<< "Unclosed start tag \"" << tag_name << "\" at byte offset " << begin_pos);
}
if (tag_name == "blockquote" && attribute_name == Slice("expandable")) {
argument = "1";
}
continue;
}
i++;
while (text[i] != 0 && is_space(text[i])) {
@ -3331,6 +3338,8 @@ Result<vector<MessageEntity>> parse_html(string &str) {
argument = attribute_value.substr(3);
} else if (tag_name == "tg-emoji" && attribute_name == Slice("emoji-id")) {
argument = std::move(attribute_value);
} else if (tag_name == "blockquote" && attribute_name == Slice("expandable")) {
argument = "1";
}
}
@ -3416,7 +3425,11 @@ Result<vector<MessageEntity>> parse_html(string &str) {
nested_entities.back().argument);
}
} else if (tag_name == "blockquote") {
entities.emplace_back(MessageEntity::Type::BlockQuote, entity_offset, entity_length);
if (!nested_entities.back().argument.empty()) {
entities.emplace_back(MessageEntity::Type::ExpandableBlockQuote, entity_offset, entity_length);
} else {
entities.emplace_back(MessageEntity::Type::BlockQuote, entity_offset, entity_length);
}
} else {
UNREACHABLE();
}
@ -3426,7 +3439,7 @@ Result<vector<MessageEntity>> parse_html(string &str) {
}
if (!nested_entities.empty()) {
return Status::Error(
400, PSLICE() << "Can't find end tag corresponding to start tag " << nested_entities.back().tag_name);
400, PSLICE() << "Can't find end tag corresponding to start tag \"" << nested_entities.back().tag_name << '"');
}
for (auto &entity : entities) {

View File

@ -1243,8 +1243,7 @@ TEST(MessageEntities, parse_html) {
check_parse_html("🏟 🏟&lt;<abac aba>", "Unsupported start tag \"abac\" at byte offset 13");
check_parse_html("🏟 🏟&lt;<abac>", "Unsupported start tag \"abac\" at byte offset 13");
check_parse_html("🏟 🏟&lt;<i =aba>", "Empty attribute name in the tag \"i\" at byte offset 13");
check_parse_html("🏟 🏟&lt;<i aba>",
"Expected equal sign in declaration of an attribute of the tag \"i\" at byte offset 13");
check_parse_html("🏟 🏟&lt;<i aba>", "Can't find end tag corresponding to start tag \"i\"");
check_parse_html("🏟 🏟&lt;<i aba = ", "Unclosed start tag \"i\" at byte offset 13");
check_parse_html("🏟 🏟&lt;<i aba = 190azAz-.,", "Unexpected end of name token at byte offset 27");
check_parse_html("🏟 🏟&lt;<i aba = \"&lt;&gt;&quot;>", "Unclosed start tag at byte offset 13");
@ -1352,8 +1351,14 @@ TEST(MessageEntities, parse_html) {
check_parse_html("🏟 🏟<b aba = caba><tg-emoji emoji-id=\"1\">🏟</tg-emoji>1</b>", "🏟 🏟🏟1",
{{td::MessageEntity::Type::Bold, 5, 3},
{td::MessageEntity::Type::CustomEmoji, 5, 2, td::CustomEmojiId(static_cast<td::int64>(1))}});
check_parse_html("<blockquote cite=\"\">a&lt;<pre >b;</></>", "a<b;",
check_parse_html("<blockquote cite=\"\" askdlbas nasjdbaj nj12b3>a&lt;<pre >b;</></>", "a<b;",
{{td::MessageEntity::Type::BlockQuote, 0, 4}, {td::MessageEntity::Type::Pre, 2, 2}});
check_parse_html("<blockquote expandable>a&lt;<pre >b;</></>", "a<b;",
{{td::MessageEntity::Type::ExpandableBlockQuote, 0, 4}, {td::MessageEntity::Type::Pre, 2, 2}});
check_parse_html("<blockquote expandable asd>a&lt;<pre >b;</></>", "a<b;",
{{td::MessageEntity::Type::ExpandableBlockQuote, 0, 4}, {td::MessageEntity::Type::Pre, 2, 2}});
check_parse_html("<blockquote expandable=false>a&lt;<pre >b;</></>", "a<b;",
{{td::MessageEntity::Type::ExpandableBlockQuote, 0, 4}, {td::MessageEntity::Type::Pre, 2, 2}});
}
static void check_parse_markdown(td::string text, const td::string &result,