Support new entity types in parse_html.

GitOrigin-RevId: 1a12c3736f4e0921f133b3ac914620b4a01f0efe
This commit is contained in:
levlam 2019-12-10 05:46:58 +03:00
parent 95bee16523
commit 1bb59a3261
2 changed files with 22 additions and 1 deletions

View File

@ -1759,7 +1759,8 @@ static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result)
}
string tag_name = to_lower(text.substr(begin_pos + 1, i - begin_pos - 1));
if (tag_name != "em" && tag_name != "strong" && tag_name != "a" && tag_name != "b" && tag_name != "i" &&
if (tag_name != "a" && tag_name != "b" && tag_name != "strong" && tag_name != "i" && tag_name != "em" &&
tag_name != "s" && tag_name != "strike" && tag_name != "del" && tag_name != "u" && tag_name != "ins" &&
tag_name != "pre" && tag_name != "code") {
return Status::Error(400, PSLICE()
<< "Unsupported start tag \"" << tag_name << "\" at byte offset " << begin_pos);
@ -1870,6 +1871,10 @@ static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result)
entities.emplace_back(MessageEntity::Type::Italic, entity_offset, entity_length);
} else if (tag_name == "b" || tag_name == "strong") {
entities.emplace_back(MessageEntity::Type::Bold, entity_offset, entity_length);
} else if (tag_name == "s" || tag_name == "strike" || tag_name == "del") {
entities.emplace_back(MessageEntity::Type::Strikethrough, entity_offset, entity_length);
} else if (tag_name == "u" || tag_name == "ins") {
entities.emplace_back(MessageEntity::Type::Underline, entity_offset, entity_length);
} else if (tag_name == "a") {
auto url = std::move(nested_entities.back().argument);
if (url.empty()) {

View File

@ -776,6 +776,22 @@ TEST(MessageEntities, parse_html) {
check_parse_html("&lt;&gt;&amp;&quot;&laquo;&raquo;&#12345678;", "<>&\"&laquo;&raquo;&#12345678;", {});
check_parse_html("➡️ ➡️<i>➡️ ➡️</i>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Italic, 5, 5}});
check_parse_html("➡️ ➡️<em>➡️ ➡️</em>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Italic, 5, 5}});
check_parse_html("➡️ ➡️<b>➡️ ➡️</b>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Bold, 5, 5}});
check_parse_html("➡️ ➡️<strong>➡️ ➡️</strong>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Bold, 5, 5}});
check_parse_html("➡️ ➡️<u>➡️ ➡️</u>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Underline, 5, 5}});
check_parse_html("➡️ ➡️<ins>➡️ ➡️</ins>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Underline, 5, 5}});
check_parse_html("➡️ ➡️<s>➡️ ➡️</s>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Strikethrough, 5, 5}});
check_parse_html("➡️ ➡️<strike>➡️ ➡️</strike>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Strikethrough, 5, 5}});
check_parse_html("➡️ ➡️<del>➡️ ➡️</del>", "➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Strikethrough, 5, 5}});
check_parse_html("➡️ ➡️<i>➡️ ➡️</i><b>➡️ ➡️</b>", "➡️ ➡️➡️ ➡️➡️ ➡️",
{{td::MessageEntity::Type::Italic, 5, 5}, {td::MessageEntity::Type::Bold, 10, 5}});
check_parse_html("🏟 🏟<i>🏟 &lt🏟</i>", "🏟 🏟🏟 <🏟", {{td::MessageEntity::Type::Italic, 5, 6}});