Support custom emoji in parse_html.

This commit is contained in:
levlam 2022-07-19 14:13:37 +03:00
parent 8e09a83506
commit eef3fe5932
2 changed files with 19 additions and 1 deletions

View File

@ -3007,7 +3007,8 @@ static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result)
string tag_name = to_lower(text.substr(begin_pos + 1, i - begin_pos - 1)); string tag_name = to_lower(text.substr(begin_pos + 1, i - begin_pos - 1));
if (tag_name != "a" && tag_name != "b" && tag_name != "strong" && tag_name != "i" && tag_name != "em" && if (tag_name != "a" && tag_name != "b" && tag_name != "strong" && tag_name != "i" && tag_name != "em" &&
tag_name != "s" && tag_name != "strike" && tag_name != "del" && tag_name != "u" && tag_name != "ins" && tag_name != "s" && tag_name != "strike" && tag_name != "del" && tag_name != "u" && tag_name != "ins" &&
tag_name != "tg-spoiler" && tag_name != "span" && tag_name != "pre" && tag_name != "code") { tag_name != "tg-spoiler" && tag_name != "tg-emoji" && tag_name != "span" && tag_name != "pre" &&
tag_name != "code") {
return Status::Error(400, PSLICE() return Status::Error(400, PSLICE()
<< "Unsupported start tag \"" << tag_name << "\" at byte offset " << begin_pos); << "Unsupported start tag \"" << tag_name << "\" at byte offset " << begin_pos);
} }
@ -3085,6 +3086,8 @@ static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result)
argument = attribute_value.substr(9); argument = attribute_value.substr(9);
} else if (tag_name == "span" && attribute_name == Slice("class") && begins_with(attribute_value, "tg-")) { } else if (tag_name == "span" && attribute_name == Slice("class") && begins_with(attribute_value, "tg-")) {
argument = attribute_value.substr(3); argument = attribute_value.substr(3);
} else if (tag_name == "tg-emoji" && attribute_name == Slice("emoji-id")) {
argument = std::move(attribute_value);
} }
} }
@ -3130,6 +3133,12 @@ static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result)
entities.emplace_back(MessageEntity::Type::Underline, entity_offset, entity_length); entities.emplace_back(MessageEntity::Type::Underline, entity_offset, entity_length);
} else if (tag_name == "tg-spoiler" || (tag_name == "span" && nested_entities.back().argument == "spoiler")) { } else if (tag_name == "tg-spoiler" || (tag_name == "span" && nested_entities.back().argument == "spoiler")) {
entities.emplace_back(MessageEntity::Type::Spoiler, entity_offset, entity_length); entities.emplace_back(MessageEntity::Type::Spoiler, entity_offset, entity_length);
} else if (tag_name == "tg-emoji") {
auto r_document_id = to_integer_safe<int64>(nested_entities.back().argument);
if (r_document_id.is_error() || r_document_id.ok() == 0) {
return Status::Error(400, "Invalid custom emoji identifier specified");
}
entities.emplace_back(MessageEntity::Type::CustomEmoji, entity_offset, entity_length, r_document_id.ok());
} else if (tag_name == "a") { } else if (tag_name == "a") {
auto url = std::move(nested_entities.back().argument); auto url = std::move(nested_entities.back().argument);
if (url.empty()) { if (url.empty()) {

View File

@ -1329,6 +1329,15 @@ TEST(MessageEntities, parse_html) {
{{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 6, 6}}); {{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 6, 6}});
check_parse_html("🏟 🏟&lt;<pre> <code class=\"language-fift\">🏟 🏟&lt;</></>", "🏟 🏟< 🏟 🏟<", check_parse_html("🏟 🏟&lt;<pre> <code class=\"language-fift\">🏟 🏟&lt;</></>", "🏟 🏟< 🏟 🏟<",
{{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 7, 6}}); {{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 7, 6}});
check_parse_html("➑️ ➑️<tg-emoji emoji-id = \"12345\">➑️ ➑️</tg-emoji><b>➑️ ➑️</b>",
"➑️ ➑️➑️ ➑️➑️ ➑️",
{{td::MessageEntity::Type::CustomEmoji, 5, 5, static_cast<td::int64>(12345)},
{td::MessageEntity::Type::Bold, 10, 5}});
check_parse_html("🏟 🏟<tg-emoji emoji-id=\"54321\">🏟 &lt🏟</tg-emoji>", "🏟 🏟🏟 <🏟",
{{td::MessageEntity::Type::CustomEmoji, 5, 6, static_cast<td::int64>(54321)}});
check_parse_html(
"🏟 🏟<b aba = caba><tg-emoji emoji-id=\"1\">🏟</tg-emoji>1</b>", "🏟 🏟🏟1",
{{td::MessageEntity::Type::Bold, 5, 3}, {td::MessageEntity::Type::CustomEmoji, 5, 2, static_cast<td::int64>(1)}});
} }
static void check_parse_markdown(td::string text, const td::string &result, static void check_parse_markdown(td::string text, const td::string &result,