From 8e09a835068f6998438330cac929a7897ce0f65b Mon Sep 17 00:00:00 2001 From: levlam Date: Mon, 18 Jul 2022 23:40:57 +0300 Subject: [PATCH] Support custom emoji in MarkdownV2. --- td/telegram/LinkManager.cpp | 44 ++++++++++++++++++++++++++++++++++- td/telegram/LinkManager.h | 2 ++ td/telegram/MessageEntity.cpp | 39 +++++++++++++++++++++++++++++++ td/telegram/MessageEntity.h | 3 ++- test/message_entities.cpp | 12 ++++++++++ 5 files changed, 98 insertions(+), 2 deletions(-) diff --git a/td/telegram/LinkManager.cpp b/td/telegram/LinkManager.cpp index 28c82b793..6a4992d00 100644 --- a/td/telegram/LinkManager.cpp +++ b/td/telegram/LinkManager.cpp @@ -1577,7 +1577,7 @@ UserId LinkManager::get_link_user_id(Slice url) { } Slice host("user"); - if (!begins_with(url, host)) { + if (!begins_with(url, host) || (url.size() > host.size() && Slice("/?#").find(url[host.size()]) == Slice::npos)) { return UserId(); } url.remove_prefix(host.size()); @@ -1605,6 +1605,48 @@ UserId LinkManager::get_link_user_id(Slice url) { return UserId(); } +Result LinkManager::get_link_custom_emoji_document_id(Slice url) { + string lower_cased_url = to_lower(url); + url = lower_cased_url; + + Slice link_scheme("tg:"); + if (!begins_with(url, link_scheme)) { + return Status::Error(400, "Custom emoji URL must have scheme tg"); + } + url.remove_prefix(link_scheme.size()); + if (begins_with(url, "//")) { + url.remove_prefix(2); + } + + Slice host("emoji"); + if (!begins_with(url, host) || (url.size() > host.size() && Slice("/?#").find(url[host.size()]) == Slice::npos)) { + return Status::Error(400, PSLICE() << "Custom emoji URL must have host \"" << host << '"'); + } + url.remove_prefix(host.size()); + if (begins_with(url, "/")) { + url.remove_prefix(1); + } + if (!begins_with(url, "?")) { + return Status::Error(400, "Custom emoji URL must have an emoji identifier"); + } + url.remove_prefix(1); + url.truncate(url.find('#')); + + for (auto parameter : full_split(url, '&')) { + Slice key; + Slice value; + std::tie(key, value) = split(parameter, '='); + if (key == Slice("id")) { + auto r_document_id = to_integer_safe(value); + if (r_document_id.is_error() || r_document_id.ok() == 0) { + return Status::Error(400, "Invalid custom emoji identifier specified"); + } + return r_document_id.ok(); + } + } + return Status::Error(400, "Custom emoji URL must have an emoji identifier"); +} + Result LinkManager::get_message_link_info(Slice url) { if (url.empty()) { return Status::Error("URL must be non-empty"); diff --git a/td/telegram/LinkManager.h b/td/telegram/LinkManager.h index 6c4be78c5..d2e9f3850 100644 --- a/td/telegram/LinkManager.h +++ b/td/telegram/LinkManager.h @@ -80,6 +80,8 @@ class LinkManager final : public Actor { static UserId get_link_user_id(Slice url); + static Result get_link_custom_emoji_document_id(Slice url); + static Result get_message_link_info(Slice url); private: diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 0fc50de30..0cd512b9b 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -116,6 +116,9 @@ StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &me if (message_entity.user_id.is_valid()) { string_builder << ", " << message_entity.user_id; } + if (message_entity.document_id != 0) { + string_builder << ", emoji = " << message_entity.document_id; + } string_builder << ']'; return string_builder; } @@ -2004,6 +2007,8 @@ static Result> do_parse_markdown_v2(CSlice text, string &r return c == '~'; case MessageEntity::Type::Spoiler: return c == '|' && text[i + 1] == '|'; + case MessageEntity::Type::CustomEmoji: + return c == ']'; default: UNREACHABLE(); return false; @@ -2070,6 +2075,15 @@ static Result> do_parse_markdown_v2(CSlice text, string &r type = MessageEntity::Type::Code; } break; + case '!': + if (text[i + 1] == '[') { + i++; + type = MessageEntity::Type::CustomEmoji; + } else { + return Status::Error(400, PSLICE() << "Character '" << text[i] + << "' is reserved and must be escaped with the preceding '\\'"); + } + break; default: return Status::Error( 400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'"); @@ -2080,6 +2094,7 @@ static Result> do_parse_markdown_v2(CSlice text, string &r auto type = nested_entities.back().type; auto argument = std::move(nested_entities.back().argument); UserId user_id; + int64 document_id = 0; bool skip_entity = utf16_offset == nested_entities.back().entity_offset; switch (type) { case MessageEntity::Type::Bold: @@ -2126,6 +2141,28 @@ static Result> do_parse_markdown_v2(CSlice text, string &r } break; } + case MessageEntity::Type::CustomEmoji: { + if (text[i + 1] != '(') { + return Status::Error(400, "Custom emoji entity must contain a tg://emoji URL"); + } + i += 2; + string url; + auto url_begin_pos = i; + while (i < text.size() && text[i] != ')') { + if (text[i] == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) { + url += text[i + 1]; + i += 2; + continue; + } + url += text[i++]; + } + if (text[i] != ')') { + return Status::Error(400, PSLICE() + << "Can't find end of a custom emoji URL at byte offset " << url_begin_pos); + } + TRY_RESULT_ASSIGN(document_id, LinkManager::get_link_custom_emoji_document_id(url)); + break; + } default: UNREACHABLE(); return false; @@ -2136,6 +2173,8 @@ static Result> do_parse_markdown_v2(CSlice text, string &r auto entity_length = utf16_offset - entity_offset; if (user_id.is_valid()) { entities.emplace_back(entity_offset, entity_length, user_id); + } else if (document_id != 0) { + entities.emplace_back(type, entity_offset, entity_length, document_id); } else { entities.emplace_back(type, entity_offset, entity_length, std::move(argument)); } diff --git a/td/telegram/MessageEntity.h b/td/telegram/MessageEntity.h index 8f13d5af1..539271577 100644 --- a/td/telegram/MessageEntity.h +++ b/td/telegram/MessageEntity.h @@ -81,7 +81,8 @@ class MessageEntity { bool operator==(const MessageEntity &other) const { return offset == other.offset && length == other.length && type == other.type && - media_timestamp == other.media_timestamp && argument == other.argument && user_id == other.user_id; + media_timestamp == other.media_timestamp && argument == other.argument && user_id == other.user_id && + document_id == other.document_id; } bool operator<(const MessageEntity &other) const { diff --git a/test/message_entities.cpp b/test/message_entities.cpp index 57fd9d80d..e5393fa0d 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -1384,6 +1384,16 @@ TEST(MessageEntities, parse_markdown) { check_parse_markdown("🏟 🏟__🏟 _🏟___", "Can't find end of Italic entity at byte offset 23"); check_parse_markdown("🏟 🏟__", "Can't find end of Underline entity at byte offset 9"); check_parse_markdown("🏟 🏟||test\\|", "Can't find end of Spoiler entity at byte offset 9"); + check_parse_markdown("🏟 🏟!", "Character '!' is reserved and must be escaped with the preceding '\\'"); + check_parse_markdown("🏟 🏟![", "Can't find end of CustomEmoji entity at byte offset 9"); + check_parse_markdown("🏟 🏟![👍", "Can't find end of CustomEmoji entity at byte offset 9"); + check_parse_markdown("🏟 🏟![👍]", "Custom emoji entity must contain a tg://emoji URL"); + check_parse_markdown("🏟 🏟![👍](tg://emoji?id=1234", "Can't find end of a custom emoji URL at byte offset 17"); + check_parse_markdown("🏟 🏟![👍](t://emoji?id=1234)", "Custom emoji URL must have scheme tg"); + check_parse_markdown("🏟 🏟![👍](tg:emojis?id=1234)", "Custom emoji URL must have host \"emoji\""); + check_parse_markdown("🏟 🏟![👍](tg://emoji#test)", "Custom emoji URL must have an emoji identifier"); + check_parse_markdown("🏟 🏟![👍](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier"); + check_parse_markdown("🏟 🏟![👍](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified"); check_parse_markdown("", "", {}); check_parse_markdown("\\\\", "\\", {}); @@ -1455,6 +1465,8 @@ TEST(MessageEntities, parse_markdown) { check_parse_markdown("[telegram\\.org](asdasd)", "telegram.org", {}); check_parse_markdown("[telegram\\.org](tg:user?id=123456)", "telegram.org", {{0, 12, td::UserId(static_cast(123456))}}); + check_parse_markdown("🏟 🏟![👍](TG://EMoJI/?test=1231&id=25#id=32)a", "🏟 🏟👍a", + {{td::MessageEntity::Type::CustomEmoji, 5, 2, static_cast(25)}}); } static void check_parse_markdown_v3(td::string text, td::vector entities,