Support custom emoji in MarkdownV2.

This commit is contained in:
levlam 2022-07-18 23:40:57 +03:00
parent 585191bb6f
commit 8e09a83506
5 changed files with 98 additions and 2 deletions

View File

@ -1577,7 +1577,7 @@ UserId LinkManager::get_link_user_id(Slice url) {
}
Slice host("user");
if (!begins_with(url, host)) {
if (!begins_with(url, host) || (url.size() > host.size() && Slice("/?#").find(url[host.size()]) == Slice::npos)) {
return UserId();
}
url.remove_prefix(host.size());
@ -1605,6 +1605,48 @@ UserId LinkManager::get_link_user_id(Slice url) {
return UserId();
}
Result<int64> LinkManager::get_link_custom_emoji_document_id(Slice url) {
string lower_cased_url = to_lower(url);
url = lower_cased_url;
Slice link_scheme("tg:");
if (!begins_with(url, link_scheme)) {
return Status::Error(400, "Custom emoji URL must have scheme tg");
}
url.remove_prefix(link_scheme.size());
if (begins_with(url, "//")) {
url.remove_prefix(2);
}
Slice host("emoji");
if (!begins_with(url, host) || (url.size() > host.size() && Slice("/?#").find(url[host.size()]) == Slice::npos)) {
return Status::Error(400, PSLICE() << "Custom emoji URL must have host \"" << host << '"');
}
url.remove_prefix(host.size());
if (begins_with(url, "/")) {
url.remove_prefix(1);
}
if (!begins_with(url, "?")) {
return Status::Error(400, "Custom emoji URL must have an emoji identifier");
}
url.remove_prefix(1);
url.truncate(url.find('#'));
for (auto parameter : full_split(url, '&')) {
Slice key;
Slice value;
std::tie(key, value) = split(parameter, '=');
if (key == Slice("id")) {
auto r_document_id = to_integer_safe<int64>(value);
if (r_document_id.is_error() || r_document_id.ok() == 0) {
return Status::Error(400, "Invalid custom emoji identifier specified");
}
return r_document_id.ok();
}
}
return Status::Error(400, "Custom emoji URL must have an emoji identifier");
}
Result<MessageLinkInfo> LinkManager::get_message_link_info(Slice url) {
if (url.empty()) {
return Status::Error("URL must be non-empty");

View File

@ -80,6 +80,8 @@ class LinkManager final : public Actor {
static UserId get_link_user_id(Slice url);
static Result<int64> get_link_custom_emoji_document_id(Slice url);
static Result<MessageLinkInfo> get_message_link_info(Slice url);
private:

View File

@ -116,6 +116,9 @@ StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &me
if (message_entity.user_id.is_valid()) {
string_builder << ", " << message_entity.user_id;
}
if (message_entity.document_id != 0) {
string_builder << ", emoji = " << message_entity.document_id;
}
string_builder << ']';
return string_builder;
}
@ -2004,6 +2007,8 @@ static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &r
return c == '~';
case MessageEntity::Type::Spoiler:
return c == '|' && text[i + 1] == '|';
case MessageEntity::Type::CustomEmoji:
return c == ']';
default:
UNREACHABLE();
return false;
@ -2070,6 +2075,15 @@ static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &r
type = MessageEntity::Type::Code;
}
break;
case '!':
if (text[i + 1] == '[') {
i++;
type = MessageEntity::Type::CustomEmoji;
} else {
return Status::Error(400, PSLICE() << "Character '" << text[i]
<< "' is reserved and must be escaped with the preceding '\\'");
}
break;
default:
return Status::Error(
400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'");
@ -2080,6 +2094,7 @@ static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &r
auto type = nested_entities.back().type;
auto argument = std::move(nested_entities.back().argument);
UserId user_id;
int64 document_id = 0;
bool skip_entity = utf16_offset == nested_entities.back().entity_offset;
switch (type) {
case MessageEntity::Type::Bold:
@ -2126,6 +2141,28 @@ static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &r
}
break;
}
case MessageEntity::Type::CustomEmoji: {
if (text[i + 1] != '(') {
return Status::Error(400, "Custom emoji entity must contain a tg://emoji URL");
}
i += 2;
string url;
auto url_begin_pos = i;
while (i < text.size() && text[i] != ')') {
if (text[i] == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
url += text[i + 1];
i += 2;
continue;
}
url += text[i++];
}
if (text[i] != ')') {
return Status::Error(400, PSLICE()
<< "Can't find end of a custom emoji URL at byte offset " << url_begin_pos);
}
TRY_RESULT_ASSIGN(document_id, LinkManager::get_link_custom_emoji_document_id(url));
break;
}
default:
UNREACHABLE();
return false;
@ -2136,6 +2173,8 @@ static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &r
auto entity_length = utf16_offset - entity_offset;
if (user_id.is_valid()) {
entities.emplace_back(entity_offset, entity_length, user_id);
} else if (document_id != 0) {
entities.emplace_back(type, entity_offset, entity_length, document_id);
} else {
entities.emplace_back(type, entity_offset, entity_length, std::move(argument));
}

View File

@ -81,7 +81,8 @@ class MessageEntity {
bool operator==(const MessageEntity &other) const {
return offset == other.offset && length == other.length && type == other.type &&
media_timestamp == other.media_timestamp && argument == other.argument && user_id == other.user_id;
media_timestamp == other.media_timestamp && argument == other.argument && user_id == other.user_id &&
document_id == other.document_id;
}
bool operator<(const MessageEntity &other) const {

View File

@ -1384,6 +1384,16 @@ TEST(MessageEntities, parse_markdown) {
check_parse_markdown("🏟 🏟__🏟 _🏟___", "Can't find end of Italic entity at byte offset 23");
check_parse_markdown("🏟 🏟__", "Can't find end of Underline entity at byte offset 9");
check_parse_markdown("🏟 🏟||test\\|", "Can't find end of Spoiler entity at byte offset 9");
check_parse_markdown("🏟 🏟!", "Character '!' is reserved and must be escaped with the preceding '\\'");
check_parse_markdown("🏟 🏟![", "Can't find end of CustomEmoji entity at byte offset 9");
check_parse_markdown("🏟 🏟![πŸ‘", "Can't find end of CustomEmoji entity at byte offset 9");
check_parse_markdown("🏟 🏟![πŸ‘]", "Custom emoji entity must contain a tg://emoji URL");
check_parse_markdown("🏟 🏟![πŸ‘](tg://emoji?id=1234", "Can't find end of a custom emoji URL at byte offset 17");
check_parse_markdown("🏟 🏟![πŸ‘](t://emoji?id=1234)", "Custom emoji URL must have scheme tg");
check_parse_markdown("🏟 🏟![πŸ‘](tg:emojis?id=1234)", "Custom emoji URL must have host \"emoji\"");
check_parse_markdown("🏟 🏟![πŸ‘](tg://emoji#test)", "Custom emoji URL must have an emoji identifier");
check_parse_markdown("🏟 🏟![πŸ‘](tg://emoji?test=1#&id=25)", "Custom emoji URL must have an emoji identifier");
check_parse_markdown("🏟 🏟![πŸ‘](tg://emoji?test=1231&id=025)", "Invalid custom emoji identifier specified");
check_parse_markdown("", "", {});
check_parse_markdown("\\\\", "\\", {});
@ -1455,6 +1465,8 @@ TEST(MessageEntities, parse_markdown) {
check_parse_markdown("[telegram\\.org](asdasd)", "telegram.org", {});
check_parse_markdown("[telegram\\.org](tg:user?id=123456)", "telegram.org",
{{0, 12, td::UserId(static_cast<td::int64>(123456))}});
check_parse_markdown("🏟 🏟![πŸ‘](TG://EMoJI/?test=1231&id=25#id=32)a", "🏟 πŸŸπŸ‘a",
{{td::MessageEntity::Type::CustomEmoji, 5, 2, static_cast<td::int64>(25)}});
}
static void check_parse_markdown_v3(td::string text, td::vector<td::MessageEntity> entities,