Add parse_markdown_v2.
GitOrigin-RevId: ea2ce8bad64becc53d2e6466019469dffec2dc27
This commit is contained in:
parent
b1d1ea2e6c
commit
fdf70df492
@ -28,71 +28,52 @@ int MessageEntity::get_type_priority(Type type) {
|
|||||||
return types[static_cast<int32>(type)];
|
return types[static_cast<int32>(type)];
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &message_entity) {
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity::Type &message_entity_type) {
|
||||||
bool has_argument = false;
|
switch (message_entity_type) {
|
||||||
string_builder << '[';
|
|
||||||
switch (message_entity.type) {
|
|
||||||
case MessageEntity::Type::Mention:
|
case MessageEntity::Type::Mention:
|
||||||
string_builder << "Mention";
|
return string_builder << "Mention";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Hashtag:
|
case MessageEntity::Type::Hashtag:
|
||||||
string_builder << "Hashtag";
|
return string_builder << "Hashtag";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::BotCommand:
|
case MessageEntity::Type::BotCommand:
|
||||||
string_builder << "BotCommand";
|
return string_builder << "BotCommand";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Url:
|
case MessageEntity::Type::Url:
|
||||||
string_builder << "Url";
|
return string_builder << "Url";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::EmailAddress:
|
case MessageEntity::Type::EmailAddress:
|
||||||
string_builder << "EmailAddress";
|
return string_builder << "EmailAddress";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Bold:
|
case MessageEntity::Type::Bold:
|
||||||
string_builder << "Bold";
|
return string_builder << "Bold";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Italic:
|
case MessageEntity::Type::Italic:
|
||||||
string_builder << "Italic";
|
return string_builder << "Italic";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Underline:
|
case MessageEntity::Type::Underline:
|
||||||
string_builder << "Underline";
|
return string_builder << "Underline";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Strikethrough:
|
case MessageEntity::Type::Strikethrough:
|
||||||
string_builder << "Strikethrough";
|
return string_builder << "Strikethrough";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::BlockQuote:
|
case MessageEntity::Type::BlockQuote:
|
||||||
string_builder << "BlockQuote";
|
return string_builder << "BlockQuote";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Code:
|
case MessageEntity::Type::Code:
|
||||||
string_builder << "Code";
|
return string_builder << "Code";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Pre:
|
case MessageEntity::Type::Pre:
|
||||||
string_builder << "Pre";
|
return string_builder << "Pre";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::PreCode:
|
case MessageEntity::Type::PreCode:
|
||||||
string_builder << "PreCode";
|
return string_builder << "PreCode";
|
||||||
has_argument = true;
|
|
||||||
break;
|
|
||||||
case MessageEntity::Type::TextUrl:
|
case MessageEntity::Type::TextUrl:
|
||||||
string_builder << "TextUrl";
|
return string_builder << "TextUrl";
|
||||||
has_argument = true;
|
|
||||||
break;
|
|
||||||
case MessageEntity::Type::MentionName:
|
case MessageEntity::Type::MentionName:
|
||||||
string_builder << "MentionName";
|
return string_builder << "MentionName";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::Cashtag:
|
case MessageEntity::Type::Cashtag:
|
||||||
string_builder << "Cashtag";
|
return string_builder << "Cashtag";
|
||||||
break;
|
|
||||||
case MessageEntity::Type::PhoneNumber:
|
case MessageEntity::Type::PhoneNumber:
|
||||||
string_builder << "PhoneNumber";
|
return string_builder << "PhoneNumber";
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
string_builder << "Impossible";
|
return string_builder << "Impossible";
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
string_builder << ", offset = " << message_entity.offset << ", length = " << message_entity.length;
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &message_entity) {
|
||||||
if (has_argument) {
|
string_builder << '[' << message_entity.type << ", offset = " << message_entity.offset
|
||||||
|
<< ", length = " << message_entity.length;
|
||||||
|
if (!message_entity.argument.empty()) {
|
||||||
string_builder << ", argument = \"" << message_entity.argument << "\"";
|
string_builder << ", argument = \"" << message_entity.argument << "\"";
|
||||||
}
|
}
|
||||||
if (message_entity.user_id.is_valid()) {
|
if (message_entity.user_id.is_valid()) {
|
||||||
@ -1322,7 +1303,7 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
i += 2;
|
i += 2;
|
||||||
is_pre = true;
|
is_pre = true;
|
||||||
size_t language_end = i;
|
size_t language_end = i;
|
||||||
while (language_end < size && !is_space(text[language_end]) && text[language_end] != '`') {
|
while (!is_space(text[language_end]) && text[language_end] != '`') {
|
||||||
language_end++;
|
language_end++;
|
||||||
}
|
}
|
||||||
if (i != language_end && language_end < size && text[language_end] != '`') {
|
if (i != language_end && language_end < size && text[language_end] != '`') {
|
||||||
@ -1405,7 +1386,224 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
return entities;
|
return entities;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32 decode_html_entity(Slice text, size_t &pos) {
|
static Result<vector<MessageEntity>> do_parse_markdown_v2(CSlice text, string &result) {
|
||||||
|
vector<MessageEntity> entities;
|
||||||
|
int32 utf16_offset = 0;
|
||||||
|
|
||||||
|
struct EntityInfo {
|
||||||
|
MessageEntity::Type type;
|
||||||
|
string argument;
|
||||||
|
int32 entity_offset;
|
||||||
|
size_t entity_byte_offset;
|
||||||
|
size_t entity_begin_pos;
|
||||||
|
|
||||||
|
EntityInfo(MessageEntity::Type type, string argument, int32 entity_offset, size_t entity_byte_offset,
|
||||||
|
size_t entity_begin_pos)
|
||||||
|
: type(type)
|
||||||
|
, argument(std::move(argument))
|
||||||
|
, entity_offset(entity_offset)
|
||||||
|
, entity_byte_offset(entity_byte_offset)
|
||||||
|
, entity_begin_pos(entity_begin_pos) {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::vector<EntityInfo> nested_entities;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < text.size(); i++) {
|
||||||
|
auto c = static_cast<unsigned char>(text[i]);
|
||||||
|
if (c == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
|
||||||
|
i++;
|
||||||
|
utf16_offset += 1;
|
||||||
|
result += text[i];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice reserved_characters("_*[]()~`>#+=|{}.!");
|
||||||
|
if (!nested_entities.empty()) {
|
||||||
|
switch (nested_entities.back().type) {
|
||||||
|
case MessageEntity::Type::Code:
|
||||||
|
case MessageEntity::Type::Pre:
|
||||||
|
case MessageEntity::Type::PreCode:
|
||||||
|
reserved_characters = Slice("`");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reserved_characters.find(text[i]) == Slice::npos) {
|
||||||
|
if (is_utf8_character_first_code_unit(c)) {
|
||||||
|
utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
|
||||||
|
}
|
||||||
|
result.push_back(text[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_end_of_an_entity = false;
|
||||||
|
if (!nested_entities.empty()) {
|
||||||
|
is_end_of_an_entity = [&] {
|
||||||
|
switch (nested_entities.back().type) {
|
||||||
|
case MessageEntity::Type::Bold:
|
||||||
|
return c == '*';
|
||||||
|
case MessageEntity::Type::Italic:
|
||||||
|
return c == '_' && text[i + 1] != '_';
|
||||||
|
case MessageEntity::Type::Code:
|
||||||
|
return c == '`';
|
||||||
|
case MessageEntity::Type::Pre:
|
||||||
|
case MessageEntity::Type::PreCode:
|
||||||
|
return c == '`' && text[i + 1] == '`' && text[i + 2] == '`';
|
||||||
|
case MessageEntity::Type::TextUrl:
|
||||||
|
return c == ']';
|
||||||
|
case MessageEntity::Type::Underline:
|
||||||
|
return c == '_' && text[i + 1] == '_';
|
||||||
|
case MessageEntity::Type::Strikethrough:
|
||||||
|
return c == '~';
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_end_of_an_entity) {
|
||||||
|
// begin of an entity
|
||||||
|
MessageEntity::Type type;
|
||||||
|
string argument;
|
||||||
|
int32 entity_byte_offset = i;
|
||||||
|
switch (c) {
|
||||||
|
case '_':
|
||||||
|
if (text[i + 1] == '_') {
|
||||||
|
type = MessageEntity::Type::Underline;
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
type = MessageEntity::Type::Italic;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '*':
|
||||||
|
type = MessageEntity::Type::Bold;
|
||||||
|
break;
|
||||||
|
case '~':
|
||||||
|
type = MessageEntity::Type::Strikethrough;
|
||||||
|
break;
|
||||||
|
case '[':
|
||||||
|
type = MessageEntity::Type::TextUrl;
|
||||||
|
break;
|
||||||
|
case '`':
|
||||||
|
if (text[i + 1] == '`' && text[i + 2] == '`') {
|
||||||
|
i += 3;
|
||||||
|
type = MessageEntity::Type::Pre;
|
||||||
|
size_t language_end = i;
|
||||||
|
while (!is_space(text[language_end]) && text[language_end] != '`') {
|
||||||
|
language_end++;
|
||||||
|
}
|
||||||
|
if (i != language_end && language_end < text.size() && text[language_end] != '`') {
|
||||||
|
type = MessageEntity::Type::PreCode;
|
||||||
|
argument = text.substr(i, language_end - i).str();
|
||||||
|
i = language_end;
|
||||||
|
}
|
||||||
|
// skip one new line in the beginning of the text
|
||||||
|
if (text[i] == '\n' || text[i] == '\r') {
|
||||||
|
if ((text[i + 1] == '\n' || text[i + 1] == '\r') && text[i] != text[i + 1]) {
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i--;
|
||||||
|
} else {
|
||||||
|
type = MessageEntity::Type::Code;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return Status::Error(
|
||||||
|
400, PSLICE() << "Character '" << text[i] << "' is reserved and must be escaped with the preceding '\\'");
|
||||||
|
}
|
||||||
|
nested_entities.emplace_back(type, std::move(argument), utf16_offset, entity_byte_offset, result.size());
|
||||||
|
} else {
|
||||||
|
// end of an entity
|
||||||
|
auto type = nested_entities.back().type;
|
||||||
|
auto argument = std::move(nested_entities.back().argument);
|
||||||
|
UserId user_id;
|
||||||
|
bool skip_entity = utf16_offset == nested_entities.back().entity_offset;
|
||||||
|
switch (type) {
|
||||||
|
case MessageEntity::Type::Bold:
|
||||||
|
case MessageEntity::Type::Italic:
|
||||||
|
case MessageEntity::Type::Code:
|
||||||
|
case MessageEntity::Type::Strikethrough:
|
||||||
|
break;
|
||||||
|
case MessageEntity::Type::Underline:
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
case MessageEntity::Type::Pre:
|
||||||
|
case MessageEntity::Type::PreCode:
|
||||||
|
i += 2;
|
||||||
|
break;
|
||||||
|
case MessageEntity::Type::TextUrl: {
|
||||||
|
string url;
|
||||||
|
if (text[i + 1] != '(') {
|
||||||
|
// use text as a url
|
||||||
|
url = result.substr(nested_entities.back().entity_begin_pos);
|
||||||
|
} else {
|
||||||
|
i += 2;
|
||||||
|
auto url_begin_pos = i;
|
||||||
|
while (i < text.size() && text[i] != ')') {
|
||||||
|
if (text[i] == '\\' && text[i + 1] > 0 && text[i + 1] <= 126) {
|
||||||
|
url += text[i + 1];
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
url += text[i++];
|
||||||
|
}
|
||||||
|
if (text[i] != ')') {
|
||||||
|
return Status::Error(400, PSLICE() << "Can't find end of a URL at byte offset " << url_begin_pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
user_id = get_link_user_id(url);
|
||||||
|
if (!user_id.is_valid()) {
|
||||||
|
auto r_url = check_url(url);
|
||||||
|
if (r_url.is_error()) {
|
||||||
|
skip_entity = true;
|
||||||
|
} else {
|
||||||
|
argument = r_url.move_as_ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skip_entity) {
|
||||||
|
auto entity_offset = nested_entities.back().entity_offset;
|
||||||
|
auto entity_length = utf16_offset - entity_offset;
|
||||||
|
if (user_id.is_valid()) {
|
||||||
|
entities.emplace_back(entity_offset, entity_length, user_id);
|
||||||
|
} else {
|
||||||
|
entities.emplace_back(type, entity_offset, entity_length, std::move(argument));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nested_entities.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!nested_entities.empty()) {
|
||||||
|
return Status::Error(400, PSLICE() << "Can't find end of " << nested_entities.back().type
|
||||||
|
<< " entity at byte offset " << nested_entities.back().entity_byte_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(entities.begin(), entities.end());
|
||||||
|
|
||||||
|
return entities;
|
||||||
|
}
|
||||||
|
|
||||||
|
Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
|
||||||
|
string result;
|
||||||
|
TRY_RESULT(entities, do_parse_markdown_v2(text, result));
|
||||||
|
text = result;
|
||||||
|
return entities;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 decode_html_entity(CSlice text, size_t &pos) {
|
||||||
auto c = static_cast<unsigned char>(text[pos]);
|
auto c = static_cast<unsigned char>(text[pos]);
|
||||||
if (c != '&') {
|
if (c != '&') {
|
||||||
return 0;
|
return 0;
|
||||||
@ -1458,7 +1656,7 @@ static uint32 decode_html_entity(Slice text, size_t &pos) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Result<vector<MessageEntity>> do_parse_html(Slice text, string &result) {
|
static Result<vector<MessageEntity>> do_parse_html(CSlice text, string &result) {
|
||||||
vector<MessageEntity> entities;
|
vector<MessageEntity> entities;
|
||||||
int32 utf16_offset = 0;
|
int32 utf16_offset = 0;
|
||||||
|
|
||||||
|
@ -97,6 +97,8 @@ class MessageEntity {
|
|||||||
static int get_type_priority(Type type);
|
static int get_type_priority(Type type);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity::Type &message_entity_type);
|
||||||
|
|
||||||
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &message_entity);
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &message_entity);
|
||||||
|
|
||||||
struct FormattedText {
|
struct FormattedText {
|
||||||
@ -140,6 +142,8 @@ string get_first_url(Slice text, const vector<MessageEntity> &entities);
|
|||||||
|
|
||||||
Result<vector<MessageEntity>> parse_markdown(string &text);
|
Result<vector<MessageEntity>> parse_markdown(string &text);
|
||||||
|
|
||||||
|
Result<vector<MessageEntity>> parse_markdown_v2(string &text);
|
||||||
|
|
||||||
Result<vector<MessageEntity>> parse_html(string &text);
|
Result<vector<MessageEntity>> parse_html(string &text);
|
||||||
|
|
||||||
vector<tl_object_ptr<telegram_api::MessageEntity>> get_input_message_entities(const ContactsManager *contacts_manager,
|
vector<tl_object_ptr<telegram_api::MessageEntity>> get_input_message_entities(const ContactsManager *contacts_manager,
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
//
|
//
|
||||||
#include "td/telegram/MessageEntity.h"
|
#include "td/telegram/MessageEntity.h"
|
||||||
|
|
||||||
|
#include "td/utils/common.h"
|
||||||
#include "td/utils/format.h"
|
#include "td/utils/format.h"
|
||||||
#include "td/utils/logging.h"
|
#include "td/utils/logging.h"
|
||||||
#include "td/utils/tests.h"
|
#include "td/utils/tests.h"
|
||||||
@ -730,7 +731,7 @@ static void check_parse_html(td::string text, const td::string &result, const td
|
|||||||
ASSERT_STREQ(result, text);
|
ASSERT_STREQ(result, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_parse_html(td::string text, const td::string &error_message) {
|
static void check_parse_html(td::string text, td::Slice error_message) {
|
||||||
auto r_entities = td::parse_html(text);
|
auto r_entities = td::parse_html(text);
|
||||||
ASSERT_TRUE(r_entities.is_error());
|
ASSERT_TRUE(r_entities.is_error());
|
||||||
ASSERT_EQ(400, r_entities.error().code());
|
ASSERT_EQ(400, r_entities.error().code());
|
||||||
@ -764,6 +765,8 @@ TEST(MessageEntities, parse_html) {
|
|||||||
check_parse_html("<>&"«»�", "<>&\"«»�", {});
|
check_parse_html("<>&"«»�", "<>&\"«»�", {});
|
||||||
check_parse_html("β‘οΈ β‘οΈ<i>β‘οΈ β‘οΈ</i>", "β‘οΈ β‘οΈβ‘οΈ β‘οΈ",
|
check_parse_html("β‘οΈ β‘οΈ<i>β‘οΈ β‘οΈ</i>", "β‘οΈ β‘οΈβ‘οΈ β‘οΈ",
|
||||||
{{td::MessageEntity::Type::Italic, 5, 5}});
|
{{td::MessageEntity::Type::Italic, 5, 5}});
|
||||||
|
check_parse_html("β‘οΈ β‘οΈ<i>β‘οΈ β‘οΈ</i><b>β‘οΈ β‘οΈ</b>", "β‘οΈ β‘οΈβ‘οΈ β‘οΈβ‘οΈ β‘οΈ",
|
||||||
|
{{td::MessageEntity::Type::Italic, 5, 5}, {td::MessageEntity::Type::Bold, 10, 5}});
|
||||||
check_parse_html("π π<i>π <π</i>", "π ππ <π", {{td::MessageEntity::Type::Italic, 5, 6}});
|
check_parse_html("π π<i>π <π</i>", "π ππ <π", {{td::MessageEntity::Type::Italic, 5, 6}});
|
||||||
check_parse_html("π π<i>π ><b aba = caba><π</b></i>", "π ππ ><π",
|
check_parse_html("π π<i>π ><b aba = caba><π</b></i>", "π ππ ><π",
|
||||||
{{td::MessageEntity::Type::Italic, 5, 7}, {td::MessageEntity::Type::Bold, 9, 3}});
|
{{td::MessageEntity::Type::Italic, 5, 7}, {td::MessageEntity::Type::Bold, 9, 3}});
|
||||||
@ -777,6 +780,8 @@ TEST(MessageEntities, parse_html) {
|
|||||||
{{td::MessageEntity::Type::Italic, 6, 1}});
|
{{td::MessageEntity::Type::Italic, 6, 1}});
|
||||||
check_parse_html("π π<<i aba = '<>"'>a</>", "π π<a",
|
check_parse_html("π π<<i aba = '<>"'>a</>", "π π<a",
|
||||||
{{td::MessageEntity::Type::Italic, 6, 1}});
|
{{td::MessageEntity::Type::Italic, 6, 1}});
|
||||||
|
check_parse_html("π π<<i>π π<</>", "π π<π π<",
|
||||||
|
{{td::MessageEntity::Type::Italic, 6, 6}});
|
||||||
check_parse_html("π π<<i>a</ >", "π π<a", {{td::MessageEntity::Type::Italic, 6, 1}});
|
check_parse_html("π π<<i>a</ >", "π π<a", {{td::MessageEntity::Type::Italic, 6, 1}});
|
||||||
check_parse_html("π π<<i>a</i >", "π π<a", {{td::MessageEntity::Type::Italic, 6, 1}});
|
check_parse_html("π π<<i>a</i >", "π π<a", {{td::MessageEntity::Type::Italic, 6, 1}});
|
||||||
check_parse_html("π π<<b></b>", "π π<", {});
|
check_parse_html("π π<<b></b>", "π π<", {});
|
||||||
@ -806,3 +811,126 @@ TEST(MessageEntities, parse_html) {
|
|||||||
check_parse_html("<a>https://telegram.org/asdsa?asdasdwe#12e3we</a>", "https://telegram.org/asdsa?asdasdwe#12e3we",
|
check_parse_html("<a>https://telegram.org/asdsa?asdasdwe#12e3we</a>", "https://telegram.org/asdsa?asdasdwe#12e3we",
|
||||||
{{td::MessageEntity::Type::TextUrl, 0, 42, "https://telegram.org/asdsa?asdasdwe#12e3we"}});
|
{{td::MessageEntity::Type::TextUrl, 0, 42, "https://telegram.org/asdsa?asdasdwe#12e3we"}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void check_parse_markdown(td::string text, const td::string &result,
|
||||||
|
const td::vector<td::MessageEntity> &entities) {
|
||||||
|
auto r_entities = td::parse_markdown_v2(text);
|
||||||
|
ASSERT_TRUE(r_entities.is_ok());
|
||||||
|
ASSERT_EQ(entities, r_entities.ok());
|
||||||
|
ASSERT_STREQ(result, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_parse_markdown(td::string text, td::Slice error_message) {
|
||||||
|
auto r_entities = td::parse_markdown_v2(text);
|
||||||
|
ASSERT_TRUE(r_entities.is_error());
|
||||||
|
ASSERT_EQ(400, r_entities.error().code());
|
||||||
|
ASSERT_STREQ(error_message, r_entities.error().message());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(MessageEntities, parse_markdown) {
|
||||||
|
td::Slice reserved_characters("]()>#+=|{}.!");
|
||||||
|
td::Slice begin_characters("_*[~`");
|
||||||
|
for (char c = 1; c < 126; c++) {
|
||||||
|
if (begin_characters.find(c) != td::Slice::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
td::string text(1, c);
|
||||||
|
if (reserved_characters.find(c) == td::Slice::npos) {
|
||||||
|
check_parse_markdown(text, text, {});
|
||||||
|
} else {
|
||||||
|
check_parse_markdown(
|
||||||
|
text, PSLICE() << "Character '" << c << "' is reserved and must be escaped with the preceding '\\'");
|
||||||
|
|
||||||
|
td::string escaped_text = "\\" + text;
|
||||||
|
check_parse_markdown(escaped_text, text, {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check_parse_markdown("π π_abacaba", "Can't find end of Italic entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π_abac * asd ", "Can't find end of Bold entity at byte offset 15");
|
||||||
|
check_parse_markdown("π π_abac * asd _", "Can't find end of Italic entity at byte offset 21");
|
||||||
|
check_parse_markdown("π π`", "Can't find end of Code entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π```", "Can't find end of Pre entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π```a", "Can't find end of Pre entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π```a ", "Can't find end of PreCode entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π__π π_", "Can't find end of Italic entity at byte offset 20");
|
||||||
|
check_parse_markdown("π π_π π__", "Can't find end of Underline entity at byte offset 19");
|
||||||
|
check_parse_markdown("π π```π π`", "Can't find end of Code entity at byte offset 21");
|
||||||
|
check_parse_markdown("π π```π π_", "Can't find end of PreCode entity at byte offset 9");
|
||||||
|
check_parse_markdown("π π```π π\\`", "Can't find end of PreCode entity at byte offset 9");
|
||||||
|
check_parse_markdown("[telegram\\.org](asd\\)", "Can't find end of a URL at byte offset 16");
|
||||||
|
check_parse_markdown("[telegram\\.org](", "Can't find end of a URL at byte offset 16");
|
||||||
|
check_parse_markdown("[telegram\\.org](asd", "Can't find end of a URL at byte offset 16");
|
||||||
|
check_parse_markdown("π π__π _π___", "Can't find end of Italic entity at byte offset 23");
|
||||||
|
check_parse_markdown("π π__", "Can't find end of Underline entity at byte offset 9");
|
||||||
|
|
||||||
|
check_parse_markdown("", "", {});
|
||||||
|
check_parse_markdown("\\\\", "\\", {});
|
||||||
|
check_parse_markdown("\\\\\\", "\\\\", {});
|
||||||
|
check_parse_markdown("\\\\\\\\\\_\\*\\`", "\\\\_*`", {});
|
||||||
|
check_parse_markdown("β‘οΈ β‘οΈ", "β‘οΈ β‘οΈ", {});
|
||||||
|
check_parse_markdown("π π``", "π π", {});
|
||||||
|
check_parse_markdown("π π_abac \\* asd _", "π πabac * asd ", {{td::MessageEntity::Type::Italic, 5, 11}});
|
||||||
|
check_parse_markdown("π \\.π_π\\. π_", "π .ππ. π", {{td::MessageEntity::Type::Italic, 6, 6}});
|
||||||
|
check_parse_markdown("\\\\\\a\\b\\c\\d\\e\\f\\1\\2\\3\\4\\β‘οΈ\\", "\\abcdef1234\\β‘οΈ\\", {});
|
||||||
|
check_parse_markdown("β‘οΈ β‘οΈ_β‘οΈ β‘οΈ_", "β‘οΈ β‘οΈβ‘οΈ β‘οΈ",
|
||||||
|
{{td::MessageEntity::Type::Italic, 5, 5}});
|
||||||
|
check_parse_markdown("β‘οΈ β‘οΈ_β‘οΈ β‘οΈ_*β‘οΈ β‘οΈ*", "β‘οΈ β‘οΈβ‘οΈ β‘οΈβ‘οΈ β‘οΈ",
|
||||||
|
{{td::MessageEntity::Type::Italic, 5, 5}, {td::MessageEntity::Type::Bold, 10, 5}});
|
||||||
|
check_parse_markdown("π π_π \\.π_", "π ππ .π", {{td::MessageEntity::Type::Italic, 5, 6}});
|
||||||
|
check_parse_markdown("π π_π *π*_", "π ππ π",
|
||||||
|
{{td::MessageEntity::Type::Italic, 5, 5}, {td::MessageEntity::Type::Bold, 8, 2}});
|
||||||
|
check_parse_markdown("π π_π __π___", "π ππ π",
|
||||||
|
{{td::MessageEntity::Type::Italic, 5, 5}, {td::MessageEntity::Type::Underline, 8, 2}});
|
||||||
|
check_parse_markdown("π π__π _π_ __", "π ππ π ",
|
||||||
|
{{td::MessageEntity::Type::Underline, 5, 6}, {td::MessageEntity::Type::Italic, 8, 2}});
|
||||||
|
check_parse_markdown("π π__π _π_\\___", "π ππ π_",
|
||||||
|
{{td::MessageEntity::Type::Underline, 5, 6}, {td::MessageEntity::Type::Italic, 8, 2}});
|
||||||
|
check_parse_markdown("π π`π π```", "π ππ π", {{td::MessageEntity::Type::Code, 5, 5}});
|
||||||
|
check_parse_markdown("π π```π π```", "π π π",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 3, "π"}});
|
||||||
|
check_parse_markdown("π π```π\nπ```", "π ππ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 2, "π"}});
|
||||||
|
check_parse_markdown("π π```π\rπ```", "π ππ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 2, "π"}});
|
||||||
|
check_parse_markdown("π π```π\n\rπ```", "π ππ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 2, "π"}});
|
||||||
|
check_parse_markdown("π π```π\r\nπ```", "π ππ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 2, "π"}});
|
||||||
|
check_parse_markdown("π π```π\n\nπ```", "π π\nπ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 3, "π"}});
|
||||||
|
check_parse_markdown("π π```π\r\rπ```", "π π\rπ",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 3, "π"}});
|
||||||
|
check_parse_markdown("π π```π \\\\\\`π```", "π π \\`π",
|
||||||
|
{{td::MessageEntity::Type::PreCode, 5, 5, "π"}});
|
||||||
|
check_parse_markdown("π π**", "π π", {});
|
||||||
|
check_parse_markdown("π π``", "π π", {});
|
||||||
|
check_parse_markdown("π π``````", "π π", {});
|
||||||
|
check_parse_markdown("π π____", "π π", {});
|
||||||
|
check_parse_markdown("`_* *_`__*` `*__", "_* *_ ",
|
||||||
|
{{td::MessageEntity::Type::Code, 0, 5},
|
||||||
|
{td::MessageEntity::Type::Code, 5, 1},
|
||||||
|
{td::MessageEntity::Type::Bold, 5, 1},
|
||||||
|
{td::MessageEntity::Type::Underline, 5, 1}});
|
||||||
|
check_parse_markdown("_* * ` `_", " ",
|
||||||
|
{{td::MessageEntity::Type::Italic, 0, 3},
|
||||||
|
{td::MessageEntity::Type::Bold, 0, 1},
|
||||||
|
{td::MessageEntity::Type::Code, 2, 1}});
|
||||||
|
check_parse_markdown("[](telegram.org)", "", {});
|
||||||
|
check_parse_markdown("[ ](telegram.org)", " ", {{td::MessageEntity::Type::TextUrl, 0, 1, "http://telegram.org/"}});
|
||||||
|
check_parse_markdown("[ ](as)", " ", {});
|
||||||
|
check_parse_markdown("[telegram\\.org]", "telegram.org",
|
||||||
|
{{td::MessageEntity::Type::TextUrl, 0, 12, "http://telegram.org/"}});
|
||||||
|
check_parse_markdown("[telegram\\.org]a", "telegram.orga",
|
||||||
|
{{td::MessageEntity::Type::TextUrl, 0, 12, "http://telegram.org/"}});
|
||||||
|
check_parse_markdown("[telegram\\.org](telegram.dog)", "telegram.org",
|
||||||
|
{{td::MessageEntity::Type::TextUrl, 0, 12, "http://telegram.dog/"}});
|
||||||
|
check_parse_markdown("[telegram\\.org](https://telegram.dog?)", "telegram.org",
|
||||||
|
{{td::MessageEntity::Type::TextUrl, 0, 12, "https://telegram.dog/?"}});
|
||||||
|
check_parse_markdown("[telegram\\.org](https://telegram.dog?\\\\\\()", "telegram.org",
|
||||||
|
{{td::MessageEntity::Type::TextUrl, 0, 12, "https://telegram.dog/?\\("}});
|
||||||
|
check_parse_markdown("[telegram\\.org]()", "telegram.org", {});
|
||||||
|
check_parse_markdown("[telegram\\.org](asdasd)", "telegram.org", {});
|
||||||
|
check_parse_markdown("[telegram\\.org](tg:user?id=123456)", "telegram.org", {{0, 12, td::UserId(123456)}});
|
||||||
|
}
|
||||||
|
Loadingβ¦
Reference in New Issue
Block a user