Add synchronous td_api::parseMarkdown method.

GitOrigin-RevId: 89f59eda7076959a6bed9db006f895298e2fd66c
This commit is contained in:
levlam 2020-03-09 23:55:32 +03:00
parent c04ae16bda
commit 1068cac8f0
8 changed files with 874 additions and 8 deletions

View File

@ -2705,11 +2705,11 @@ deepLinkInfo text:formattedText need_update_application:Bool = DeepLinkInfo;
//@class TextParseMode @description Describes the way the text should be parsed for TextEntities
//@description The text should be parsed in markdown-style
//@version Version of the parser: 0 or 1 - Bot API Markdown parse mode, 2 - Bot API MarkdownV2 parse mode
//@description The text uses Markdown-style formatting
//@version Version of the parser: 0 or 1 - Telegram Bot API "Markdown" parse mode, 2 - Telegram Bot API "MarkdownV2" parse mode
textParseModeMarkdown version:int32 = TextParseMode;
//@description The text should be parsed in HTML-style
//@description The text uses HTML-style formatting. The same as Telegram Bot API "HTML" parse mode
textParseModeHTML = TextParseMode;
@ -3421,9 +3421,13 @@ editMessageSchedulingState chat_id:int53 message_id:int53 scheduling_state:Messa
//@description Returns all entities (mentions, hashtags, cashtags, bot commands, bank card numbers, URLs, and email addresses) contained in the text. This is an offline method. Can be called before authorization. Can be called synchronously @text The text in which to look for entites
getTextEntities text:string = TextEntities;
//@description Parses Bold, Italic, Underline, Strikethrough, Code, Pre, PreCode, TextUrl and MentionName entities contained in the text. This is an offline method. Can be called before authorization. Can be called synchronously @text The text which should be parsed @parse_mode Text parse mode
//@description Parses Bold, Italic, Underline, Strikethrough, Code, Pre, PreCode, TextUrl and MentionName entities contained in the text. This is an offline method. Can be called before authorization. Can be called synchronously @text The text to parse @parse_mode Text parse mode
parseTextEntities text:string parse_mode:TextParseMode = FormattedText;
//@description Parses Markdown entities in a human-friendly format, ignoring mark up errors. This is an offline method. Can be called before authorization. Can be called synchronously
//@text The text to parse. For example, "__italic__ ~~strikethrough~~ **bold** `code` ```pre``` __[italic__ text_url](telegram.org) __italic**bold italic__bold**"
parseMarkdown text:formattedText = FormattedText;
//@description Returns the MIME type of a file, guessed by its extension. Returns an empty string on failure. This is an offline method. Can be called before authorization. Can be called synchronously @file_name The name of the file or path to the file
getFileMimeType file_name:string = Text;

Binary file not shown.

View File

@ -117,6 +117,7 @@ tl_object_ptr<td_api::TextEntityType> MessageEntity::get_text_entity_type_object
case MessageEntity::Type::TextUrl:
return make_tl_object<td_api::textEntityTypeTextUrl>(argument);
case MessageEntity::Type::MentionName:
// can't use contacts_manager, because can be called from a static request
return make_tl_object<td_api::textEntityTypeMentionName>(user_id.get());
case MessageEntity::Type::Cashtag:
return make_tl_object<td_api::textEntityTypeCashtag>();
@ -430,7 +431,8 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
auto card_number_begin = ptr;
size_t digit_count = 0;
while (ptr != end && (is_digit(*ptr) || *ptr == ' ' || *ptr == '-')) {
if (*ptr == ' ' && digit_count >= 16 && digit_count <= 19 && digit_count == static_cast<size_t>(ptr - card_number_begin)) {
if (*ptr == ' ' && digit_count >= 16 && digit_count <= 19 &&
digit_count == static_cast<size_t>(ptr - card_number_begin)) {
// continuous card number
break;
}
@ -1861,6 +1863,557 @@ Result<vector<MessageEntity>> parse_markdown_v2(string &text) {
return entities;
}
static vector<Slice> find_text_url_entities_v3(Slice text) {
vector<Slice> result;
size_t size = text.size();
for (size_t i = 0; i < size; i++) {
if (text[i] != '[') {
continue;
}
auto text_begin = i;
auto text_end = text_begin + 1;
while (text_end < size && text[text_end] != ']') {
text_end++;
}
i = text_end; // prevent quadratic asymptotic
if (text_end == size || text_end == text_begin + 1) {
continue;
}
auto url_begin = text_end + 1;
if (url_begin == size || text[url_begin] != '(') {
continue;
}
size_t url_end = url_begin + 1;
while (url_end < size && text[url_end] != ')') {
url_end++;
}
i = url_end; // prevent quadratic asymptotic, disallows [a](b[c](t.me)
if (url_end < size) {
Slice url = text.substr(url_begin + 1, url_end - url_begin - 1);
if (check_url(url).is_ok()) {
result.push_back(text.substr(text_begin, text_end - text_begin + 1));
result.push_back(text.substr(url_begin, url_end - url_begin + 1));
}
}
}
return result;
}
// entities must be valid for the text
static FormattedText parse_text_url_entities_v3(Slice text, vector<MessageEntity> entities) {
// TextUrl and MentionName can't intersect TextUrl entities,
// so try to find new TextUrl entities only between the predetermined TextUrl and MentionName entities
FormattedText result;
int32 result_text_utf16_length = 0;
vector<MessageEntity> part_entities;
vector<MessageEntity> part_splittable_entities[SPLITTABLE_ENTITY_TYPE_COUNT];
int32 part_begin = 0;
int32 max_end = 0;
int32 skipped_length = 0;
auto add_part = [&](int32 part_end) {
// we have [part_begin, max_end) kept part and [max_end, part_end) part to parse text_url entities
if (max_end != part_begin) {
// add all entities from the kept part
auto kept_part_text = utf8_utf16_substr(text, 0, max_end - part_begin);
text = text.substr(kept_part_text.size());
result.text.append(kept_part_text.begin(), kept_part_text.size());
append(result.entities, std::move(part_entities));
part_entities.clear();
result_text_utf16_length += max_end - part_begin;
}
if (part_end != max_end) {
// try to find text_url entities in the left part
auto parsed_part_text = utf8_utf16_substr(text, 0, part_end - max_end);
text = text.substr(parsed_part_text.size());
size_t splittable_entity_pos[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
vector<Slice> text_urls = find_text_url_entities_v3(parsed_part_text);
for (size_t index = 0; index < SPLITTABLE_ENTITY_TYPE_COUNT; index++) {
check_non_intersecting(part_splittable_entities[index]);
}
int32 text_utf16_offset = max_end;
size_t prev_pos = 0;
for (size_t i = 0; i < text_urls.size(); i += 2) {
auto text_begin_pos = static_cast<size_t>(text_urls[i].begin() - parsed_part_text.begin());
auto text_end_pos = text_begin_pos + text_urls[i].size() - 1;
auto url_begin_pos = static_cast<size_t>(text_urls[i + 1].begin() - parsed_part_text.begin());
auto url_end_pos = url_begin_pos + text_urls[i + 1].size() - 1;
CHECK(parsed_part_text[text_begin_pos] == '[');
CHECK(parsed_part_text[text_end_pos] == ']');
CHECK(url_begin_pos == text_end_pos + 1);
CHECK(parsed_part_text[url_begin_pos] == '(');
CHECK(parsed_part_text[url_end_pos] == ')');
Slice before_text_url = parsed_part_text.substr(prev_pos, text_begin_pos - prev_pos);
auto before_text_url_utf16_length = narrow_cast<int32>(utf8_utf16_length(before_text_url));
result_text_utf16_length += before_text_url_utf16_length;
result.text.append(before_text_url.begin(), before_text_url.size());
text_utf16_offset += before_text_url_utf16_length;
Slice text_url = parsed_part_text.substr(text_begin_pos + 1, text_end_pos - text_begin_pos - 1);
auto text_url_utf16_length = narrow_cast<int32>(utf8_utf16_length(text_url));
Slice url = parsed_part_text.substr(url_begin_pos + 1, url_end_pos - url_begin_pos - 1);
auto url_utf16_length = narrow_cast<int32>(utf8_utf16_length(url));
result.entities.emplace_back(MessageEntity::Type::TextUrl, result_text_utf16_length, text_url_utf16_length,
check_url(url).move_as_ok());
result.text.append(text_url.begin(), text_url.size());
result_text_utf16_length += text_url_utf16_length;
auto initial_utf16_length = 1 + text_url_utf16_length + 1 + 1 + url_utf16_length + 1;
// adjust splittable entities, removing deleted parts from them
// in the segment [text_utf16_offset, text_utf16_offset + initial_utf16_length)
// the first character and the last (url_utf16_length + 3) characters are deleted
for (size_t index = 0; index < SPLITTABLE_ENTITY_TYPE_COUNT; index++) {
auto &pos = splittable_entity_pos[index];
auto &splittable_entities = part_splittable_entities[index];
while (pos < splittable_entities.size() &&
splittable_entities[pos].offset < text_utf16_offset + initial_utf16_length) {
auto offset = splittable_entities[pos].offset;
auto length = splittable_entities[pos].length;
if (offset + length > text_utf16_offset + 1 + text_url_utf16_length) {
// ends after last removed part; truncate length
length = text_utf16_offset + 1 + text_url_utf16_length - offset;
}
if (offset >= text_utf16_offset + 1) {
offset--;
} else if (offset + length >= text_utf16_offset + 1) {
length--;
}
if (length > 0) {
CHECK(offset >= skipped_length);
CHECK(offset - skipped_length + length <= result_text_utf16_length);
if (offset < text_utf16_offset && offset + length > text_utf16_offset) {
// entity intersects start on the new text_url entity; split it
result.entities.emplace_back(splittable_entities[pos].type, offset - skipped_length,
text_utf16_offset - offset);
length -= text_utf16_offset - offset;
offset = text_utf16_offset;
}
result.entities.emplace_back(splittable_entities[pos].type, offset - skipped_length, length);
}
if (splittable_entities[pos].offset + splittable_entities[pos].length >
text_utf16_offset + initial_utf16_length) {
// begins before end of the segment, but ends after it
// need to keep the entity for future segments, so split the entity
splittable_entities[pos].length = splittable_entities[pos].offset + splittable_entities[pos].length -
(text_utf16_offset + initial_utf16_length);
splittable_entities[pos].offset = text_utf16_offset + initial_utf16_length;
} else {
pos++;
}
}
}
text_utf16_offset += initial_utf16_length;
skipped_length += 2 + 2 + url_utf16_length;
prev_pos = url_end_pos + 1;
}
result.text.append(parsed_part_text.begin() + prev_pos, parsed_part_text.size() - prev_pos);
result_text_utf16_length += part_end - text_utf16_offset;
// now add all splittable entities from [text_utf16_offset, part_end)
for (size_t index = 0; index < SPLITTABLE_ENTITY_TYPE_COUNT; index++) {
auto &pos = splittable_entity_pos[index];
auto &splittable_entities = part_splittable_entities[index];
while (pos < splittable_entities.size() && splittable_entities[pos].offset < part_end) {
if (splittable_entities[pos].offset + splittable_entities[pos].length > part_end) {
// begins before end of the segment, but ends after it
// need to keep the entity for future segments, so split the entity
// entities don't intersect each other, so there can be at most one such entity
result.entities.emplace_back(splittable_entities[pos].type,
splittable_entities[pos].offset - skipped_length,
part_end - splittable_entities[pos].offset);
splittable_entities[pos].length =
splittable_entities[pos].offset + splittable_entities[pos].length - part_end;
splittable_entities[pos].offset = part_end;
} else {
result.entities.emplace_back(splittable_entities[pos].type,
splittable_entities[pos].offset - skipped_length,
splittable_entities[pos].length);
pos++;
}
}
if (pos == splittable_entities.size()) {
splittable_entities.clear();
} else {
CHECK(pos == splittable_entities.size() - 1);
CHECK(!text.empty());
splittable_entities[0] = std::move(splittable_entities.back());
splittable_entities.resize(1);
}
}
}
part_begin = part_end;
};
for (auto &entity : entities) {
if (is_splittable_entity(entity.type)) {
auto index = get_splittable_entity_type_index(entity.type);
part_splittable_entities[index].push_back(entity);
continue;
}
CHECK(entity.type == MessageEntity::Type::TextUrl || entity.type == MessageEntity::Type::MentionName);
if (entity.offset > max_end) {
// found a gap from max_end to entity.offset between predetermined entities
add_part(entity.offset);
} else {
CHECK(entity.offset == max_end);
}
max_end = entity.offset + entity.length;
part_entities.push_back(entity);
part_entities.back().offset -= skipped_length;
}
add_part(part_begin + narrow_cast<int32>(utf8_utf16_length(text)));
return result;
}
static vector<MessageEntity> find_splittable_entities_v3(Slice text, const vector<MessageEntity> &entities) {
std::unordered_set<size_t> unallowed_boundaries;
for (auto &entity : entities) {
unallowed_boundaries.insert(entity.offset);
unallowed_boundaries.insert(entity.offset + entity.length);
}
auto found_entities = find_entities(text, false, false);
td::remove_if(found_entities, [](const auto &entity) {
return entity.type == MessageEntity::Type::EmailAddress || entity.type == MessageEntity::Type::Url;
});
for (auto &entity : found_entities) {
for (int32 i = 0; i <= entity.length; i++) {
unallowed_boundaries.insert(entity.offset + i);
}
}
vector<MessageEntity> result;
size_t splittable_entity_offset[SPLITTABLE_ENTITY_TYPE_COUNT] = {};
int32 utf16_offset = 0;
for (size_t i = 0; i + 1 < text.size(); i++) {
auto c = static_cast<unsigned char>(text[i]);
if (is_utf8_character_first_code_unit(c)) {
utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
}
if ((c == '_' || c == '*' || c == '~') && text[i] == text[i + 1] && unallowed_boundaries.count(utf16_offset) == 0) {
auto j = i + 2;
while (j != text.size() && text[j] == text[i] && unallowed_boundaries.count(utf16_offset + j - i - 1) == 0) {
j++;
}
if (j == i + 2) {
auto type = c == '_' ? MessageEntity::Type::Italic
: (c == '*' ? MessageEntity::Type::Bold : MessageEntity::Type::Strikethrough);
auto index = get_splittable_entity_type_index(type);
if (splittable_entity_offset[index] != 0) {
auto length = utf16_offset - splittable_entity_offset[index] - 1;
if (length > 0) {
result.emplace_back(type, splittable_entity_offset[index], length);
}
splittable_entity_offset[index] = 0;
} else {
splittable_entity_offset[index] = utf16_offset + 1;
}
}
utf16_offset += j - i - 1;
i = j - 1;
}
}
return result;
}
// entities must be valid and can contain only splittable, TextUrl and MentionName entities
// __italic__ ~~strikethrough~~ **bold** and [text_url](telegram.org) entities are left to be parsed
static FormattedText parse_markdown_v3_without_pre(Slice text, vector<MessageEntity> entities) {
check_is_sorted(entities);
FormattedText parsed_text_url_text;
if (text.find('[') != string::npos) {
parsed_text_url_text = parse_text_url_entities_v3(text, std::move(entities));
text = parsed_text_url_text.text;
entities = std::move(parsed_text_url_text.entities);
}
// splittable entities are sorted only within a fixed type now
bool have_splittable_entities = false;
for (size_t i = 0; i + 1 < text.size(); i++) {
if ((text[i] == '_' || text[i] == '*' || text[i] == '~') && text[i] == text[i + 1]) {
have_splittable_entities = true;
break;
}
}
if (!have_splittable_entities) {
// fast path
std::sort(entities.begin(), entities.end());
return {text.str(), std::move(entities)};
}
auto found_splittable_entities = find_splittable_entities_v3(text, entities);
vector<int32> removed_pos;
for (auto &entity : found_splittable_entities) {
removed_pos.push_back(entity.offset - 1);
removed_pos.push_back(entity.offset + entity.length + 1);
}
std::sort(removed_pos.begin(), removed_pos.end());
string new_text;
CHECK(text.size() >= 2 * removed_pos.size());
new_text.reserve(text.size() - 2 * removed_pos.size());
size_t j = 0;
int32 utf16_offset = 0;
for (size_t i = 0; i < text.size(); i++) {
auto c = static_cast<unsigned char>(text[i]);
if (is_utf8_character_first_code_unit(c)) {
utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
}
if (j < removed_pos.size() && utf16_offset == removed_pos[j]) {
i++;
utf16_offset++;
CHECK(j + 1 == removed_pos.size() || removed_pos[j + 1] >= removed_pos[j] + 2);
j++;
} else {
new_text += text[i];
}
}
CHECK(j == removed_pos.size());
combine(entities, std::move(found_splittable_entities));
for (auto &entity : entities) {
auto removed_before_begin =
std::upper_bound(removed_pos.begin(), removed_pos.end(), entity.offset) - removed_pos.begin();
auto removed_before_end =
std::upper_bound(removed_pos.begin(), removed_pos.end(), entity.offset + entity.length) - removed_pos.begin();
entity.length -= 2 * (removed_before_end - removed_before_begin);
entity.offset -= 2 * removed_before_begin;
CHECK(entity.offset >= 0);
CHECK(entity.length >= 0);
CHECK(entity.offset + entity.length <= utf16_offset);
}
td::remove_if(entities, [](const auto &entity) { return entity.length == 0; });
std::sort(entities.begin(), entities.end());
return {std::move(new_text), std::move(entities)};
}
static FormattedText parse_pre_entities_v3(Slice text) {
string result;
vector<MessageEntity> entities;
size_t size = text.size();
int32 utf16_offset = 0;
for (size_t i = 0; i < size; i++) {
auto c = static_cast<unsigned char>(text[i]);
if (c != '`') {
if (is_utf8_character_first_code_unit(c)) {
utf16_offset += 1 + (c >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
}
result.push_back(text[i]);
continue;
}
size_t j = i + 1;
while (j < size && text[j] == '`') {
j++;
}
if (j - i == 1 || j - i == 3) {
// trying to find end of the entity
int32 entity_length = 0;
bool is_found = false;
for (size_t end_tag_begin = j; end_tag_begin < size; end_tag_begin++) {
auto cur_c = static_cast<unsigned char>(text[end_tag_begin]);
if (cur_c == '`') {
// possible end tag
size_t end_tag_end = end_tag_begin + 1;
while (end_tag_end < size && text[end_tag_end] == '`') {
end_tag_end++;
}
if (end_tag_end - end_tag_begin == j - i) {
// end tag found
CHECK(entity_length > 0);
entities.emplace_back(j - i == 3 ? MessageEntity::Type::Pre : MessageEntity::Type::Code, utf16_offset,
entity_length);
result.append(text.begin() + j, end_tag_begin - j);
utf16_offset += entity_length;
i = end_tag_end - 1;
is_found = true;
break;
} else {
// not an end tag, skip
entity_length += end_tag_end - end_tag_begin;
end_tag_begin = end_tag_end - 1;
}
} else if (is_utf8_character_first_code_unit(cur_c)) {
entity_length += 1 + (cur_c >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
}
}
if (is_found) {
continue;
}
}
result.append(text.begin() + i, j - i);
utf16_offset += j - i;
i = j - 1;
}
return {std::move(result), std::move(entities)};
}
// entities must be valid for the text
static FormattedText parse_pre_entities_v3(Slice text, vector<MessageEntity> entities) {
// nothing can intersect pre entities, so ignore all '`' inside the predetermined entities
// and try to find new pre entities only between the predetermined entities
FormattedText result;
int32 result_text_utf16_length = 0;
int32 part_begin = 0;
int32 max_end = 0;
int32 skipped_length = 0;
auto add_part = [&](int32 part_end) {
// we have [part_begin, max_end) kept part and [max_end, part_end) part to parse pre entities
CHECK(part_begin == result_text_utf16_length + skipped_length);
if (max_end != part_begin) {
// add the kept part
auto kept_part_text = utf8_utf16_substr(text, 0, max_end - part_begin);
text = text.substr(kept_part_text.size());
result.text.append(kept_part_text.begin(), kept_part_text.size());
result_text_utf16_length += max_end - part_begin;
}
if (part_end != max_end) {
// try to find pre entities in the left part
auto parsed_part_text = utf8_utf16_substr(text, 0, part_end - max_end);
text = text.substr(parsed_part_text.size());
if (parsed_part_text.find('`') == string::npos) {
// fast path, no pre entities; just append the text
result.text.append(parsed_part_text.begin(), parsed_part_text.size());
result_text_utf16_length += part_end - max_end;
} else {
FormattedText parsed_text = parse_pre_entities_v3(parsed_part_text);
int32 new_skipped_length = 0;
for (auto &entity : parsed_text.entities) {
new_skipped_length += (entity.type == MessageEntity::Type::Pre ? 6 : 2);
}
CHECK(new_skipped_length < part_end - max_end);
result.text += parsed_text.text;
for (auto &entity : parsed_text.entities) {
entity.offset += result_text_utf16_length;
}
append(result.entities, std::move(parsed_text.entities));
result_text_utf16_length += part_end - max_end - new_skipped_length;
skipped_length += new_skipped_length;
}
}
part_begin = part_end;
};
for (auto &entity : entities) {
if (entity.offset > max_end) {
// found a gap from max_end to entity.offset between predetermined entities
add_part(entity.offset);
}
max_end = td::max(max_end, entity.offset + entity.length);
result.entities.push_back(std::move(entity));
result.entities.back().offset -= skipped_length;
}
add_part(part_begin + narrow_cast<int32>(utf8_utf16_length(text)));
return result;
}
// text entities must be valid
// returned entities must be resplitted and fixed
FormattedText parse_markdown_v3(FormattedText text) {
if (text.text.find('`') != string::npos) {
text = parse_pre_entities_v3(text.text, std::move(text.entities));
check_is_sorted(text.entities);
}
bool have_pre = false;
for (auto &entity : text.entities) {
if (is_pre_entity(entity.type)) {
have_pre = true;
break;
}
}
if (!have_pre) {
// fast path
return parse_markdown_v3_without_pre(text.text, std::move(text.entities));
}
FormattedText result;
int32 result_text_utf16_length = 0;
vector<MessageEntity> part_entities;
int32 part_begin = 0;
int32 max_end = 0;
Slice left_text = text.text;
auto add_part = [&](int32 part_end) {
auto part_text = utf8_utf16_substr(left_text, 0, part_end - part_begin);
left_text = left_text.substr(part_text.size());
FormattedText part = parse_markdown_v3_without_pre(part_text, std::move(part_entities));
part_entities.clear();
result.text += part.text;
for (auto &entity : part.entities) {
entity.offset += result_text_utf16_length;
}
append(result.entities, std::move(part.entities));
result_text_utf16_length += narrow_cast<int32>(utf8_utf16_length(part.text));
part_begin = part_end;
};
for (size_t i = 0; i < text.entities.size(); i++) {
auto &entity = text.entities[i];
CHECK(is_splittable_entity(entity.type) || is_pre_entity(entity.type) ||
entity.type == MessageEntity::Type::TextUrl || entity.type == MessageEntity::Type::MentionName);
if (is_pre_entity(entity.type)) {
CHECK(entity.offset >= max_end);
CHECK(i + 1 == text.entities.size() || text.entities[i + 1].offset >= entity.offset + entity.length);
add_part(entity.offset);
auto part_text = utf8_utf16_substr(left_text, 0, entity.length);
left_text = left_text.substr(part_text.size());
result.text.append(part_text.begin(), part_text.size());
result.entities.push_back(entity);
result.entities.back().offset = result_text_utf16_length;
result_text_utf16_length += entity.length;
part_begin = entity.offset + entity.length;
} else {
part_entities.push_back(entity);
part_entities.back().offset -= part_begin;
}
max_end = td::max(max_end, entity.offset + entity.length);
}
add_part(part_begin + narrow_cast<int32>(utf8_utf16_length(left_text)));
return result;
}
static uint32 decode_html_entity(CSlice text, size_t &pos) {
auto c = static_cast<unsigned char>(text[pos]);
if (c != '&') {
@ -2342,7 +2895,7 @@ Result<vector<MessageEntity>> get_message_entities(const ContactsManager *contac
case td_api::textEntityTypeMentionName::ID: {
auto entity_mention_name = static_cast<td_api::textEntityTypeMentionName *>(entity->type_.get());
UserId user_id(entity_mention_name->user_id_);
if (!contacts_manager->have_input_user(user_id)) {
if (contacts_manager != nullptr && !contacts_manager->have_input_user(user_id)) {
return Status::Error(7, "Have no access to the user");
}
entities.emplace_back(entity->offset_, entity->length_, user_id);
@ -2758,8 +3311,8 @@ static std::pair<size_t, int32> remove_invalid_entities(const string &text, vect
break;
}
auto have_hidden_data =
entity->type == MessageEntity::Type::TextUrl || entity->type == MessageEntity::Type::MentionName;
auto have_hidden_data = entity->type == MessageEntity::Type::TextUrl ||
entity->type == MessageEntity::Type::MentionName || is_pre_entity(entity->type);
if (last_non_whitespace_utf16_offset >= entity->offset ||
(last_space_utf16_offset >= entity->offset && have_hidden_data)) {
// TODO check entity for validness, for example, that mentions, hashtags, cashtags and URLs are valid

View File

@ -146,6 +146,8 @@ Result<vector<MessageEntity>> parse_markdown(string &text);
Result<vector<MessageEntity>> parse_markdown_v2(string &text);
FormattedText parse_markdown_v3(FormattedText text);
Result<vector<MessageEntity>> parse_html(string &text);
vector<tl_object_ptr<telegram_api::MessageEntity>> get_input_message_entities(const ContactsManager *contacts_manager,

View File

@ -3080,6 +3080,7 @@ bool Td::is_synchronous_request(int32 id) {
switch (id) {
case td_api::getTextEntities::ID:
case td_api::parseTextEntities::ID:
case td_api::parseMarkdown::ID:
case td_api::getFileMimeType::ID:
case td_api::getFileExtension::ID:
case td_api::cleanFileName::ID:
@ -3302,6 +3303,7 @@ td_api::object_ptr<td_api::Object> Td::static_request(td_api::object_ptr<td_api:
bool need_logging = [function_id] {
switch (function_id) {
case td_api::parseTextEntities::ID:
case td_api::parseMarkdown::ID:
case td_api::getFileMimeType::ID:
case td_api::getFileExtension::ID:
case td_api::cleanFileName::ID:
@ -7447,6 +7449,10 @@ void Td::on_request(uint64 id, const td_api::parseTextEntities &request) {
UNREACHABLE();
}
void Td::on_request(uint64 id, const td_api::parseMarkdown &request) {
UNREACHABLE();
}
void Td::on_request(uint64 id, const td_api::getFileMimeType &request) {
UNREACHABLE();
}
@ -7549,6 +7555,27 @@ td_api::object_ptr<td_api::Object> Td::do_static_request(td_api::parseTextEntiti
return make_tl_object<td_api::formattedText>(std::move(request.text_), get_text_entities_object(r_entities.ok()));
}
td_api::object_ptr<td_api::Object> Td::do_static_request(td_api::parseMarkdown &request) {
if (request.text_ == nullptr) {
return make_error(400, "Text must be non-empty");
}
auto r_entities = get_message_entities(nullptr, std::move(request.text_->entities_));
if (r_entities.is_error()) {
return make_error(400, r_entities.error().message());
}
auto entities = r_entities.move_as_ok();
auto status = fix_formatted_text(request.text_->text_, entities, true, true, true, true);
if (status.is_error()) {
return make_error(400, status.error().message());
}
auto parsed_text = parse_markdown_v3({std::move(request.text_->text_), std::move(entities)});
fix_formatted_text(parsed_text.text, parsed_text.entities, true, true, true, true).ensure();
return make_tl_object<td_api::formattedText>(std::move(parsed_text.text),
get_text_entities_object(parsed_text.entities));
}
td_api::object_ptr<td_api::Object> Td::do_static_request(const td_api::getFileMimeType &request) {
// don't check file name UTF-8 correctness
return make_tl_object<td_api::text>(MimeType::from_extension(PathView(request.file_name_).extension()));

View File

@ -1045,6 +1045,8 @@ class Td final : public NetQueryCallback {
void on_request(uint64 id, const td_api::parseTextEntities &request);
void on_request(uint64 id, const td_api::parseMarkdown &request);
void on_request(uint64 id, const td_api::getFileMimeType &request);
void on_request(uint64 id, const td_api::getFileExtension &request);
@ -1096,6 +1098,7 @@ class Td final : public NetQueryCallback {
}
static td_api::object_ptr<td_api::Object> do_static_request(const td_api::getTextEntities &request);
static td_api::object_ptr<td_api::Object> do_static_request(td_api::parseTextEntities &request);
static td_api::object_ptr<td_api::Object> do_static_request(td_api::parseMarkdown &request);
static td_api::object_ptr<td_api::Object> do_static_request(const td_api::getFileMimeType &request);
static td_api::object_ptr<td_api::Object> do_static_request(const td_api::getFileExtension &request);
static td_api::object_ptr<td_api::Object> do_static_request(const td_api::cleanFileName &request);

View File

@ -2693,6 +2693,8 @@ class CliClient final : public Actor {
send_request(td_api::make_object<td_api::getTextEntities>(args));
} else if (op == "gtes") {
execute(td_api::make_object<td_api::getTextEntities>(args));
} else if (op == "pm") {
send_request(td_api::make_object<td_api::parseMarkdown>(as_formatted_text(args)));
} else if (op == "pte") {
send_request(
td_api::make_object<td_api::parseTextEntities>(args, td_api::make_object<td_api::textParseModeMarkdown>(2)));

View File

@ -1242,3 +1242,278 @@ TEST(MessageEntities, parse_markdown) {
check_parse_markdown("[telegram\\.org](asdasd)", "telegram.org", {});
check_parse_markdown("[telegram\\.org](tg:user?id=123456)", "telegram.org", {{0, 12, td::UserId(123456)}});
}
static void check_parse_markdown_v3(td::string text, td::vector<td::MessageEntity> entities, const td::string &result,
const td::vector<td::MessageEntity> &result_entities, bool fix = false) {
auto parsed_text = td::parse_markdown_v3({std::move(text), std::move(entities)});
if (fix) {
ASSERT_TRUE(fix_formatted_text(parsed_text.text, parsed_text.entities, true, true, true, true).is_ok());
}
ASSERT_STREQ(result, parsed_text.text);
ASSERT_EQ(result_entities, parsed_text.entities);
}
static void check_parse_markdown_v3(td::string text, const td::string &result,
const td::vector<td::MessageEntity> &result_entities, bool fix = false) {
check_parse_markdown_v3(std::move(text), td::vector<td::MessageEntity>(), result, result_entities, fix);
}
TEST(MessageEntities, parse_markdown_v3) {
check_parse_markdown_v3("🏟````🏟``🏟`aba🏟```c🏟`aba🏟 daba🏟```c🏟`aba🏟```🏟 `🏟``🏟```",
"🏟````🏟``🏟aba🏟```c🏟aba🏟 daba🏟c🏟`aba🏟🏟 `🏟``🏟```",
{{td::MessageEntity::Type::Code, 12, 11}, {td::MessageEntity::Type::Pre, 35, 9}});
check_parse_markdown_v3(
"🏟````🏟``🏟`aba🏟```c🏟`aba🏟 daba🏟```c🏟`aba🏟🏟```🏟 `🏟``🏟```",
{{td::MessageEntity::Type::Italic, 12, 1},
{td::MessageEntity::Type::Italic, 44, 1},
{td::MessageEntity::Type::Bold, 45, 1},
{td::MessageEntity::Type::Bold, 49, 2}},
"🏟````🏟``🏟`aba🏟c🏟`aba🏟 daba🏟c🏟`aba🏟🏟🏟 `🏟``🏟",
{{td::MessageEntity::Type::Italic, 12, 1},
{td::MessageEntity::Type::Pre, 18, 16},
{td::MessageEntity::Type::Italic, 38, 1},
{td::MessageEntity::Type::Bold, 39, 1},
{td::MessageEntity::Type::Bold, 43, 2},
{td::MessageEntity::Type::Pre, 45, 10}});
check_parse_markdown_v3("` `", " ", {{td::MessageEntity::Type::Code, 0, 1}});
check_parse_markdown_v3("`\n`", "\n", {{td::MessageEntity::Type::Code, 0, 1}});
check_parse_markdown_v3("` `a", " a", {{td::MessageEntity::Type::Code, 0, 1}}, true);
check_parse_markdown_v3("`\n`a", "\na", {}, true);
check_parse_markdown_v3("``", "``", {});
check_parse_markdown_v3("[a](b[c](t.me)", "[a](b[c](t.me)", {});
check_parse_markdown_v3("[](t.me)", "[](t.me)", {});
check_parse_markdown_v3("[ ](t.me)", " ", {{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"}});
check_parse_markdown_v3("[ ](t.me)", "", {}, true);
check_parse_markdown_v3("[ ](t.me)a", " a", {{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"}}, true);
check_parse_markdown_v3(
"[ ](t.me) [ ](t.me)", {{td::MessageEntity::Type::TextUrl, 8, 1, "http://t.me/"}, {10, 1, td::UserId(1)}},
"[ ](t.me) [ ](t.me)", {{td::MessageEntity::Type::TextUrl, 8, 1, "http://t.me/"}, {10, 1, td::UserId(1)}});
check_parse_markdown_v3("[\n](t.me)", "\n", {{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"}});
check_parse_markdown_v3("[\n](t.me)a", "\na", {}, true);
check_parse_markdown_v3("asd[abcd](google.com)", {{td::MessageEntity::Type::Italic, 0, 5}}, "asdabcd",
{{td::MessageEntity::Type::Italic, 0, 3},
{td::MessageEntity::Type::TextUrl, 3, 4, "http://google.com/"},
{td::MessageEntity::Type::Italic, 3, 1}});
check_parse_markdown_v3("asd[abcd](google.com)efg[hi](https://t.me?t=1#h)e",
{{td::MessageEntity::Type::Italic, 0, 5}, {td::MessageEntity::Type::Italic, 18, 31}},
"asdabcdefghie",
{{td::MessageEntity::Type::Italic, 0, 3},
{td::MessageEntity::Type::TextUrl, 3, 4, "http://google.com/"},
{td::MessageEntity::Type::Italic, 3, 1},
{td::MessageEntity::Type::Italic, 7, 3},
{td::MessageEntity::Type::TextUrl, 10, 2, "https://t.me/?t=1#h"},
{td::MessageEntity::Type::Italic, 10, 2},
{td::MessageEntity::Type::Italic, 12, 1}});
check_parse_markdown_v3(
"🏟🏟🏟[🏟🏟🏟🏟🏟](www.🤙.tk#1)🤙🤙🤙[🏟🏟🏟🏟](www.🤙.tk#2)🤙🤙🤙["
"🏟🏟🏟🏟](www.🤙.tk#3)🏟🏟🏟[🏟🏟🏟🏟](www.🤙.tk#4)🤙🤙",
"🏟🏟🏟🏟🏟🏟🏟🏟🤙🤙🤙🏟🏟🏟🏟🤙🤙🤙🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟"
"🏟🤙🤙",
{{td::MessageEntity::Type::TextUrl, 6, 10, "http://www.🤙.tk/#1"},
{td::MessageEntity::Type::TextUrl, 22, 8, "http://www.🤙.tk/#2"},
{td::MessageEntity::Type::TextUrl, 36, 8, "http://www.🤙.tk/#3"},
{td::MessageEntity::Type::TextUrl, 50, 8, "http://www.🤙.tk/#4"}});
check_parse_markdown_v3(
"[🏟🏟🏟🏟🏟](www.🤙.tk#1)[🏟🏟🏟🏟](www.🤙.tk#2)[🏟🏟🏟🏟](www.🤙.tk#3)["
"🏟🏟🏟🏟](www.🤙.tk#4)",
"🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟",
{{td::MessageEntity::Type::TextUrl, 0, 10, "http://www.🤙.tk/#1"},
{td::MessageEntity::Type::TextUrl, 10, 8, "http://www.🤙.tk/#2"},
{td::MessageEntity::Type::TextUrl, 18, 8, "http://www.🤙.tk/#3"},
{td::MessageEntity::Type::TextUrl, 26, 8, "http://www.🤙.tk/#4"}});
check_parse_markdown_v3(
"🏟🏟🏟[🏟🏟🏟🏟🏟](www.🤙.tk)🤙🤙🤙[🏟🏟🏟🏟](www.🤙.tk)🤙🤙🤙["
"🏟🏟🏟🏟](www.🤙.tk)🏟🏟🏟[🏟🏟🏟🏟](www.🤙.tk)🤙🤙",
{{td::MessageEntity::Type::Bold, 0, 2},
{td::MessageEntity::Type::Bold, 4, 2},
{td::MessageEntity::Type::Bold, 7, 2},
{td::MessageEntity::Type::Bold, 11, 2},
{td::MessageEntity::Type::Bold, 15, 2},
{td::MessageEntity::Type::Bold, 18, 2},
{td::MessageEntity::Type::Bold, 26, 2},
{31, 2, td::UserId(1)},
{td::MessageEntity::Type::Bold, 35, 1},
{td::MessageEntity::Type::Bold, 44, 2},
{td::MessageEntity::Type::Bold, 50, 2},
{td::MessageEntity::Type::Bold, 54, 2},
{56, 2, td::UserId(2)},
{td::MessageEntity::Type::Bold, 58, 7},
{60, 2, td::UserId(3)},
{td::MessageEntity::Type::Bold, 67, 7},
{td::MessageEntity::Type::Bold, 80, 7},
{td::MessageEntity::Type::Bold, 89, 25}},
"🏟🏟🏟🏟🏟🏟🏟🏟🤙🤙🤙🏟🏟🏟🏟🤙🤙🤙🏟🏟🏟🏟🏟🏟🏟🏟🏟🏟"
"🏟🤙🤙",
{{td::MessageEntity::Type::Bold, 0, 2},
{td::MessageEntity::Type::Bold, 4, 2},
{td::MessageEntity::Type::TextUrl, 6, 10, "http://www.🤙.tk/"},
{td::MessageEntity::Type::Bold, 6, 2},
{td::MessageEntity::Type::Bold, 10, 2},
{td::MessageEntity::Type::Bold, 14, 2},
{18, 2, td::UserId(1)},
{td::MessageEntity::Type::TextUrl, 22, 8, "http://www.🤙.tk/"},
{30, 2, td::UserId(2)},
{td::MessageEntity::Type::Bold, 32, 2},
{34, 2, td::UserId(3)},
{td::MessageEntity::Type::Bold, 34, 2},
{td::MessageEntity::Type::TextUrl, 36, 8, "http://www.🤙.tk/"},
{td::MessageEntity::Type::Bold, 36, 2},
{td::MessageEntity::Type::Bold, 40, 4},
{td::MessageEntity::Type::Bold, 44, 4},
{td::MessageEntity::Type::TextUrl, 50, 8, "http://www.🤙.tk/"},
{td::MessageEntity::Type::Bold, 50, 8},
{td::MessageEntity::Type::Bold, 58, 4}});
check_parse_markdown_v3("[`a`](t.me) [b](t.me)", {{td::MessageEntity::Type::Code, 13, 1}}, "[a](t.me) [b](t.me)",
{{td::MessageEntity::Type::Code, 1, 1}, {td::MessageEntity::Type::Code, 11, 1}});
check_parse_markdown_v3(
"[text](example.com)",
{{td::MessageEntity::Type::Strikethrough, 0, 1}, {td::MessageEntity::Type::Strikethrough, 5, 14}}, "text",
{{td::MessageEntity::Type::TextUrl, 0, 4, "http://example.com/"}});
check_parse_markdown_v3("🏟[🏟](t.me) `🏟` [🏟](t.me) `a`", "🏟🏟 🏟 🏟 a",
{{td::MessageEntity::Type::TextUrl, 2, 2, "http://t.me/"},
{td::MessageEntity::Type::Code, 5, 2},
{td::MessageEntity::Type::TextUrl, 8, 2, "http://t.me/"},
{td::MessageEntity::Type::Code, 11, 1}});
check_parse_markdown_v3("__ __", " ", {{td::MessageEntity::Type::Italic, 0, 1}});
check_parse_markdown_v3("__\n__", "\n", {{td::MessageEntity::Type::Italic, 0, 1}});
check_parse_markdown_v3("__ __a", " a", {}, true);
check_parse_markdown_v3("__\n__a", "\na", {}, true);
check_parse_markdown_v3("**** __a__ **b** ~~c~~", "**** a b c",
{{td::MessageEntity::Type::Italic, 5, 1},
{td::MessageEntity::Type::Bold, 7, 1},
{td::MessageEntity::Type::Strikethrough, 9, 1}});
check_parse_markdown_v3("тест __аааа__ **бббб** ~~вввв~~", "тест аааа бббб вввв",
{{td::MessageEntity::Type::Italic, 5, 4},
{td::MessageEntity::Type::Bold, 10, 4},
{td::MessageEntity::Type::Strikethrough, 15, 4}});
check_parse_markdown_v3("___a___ ***b** ~c~~", "___a___ ***b** ~c~~", {});
check_parse_markdown_v3(
"__asd[ab__cd](t.me)", "asdabcd",
{{td::MessageEntity::Type::Italic, 0, 5}, {td::MessageEntity::Type::TextUrl, 3, 4, "http://t.me/"}});
check_parse_markdown_v3("__asd[ab__cd](t.me)", "asdabcd",
{{td::MessageEntity::Type::Italic, 0, 3},
{td::MessageEntity::Type::TextUrl, 3, 4, "http://t.me/"},
{td::MessageEntity::Type::Italic, 3, 2}},
true);
check_parse_markdown_v3("__[ab_](t.me)_", "__ab__", {{td::MessageEntity::Type::TextUrl, 2, 3, "http://t.me/"}});
check_parse_markdown_v3(
"__[ab__](t.me)_", "ab_",
{{td::MessageEntity::Type::TextUrl, 0, 2, "http://t.me/"}, {td::MessageEntity::Type::Italic, 0, 2}});
check_parse_markdown_v3("__[__ab__](t.me)__", "____ab____",
{{td::MessageEntity::Type::TextUrl, 2, 6, "http://t.me/"}});
check_parse_markdown_v3(
"__[__ab__](t.me)a__", "____aba",
{{td::MessageEntity::Type::TextUrl, 2, 4, "http://t.me/"}, {td::MessageEntity::Type::Italic, 6, 1}});
check_parse_markdown_v3("`a` __ab__", {{td::MessageEntity::Type::Bold, 6, 3}}, "a __ab__",
{{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Bold, 4, 3}});
check_parse_markdown_v3("`a` __ab__", {{td::MessageEntity::Type::Underline, 5, 1}}, "a __ab__",
{{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Underline, 3, 1}});
check_parse_markdown_v3("`a` @test__test__test", "a @test__test__test", {{td::MessageEntity::Type::Code, 0, 1}});
check_parse_markdown_v3("`a` #test__test__test", "a #test__test__test", {{td::MessageEntity::Type::Code, 0, 1}});
check_parse_markdown_v3("`a` __@test_test_test__", "a @test_test_test",
{{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Italic, 2, 15}});
check_parse_markdown_v3("`a` __#test_test_test__", "a #test_test_test",
{{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Italic, 2, 15}});
check_parse_markdown_v3("[a](t.me) __@test**test**test__", "a @testtesttest",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"},
{td::MessageEntity::Type::Italic, 2, 13},
{td::MessageEntity::Type::Bold, 7, 4}});
check_parse_markdown_v3("[a](t.me) __#test~~test~~test__", "a #testtesttest",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"},
{td::MessageEntity::Type::Italic, 2, 13},
{td::MessageEntity::Type::Strikethrough, 7, 4}});
check_parse_markdown_v3("[a](t.me) __@test__test__test__", "a @testtesttest",
{{td::MessageEntity::Type::TextUrl, 0, 1, "http://t.me/"},
{td::MessageEntity::Type::Italic, 2, 5},
{td::MessageEntity::Type::Italic, 11, 4}});
check_parse_markdown_v3("__**~~__gh**~~", "gh",
{{td::MessageEntity::Type::Bold, 0, 2}, {td::MessageEntity::Type::Strikethrough, 0, 2}});
check_parse_markdown_v3("__ab**cd~~ef__gh**ij~~", "abcdefghij",
{{td::MessageEntity::Type::Italic, 0, 6},
{td::MessageEntity::Type::Bold, 2, 6},
{td::MessageEntity::Type::Strikethrough, 4, 6}});
check_parse_markdown_v3("__ab**cd~~ef__gh**ij~~", "abcdefghij",
{{td::MessageEntity::Type::Italic, 0, 2},
{td::MessageEntity::Type::Bold, 2, 2},
{td::MessageEntity::Type::Italic, 2, 2},
{td::MessageEntity::Type::Strikethrough, 4, 6},
{td::MessageEntity::Type::Bold, 4, 4},
{td::MessageEntity::Type::Italic, 4, 2}},
true);
check_parse_markdown_v3("__ab**[cd~~ef__](t.me)gh**ij~~", "abcdefghij",
{{td::MessageEntity::Type::Italic, 0, 6},
{td::MessageEntity::Type::Bold, 2, 6},
{td::MessageEntity::Type::TextUrl, 2, 4, "http://t.me/"},
{td::MessageEntity::Type::Strikethrough, 4, 6}});
check_parse_markdown_v3("__ab**[cd~~e](t.me)f__gh**ij~~", "abcdefghij",
{{td::MessageEntity::Type::Italic, 0, 6},
{td::MessageEntity::Type::Bold, 2, 6},
{td::MessageEntity::Type::TextUrl, 2, 3, "http://t.me/"},
{td::MessageEntity::Type::Strikethrough, 4, 6}});
check_parse_markdown_v3("__ab**[cd~~](t.me)ef__gh**ij~~", "abcdefghij",
{{td::MessageEntity::Type::Italic, 0, 6},
{td::MessageEntity::Type::Bold, 2, 6},
{td::MessageEntity::Type::TextUrl, 2, 2, "http://t.me/"},
{td::MessageEntity::Type::Strikethrough, 4, 6}});
check_parse_markdown_v3("[__**bold italic link**__](example.com)", "bold italic link",
{{td::MessageEntity::Type::TextUrl, 0, 16, "http://example.com/"},
{td::MessageEntity::Type::Bold, 0, 16},
{td::MessageEntity::Type::Italic, 0, 16}});
check_parse_markdown_v3(
"__italic__ ~~strikethrough~~ **bold** `code` ```pre``` __[italic__ text_url](telegram.org) __italic**bold "
"italic__bold**__italic__ ~~strikethrough~~ **bold** `code` ```pre``` __[italic__ text_url](telegram.org) "
"__italic**bold italic__bold**",
"italic strikethrough bold code pre italic text_url italicbold italicbolditalic strikethrough bold code pre "
"italic text_url italicbold italicbold",
{{td::MessageEntity::Type::Italic, 0, 6},
{td::MessageEntity::Type::Strikethrough, 7, 13},
{td::MessageEntity::Type::Bold, 21, 4},
{td::MessageEntity::Type::Code, 26, 4},
{td::MessageEntity::Type::Pre, 31, 3},
{td::MessageEntity::Type::TextUrl, 35, 15, "http://telegram.org/"},
{td::MessageEntity::Type::Italic, 35, 6},
{td::MessageEntity::Type::Italic, 51, 17},
{td::MessageEntity::Type::Bold, 57, 15},
{td::MessageEntity::Type::Italic, 72, 6},
{td::MessageEntity::Type::Strikethrough, 79, 13},
{td::MessageEntity::Type::Bold, 93, 4},
{td::MessageEntity::Type::Code, 98, 4},
{td::MessageEntity::Type::Pre, 103, 3},
{td::MessageEntity::Type::TextUrl, 107, 15, "http://telegram.org/"},
{td::MessageEntity::Type::Italic, 107, 6},
{td::MessageEntity::Type::Italic, 123, 17},
{td::MessageEntity::Type::Bold, 129, 15}});
td::vector<td::string> parts{"a", " #test ", "__", "**", "~~", "[", "](t.me)", "`"};
td::vector<td::MessageEntity::Type> types{
td::MessageEntity::Type::Bold, td::MessageEntity::Type::Italic, td::MessageEntity::Type::Underline,
td::MessageEntity::Type::Strikethrough, td::MessageEntity::Type::Code, td::MessageEntity::Type::Pre,
td::MessageEntity::Type::PreCode, td::MessageEntity::Type::TextUrl, td::MessageEntity::Type::MentionName};
for (size_t test_n = 0; test_n < 1000; test_n++) {
td::string str;
int part_n = td::Random::fast(1, 200);
for (int i = 0; i < part_n; i++) {
str += parts[td::Random::fast(0, static_cast<int>(parts.size()) - 1)];
}
td::vector<td::MessageEntity> entities;
int entity_n = td::Random::fast(1, 20);
for (int i = 0; i < entity_n; i++) {
auto type = types[td::Random::fast(0, static_cast<int>(types.size()) - 1)];
td::int32 offset = td::Random::fast(0, static_cast<int>(str.size()) - 1);
auto max_length = static_cast<int>(str.size() - offset);
if ((test_n & 1) != 0 && max_length > 4) {
max_length = 4;
}
td::int32 length = td::Random::fast(0, max_length);
entities.emplace_back(type, offset, length);
}
ASSERT_TRUE(fix_formatted_text(str, entities, true, true, true, true).is_ok());
td::parse_markdown_v3({std::move(str), std::move(entities)});
}
}