Support language code specification for PreCode entities in parseMarkdown.

This commit is contained in:
levlam 2023-11-06 00:45:24 +03:00
parent 180afdec3e
commit 8e8a7e0f18
2 changed files with 41 additions and 7 deletions

View File

@ -2711,11 +2711,35 @@ static FormattedText parse_pre_entities_v3(Slice text) {
if (end_tag_end - end_tag_begin == j - i) { if (end_tag_end - end_tag_begin == j - i) {
// end tag found // end tag found
CHECK(entity_length > 0); CHECK(entity_length > 0);
auto entity_begin = j;
string language_code;
if (j - i == 3) {
size_t language_code_end = j;
while (language_code_end < end_tag_begin - 1 && 33 <= text[language_code_end] &&
text[language_code_end] <= 126) {
language_code_end++;
}
if (language_code_end < end_tag_begin - 1 && text[language_code_end] == '\n' &&
(language_code_end != entity_begin || i == 0 || text[i - 1] == '\n')) {
language_code = text.substr(entity_begin, language_code_end - entity_begin).str();
entity_begin = language_code_end + 1;
entity_length -= entity_begin - j;
CHECK(entity_length > 0);
}
}
if (!language_code.empty()) {
entities.emplace_back(MessageEntity::Type::PreCode, utf16_offset, entity_length,
std::move(language_code));
} else {
entities.emplace_back(j - i == 3 ? MessageEntity::Type::Pre : MessageEntity::Type::Code, utf16_offset, entities.emplace_back(j - i == 3 ? MessageEntity::Type::Pre : MessageEntity::Type::Code, utf16_offset,
entity_length); entity_length);
result.append(text.begin() + j, end_tag_begin - j); }
result.append(text.begin() + entity_begin, end_tag_begin - entity_begin);
utf16_offset += entity_length; utf16_offset += entity_length;
i = end_tag_end - 1; i = end_tag_end - 1;
if (end_tag_end < size && text[end_tag_end] == '\n') {
i++;
}
is_found = true; is_found = true;
break; break;
} else { } else {
@ -2774,10 +2798,7 @@ static FormattedText parse_pre_entities_v3(Slice text, vector<MessageEntity> ent
result_text_utf16_length += part_end - max_end; result_text_utf16_length += part_end - max_end;
} else { } else {
FormattedText parsed_text = parse_pre_entities_v3(parsed_part_text); FormattedText parsed_text = parse_pre_entities_v3(parsed_part_text);
int32 new_skipped_length = 0; int32 new_skipped_length = parsed_part_text.size() - parsed_text.text.size();
for (auto &entity : parsed_text.entities) {
new_skipped_length += (entity.type == MessageEntity::Type::Pre ? 6 : 2);
}
CHECK(new_skipped_length < part_end - max_end); CHECK(new_skipped_length < part_end - max_end);
result.text += parsed_text.text; result.text += parsed_text.text;
for (auto &entity : parsed_text.entities) { for (auto &entity : parsed_text.entities) {

View File

@ -1835,6 +1835,19 @@ TEST(MessageEntities, parse_markdown_v3) {
{td::MessageEntity::Type::Italic, 123, 17}, {td::MessageEntity::Type::Italic, 123, 17},
{td::MessageEntity::Type::Bold, 129, 15}, {td::MessageEntity::Type::Bold, 129, 15},
{td::MessageEntity::Type::Spoiler, 145, 7}}); {td::MessageEntity::Type::Spoiler, 145, 7}});
check_parse_markdown_v3("```\nsome code\n```", "some code\n", {{td::MessageEntity::Type::Pre, 0, 10}});
check_parse_markdown_v3("asd\n```\nsome code\n```cabab", "asd\nsome code\ncabab",
{{td::MessageEntity::Type::Pre, 4, 10}});
check_parse_markdown_v3("asd\naba```\nsome code\n```cabab", "asd\naba\nsome code\ncabab",
{{td::MessageEntity::Type::Pre, 7, 11}});
check_parse_markdown_v3("asd\naba```\nsome code\n```\ncabab", "asd\naba\nsome code\ncabab",
{{td::MessageEntity::Type::Pre, 7, 11}});
check_parse_markdown_v3("asd\naba```a b\nsome code\n```\ncabab", "asd\nabaa b\nsome code\ncabab",
{{td::MessageEntity::Type::Pre, 7, 14}});
check_parse_markdown_v3("asd\naba```a!@#$%^&*(b\nsome code\n```\ncabab", "asd\nabasome code\ncabab",
{{td::MessageEntity::Type::PreCode, 7, 10, "a!@#$%^&*(b"}});
check_parse_markdown_v3("```aba\n```", "aba\n", {{td::MessageEntity::Type::Pre, 0, 4}});
check_parse_markdown_v3("```\n```", "\n", {{td::MessageEntity::Type::Pre, 0, 1}});
td::vector<td::string> parts{"a", " #test__a", "__", "**", "~~", "||", "[", "](t.me)", "`"}; td::vector<td::string> parts{"a", " #test__a", "__", "**", "~~", "||", "[", "](t.me)", "`"};
td::vector<td::MessageEntity::Type> types{ td::vector<td::MessageEntity::Type> types{