Support PreCode entities in getMarkdownText.

This commit is contained in:
levlam 2023-11-06 02:11:15 +03:00
parent 000da35ad0
commit d0f6791777
2 changed files with 66 additions and 11 deletions

View File

@ -2924,6 +2924,14 @@ FormattedText get_markdown_v3(FormattedText text) {
int32 utf16_offset = 0;
int32 utf16_added = 0;
auto is_valid_language_code = [](Slice code) {
for (auto c : code) {
if (c < 33 || c > 126) {
return false;
}
}
return true;
};
for (size_t pos = 0; pos <= text.text.size(); pos++) {
auto c = static_cast<unsigned char>(text.text[pos]);
if (is_utf8_character_first_code_unit(c)) {
@ -2936,6 +2944,7 @@ FormattedText get_markdown_v3(FormattedText text) {
CHECK(utf16_offset == entity_end);
bool need_entity = false;
switch (entity->type) {
case MessageEntity::Type::Italic:
result.text += "__";
@ -2967,11 +2976,22 @@ FormattedText get_markdown_v3(FormattedText text) {
result.text += "```";
utf16_added += 3;
break;
default:
result.entities.push_back(*entity);
result.entities.back().offset += nested_entities_stack.back().utf16_added_before;
result.entities.back().length += utf16_added - nested_entities_stack.back().utf16_added_before;
case MessageEntity::Type::PreCode:
if (is_valid_language_code(entity->argument)) {
result.text += "```";
utf16_added += 3;
} else {
need_entity = true;
}
break;
default:
need_entity = true;
break;
}
if (need_entity) {
result.entities.push_back(*entity);
result.entities.back().offset += nested_entities_stack.back().utf16_added_before;
result.entities.back().length += utf16_added - nested_entities_stack.back().utf16_added_before;
}
nested_entities_stack.pop_back();
}
@ -3006,6 +3026,24 @@ FormattedText get_markdown_v3(FormattedText text) {
case MessageEntity::Type::Pre:
result.text += "```";
utf16_added += 3;
if (c != '\n') {
result.text += "\n";
utf16_added++;
}
break;
case MessageEntity::Type::PreCode:
if (is_valid_language_code(text.entities[current_entity].argument)) {
result.text += "```";
utf16_added += 3;
if (!text.entities[current_entity].argument.empty()) {
result.text += text.entities[current_entity].argument;
utf16_added += text.entities[current_entity].argument.size();
}
if (c != '\n') {
result.text += "\n";
utf16_added++;
}
}
break;
default:
// keep as is

View File

@ -1901,7 +1901,7 @@ static void check_get_markdown_v3(const td::string &result_text, const td::vecto
}
TEST(MessageEntities, get_markdown_v3) {
check_get_markdown_v3("``` ```", {}, " ", {{td::MessageEntity::Type::Pre, 0, 1}});
check_get_markdown_v3("```\n ```", {}, " ", {{td::MessageEntity::Type::Pre, 0, 1}});
check_get_markdown_v3("` `", {}, " ", {{td::MessageEntity::Type::Code, 0, 1}});
check_get_markdown_v3("`\n`", {}, "\n", {{td::MessageEntity::Type::Code, 0, 1}});
check_get_markdown_v3("ab", {{td::MessageEntity::Type::Code, 0, 1}, {td::MessageEntity::Type::Pre, 1, 1}}, "ab",
@ -1916,16 +1916,18 @@ TEST(MessageEntities, get_markdown_v3) {
check_get_markdown_v3("** **", {}, " ", {{td::MessageEntity::Type::Bold, 0, 1}});
check_get_markdown_v3("~~ ~~", {}, " ", {{td::MessageEntity::Type::Strikethrough, 0, 1}});
check_get_markdown_v3("|| ||", {}, " ", {{td::MessageEntity::Type::Spoiler, 0, 1}});
check_get_markdown_v3("__a__ **b** ~~c~~ ||d|| e", {{td::MessageEntity::Type::PreCode, 24, 1, "C++"}}, "a b c d e",
check_get_markdown_v3("__a__ **b** ~~c~~ ||d|| e", {{td::MessageEntity::Type::PreCode, 24, 1, " C++"}}, "a b c d e",
{{td::MessageEntity::Type::Italic, 0, 1},
{td::MessageEntity::Type::Bold, 2, 1},
{td::MessageEntity::Type::Strikethrough, 4, 1},
{td::MessageEntity::Type::Spoiler, 6, 1},
{td::MessageEntity::Type::PreCode, 8, 1, "C++"}});
check_get_markdown_v3("`ab` ```cd``` ef", {{td::MessageEntity::Type::PreCode, 14, 2, "C++"}}, "ab cd ef",
{{td::MessageEntity::Type::Code, 0, 2},
{td::MessageEntity::Type::Pre, 3, 2},
{td::MessageEntity::Type::PreCode, 6, 2, "C++"}});
{td::MessageEntity::Type::PreCode, 8, 1, " C++"}});
check_get_markdown_v3("```cpp\ngh```\n`ab`\n```\ncd```\nef", {{td::MessageEntity::Type::PreCode, 28, 2, " C++"}},
"gh\nab\ncd\nef",
{{td::MessageEntity::Type::PreCode, 0, 2, "cpp"},
{td::MessageEntity::Type::Code, 3, 2},
{td::MessageEntity::Type::Pre, 6, 2},
{td::MessageEntity::Type::PreCode, 9, 2, " C++"}});
check_get_markdown_v3("__asd__[__ab__cd](http://t.me/)", {}, "asdabcd",
{{td::MessageEntity::Type::Italic, 0, 3},
{td::MessageEntity::Type::TextUrl, 3, 4, "http://t.me/"},
@ -1944,4 +1946,19 @@ TEST(MessageEntities, get_markdown_v3) {
{{td::MessageEntity::Type::TextUrl, 0, 16, "http://example.com/"},
{td::MessageEntity::Type::Bold, 0, 16},
{td::MessageEntity::Type::Italic, 0, 16}});
check_get_markdown_v3("```\nsome code\n```", {}, "some code\n", {{td::MessageEntity::Type::Pre, 0, 10}});
check_get_markdown_v3("asd\n```\nsome code\n```cabab", {}, "asd\nsome code\ncabab",
{{td::MessageEntity::Type::Pre, 4, 10}});
check_get_markdown_v3("asd\naba```\nsome code\n```cabab", {}, "asd\nabasome code\ncabab",
{{td::MessageEntity::Type::Pre, 7, 10}});
check_get_markdown_v3("asd\naba```\nsome code\n```\ncabab", {}, "asd\nabasome code\n\ncabab",
{{td::MessageEntity::Type::Pre, 7, 10}});
check_get_markdown_v3("asd\naba```\na b\nsome code\n```\ncabab", {}, "asd\nabaa b\nsome code\n\ncabab",
{{td::MessageEntity::Type::Pre, 7, 14}});
check_get_markdown_v3("asd\n```\na b\nsome code\n```\ncabab", {}, "asd\na b\nsome code\n\ncabab",
{{td::MessageEntity::Type::Pre, 4, 14}});
check_get_markdown_v3("asd\naba```a!@#$%^&*(b\nsome code\n```\ncabab", {}, "asd\nabasome code\n\ncabab",
{{td::MessageEntity::Type::PreCode, 7, 10, "a!@#$%^&*(b"}});
check_get_markdown_v3("```\naba\n```", {}, "aba\n", {{td::MessageEntity::Type::Pre, 0, 4}});
check_get_markdown_v3("```\n```", {}, "\n", {{td::MessageEntity::Type::Pre, 0, 1}});
}