diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 2d953032b..fcf5c7677 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -1662,13 +1662,13 @@ static Result> do_parse_html(CSlice text, string &result) struct EntityInfo { string tag_name; - string url; + string argument; int32 entity_offset; size_t entity_begin_pos; - EntityInfo(string tag_name, string url, int32 entity_offset, size_t entity_begin_pos) + EntityInfo(string tag_name, string argument, int32 entity_offset, size_t entity_begin_pos) : tag_name(std::move(tag_name)) - , url(std::move(url)) + , argument(std::move(argument)) , entity_offset(entity_offset) , entity_begin_pos(entity_begin_pos) { } @@ -1711,8 +1711,7 @@ static Result> do_parse_html(CSlice text, string &result) << "Unsupported start tag \"" << tag_name << "\" at byte offset " << begin_pos); } - string url; - // string language; TODO PreCode support + string argument; while (text[i] != '>') { while (text[i] != 0 && is_space(text[i])) { i++; @@ -1779,11 +1778,14 @@ static Result> do_parse_html(CSlice text, string &result) } if (tag_name == "a" && attribute_name == Slice("href")) { - url = std::move(attribute_value); + argument = std::move(attribute_value); + } + if (tag_name == "code" && attribute_name == Slice("class") && begins_with(attribute_value, "language-")) { + argument = attribute_value.substr(9); } } - nested_entities.emplace_back(std::move(tag_name), std::move(url), utf16_offset, result.size()); + nested_entities.emplace_back(std::move(tag_name), std::move(argument), utf16_offset, result.size()); } else { // end of an entity if (nested_entities.empty()) { @@ -1815,7 +1817,7 @@ static Result> do_parse_html(CSlice text, string &result) } else if (tag_name == "b" || tag_name == "strong") { entities.emplace_back(MessageEntity::Type::Bold, entity_offset, entity_length); } else if (tag_name == "a") { - auto url = std::move(nested_entities.back().url); + auto url = std::move(nested_entities.back().argument); if (url.empty()) { url = result.substr(nested_entities.back().entity_begin_pos); } @@ -1829,9 +1831,23 @@ static Result> do_parse_html(CSlice text, string &result) } } } else if (tag_name == "pre") { - entities.emplace_back(MessageEntity::Type::Pre, entity_offset, entity_length); + if (!entities.empty() && entities.back().type == MessageEntity::Type::Code && + entities.back().offset == entity_offset && entities.back().length == entity_length && + !entities.back().argument.empty()) { + entities.back().type = MessageEntity::Type::PreCode; + } else { + entities.emplace_back(MessageEntity::Type::Pre, entity_offset, entity_length); + } } else if (tag_name == "code") { - entities.emplace_back(MessageEntity::Type::Code, entity_offset, entity_length); + if (!entities.empty() && entities.back().type == MessageEntity::Type::Pre && + entities.back().offset == entity_offset && entities.back().length == entity_length && + !nested_entities.back().argument.empty()) { + entities.back().type = MessageEntity::Type::PreCode; + entities.back().argument = std::move(nested_entities.back().argument); + } else { + entities.emplace_back(MessageEntity::Type::Code, entity_offset, entity_length, + nested_entities.back().argument); + } } else { UNREACHABLE(); } @@ -1844,6 +1860,12 @@ static Result> do_parse_html(CSlice text, string &result) 400, PSLICE() << "Can't find end tag corresponding to start tag " << nested_entities.back().tag_name); } + for (auto &entity : entities) { + if (entity.type == MessageEntity::Type::Code && !entity.argument.empty()) { + entity.argument.clear(); + } + } + std::sort(entities.begin(), entities.end()); return entities; diff --git a/test/message_entities.cpp b/test/message_entities.cpp index e249f6c56..c8fcd1a45 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -810,6 +810,22 @@ TEST(MessageEntities, parse_html) { {{td::MessageEntity::Type::TextUrl, 0, 12, "http://telegram.org/"}}); check_parse_html("https://telegram.org/asdsa?asdasdwe#12e3we", "https://telegram.org/asdsa?asdasdwe#12e3we", {{td::MessageEntity::Type::TextUrl, 0, 42, "https://telegram.org/asdsa?asdasdwe#12e3we"}}); + check_parse_html("🏟 🏟<
🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::Pre, 6, 6}});
+  check_parse_html("🏟 🏟<🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::Code, 6, 6}});
+  check_parse_html("🏟 🏟<
🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::Pre, 6, 6}, {td::MessageEntity::Type::Code, 6, 6}});
+  check_parse_html("🏟 🏟<
🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::Pre, 6, 6}, {td::MessageEntity::Type::Code, 6, 6}});
+  check_parse_html("🏟 🏟<
🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::PreCode, 6, 6, "fift"}});
+  check_parse_html("🏟 🏟<
🏟 🏟<", "🏟 🏟<🏟 🏟<",
+                   {{td::MessageEntity::Type::PreCode, 6, 6, "fift"}});
+  check_parse_html("🏟 🏟<
🏟 🏟< ", "🏟 🏟<🏟 🏟< ",
+                   {{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 6, 6}});
+  check_parse_html("🏟 🏟<
 🏟 🏟<", "🏟 🏟< 🏟 🏟<",
+                   {{td::MessageEntity::Type::Pre, 6, 7}, {td::MessageEntity::Type::Code, 7, 6}});
 }
 
 static void check_parse_markdown(td::string text, const td::string &result,