diff --git a/td/generate/scheme/td_api.tl b/td/generate/scheme/td_api.tl index 05baa1c77..be3ba316a 100644 --- a/td/generate/scheme/td_api.tl +++ b/td/generate/scheme/td_api.tl @@ -5734,6 +5734,9 @@ topChatCategoryCalls = TopChatCategory; topChatCategoryForwardChats = TopChatCategory; +//@description Contains 0-based match position @position The position of the match +foundPosition position:int32 = FoundPosition; + //@description Contains 0-based positions of matched objects @total_count Total number of matched objects @positions The positions of the matched objects foundPositions total_count:int32 positions:vector = FoundPositions; @@ -7264,6 +7267,12 @@ getMessageAddedReactions chat_id:int53 message_id:int53 reaction_type:ReactionTy setDefaultReactionType reaction_type:ReactionType = Ok; +//@description Searches for a given quote in a text. Returns found quote start position in UTF-16 code units. Returns a 404 error if the quote is not found. Can be called synchronously +//@text Text in which to search for the quote +//@quote Quote to search for +//@quote_position Approximate quote position in UTF-16 code units +searchQuote text:formattedText quote:formattedText quote_position:int32 = FoundPositions; + //@description Returns all entities (mentions, hashtags, cashtags, bot commands, bank card numbers, URLs, and email addresses) found in the text. Can be called synchronously @text The text in which to look for entities getTextEntities text:string = TextEntities; diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index f9293b888..99bd4f9e1 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -4651,4 +4651,63 @@ void remove_unallowed_entities(const Td *td, FormattedText &text, DialogId dialo } } +int32 search_quote(FormattedText &&text, FormattedText &"e, int32 quote_position) { + auto process_quote_entities = [](FormattedText &text, int32 length) { + remove_unallowed_quote_entities(text); + td::remove_if(text.entities, [length](const MessageEntity &entity) { + if (entity.offset < 0 || entity.offset >= length) { + return true; + } + if (entity.length <= 0 || entity.length > length - entity.offset) { + return true; + } + return false; + }); + remove_empty_entities(text.entities); + fix_entities(text.entities); + remove_invalid_entities(text.text, text.entities); + }; + int32 length = text_length(text.text); + int32 quote_length = text_length(quote.text); + if (quote_length == 0 || quote_length > length) { + return -1; + } + process_quote_entities(text, length); + process_quote_entities(quote, quote_length); + + quote_position = clamp(quote_position, 0, length - 1); + vector byte_positions; + byte_positions.reserve(length); + for (size_t i = 0; i < text.text.size(); i++) { + auto c = static_cast(text.text[i]); + if (is_utf8_character_first_code_unit(c)) { + byte_positions.push_back(i); + if (c >= 0xf0) { // >= 4 bytes in symbol => surrogate pair + byte_positions.push_back(string::npos); + } + } + } + CHECK(byte_positions.size() == static_cast(length)); + auto check_position = [&text, "e, &byte_positions, length, quote_length](int32 position) { + if (position < 0 || position > length - quote_length) { + return false; + } + auto byte_position = byte_positions[position]; + if (byte_position == string::npos || text.text[byte_position] != quote.text[0] || + Slice(text.text).substr(byte_position, quote.text.size()) != quote.text) { + return false; + } + return true; + }; + for (int32 i = 0; quote_position - i >= 0 || quote_position + i + 1 <= length - quote_length; i++) { + if (check_position(quote_position - i)) { + return quote_position - i; + } + if (check_position(quote_position + i + 1)) { + return quote_position + i + 1; + } + } + return -1; +} + } // namespace td diff --git a/td/telegram/MessageEntity.h b/td/telegram/MessageEntity.h index c25804726..f8e0012e0 100644 --- a/td/telegram/MessageEntity.h +++ b/td/telegram/MessageEntity.h @@ -185,6 +185,8 @@ void remove_empty_entities(vector &entities); void remove_unallowed_quote_entities(FormattedText &text); +int32 search_quote(FormattedText &&text, FormattedText &"e, int32 quote_position); + Slice get_first_url(const FormattedText &text); bool is_visible_url(const FormattedText &text, const string &url); diff --git a/td/telegram/Td.cpp b/td/telegram/Td.cpp index 86a8a9e30..49f010478 100644 --- a/td/telegram/Td.cpp +++ b/td/telegram/Td.cpp @@ -2851,6 +2851,7 @@ bool Td::is_authentication_request(int32 id) { bool Td::is_synchronous_request(const td_api::Function *function) { switch (function->get_id()) { + case td_api::searchQuote::ID: case td_api::getTextEntities::ID: case td_api::parseTextEntities::ID: case td_api::parseMarkdown::ID: @@ -9038,6 +9039,10 @@ void Td::on_request(uint64 id, const td_api::getSupportName &request) { get_support_name(this, std::move(query_promise)); } +void Td::on_request(uint64 id, const td_api::searchQuote &request) { + UNREACHABLE(); +} + void Td::on_request(uint64 id, const td_api::getTextEntities &request) { UNREACHABLE(); } @@ -9130,6 +9135,30 @@ void Td::on_request(uint64 id, const td_api::addLogMessage &request) { UNREACHABLE(); } +td_api::object_ptr Td::do_static_request(td_api::searchQuote &request) { + if (request.text_ == nullptr || request.quote_ == nullptr) { + return make_error(400, "Text and quote must be non-empty"); + } + if (!check_utf8(request.text_->text_) || !check_utf8(request.quote_->text_)) { + return make_error(400, "Strings must be encoded in UTF-8"); + } + auto r_text_entities = get_message_entities(nullptr, std::move(request.text_->entities_), false); + if (r_text_entities.is_error()) { + return make_error(400, r_text_entities.error().message()); + } + auto r_quote_entities = get_message_entities(nullptr, std::move(request.quote_->entities_), false); + if (r_quote_entities.is_error()) { + return make_error(400, r_quote_entities.error().message()); + } + auto position = + search_quote({std::move(request.text_->text_), r_text_entities.move_as_ok()}, + {std::move(request.quote_->text_), r_quote_entities.move_as_ok()}, request.quote_position_); + if (position == -1) { + return make_error(404, "Not Found"); + } + return td_api::make_object(position); +} + td_api::object_ptr Td::do_static_request(const td_api::getTextEntities &request) { if (!check_utf8(request.text_)) { return make_error(400, "Text must be encoded in UTF-8"); diff --git a/td/telegram/Td.h b/td/telegram/Td.h index ddc688183..8e20b54fb 100644 --- a/td/telegram/Td.h +++ b/td/telegram/Td.h @@ -1664,6 +1664,8 @@ class Td final : public Actor { void on_request(uint64 id, const td_api::getSupportName &request); + void on_request(uint64 id, const td_api::searchQuote &request); + void on_request(uint64 id, const td_api::getTextEntities &request); void on_request(uint64 id, const td_api::parseTextEntities &request); @@ -1730,6 +1732,7 @@ class Td final : public Actor { return td_api::make_object(400, "The method can't be executed synchronously"); } static td_api::object_ptr do_static_request(const td_api::getOption &request); + static td_api::object_ptr do_static_request(td_api::searchQuote &request); static td_api::object_ptr do_static_request(const td_api::getTextEntities &request); static td_api::object_ptr do_static_request(td_api::parseTextEntities &request); static td_api::object_ptr do_static_request(td_api::parseMarkdown &request); diff --git a/td/telegram/cli.cpp b/td/telegram/cli.cpp index ec3f0b28c..c8453eadd 100644 --- a/td/telegram/cli.cpp +++ b/td/telegram/cli.cpp @@ -4043,6 +4043,13 @@ class CliClient final : public Actor { send_request(td_api::make_object(args)); } else if (op == "jcbil") { send_request(td_api::make_object(args)); + } else if (op == "sq") { + string text; + string quote; + int32 quote_position; + get_args(args, text, quote, quote_position); + execute( + td_api::make_object(as_formatted_text(text), as_formatted_text(quote), quote_position)); } else if (op == "gte") { send_request(td_api::make_object(args)); } else if (op == "gtee") {