Keep bot commands and media timestamps in translated text.

This commit is contained in:
levlam 2023-01-18 20:45:46 +03:00
parent e26aecc87a
commit 3f7148407c
4 changed files with 52 additions and 11 deletions

View File

@ -3410,7 +3410,7 @@ Result<vector<MessageEntity>> get_message_entities(const ContactsManager *contac
break;
}
case td_api::textEntityTypeMentionName::ID: {
auto entity = static_cast<td_api::textEntityTypeMentionName *>(input_entity->type_.get());
auto entity = static_cast<const td_api::textEntityTypeMentionName *>(input_entity->type_.get());
UserId user_id(entity->user_id_);
if (contacts_manager != nullptr) {
TRY_STATUS(contacts_manager->get_input_user(user_id));
@ -3419,7 +3419,7 @@ Result<vector<MessageEntity>> get_message_entities(const ContactsManager *contac
break;
}
case td_api::textEntityTypeMediaTimestamp::ID: {
auto entity = static_cast<td_api::textEntityTypeMediaTimestamp *>(input_entity->type_.get());
auto entity = static_cast<const td_api::textEntityTypeMediaTimestamp *>(input_entity->type_.get());
if (entity->media_timestamp_ < 0) {
return Status::Error(400, "Invalid media timestamp specified");
}
@ -3430,7 +3430,7 @@ Result<vector<MessageEntity>> get_message_entities(const ContactsManager *contac
entities.emplace_back(MessageEntity::Type::Spoiler, offset, length);
break;
case td_api::textEntityTypeCustomEmoji::ID: {
auto entity = static_cast<td_api::textEntityTypeCustomEmoji *>(input_entity->type_.get());
auto entity = static_cast<const td_api::textEntityTypeCustomEmoji *>(input_entity->type_.get());
CustomEmojiId custom_emoji_id(entity->custom_emoji_id_);
if (!custom_emoji_id.is_valid()) {
return Status::Error(400, "Invalid custom emoji identifier specified");
@ -3733,15 +3733,17 @@ telegram_api::object_ptr<telegram_api::textWithEntities> get_input_text_with_ent
FormattedText get_formatted_text(const ContactsManager *contacts_manager,
telegram_api::object_ptr<telegram_api::textWithEntities> text_with_entities,
const char *source) {
bool allow_empty, bool skip_new_entities, bool skip_bot_commands,
bool skip_media_timestamps, bool for_draft, const char *source) {
CHECK(text_with_entities != nullptr);
auto entities = get_message_entities(contacts_manager, std::move(text_with_entities->entities_), source);
auto status = fix_formatted_text(text_with_entities->text_, entities, true, true, true, true, false);
auto status = fix_formatted_text(text_with_entities->text_, entities, allow_empty, skip_new_entities,
skip_bot_commands, skip_media_timestamps, for_draft);
if (status.is_error()) {
if (!clean_input_string(text_with_entities->text_)) {
text_with_entities->text_.clear();
}
entities = find_entities(text_with_entities->text_, true, true);
entities = find_entities(text_with_entities->text_, skip_bot_commands, skip_media_timestamps);
}
return {std::move(text_with_entities->text_), std::move(entities)};
}

View File

@ -15,6 +15,7 @@
#include "td/utils/common.h"
#include "td/utils/FlatHashSet.h"
#include "td/utils/HashTableUtils.h"
#include "td/utils/Slice.h"
#include "td/utils/Status.h"
#include "td/utils/StringBuilder.h"
@ -131,6 +132,18 @@ struct FormattedText {
void parse(ParserT &parser);
};
struct FormattedTextHash {
uint32 operator()(const FormattedText &formatted_text) const {
auto hash = Hash<string>()(formatted_text.text);
for (auto &entity : formatted_text.entities) {
hash = hash * 2023654985u + Hash<int32>()(static_cast<int32>(entity.type));
hash = hash * 2023654985u + Hash<int32>()(entity.length);
hash = hash * 2023654985u + Hash<int32>()(entity.offset);
}
return hash;
}
};
StringBuilder &operator<<(StringBuilder &string_builder, const FormattedText &text);
inline bool operator==(const FormattedText &lhs, const FormattedText &rhs) {
@ -206,7 +219,8 @@ telegram_api::object_ptr<telegram_api::textWithEntities> get_input_text_with_ent
FormattedText get_formatted_text(const ContactsManager *contacts_manager,
telegram_api::object_ptr<telegram_api::textWithEntities> text_with_entities,
const char *source);
bool allow_empty, bool skip_new_entities, bool skip_bot_commands,
bool skip_media_timestamps, bool for_draft, const char *source);
// like clean_input_string but also validates entities
Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool allow_empty, bool skip_new_entities,

View File

@ -64,6 +64,27 @@ void TranslationManager::translate_text(td_api::object_ptr<td_api::formattedText
if (text == nullptr) {
return promise.set_error(Status::Error(400, "Text must be non-empty"));
}
bool skip_bot_commands = true;
int32 max_media_timestamp = -1;
for (const auto &entity : text->entities_) {
if (entity == nullptr || entity->type_ == nullptr) {
continue;
}
switch (entity->type_->get_id()) {
case td_api::textEntityTypeBotCommand::ID:
skip_bot_commands = false;
break;
case td_api::textEntityTypeMediaTimestamp::ID:
max_media_timestamp =
td::max(max_media_timestamp,
static_cast<const td_api::textEntityTypeMediaTimestamp *>(entity->type_.get())->media_timestamp_);
break;
default:
// nothing to do
break;
}
}
TRY_RESULT_PROMISE(promise, entities, get_message_entities(td_->contacts_manager_.get(), std::move(text->entities_)));
TRY_STATUS_PROMISE(promise, fix_formatted_text(text->text_, entities, true, true, true, true, true));
@ -72,26 +93,29 @@ void TranslationManager::translate_text(td_api::object_ptr<td_api::formattedText
texts.push_back(FormattedText{std::move(text->text_), std::move(entities)});
auto query_promise = PromiseCreator::lambda(
[actor_id = actor_id(this), promise = std::move(promise)](
[actor_id = actor_id(this), skip_bot_commands, max_media_timestamp, promise = std::move(promise)](
Result<vector<telegram_api::object_ptr<telegram_api::textWithEntities>>> result) mutable {
if (result.is_error()) {
return promise.set_error(result.move_as_error());
}
send_closure(actor_id, &TranslationManager::on_get_translated_texts, result.move_as_ok(), std::move(promise));
send_closure(actor_id, &TranslationManager::on_get_translated_texts, result.move_as_ok(), skip_bot_commands,
max_media_timestamp, std::move(promise));
});
td_->create_handler<TranslateTextQuery>(std::move(query_promise))->send(std::move(texts), to_language_code);
}
void TranslationManager::on_get_translated_texts(vector<telegram_api::object_ptr<telegram_api::textWithEntities>> texts,
bool skip_bot_commands, int32 max_media_timestamp,
Promise<td_api::object_ptr<td_api::formattedText>> &&promise) {
TRY_STATUS_PROMISE(promise, G()->close_status());
if (texts.size() != 1u) {
return promise.set_error(Status::Error(500, "Receive invalid number of results"));
}
auto formatted_text =
get_formatted_text(td_->contacts_manager_.get(), std::move(texts[0]), "on_get_translated_texts");
promise.set_value(get_formatted_text_object(formatted_text, true, -1));
get_formatted_text(td_->contacts_manager_.get(), std::move(texts[0]), true, true, skip_bot_commands,
max_media_timestamp == -1, true, "on_get_translated_texts");
promise.set_value(get_formatted_text_object(formatted_text, skip_bot_commands, max_media_timestamp));
}
} // namespace td

View File

@ -29,6 +29,7 @@ class TranslationManager final : public Actor {
void tear_down() final;
void on_get_translated_texts(vector<telegram_api::object_ptr<telegram_api::textWithEntities>> texts,
bool skip_bot_commands, int32 max_media_timestamp,
Promise<td_api::object_ptr<td_api::formattedText>> &&promise);
Td *td_;