tdlight/td/telegram/TranscriptionInfo.cpp

203 lines
7.2 KiB
C++
Raw Normal View History

2022-10-19 19:43:30 +02:00
//
2024-01-01 01:07:21 +01:00
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024
2022-10-19 19:43:30 +02:00
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include "td/telegram/TranscriptionInfo.h"
2022-10-20 12:28:07 +02:00
#include "td/telegram/AccessRights.h"
#include "td/telegram/DialogId.h"
#include "td/telegram/DialogManager.h"
2022-10-20 12:28:07 +02:00
#include "td/telegram/Global.h"
#include "td/telegram/Td.h"
2022-10-20 12:28:07 +02:00
#include "td/telegram/telegram_api.h"
#include "td/utils/buffer.h"
#include "td/utils/logging.h"
2022-10-19 19:43:30 +02:00
namespace td {
class TranscribeAudioQuery final : public Td::ResultHandler {
DialogId dialog_id_;
2023-11-23 12:12:27 +01:00
std::function<void(Result<telegram_api::object_ptr<telegram_api::messages_transcribedAudio>>)> handler_;
public:
2023-09-21 18:11:17 +02:00
void send(MessageFullId message_full_id,
2023-11-23 12:12:27 +01:00
std::function<void(Result<telegram_api::object_ptr<telegram_api::messages_transcribedAudio>>)> &&handler) {
2023-09-21 18:11:17 +02:00
dialog_id_ = message_full_id.get_dialog_id();
handler_ = std::move(handler);
auto input_peer = td_->dialog_manager_->get_input_peer(dialog_id_, AccessRights::Read);
if (input_peer == nullptr) {
return on_error(Status::Error(400, "Can't access the chat"));
}
auto query = G()->net_query_creator().create(telegram_api::messages_transcribeAudio(
std::move(input_peer), message_full_id.get_message_id().get_server_message_id().get()));
query->total_timeout_limit_ = 8;
send_query(std::move(query));
}
void on_result(BufferSlice packet) final {
auto result_ptr = fetch_result<telegram_api::messages_transcribeAudio>(packet);
if (result_ptr.is_error()) {
return on_error(result_ptr.move_as_error());
}
auto result = result_ptr.move_as_ok();
LOG(INFO) << "Receive result for TranscribeAudioQuery: " << to_string(result);
2023-11-23 12:12:27 +01:00
handler_(std::move(result));
}
void on_error(Status status) final {
td_->dialog_manager_->on_get_dialog_error(dialog_id_, status, "TranscribeAudioQuery");
handler_(std::move(status));
}
};
class RateTranscribedAudioQuery final : public Td::ResultHandler {
Promise<Unit> promise_;
DialogId dialog_id_;
public:
explicit RateTranscribedAudioQuery(Promise<Unit> &&promise) : promise_(std::move(promise)) {
}
2023-09-21 18:11:17 +02:00
void send(MessageFullId message_full_id, int64 transcription_id, bool is_good) {
dialog_id_ = message_full_id.get_dialog_id();
auto input_peer = td_->dialog_manager_->get_input_peer(dialog_id_, AccessRights::Read);
if (input_peer == nullptr) {
return on_error(Status::Error(400, "Can't access the chat"));
}
send_query(G()->net_query_creator().create(telegram_api::messages_rateTranscribedAudio(
2023-09-21 18:11:17 +02:00
std::move(input_peer), message_full_id.get_message_id().get_server_message_id().get(), transcription_id,
is_good)));
}
void on_result(BufferSlice packet) final {
auto result_ptr = fetch_result<telegram_api::messages_rateTranscribedAudio>(packet);
if (result_ptr.is_error()) {
return on_error(result_ptr.move_as_error());
}
bool result = result_ptr.ok();
LOG(INFO) << "Receive result for RateTranscribedAudioQuery: " << result;
promise_.set_value(Unit());
}
void on_error(Status status) final {
td_->dialog_manager_->on_get_dialog_error(dialog_id_, status, "RateTranscribedAudioQuery");
promise_.set_error(std::move(status));
}
};
bool TranscriptionInfo::recognize_speech(
2023-09-21 18:11:17 +02:00
Td *td, MessageFullId message_full_id, Promise<Unit> &&promise,
2023-11-23 12:12:27 +01:00
std::function<void(Result<telegram_api::object_ptr<telegram_api::messages_transcribedAudio>>)> &&handler) {
2022-10-19 19:43:30 +02:00
if (is_transcribed_) {
promise.set_value(Unit());
return false;
}
speech_recognition_queries_.push_back(std::move(promise));
if (speech_recognition_queries_.size() == 1) {
last_transcription_error_ = Status::OK();
2023-09-21 18:11:17 +02:00
td->create_handler<TranscribeAudioQuery>()->send(message_full_id, std::move(handler));
2022-10-19 19:43:30 +02:00
return true;
}
return false;
}
vector<Promise<Unit>> TranscriptionInfo::on_final_transcription(string &&text, int64 transcription_id) {
CHECK(!is_transcribed_);
CHECK(transcription_id_ == 0 || transcription_id_ == transcription_id);
CHECK(transcription_id != 0);
transcription_id_ = transcription_id;
is_transcribed_ = true;
text_ = std::move(text);
last_transcription_error_ = Status::OK();
CHECK(!speech_recognition_queries_.empty());
auto promises = std::move(speech_recognition_queries_);
speech_recognition_queries_.clear();
2022-11-07 11:54:27 +01:00
return promises;
2022-10-19 19:43:30 +02:00
}
bool TranscriptionInfo::on_partial_transcription(string &&text, int64 transcription_id) {
CHECK(!is_transcribed_);
CHECK(transcription_id_ == 0 || transcription_id_ == transcription_id);
CHECK(transcription_id != 0);
bool is_changed = text_ != text;
transcription_id_ = transcription_id;
text_ = std::move(text);
last_transcription_error_ = Status::OK();
return is_changed;
}
vector<Promise<Unit>> TranscriptionInfo::on_failed_transcription(Status &&error) {
CHECK(!is_transcribed_);
transcription_id_ = 0;
text_.clear();
last_transcription_error_ = std::move(error);
CHECK(!speech_recognition_queries_.empty());
auto promises = std::move(speech_recognition_queries_);
speech_recognition_queries_.clear();
return promises;
}
2023-09-21 18:11:17 +02:00
void TranscriptionInfo::rate_speech_recognition(Td *td, MessageFullId message_full_id, bool is_good,
Promise<Unit> &&promise) const {
if (!is_transcribed_) {
return promise.set_value(Unit());
}
CHECK(transcription_id_ != 0);
2023-09-21 18:11:17 +02:00
td->create_handler<RateTranscribedAudioQuery>(std::move(promise))->send(message_full_id, transcription_id_, is_good);
}
2022-10-19 19:43:30 +02:00
unique_ptr<TranscriptionInfo> TranscriptionInfo::copy_if_transcribed(const unique_ptr<TranscriptionInfo> &info) {
if (info == nullptr || !info->is_transcribed_) {
return nullptr;
}
auto result = make_unique<TranscriptionInfo>();
result->is_transcribed_ = true;
result->transcription_id_ = info->transcription_id_;
result->text_ = info->text_;
return result;
}
bool TranscriptionInfo::update_from(unique_ptr<TranscriptionInfo> &old_info, unique_ptr<TranscriptionInfo> &&new_info) {
if (new_info == nullptr || !new_info->is_transcribed_) {
return false;
}
CHECK(new_info->transcription_id_ != 0);
CHECK(new_info->last_transcription_error_.is_ok());
CHECK(new_info->speech_recognition_queries_.empty());
if (old_info == nullptr) {
old_info = std::move(new_info);
return true;
}
if (old_info->transcription_id_ != 0 || !old_info->speech_recognition_queries_.empty()) {
return false;
}
CHECK(!old_info->is_transcribed_);
old_info = std::move(new_info);
return true;
}
td_api::object_ptr<td_api::SpeechRecognitionResult> TranscriptionInfo::get_speech_recognition_result_object() const {
if (is_transcribed_) {
return td_api::make_object<td_api::speechRecognitionResultText>(text_);
}
if (!speech_recognition_queries_.empty()) {
return td_api::make_object<td_api::speechRecognitionResultPending>(text_);
}
if (last_transcription_error_.is_error()) {
return td_api::make_object<td_api::speechRecognitionResultError>(td_api::make_object<td_api::error>(
last_transcription_error_.code(), last_transcription_error_.message().str()));
}
return nullptr;
}
} // namespace td