Add class td_api::SpeechRecognitionResult.

2022-07-25 13:47:06 +03:00 · 2022-07-25 13:47:06 +03:00 · 366fd53a92
commit 366fd53a92
parent d4b59bda87
4 changed files with 76 additions and 19 deletions
--- a/td/generate/scheme/td_api.tl
+++ b/td/generate/scheme/td_api.tl
@ -324,9 +324,8 @@ videoNote duration:int32 length:int32 minithumbnail:minithumbnail thumbnail:thum

 //@description Describes a voice note. The voice note must be encoded with the Opus codec, and stored inside an OGG container. Voice notes can have only a single audio channel
 //@duration Duration of the voice note, in seconds; as defined by the sender @waveform A waveform representation of the voice note in 5-bit format
-//@mime_type MIME type of the file; as defined by the sender @is_recognized True, if speech recognition is completed; Premium users only
-//@recognized_text Recognized text of the voice note; Premium users only. Call recognizeSpeech to get recognized text of the voice note @voice File containing the voice note
-voiceNote duration:int32 waveform:bytes mime_type:string is_recognized:Bool recognized_text:string voice:file = VoiceNote;
+//@mime_type MIME type of the file; as defined by the sender @speech_recognition_result Result of speech recognition in the voice note; may be null @voice File containing the voice note
+voiceNote duration:int32 waveform:bytes mime_type:string speech_recognition_result:SpeechRecognitionResult voice:file = VoiceNote;

 //@description Describes an animated representation of an emoji
 //@sticker Animated sticker for the emoji
@ -2597,6 +2596,18 @@ diceStickersSlotMachine background:sticker lever:sticker left_reel:sticker cente
 importedContacts user_ids:vector<int53> importer_count:vector<int32> = ImportedContacts;


+//@class SpeechRecognitionResult @description Describes result of speech recognition in a voice note
+
+//@description The speech recognition is ongoing @partial_text Partially recognized text
+speechRecognitionResultPending partial_text:string = SpeechRecognitionResult;
+
+//@description The speech recognition successfully finished @text Recognized text
+speechRecognitionResultText text:string = SpeechRecognitionResult;
+
+//@description The speech recognition failed @error Received error
+speechRecognitionResultError error:error = SpeechRecognitionResult;
+
+
 //@description Describes a color to highlight a bot added to attachment menu @light_color Color in the RGB24 format for light themes @dark_color Color in the RGB24 format for dark themes
 attachmentMenuBotColor light_color:int32 dark_color:int32 = AttachmentMenuBotColor;

--- a/td/telegram/InlineQueriesManager.cpp
+++ b/td/telegram/InlineQueriesManager.cpp
@ -1214,6 +1214,26 @@ static tl_object_ptr<td_api::closedVectorPath> copy_closed_vector_path(
  return copy(obj);
 }

+template <>
+tl_object_ptr<td_api::SpeechRecognitionResult> copy(const td_api::SpeechRecognitionResult &obj) {
+  switch (obj.get_id()) {
+    case td_api::speechRecognitionResultPending::ID:
+      return td_api::make_object<td_api::speechRecognitionResultPending>(
+          static_cast<const td_api::speechRecognitionResultPending &>(obj).partial_text_);
+    case td_api::speechRecognitionResultText::ID:
+      return td_api::make_object<td_api::speechRecognitionResultText>(
+          static_cast<const td_api::speechRecognitionResultText &>(obj).text_);
+    case td_api::speechRecognitionResultError::ID: {
+      auto *error = static_cast<const td_api::speechRecognitionResultError &>(obj).error_.get();
+      return td_api::make_object<td_api::speechRecognitionResultError>(
+          td_api::make_object<td_api::error>(error->code_, error->message_));
+    }
+    default:
+      UNREACHABLE();
+  }
+  return nullptr;
+}
+
 template <>
 tl_object_ptr<td_api::animation> copy(const td_api::animation &obj) {
  return td_api::make_object<td_api::animation>(obj.duration_, obj.width_, obj.height_, obj.file_name_, obj.mime_type_,
@ -1257,8 +1277,8 @@ tl_object_ptr<td_api::video> copy(const td_api::video &obj) {

 template <>
 tl_object_ptr<td_api::voiceNote> copy(const td_api::voiceNote &obj) {
-  return td_api::make_object<td_api::voiceNote>(obj.duration_, obj.waveform_, obj.mime_type_, obj.is_recognized_,
-                                                obj.recognized_text_, copy(obj.voice_));
+  return td_api::make_object<td_api::voiceNote>(obj.duration_, obj.waveform_, obj.mime_type_,
+                                                copy(obj.speech_recognition_result_), copy(obj.voice_));
 }

 template <>
--- a/td/telegram/VoiceNotesManager.cpp
+++ b/td/telegram/VoiceNotesManager.cpp
@ -138,8 +138,24 @@ tl_object_ptr<td_api::voiceNote> VoiceNotesManager::get_voice_note_object(FileId
  CHECK(it != voice_notes_.end());
  auto voice_note = it->second.get();
  CHECK(voice_note != nullptr);
+
+  auto speech_recognition_result = [this, voice_note]() -> td_api::object_ptr<td_api::SpeechRecognitionResult> {
+    if (voice_note->is_transcribed) {
+      return td_api::make_object<td_api::speechRecognitionResultText>(voice_note->text);
+    }
+    if (speech_recognition_queries_.count(voice_note->file_id) != 0) {
+      return td_api::make_object<td_api::speechRecognitionResultPending>(voice_note->text);
+    }
+    if (voice_note->last_transcription_error.is_error()) {
+      return td_api::make_object<td_api::speechRecognitionResultError>(
+          td_api::make_object<td_api::error>(voice_note->last_transcription_error.error().code(),
+                                             voice_note->last_transcription_error.error().message().str()));
+    }
+    return nullptr;
+  }();
+
  return make_tl_object<td_api::voiceNote>(voice_note->duration, voice_note->waveform, voice_note->mime_type,
-                                           voice_note->is_transcribed, voice_note->text,
+                                           std::move(speech_recognition_result),
                                           td_->file_manager_->get_file_object(file_id));
 }

@ -167,6 +183,7 @@ FileId VoiceNotesManager::on_get_voice_note(unique_ptr<VoiceNote> new_voice_note
      v->is_transcribed = true;
      v->transcription_id = new_voice_note->transcription_id;
      v->text = std::move(new_voice_note->text);
+      v->last_transcription_error = Status::OK();
      on_voice_note_transcription_updated(file_id);
    }
  }
@ -199,8 +216,15 @@ FileId VoiceNotesManager::dup_voice_note(FileId new_id, FileId old_id) {
  CHECK(old_voice_note != nullptr);
  auto &new_voice_note = voice_notes_[new_id];
  CHECK(new_voice_note == nullptr);
-  new_voice_note = make_unique<VoiceNote>(*old_voice_note);
+  new_voice_note = make_unique<VoiceNote>();
  new_voice_note->file_id = new_id;
+  new_voice_note->mime_type = old_voice_note->mime_type;
+  new_voice_note->duration = old_voice_note->duration;
+  new_voice_note->waveform = old_voice_note->waveform;
+  if (old_voice_note->is_transcribed) {
+    new_voice_note->is_transcribed = old_voice_note->is_transcribed;
+    new_voice_note->text = old_voice_note->text;
+  }
  return new_id;
 }

@ -293,6 +317,8 @@ void VoiceNotesManager::recognize_speech(FullMessageId full_message_id, Promise<
  queries.push_back(std::move(promise));
  if (queries.size() == 1) {
    td_->create_handler<TranscribeAudioQuery>()->send(file_id, full_message_id);
+    voice_note->last_transcription_error = Status::OK();
+    on_voice_note_transcription_updated(file_id);
  }
 }

@ -306,10 +332,7 @@ void VoiceNotesManager::on_voice_note_transcribed(FileId file_id, string &&text,
  voice_note->transcription_id = transcription_id;
  voice_note->is_transcribed = is_final;
  voice_note->text = std::move(text);
-
-  if (is_changed) {
-    on_voice_note_transcription_updated(file_id);
-  }
+  voice_note->last_transcription_error = Status::OK();

  if (is_final) {
    auto it = speech_recognition_queries_.find(file_id);
@ -318,8 +341,13 @@ void VoiceNotesManager::on_voice_note_transcribed(FileId file_id, string &&text,
    auto promises = std::move(it->second);
    speech_recognition_queries_.erase(it);

+    on_voice_note_transcription_updated(file_id);
    set_promises(promises);
  } else {
+    if (is_changed) {
+      on_voice_note_transcription_updated(file_id);
+    }
+
    if (pending_voice_note_transcription_queries_.count(transcription_id) != 0) {
      on_pending_voice_note_transcription_failed(transcription_id,
                                                 Status::Error(500, "Receive duplicate recognition identifier"));
@ -334,15 +362,11 @@ void VoiceNotesManager::on_voice_note_transcription_failed(FileId file_id, Statu
  auto voice_note = get_voice_note(file_id);
  CHECK(voice_note != nullptr);
  CHECK(!voice_note->is_transcribed);
+  CHECK(pending_voice_note_transcription_queries_.count(voice_note->transcription_id) == 0);

-  if (voice_note->transcription_id != 0) {
-    CHECK(pending_voice_note_transcription_queries_.count(voice_note->transcription_id) == 0);
-    voice_note->transcription_id = 0;
-    if (!voice_note->text.empty()) {
-      voice_note->text.clear();
-      on_voice_note_transcription_updated(file_id);
-    }
-  }
+  voice_note->transcription_id = 0;
+  voice_note->text.clear();
+  voice_note->last_transcription_error = error.clone();

  auto it = speech_recognition_queries_.find(file_id);
  CHECK(it != speech_recognition_queries_.end());
@ -350,6 +374,7 @@ void VoiceNotesManager::on_voice_note_transcription_failed(FileId file_id, Statu
  auto promises = std::move(it->second);
  speech_recognition_queries_.erase(it);

+  on_voice_note_transcription_updated(file_id);
  fail_promises(promises, std::move(error));
 }

--- a/td/telegram/VoiceNotesManager.h
+++ b/td/telegram/VoiceNotesManager.h
@ -82,6 +82,7 @@ class VoiceNotesManager final : public Actor {
    string waveform;
    int64 transcription_id = 0;
    string text;
+    Status last_transcription_error;

    FileId file_id;
  };