From 44832189e36f704cedc03ecbe0850d24145e45b4 Mon Sep 17 00:00:00 2001
From: levlam <levlam@telegram.org>
Date: Wed, 25 May 2022 19:24:40 +0300
Subject: [PATCH] Add voiceNote.recognized_text

---
 td/generate/scheme/td_api.tl         |  8 ++--
 td/telegram/InlineQueriesManager.cpp |  3 +-
 td/telegram/Version.h                |  1 +
 td/telegram/VoiceNotesManager.cpp    |  1 +
 td/telegram/VoiceNotesManager.h      |  4 ++
 td/telegram/VoiceNotesManager.hpp    | 57 +++++++++++++++++++++++++---
 6 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/td/generate/scheme/td_api.tl b/td/generate/scheme/td_api.tl
index 366b3339d..b5695a26a 100644
--- a/td/generate/scheme/td_api.tl
+++ b/td/generate/scheme/td_api.tl
@@ -311,9 +311,11 @@ video duration:int32 width:int32 height:int32 file_name:string mime_type:string
 //@thumbnail Video thumbnail in JPEG format; as defined by the sender; may be null @video File containing the video
 videoNote duration:int32 length:int32 minithumbnail:minithumbnail thumbnail:thumbnail video:file = VideoNote;
 
-//@description Describes a voice note. The voice note must be encoded with the Opus codec, and stored inside an OGG container. Voice notes can have only a single audio channel @duration Duration of the voice note, in seconds; as defined by the sender
-//@waveform A waveform representation of the voice note in 5-bit format @mime_type MIME type of the file; as defined by the sender @voice File containing the voice note
-voiceNote duration:int32 waveform:bytes mime_type:string voice:file = VoiceNote;
+//@description Describes a voice note. The voice note must be encoded with the Opus codec, and stored inside an OGG container. Voice notes can have only a single audio channel
+//@duration Duration of the voice note, in seconds; as defined by the sender @waveform A waveform representation of the voice note in 5-bit format
+//@mime_type MIME type of the file; as defined by the sender @is_recognized True, if speech recognition is completed; Premium users only
+//@recognized_text Recognized text of the voice note; Premium users only. Call recognizeSpeech to get recognized text of the voice note @voice File containing the voice note
+voiceNote duration:int32 waveform:bytes mime_type:string is_recognized:Bool recognized_text:string voice:file = VoiceNote;
 
 //@description Describes an animated representation of an emoji
 //@sticker Animated sticker for the emoji
diff --git a/td/telegram/InlineQueriesManager.cpp b/td/telegram/InlineQueriesManager.cpp
index 994c4cfc6..72b978846 100644
--- a/td/telegram/InlineQueriesManager.cpp
+++ b/td/telegram/InlineQueriesManager.cpp
@@ -1246,7 +1246,8 @@ tl_object_ptr<td_api::video> copy(const td_api::video &obj) {
 
 template <>
 tl_object_ptr<td_api::voiceNote> copy(const td_api::voiceNote &obj) {
-  return td_api::make_object<td_api::voiceNote>(obj.duration_, obj.waveform_, obj.mime_type_, copy(obj.voice_));
+  return td_api::make_object<td_api::voiceNote>(obj.duration_, obj.waveform_, obj.mime_type_, obj.is_recognized_,
+                                                obj.recognized_text_, copy(obj.voice_));
 }
 
 template <>
diff --git a/td/telegram/Version.h b/td/telegram/Version.h
index ade2cc932..9ba50da8f 100644
--- a/td/telegram/Version.h
+++ b/td/telegram/Version.h
@@ -52,6 +52,7 @@ enum class Version : int32 {
   AddAudioFlags,
   UseServerForwardAsCopy,
   AddMainDialogListPosition,
+  AddVoiceNoteFlags,
   Next
 };
 
diff --git a/td/telegram/VoiceNotesManager.cpp b/td/telegram/VoiceNotesManager.cpp
index 793e9805b..4f069ca01 100644
--- a/td/telegram/VoiceNotesManager.cpp
+++ b/td/telegram/VoiceNotesManager.cpp
@@ -41,6 +41,7 @@ tl_object_ptr<td_api::voiceNote> VoiceNotesManager::get_voice_note_object(FileId
   auto voice_note = it->second.get();
   CHECK(voice_note != nullptr);
   return make_tl_object<td_api::voiceNote>(voice_note->duration, voice_note->waveform, voice_note->mime_type,
+                                           voice_note->is_transcribed, voice_note->text,
                                            td_->file_manager_->get_file_object(file_id));
 }
 
diff --git a/td/telegram/VoiceNotesManager.h b/td/telegram/VoiceNotesManager.h
index 27d11ae53..0a5fa9668 100644
--- a/td/telegram/VoiceNotesManager.h
+++ b/td/telegram/VoiceNotesManager.h
@@ -10,6 +10,7 @@
 #include "td/telegram/SecretInputMedia.h"
 #include "td/telegram/td_api.h"
 #include "td/telegram/telegram_api.h"
+#include "td/telegram/Version.h"
 
 #include "td/utils/common.h"
 #include "td/utils/FlatHashMap.h"
@@ -50,7 +51,10 @@ class VoiceNotesManager {
    public:
     string mime_type;
     int32 duration = 0;
+    bool is_transcribed = false;
     string waveform;
+    int64 transcription_id = 0;
+    string text;
 
     FileId file_id;
   };
diff --git a/td/telegram/VoiceNotesManager.hpp b/td/telegram/VoiceNotesManager.hpp
index 97ffa419e..786dbc9e7 100644
--- a/td/telegram/VoiceNotesManager.hpp
+++ b/td/telegram/VoiceNotesManager.hpp
@@ -20,18 +20,63 @@ void VoiceNotesManager::store_voice_note(FileId file_id, StorerT &storer) const
   auto it = voice_notes_.find(file_id);
   CHECK(it != voice_notes_.end());
   const VoiceNote *voice_note = it->second.get();
-  store(voice_note->mime_type, storer);
-  store(voice_note->duration, storer);
-  store(voice_note->waveform, storer);
+  bool has_mime_type = !voice_note->mime_type.empty();
+  bool has_duration = voice_note->duration != 0;
+  bool has_waveform = !voice_note->waveform.empty();
+  BEGIN_STORE_FLAGS();
+  STORE_FLAG(has_mime_type);
+  STORE_FLAG(has_duration);
+  STORE_FLAG(has_waveform);
+  STORE_FLAG(voice_note->is_transcribed);
+  END_STORE_FLAGS();
+  if (has_mime_type) {
+    store(voice_note->mime_type, storer);
+  }
+  if (has_duration) {
+    store(voice_note->duration, storer);
+  }
+  if (has_waveform) {
+    store(voice_note->waveform, storer);
+  }
+  if (voice_note->is_transcribed) {
+    store(voice_note->transcription_id, storer);
+    store(voice_note->text, storer);
+  }
   store(file_id, storer);
 }
 
 template <class ParserT>
 FileId VoiceNotesManager::parse_voice_note(ParserT &parser) {
   auto voice_note = make_unique<VoiceNote>();
-  parse(voice_note->mime_type, parser);
-  parse(voice_note->duration, parser);
-  parse(voice_note->waveform, parser);
+  bool has_mime_type;
+  bool has_duration;
+  bool has_waveform;
+  if (parser.version() >= static_cast<int32>(Version::AddVoiceNoteFlags)) {
+    BEGIN_PARSE_FLAGS();
+    PARSE_FLAG(has_mime_type);
+    PARSE_FLAG(has_duration);
+    PARSE_FLAG(has_waveform);
+    PARSE_FLAG(voice_note->is_transcribed);
+    END_PARSE_FLAGS();
+  } else {
+    has_mime_type = true;
+    has_duration = true;
+    has_waveform = true;
+    voice_note->is_transcribed = false;
+  }
+  if (has_mime_type) {
+    parse(voice_note->mime_type, parser);
+  }
+  if (has_duration) {
+    parse(voice_note->duration, parser);
+  }
+  if (has_waveform) {
+    parse(voice_note->waveform, parser);
+  }
+  if (voice_note->is_transcribed) {
+    parse(voice_note->transcription_id, parser);
+    parse(voice_note->text, parser);
+  }
   parse(voice_note->file_id, parser);
   if (parser.get_error() != nullptr || !voice_note->file_id.is_valid()) {
     return FileId();