From 44832189e36f704cedc03ecbe0850d24145e45b4 Mon Sep 17 00:00:00 2001 From: levlam Date: Wed, 25 May 2022 19:24:40 +0300 Subject: [PATCH] Add voiceNote.recognized_text --- td/generate/scheme/td_api.tl | 8 ++-- td/telegram/InlineQueriesManager.cpp | 3 +- td/telegram/Version.h | 1 + td/telegram/VoiceNotesManager.cpp | 1 + td/telegram/VoiceNotesManager.h | 4 ++ td/telegram/VoiceNotesManager.hpp | 57 +++++++++++++++++++++++++--- 6 files changed, 64 insertions(+), 10 deletions(-) diff --git a/td/generate/scheme/td_api.tl b/td/generate/scheme/td_api.tl index 366b3339d..b5695a26a 100644 --- a/td/generate/scheme/td_api.tl +++ b/td/generate/scheme/td_api.tl @@ -311,9 +311,11 @@ video duration:int32 width:int32 height:int32 file_name:string mime_type:string //@thumbnail Video thumbnail in JPEG format; as defined by the sender; may be null @video File containing the video videoNote duration:int32 length:int32 minithumbnail:minithumbnail thumbnail:thumbnail video:file = VideoNote; -//@description Describes a voice note. The voice note must be encoded with the Opus codec, and stored inside an OGG container. Voice notes can have only a single audio channel @duration Duration of the voice note, in seconds; as defined by the sender -//@waveform A waveform representation of the voice note in 5-bit format @mime_type MIME type of the file; as defined by the sender @voice File containing the voice note -voiceNote duration:int32 waveform:bytes mime_type:string voice:file = VoiceNote; +//@description Describes a voice note. The voice note must be encoded with the Opus codec, and stored inside an OGG container. Voice notes can have only a single audio channel +//@duration Duration of the voice note, in seconds; as defined by the sender @waveform A waveform representation of the voice note in 5-bit format +//@mime_type MIME type of the file; as defined by the sender @is_recognized True, if speech recognition is completed; Premium users only +//@recognized_text Recognized text of the voice note; Premium users only. Call recognizeSpeech to get recognized text of the voice note @voice File containing the voice note +voiceNote duration:int32 waveform:bytes mime_type:string is_recognized:Bool recognized_text:string voice:file = VoiceNote; //@description Describes an animated representation of an emoji //@sticker Animated sticker for the emoji diff --git a/td/telegram/InlineQueriesManager.cpp b/td/telegram/InlineQueriesManager.cpp index 994c4cfc6..72b978846 100644 --- a/td/telegram/InlineQueriesManager.cpp +++ b/td/telegram/InlineQueriesManager.cpp @@ -1246,7 +1246,8 @@ tl_object_ptr copy(const td_api::video &obj) { template <> tl_object_ptr copy(const td_api::voiceNote &obj) { - return td_api::make_object(obj.duration_, obj.waveform_, obj.mime_type_, copy(obj.voice_)); + return td_api::make_object(obj.duration_, obj.waveform_, obj.mime_type_, obj.is_recognized_, + obj.recognized_text_, copy(obj.voice_)); } template <> diff --git a/td/telegram/Version.h b/td/telegram/Version.h index ade2cc932..9ba50da8f 100644 --- a/td/telegram/Version.h +++ b/td/telegram/Version.h @@ -52,6 +52,7 @@ enum class Version : int32 { AddAudioFlags, UseServerForwardAsCopy, AddMainDialogListPosition, + AddVoiceNoteFlags, Next }; diff --git a/td/telegram/VoiceNotesManager.cpp b/td/telegram/VoiceNotesManager.cpp index 793e9805b..4f069ca01 100644 --- a/td/telegram/VoiceNotesManager.cpp +++ b/td/telegram/VoiceNotesManager.cpp @@ -41,6 +41,7 @@ tl_object_ptr VoiceNotesManager::get_voice_note_object(FileId auto voice_note = it->second.get(); CHECK(voice_note != nullptr); return make_tl_object(voice_note->duration, voice_note->waveform, voice_note->mime_type, + voice_note->is_transcribed, voice_note->text, td_->file_manager_->get_file_object(file_id)); } diff --git a/td/telegram/VoiceNotesManager.h b/td/telegram/VoiceNotesManager.h index 27d11ae53..0a5fa9668 100644 --- a/td/telegram/VoiceNotesManager.h +++ b/td/telegram/VoiceNotesManager.h @@ -10,6 +10,7 @@ #include "td/telegram/SecretInputMedia.h" #include "td/telegram/td_api.h" #include "td/telegram/telegram_api.h" +#include "td/telegram/Version.h" #include "td/utils/common.h" #include "td/utils/FlatHashMap.h" @@ -50,7 +51,10 @@ class VoiceNotesManager { public: string mime_type; int32 duration = 0; + bool is_transcribed = false; string waveform; + int64 transcription_id = 0; + string text; FileId file_id; }; diff --git a/td/telegram/VoiceNotesManager.hpp b/td/telegram/VoiceNotesManager.hpp index 97ffa419e..786dbc9e7 100644 --- a/td/telegram/VoiceNotesManager.hpp +++ b/td/telegram/VoiceNotesManager.hpp @@ -20,18 +20,63 @@ void VoiceNotesManager::store_voice_note(FileId file_id, StorerT &storer) const auto it = voice_notes_.find(file_id); CHECK(it != voice_notes_.end()); const VoiceNote *voice_note = it->second.get(); - store(voice_note->mime_type, storer); - store(voice_note->duration, storer); - store(voice_note->waveform, storer); + bool has_mime_type = !voice_note->mime_type.empty(); + bool has_duration = voice_note->duration != 0; + bool has_waveform = !voice_note->waveform.empty(); + BEGIN_STORE_FLAGS(); + STORE_FLAG(has_mime_type); + STORE_FLAG(has_duration); + STORE_FLAG(has_waveform); + STORE_FLAG(voice_note->is_transcribed); + END_STORE_FLAGS(); + if (has_mime_type) { + store(voice_note->mime_type, storer); + } + if (has_duration) { + store(voice_note->duration, storer); + } + if (has_waveform) { + store(voice_note->waveform, storer); + } + if (voice_note->is_transcribed) { + store(voice_note->transcription_id, storer); + store(voice_note->text, storer); + } store(file_id, storer); } template FileId VoiceNotesManager::parse_voice_note(ParserT &parser) { auto voice_note = make_unique(); - parse(voice_note->mime_type, parser); - parse(voice_note->duration, parser); - parse(voice_note->waveform, parser); + bool has_mime_type; + bool has_duration; + bool has_waveform; + if (parser.version() >= static_cast(Version::AddVoiceNoteFlags)) { + BEGIN_PARSE_FLAGS(); + PARSE_FLAG(has_mime_type); + PARSE_FLAG(has_duration); + PARSE_FLAG(has_waveform); + PARSE_FLAG(voice_note->is_transcribed); + END_PARSE_FLAGS(); + } else { + has_mime_type = true; + has_duration = true; + has_waveform = true; + voice_note->is_transcribed = false; + } + if (has_mime_type) { + parse(voice_note->mime_type, parser); + } + if (has_duration) { + parse(voice_note->duration, parser); + } + if (has_waveform) { + parse(voice_note->waveform, parser); + } + if (voice_note->is_transcribed) { + parse(voice_note->transcription_id, parser); + parse(voice_note->text, parser); + } parse(voice_note->file_id, parser); if (parser.get_error() != nullptr || !voice_note->file_id.is_valid()) { return FileId();