From 337b657f9cee6046a2da41832a8931d717d15da7 Mon Sep 17 00:00:00 2001
From: levlam <levlam@telegram.org>
Date: Wed, 5 Oct 2022 00:06:48 +0300
Subject: [PATCH] Add watchdog for ClientManager's thread.

---
 telegram-bot-api/ClientManager.cpp    | 13 +++++++++++++
 telegram-bot-api/ClientManager.h      |  6 ++++++
 telegram-bot-api/Watchdog.cpp         |  2 +-
 telegram-bot-api/telegram-bot-api.cpp |  2 +-
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/telegram-bot-api/ClientManager.cpp b/telegram-bot-api/ClientManager.cpp
index b7174e1..a7fcf9c 100644
--- a/telegram-bot-api/ClientManager.cpp
+++ b/telegram-bot-api/ClientManager.cpp
@@ -352,6 +352,11 @@ void ClientManager::start_up() {
     auto query = get_webhook_restore_query(key_value.first, key_value.second, parameters_->shared_data_);
     send_closure_later(actor_id(this), &ClientManager::send, std::move(query));
   }
+
+  // launch watchdog
+  watchdog_id_ = td::create_actor_on_scheduler<Watchdog>(
+      "ManagerWatchdog", td::Scheduler::instance()->sched_count() - 3, td::this_thread::get_id(), WATCHDOG_TIMEOUT);
+  set_timeout_in(600.0);
 }
 
 PromisedQueryPtr ClientManager::get_webhook_restore_query(td::Slice token, td::Slice webhook_info,
@@ -429,6 +434,11 @@ void ClientManager::raw_event(const td::Event::Raw &event) {
   }
 }
 
+void ClientManager::timeout_expired() {
+  send_closure(watchdog_id_, &Watchdog::kick);
+  set_timeout_in(WATCHDOG_TIMEOUT / 2);
+}
+
 void ClientManager::hangup_shared() {
   auto id = get_link_token();
   auto *info = clients_.get(id);
@@ -458,6 +468,7 @@ void ClientManager::close_db() {
 
 void ClientManager::finish_close() {
   LOG(WARNING) << "Stop ClientManager";
+  watchdog_id_.reset();
   auto promises = std::move(close_promises_);
   for (auto &promise : promises) {
     promise.set_value(td::Unit());
@@ -465,4 +476,6 @@ void ClientManager::finish_close() {
   stop();
 }
 
+constexpr double ClientManager::WATCHDOG_TIMEOUT;
+
 }  // namespace telegram_bot_api
diff --git a/telegram-bot-api/ClientManager.h b/telegram-bot-api/ClientManager.h
index 921aaf0..21ae1cd 100644
--- a/telegram-bot-api/ClientManager.h
+++ b/telegram-bot-api/ClientManager.h
@@ -9,6 +9,7 @@
 #include "telegram-bot-api/Client.h"
 #include "telegram-bot-api/Query.h"
 #include "telegram-bot-api/Stats.h"
+#include "telegram-bot-api/Watchdog.h"
 
 #include "td/actor/actor.h"
 
@@ -68,6 +69,10 @@ class ClientManager final : public td::Actor {
   bool close_flag_ = false;
   td::vector<td::Promise<td::Unit>> close_promises_;
 
+  td::ActorOwn<Watchdog> watchdog_id_;
+
+  static constexpr double WATCHDOG_TIMEOUT = 0.5;
+
   static td::int64 get_tqueue_id(td::int64 user_id, bool is_test_dc);
 
   static PromisedQueryPtr get_webhook_restore_query(td::Slice token, td::Slice webhook_info,
@@ -75,6 +80,7 @@ class ClientManager final : public td::Actor {
 
   void start_up() final;
   void raw_event(const td::Event::Raw &event) final;
+  void timeout_expired() final;
   void hangup_shared() final;
   void close_db();
   void finish_close();
diff --git a/telegram-bot-api/Watchdog.cpp b/telegram-bot-api/Watchdog.cpp
index d715574..362b5f9 100644
--- a/telegram-bot-api/Watchdog.cpp
+++ b/telegram-bot-api/Watchdog.cpp
@@ -13,7 +13,7 @@ namespace telegram_bot_api {
 void Watchdog::kick() {
   auto now = td::Time::now();
   if (now >= last_kick_time_ + timeout_ && last_kick_time_ > 0) {
-    LOG(ERROR) << "Watchdog timeout expired after " << now - last_kick_time_ << " seconds";
+    LOG(ERROR) << get_name() << " timeout expired after " << now - last_kick_time_ << " seconds";
     td::thread::send_real_time_signal(main_thread_id_, 2);
   }
   last_kick_time_ = now;
diff --git a/telegram-bot-api/telegram-bot-api.cpp b/telegram-bot-api/telegram-bot-api.cpp
index 0737655..76a0257 100644
--- a/telegram-bot-api/telegram-bot-api.cpp
+++ b/telegram-bot-api/telegram-bot-api.cpp
@@ -506,7 +506,7 @@ int main(int argc, char *argv[]) {
 
   // +3 threads for Td
   // one thread for ClientManager and all Clients
-  // one thread for watchdog
+  // one thread for watchdogs
   // one thread for slow HTTP connections
   // one thread for DNS resolving
   const int thread_count = 7;