From 337b657f9cee6046a2da41832a8931d717d15da7 Mon Sep 17 00:00:00 2001 From: levlam <levlam@telegram.org> Date: Wed, 5 Oct 2022 00:06:48 +0300 Subject: [PATCH] Add watchdog for ClientManager's thread. --- telegram-bot-api/ClientManager.cpp | 13 +++++++++++++ telegram-bot-api/ClientManager.h | 6 ++++++ telegram-bot-api/Watchdog.cpp | 2 +- telegram-bot-api/telegram-bot-api.cpp | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/telegram-bot-api/ClientManager.cpp b/telegram-bot-api/ClientManager.cpp index b7174e1..a7fcf9c 100644 --- a/telegram-bot-api/ClientManager.cpp +++ b/telegram-bot-api/ClientManager.cpp @@ -352,6 +352,11 @@ void ClientManager::start_up() { auto query = get_webhook_restore_query(key_value.first, key_value.second, parameters_->shared_data_); send_closure_later(actor_id(this), &ClientManager::send, std::move(query)); } + + // launch watchdog + watchdog_id_ = td::create_actor_on_scheduler<Watchdog>( + "ManagerWatchdog", td::Scheduler::instance()->sched_count() - 3, td::this_thread::get_id(), WATCHDOG_TIMEOUT); + set_timeout_in(600.0); } PromisedQueryPtr ClientManager::get_webhook_restore_query(td::Slice token, td::Slice webhook_info, @@ -429,6 +434,11 @@ void ClientManager::raw_event(const td::Event::Raw &event) { } } +void ClientManager::timeout_expired() { + send_closure(watchdog_id_, &Watchdog::kick); + set_timeout_in(WATCHDOG_TIMEOUT / 2); +} + void ClientManager::hangup_shared() { auto id = get_link_token(); auto *info = clients_.get(id); @@ -458,6 +468,7 @@ void ClientManager::close_db() { void ClientManager::finish_close() { LOG(WARNING) << "Stop ClientManager"; + watchdog_id_.reset(); auto promises = std::move(close_promises_); for (auto &promise : promises) { promise.set_value(td::Unit()); @@ -465,4 +476,6 @@ void ClientManager::finish_close() { stop(); } +constexpr double ClientManager::WATCHDOG_TIMEOUT; + } // namespace telegram_bot_api diff --git a/telegram-bot-api/ClientManager.h b/telegram-bot-api/ClientManager.h index 921aaf0..21ae1cd 100644 --- a/telegram-bot-api/ClientManager.h +++ b/telegram-bot-api/ClientManager.h @@ -9,6 +9,7 @@ #include "telegram-bot-api/Client.h" #include "telegram-bot-api/Query.h" #include "telegram-bot-api/Stats.h" +#include "telegram-bot-api/Watchdog.h" #include "td/actor/actor.h" @@ -68,6 +69,10 @@ class ClientManager final : public td::Actor { bool close_flag_ = false; td::vector<td::Promise<td::Unit>> close_promises_; + td::ActorOwn<Watchdog> watchdog_id_; + + static constexpr double WATCHDOG_TIMEOUT = 0.5; + static td::int64 get_tqueue_id(td::int64 user_id, bool is_test_dc); static PromisedQueryPtr get_webhook_restore_query(td::Slice token, td::Slice webhook_info, @@ -75,6 +80,7 @@ class ClientManager final : public td::Actor { void start_up() final; void raw_event(const td::Event::Raw &event) final; + void timeout_expired() final; void hangup_shared() final; void close_db(); void finish_close(); diff --git a/telegram-bot-api/Watchdog.cpp b/telegram-bot-api/Watchdog.cpp index d715574..362b5f9 100644 --- a/telegram-bot-api/Watchdog.cpp +++ b/telegram-bot-api/Watchdog.cpp @@ -13,7 +13,7 @@ namespace telegram_bot_api { void Watchdog::kick() { auto now = td::Time::now(); if (now >= last_kick_time_ + timeout_ && last_kick_time_ > 0) { - LOG(ERROR) << "Watchdog timeout expired after " << now - last_kick_time_ << " seconds"; + LOG(ERROR) << get_name() << " timeout expired after " << now - last_kick_time_ << " seconds"; td::thread::send_real_time_signal(main_thread_id_, 2); } last_kick_time_ = now; diff --git a/telegram-bot-api/telegram-bot-api.cpp b/telegram-bot-api/telegram-bot-api.cpp index 0737655..76a0257 100644 --- a/telegram-bot-api/telegram-bot-api.cpp +++ b/telegram-bot-api/telegram-bot-api.cpp @@ -506,7 +506,7 @@ int main(int argc, char *argv[]) { // +3 threads for Td // one thread for ClientManager and all Clients - // one thread for watchdog + // one thread for watchdogs // one thread for slow HTTP connections // one thread for DNS resolving const int thread_count = 7;