// // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/db/binlog/Binlog.h" #include "td/db/DbKey.h" #include "td/utils/common.h" #include "td/utils/format.h" #include "td/utils/logging.h" #include "td/utils/misc.h" #include "td/utils/port/Stat.h" #include "td/utils/Slice.h" #include "td/utils/SliceBuilder.h" #include "td/utils/StringBuilder.h" #include "td/utils/tl_parsers.h" #include struct Trie { Trie() { nodes_.resize(1); } void add(td::Slice value) { do_add(0, PSLICE() << value << '\0'); } void dump() { if (nodes_[0].sum == 0) { // division by zero return; } LOG(PLAIN) << "TOTAL: " << nodes_[0].sum; do_dump("", 0); } private: struct FullNode { int next[256] = {}; int sum = 0; }; td::vector nodes_; void do_add(int event_id, td::Slice value) { nodes_[event_id].sum++; if (value.empty()) { return; } auto c = static_cast(value[0]); auto next_event_id = nodes_[event_id].next[c]; if (next_event_id == 0) { next_event_id = static_cast(nodes_.size()); nodes_.emplace_back(); nodes_[event_id].next[c] = next_event_id; } do_add(next_event_id, value.substr(1)); } void do_dump(td::string path, int v) { bool is_word_end = !path.empty() && path.back() == '\0'; bool need_stop = false; int next_count = 0; for (int c = 0; c < 256; c++) { if (nodes_[v].next[c] != 0) { need_stop |= c >= 128 || !(td::is_alpha(static_cast(c)) || c == '.' || c == '_'); next_count++; } } need_stop |= next_count == 0 || (next_count >= 2 && nodes_[v].sum <= nodes_[0].sum / 100); if (is_word_end || need_stop) { if (is_word_end) { path.pop_back(); } else if (next_count != 1 || nodes_[v].next[0] == 0) { path.push_back('*'); } LOG(PLAIN) << nodes_[v].sum << " " << td::StringBuilder::FixedDouble(nodes_[v].sum * 100.0 / nodes_[0].sum, 2) << "% [" << td::format::escaped(path) << "]"; return; } for (int c = 0; c < 256; c++) { auto next_event_id = nodes_[v].next[c]; if (next_event_id == 0) { continue; } do_dump(path + static_cast(c), next_event_id); } } }; enum Magic { ConfigPmcMagic = 0x1f18, BinlogPmcMagic = 0x4327 }; int main(int argc, char *argv[]) { if (argc < 2) { LOG(PLAIN) << "Usage: binlog_dump "; return 1; } td::string binlog_file_name = argv[1]; auto r_stat = td::stat(binlog_file_name); if (r_stat.is_error() || r_stat.ok().size_ == 0 || !r_stat.ok().is_reg_) { LOG(PLAIN) << "Wrong binlog file name specified"; LOG(PLAIN) << "Usage: binlog_dump "; return 1; } struct Info { std::size_t full_size = 0; std::size_t compressed_size = 0; Trie trie; Trie compressed_trie; }; std::map info; SET_VERBOSITY_LEVEL(VERBOSITY_NAME(ERROR)); td::Binlog binlog; binlog .init( binlog_file_name, [&](auto &event) { info[0].compressed_size += event.raw_event_.size(); info[event.type_].compressed_size += event.raw_event_.size(); if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { auto key = td::TlParser(event.get_data()).fetch_string(); info[event.type_].compressed_trie.add(key); } }, td::DbKey::raw_key("cucumber"), td::DbKey::empty(), -1, [&](auto &event) mutable { info[0].full_size += event.raw_event_.size(); info[event.type_].full_size += event.raw_event_.size(); if (event.type_ == ConfigPmcMagic || event.type_ == BinlogPmcMagic) { auto key = td::TlParser(event.get_data()).fetch_string(); info[event.type_].trie.add(key); } LOG(PLAIN) << "LogEvent[" << td::tag("event_id", td::format::as_hex(event.id_)) << td::tag("type", event.type_) << td::tag("flags", event.flags_) << td::tag("size", event.get_data().size()) << td::tag("data", td::format::escaped(event.get_data())) << "]\n"; }) .ensure(); for (auto &it : info) { LOG(PLAIN) << td::tag("handler", td::format::as_hex(it.first)) << td::tag("full_size", td::format::as_size(it.second.full_size)) << td::tag("compressed_size", td::format::as_size(it.second.compressed_size)); it.second.trie.dump(); if (it.second.full_size != it.second.compressed_size) { it.second.compressed_trie.dump(); } } return 0; }