// // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include "td/utils/algorithm.h" #include "td/utils/common.h" #include "td/utils/FlatHashMap.h" #include "td/utils/FlatHashMapChunks.h" #include "td/utils/format.h" #include "td/utils/Hash.h" #include "td/utils/logging.h" #include "td/utils/Random.h" #include "td/utils/Slice.h" #include "td/utils/Span.h" #include "td/utils/Time.h" #include "td/utils/VectorQueue.h" #ifdef SCOPE_EXIT #undef SCOPE_EXIT #endif #include #include #include #include #include #include #include #include #include template static void reserve(TableT &table, std::size_t size) { table.reserve(size); } template static void reserve(std::map &table, std::size_t size) { } template class NoOpTable { public: using key_type = KeyT; using value_type = std::pair; template NoOpTable(It begin, It end) { } ValueT &operator[](const KeyT &) const { static ValueT dummy; return dummy; } KeyT find(const KeyT &key) const { return key; } }; template class VectorTable { public: using key_type = KeyT; using value_type = std::pair; template VectorTable(It begin, It end) : table_(begin, end) { } ValueT &operator[](const KeyT &needle) { auto it = find(needle); if (it == table_.end()) { table_.emplace_back(needle, ValueT{}); return table_.back().second; } return it->second; } auto find(const KeyT &needle) { return std::find_if(table_.begin(), table_.end(), [&](auto &key) { return key.first == needle; }); } private: using KeyValue = value_type; td::vector table_; }; template class SortedVectorTable { public: using key_type = KeyT; using value_type = std::pair; template SortedVectorTable(It begin, It end) : table_(begin, end) { std::sort(table_.begin(), table_.end()); } ValueT &operator[](const KeyT &needle) { auto it = std::lower_bound(table_.begin(), table_.end(), needle, [](const auto &l, const auto &r) { return l.first < r; }); if (it == table_.end() || it->first != needle) { it = table_.insert(it, {needle, ValueT{}}); } return it->second; } auto find(const KeyT &needle) { auto it = std::lower_bound(table_.begin(), table_.end(), needle, [](const auto &l, const auto &r) { return l.first < r; }); if (it != table_.end() && it->first == needle) { return it; } return table_.end(); } private: using KeyValue = value_type; td::vector table_; }; template > class SimpleHashTable { public: using key_type = KeyT; using value_type = std::pair; template SimpleHashTable(It begin, It end) { nodes_.resize((end - begin) * 2); for (; begin != end; ++begin) { insert(begin->first, begin->second); } } ValueT &operator[](const KeyT &needle) { UNREACHABLE(); } ValueT *find(const KeyT &needle) { auto hash = HashT()(needle); std::size_t i = hash % nodes_.size(); while (true) { if (nodes_[i].key == needle) { return &nodes_[i].value; } if (nodes_[i].hash == 0) { return nullptr; } i++; if (i == nodes_.size()) { i = 0; } } } private: using KeyValue = value_type; struct Node { std::size_t hash{0}; KeyT key; ValueT value; }; td::vector nodes_; void insert(KeyT key, ValueT value) { auto hash = HashT()(key); std::size_t i = hash % nodes_.size(); while (true) { if (nodes_[i].hash == 0 || (nodes_[i].hash == hash && nodes_[i].key == key)) { nodes_[i].value = value; nodes_[i].key = key; nodes_[i].hash = hash; return; } i++; if (i == nodes_.size()) { i = 0; } } } }; template static void BM_Get(benchmark::State &state) { std::size_t n = state.range(0); constexpr std::size_t BATCH_SIZE = 1024; td::Random::Xorshift128plus rnd(123); using Key = typename TableT::key_type; using Value = typename TableT::value_type::second_type; using KeyValue = std::pair; td::vector data; td::vector keys; for (std::size_t i = 0; i < n; i++) { auto key = rnd(); auto value = rnd(); data.emplace_back(key, value); keys.push_back(key); } TableT table(data.begin(), data.end()); std::size_t key_i = 0; td::random_shuffle(td::as_mutable_span(keys), rnd); auto next_key = [&] { key_i++; if (key_i == data.size()) { key_i = 0; } return keys[key_i]; }; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.find(next_key())); } } } template static void BM_find_same(benchmark::State &state) { td::Random::Xorshift128plus rnd(123); TableT table; constexpr std::size_t N = 100000; constexpr std::size_t BATCH_SIZE = 1024; reserve(table, N); for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } auto key = td::Random::secure_uint64(); table[key] = 123; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.find(key)); } } } template static void BM_emplace_same(benchmark::State &state) { td::Random::Xorshift128plus rnd(123); TableT table; constexpr std::size_t N = 100000; constexpr std::size_t BATCH_SIZE = 1024; reserve(table, N); for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } auto key = 123743; table[key] = 123; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932)); } } } namespace td { template static void table_remove_if(absl::flat_hash_map &table, FunctT &&func) { for (auto it = table.begin(); it != table.end();) { if (func(*it)) { auto copy = it; ++it; table.erase(copy); } else { ++it; } } } } // namespace td template static void BM_remove_if(benchmark::State &state) { constexpr std::size_t N = 100000; constexpr std::size_t BATCH_SIZE = N; TableT table; reserve(table, N); while (state.KeepRunningBatch(BATCH_SIZE)) { state.PauseTiming(); td::Random::Xorshift128plus rnd(123); for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } state.ResumeTiming(); td::table_remove_if(table, [](auto &it) { return it.second % 2 == 0; }); } } template static void BM_erase_all_with_begin(benchmark::State &state) { constexpr std::size_t N = 100000; constexpr std::size_t BATCH_SIZE = N; TableT table; td::Random::Xorshift128plus rnd(123); while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(rnd() + 1, i); } while (!table.empty()) { table.erase(table.begin()); } } } template static void BM_cache(benchmark::State &state) { constexpr std::size_t N = 1000; constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); if (table.size() > N) { table.erase(keys.pop()); } } } } template static void BM_cache2(benchmark::State &state) { constexpr std::size_t N = 1000; constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); if (table.size() > N) { table.erase(keys.pop_rand(rnd)); } } } } template static void BM_cache3(benchmark::State &state) { std::size_t N = state.range(0); constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; std::size_t step = 20; while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i += step) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); for (std::size_t j = 1; j < step; j++) { auto key_to_find = keys.data()[rnd() % keys.size()]; benchmark::DoNotOptimize(table.find(key_to_find)); } if (table.size() > N) { table.erase(keys.pop_rand(rnd)); } } } } template static void BM_remove_if_slow(benchmark::State &state) { constexpr std::size_t N = 5000; constexpr std::size_t BATCH_SIZE = 500000; TableT table; td::Random::Xorshift128plus rnd(123); for (std::size_t i = 0; i < N; i++) { table.emplace(rnd() + 1, i); } auto first_key = table.begin()->first; { std::size_t cnt = 0; td::table_remove_if(table, [&cnt, n = N](auto &) { cnt += 2; return cnt <= n; }); } while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(first_key, i); table.erase(first_key); } } } template static void BM_remove_if_slow_old(benchmark::State &state) { constexpr std::size_t N = 100000; constexpr std::size_t BATCH_SIZE = 5000000; TableT table; while (state.KeepRunningBatch(BATCH_SIZE)) { td::Random::Xorshift128plus rnd(123); for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(rnd() + 1, i); if (table.size() > N) { std::size_t cnt = 0; td::table_remove_if(table, [&cnt, n = N](auto &) { cnt += 2; return cnt <= n; }); } } } } template static void benchmark_create(td::Slice name) { td::Random::Xorshift128plus rnd(123); { constexpr std::size_t N = 10000000; TableT table; reserve(table, N); auto start = td::Timestamp::now(); for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } auto end = td::Timestamp::now(); LOG(INFO) << name << ":" << "create " << N << " elements: " << td::format::as_time(end.at() - start.at()); double res = 0; td::vector> pauses; for (std::size_t i = 0; i < N; i++) { auto emplace_start = td::Timestamp::now(); table.emplace(rnd(), i); auto emplace_end = td::Timestamp::now(); auto pause = emplace_end.at() - emplace_start.at(); res = td::max(pause, res); if (pause > 0.001) { pauses.emplace_back(i, td::format::as_time(pause)); } } LOG(INFO) << name << ":" << "create another " << N << " elements, max pause = " << td::format::as_time(res) << " " << pauses; } } struct CacheMissNode { td::uint32 data{}; char padding[64 - sizeof(data)]; }; class IterateFast { public: static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) { td::uint32 res = 1; for (std::size_t i = 0; i < max_shift; i++) { if (ptr[i].data % max_shift != 0) { res *= ptr[i].data; } else { res /= ptr[i].data; } } return res; } }; class IterateSlow { public: static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) { td::uint32 res = 1; for (std::size_t i = 0;; i++) { if (ptr[i].data % max_shift != 0) { res *= ptr[i].data; } else { break; } } return res; } }; template static void BM_cache_miss(benchmark::State &state) { td::uint32 max_shift = state.range(0); bool flag = state.range(1); std::random_device rd; std::mt19937 rnd(rd()); int N = 50000000; td::vector nodes(N); td::uint32 i = 0; for (auto &node : nodes) { if (flag) { node.data = i++ % max_shift; } else { node.data = rnd(); } } td::vector positions(N); std::uniform_int_distribution rnd_pos(0, N - 1000); for (auto &pos : positions) { pos = rnd_pos(rnd); if (flag) { pos = pos / max_shift * max_shift + 1; } } while (state.KeepRunningBatch(positions.size())) { for (const auto pos : positions) { auto *ptr = &nodes[pos]; auto res = F::iterate(ptr, max_shift); benchmark::DoNotOptimize(res); } } } static td::uint64 equal_mask_slow(td::uint8 *bytes, td::uint8 needle) { td::uint64 mask = 0; for (int i = 0; i < 16; i++) { mask |= (bytes[i] == needle) << i; } return mask; } template static void BM_mask(benchmark::State &state) { std::size_t BATCH_SIZE = 1024; td::vector bytes(BATCH_SIZE + 16); for (auto &b : bytes) { b = static_cast(td::Random::fast(0, 17)); } while (state.KeepRunningBatch(BATCH_SIZE)) { for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17)); } } } BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable); #ifdef __aarch64__ BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly); BENCHMARK_TEMPLATE(BM_mask, td::MaskNeon); #endif #if TD_SSE2 BENCHMARK_TEMPLATE(BM_mask, td::MaskSse2); #endif #define FOR_EACH_TABLE(F) \ F(td::FlatHashMapChunks) \ F(td::FlatHashMapImpl) \ F(folly::F14FastMap) \ F(absl::flat_hash_map) \ F(std::unordered_map) \ F(std::map) //BENCHMARK(BM_cache_miss)->Ranges({{1, 16}, {0, 1}}); //BENCHMARK(BM_cache_miss)->Ranges({{1, 16}, {0, 1}}); //BENCHMARK_TEMPLATE(BM_Get, VectorTable)->Range(1, 1 << 26); //BENCHMARK_TEMPLATE(BM_Get, SortedVectorTable)->Range(1, 1 << 26); //BENCHMARK_TEMPLATE(BM_Get, NoOpTable)->Range(1, 1 << 26); #define REGISTER_GET_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_Get, HT)->Range(1, 1 << 23); #define REGISTER_FIND_BENCHMARK(HT) \ BENCHMARK_TEMPLATE(BM_find_same, HT) \ ->ComputeStatistics("max", [](const td::vector &v) { return *std::max_element(v.begin(), v.end()); }) \ ->ComputeStatistics("min", [](const td::vector &v) { return *std::min_element(v.begin(), v.end()); }) \ ->Repetitions(20) \ ->DisplayAggregatesOnly(true); #define REGISTER_REMOVE_IF_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if, HT); #define REGISTER_EMPLACE_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_emplace_same, HT); #define REGISTER_CACHE_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache, HT); #define REGISTER_CACHE2_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache2, HT); #define REGISTER_CACHE3_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache3, HT)->Range(1, 1 << 23); #define REGISTER_ERASE_ALL_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_erase_all_with_begin, HT); #define REGISTER_REMOVE_IF_SLOW_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow, HT); #define REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow_old, HT); FOR_EACH_TABLE(REGISTER_GET_BENCHMARK) FOR_EACH_TABLE(REGISTER_CACHE3_BENCHMARK) FOR_EACH_TABLE(REGISTER_CACHE2_BENCHMARK) FOR_EACH_TABLE(REGISTER_CACHE_BENCHMARK) FOR_EACH_TABLE(REGISTER_REMOVE_IF_BENCHMARK) FOR_EACH_TABLE(REGISTER_EMPLACE_BENCHMARK) FOR_EACH_TABLE(REGISTER_ERASE_ALL_BENCHMARK) FOR_EACH_TABLE(REGISTER_FIND_BENCHMARK) FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK) FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK) #define RUN_CREATE_BENCHMARK(HT) benchmark_create>(#HT); int main(int argc, char **argv) { // FOR_EACH_TABLE(RUN_CREATE_BENCHMARK); benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); }