2022-02-09 01:08:18 +01:00
|
|
|
//
|
2024-01-01 01:07:21 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024
|
2022-02-09 01:08:18 +01:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
2022-02-09 20:59:08 +01:00
|
|
|
#include "td/utils/algorithm.h"
|
2022-02-09 01:08:18 +01:00
|
|
|
#include "td/utils/common.h"
|
|
|
|
#include "td/utils/FlatHashMap.h"
|
2022-02-17 19:31:58 +01:00
|
|
|
#include "td/utils/FlatHashMapChunks.h"
|
2022-03-09 16:03:56 +01:00
|
|
|
#include "td/utils/FlatHashTable.h"
|
2022-02-09 01:08:18 +01:00
|
|
|
#include "td/utils/format.h"
|
2022-11-23 17:37:32 +01:00
|
|
|
#include "td/utils/HashTableUtils.h"
|
2022-02-09 01:08:18 +01:00
|
|
|
#include "td/utils/logging.h"
|
2022-03-09 16:03:56 +01:00
|
|
|
#include "td/utils/MapNode.h"
|
2022-02-09 01:08:18 +01:00
|
|
|
#include "td/utils/Random.h"
|
|
|
|
#include "td/utils/Slice.h"
|
|
|
|
#include "td/utils/Span.h"
|
2022-03-09 21:23:44 +01:00
|
|
|
#include "td/utils/StringBuilder.h"
|
2023-03-06 18:36:56 +01:00
|
|
|
#include "td/utils/tests.h"
|
2022-02-09 01:08:18 +01:00
|
|
|
#include "td/utils/Time.h"
|
2022-02-10 22:20:57 +01:00
|
|
|
#include "td/utils/VectorQueue.h"
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-18 21:25:23 +01:00
|
|
|
#ifdef SCOPE_EXIT
|
|
|
|
#undef SCOPE_EXIT
|
|
|
|
#endif
|
|
|
|
|
2022-02-08 19:37:45 +01:00
|
|
|
#include <absl/container/flat_hash_map.h>
|
|
|
|
#include <absl/hash/hash.h>
|
2022-02-09 01:08:18 +01:00
|
|
|
#include <algorithm>
|
|
|
|
#include <benchmark/benchmark.h>
|
2022-02-08 19:37:45 +01:00
|
|
|
#include <folly/container/F14Map.h>
|
2022-03-09 16:03:56 +01:00
|
|
|
#include <functional>
|
2022-02-09 01:08:18 +01:00
|
|
|
#include <map>
|
2022-02-18 21:04:25 +01:00
|
|
|
#include <random>
|
2022-02-09 01:08:18 +01:00
|
|
|
#include <unordered_map>
|
|
|
|
#include <utility>
|
2022-02-08 19:37:45 +01:00
|
|
|
|
|
|
|
template <class TableT>
|
2022-02-18 21:04:25 +01:00
|
|
|
static void reserve(TableT &table, std::size_t size) {
|
2022-02-08 19:37:45 +01:00
|
|
|
table.reserve(size);
|
|
|
|
}
|
2022-02-09 01:08:18 +01:00
|
|
|
|
2022-02-08 19:37:45 +01:00
|
|
|
template <class A, class B>
|
2022-02-18 21:04:25 +01:00
|
|
|
static void reserve(std::map<A, B> &table, std::size_t size) {
|
2022-02-08 19:37:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class KeyT, class ValueT>
|
|
|
|
class NoOpTable {
|
|
|
|
public:
|
|
|
|
using key_type = KeyT;
|
|
|
|
using value_type = std::pair<const KeyT, ValueT>;
|
|
|
|
template <class It>
|
|
|
|
NoOpTable(It begin, It end) {
|
|
|
|
}
|
|
|
|
|
|
|
|
ValueT &operator[](const KeyT &) const {
|
|
|
|
static ValueT dummy;
|
|
|
|
return dummy;
|
|
|
|
}
|
|
|
|
|
|
|
|
KeyT find(const KeyT &key) const {
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class KeyT, class ValueT>
|
|
|
|
class VectorTable {
|
|
|
|
public:
|
|
|
|
using key_type = KeyT;
|
|
|
|
using value_type = std::pair<const KeyT, ValueT>;
|
|
|
|
template <class It>
|
|
|
|
VectorTable(It begin, It end) : table_(begin, end) {
|
|
|
|
}
|
|
|
|
|
|
|
|
ValueT &operator[](const KeyT &needle) {
|
|
|
|
auto it = find(needle);
|
|
|
|
if (it == table_.end()) {
|
|
|
|
table_.emplace_back(needle, ValueT{});
|
|
|
|
return table_.back().second;
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
auto find(const KeyT &needle) {
|
2022-02-09 01:08:18 +01:00
|
|
|
return std::find_if(table_.begin(), table_.end(), [&](auto &key) { return key.first == needle; });
|
2022-02-08 19:37:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
using KeyValue = value_type;
|
2022-02-09 01:08:18 +01:00
|
|
|
td::vector<KeyValue> table_;
|
2022-02-08 19:37:45 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
template <class KeyT, class ValueT>
|
|
|
|
class SortedVectorTable {
|
|
|
|
public:
|
|
|
|
using key_type = KeyT;
|
|
|
|
using value_type = std::pair<KeyT, ValueT>;
|
|
|
|
template <class It>
|
|
|
|
SortedVectorTable(It begin, It end) : table_(begin, end) {
|
|
|
|
std::sort(table_.begin(), table_.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
ValueT &operator[](const KeyT &needle) {
|
2022-02-09 01:08:18 +01:00
|
|
|
auto it = std::lower_bound(table_.begin(), table_.end(), needle,
|
|
|
|
[](const auto &l, const auto &r) { return l.first < r; });
|
2022-02-08 19:37:45 +01:00
|
|
|
if (it == table_.end() || it->first != needle) {
|
|
|
|
it = table_.insert(it, {needle, ValueT{}});
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto find(const KeyT &needle) {
|
2022-02-09 01:08:18 +01:00
|
|
|
auto it = std::lower_bound(table_.begin(), table_.end(), needle,
|
|
|
|
[](const auto &l, const auto &r) { return l.first < r; });
|
2022-02-08 19:37:45 +01:00
|
|
|
if (it != table_.end() && it->first == needle) {
|
|
|
|
return it;
|
|
|
|
}
|
|
|
|
return table_.end();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
using KeyValue = value_type;
|
2022-02-09 01:08:18 +01:00
|
|
|
td::vector<KeyValue> table_;
|
2022-02-08 19:37:45 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
template <class KeyT, class ValueT, class HashT = td::Hash<KeyT>>
|
|
|
|
class SimpleHashTable {
|
|
|
|
public:
|
|
|
|
using key_type = KeyT;
|
|
|
|
using value_type = std::pair<KeyT, ValueT>;
|
|
|
|
template <class It>
|
|
|
|
SimpleHashTable(It begin, It end) {
|
|
|
|
nodes_.resize((end - begin) * 2);
|
|
|
|
for (; begin != end; ++begin) {
|
|
|
|
insert(begin->first, begin->second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ValueT &operator[](const KeyT &needle) {
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
ValueT *find(const KeyT &needle) {
|
|
|
|
auto hash = HashT()(needle);
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t i = hash % nodes_.size();
|
2022-02-08 19:37:45 +01:00
|
|
|
while (true) {
|
|
|
|
if (nodes_[i].key == needle) {
|
|
|
|
return &nodes_[i].value;
|
|
|
|
}
|
|
|
|
if (nodes_[i].hash == 0) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
if (i == nodes_.size()) {
|
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
using KeyValue = value_type;
|
|
|
|
struct Node {
|
2022-02-09 01:08:18 +01:00
|
|
|
std::size_t hash{0};
|
2022-02-08 19:37:45 +01:00
|
|
|
KeyT key;
|
|
|
|
ValueT value;
|
|
|
|
};
|
2022-02-09 01:08:18 +01:00
|
|
|
td::vector<Node> nodes_;
|
2022-02-08 19:37:45 +01:00
|
|
|
|
|
|
|
void insert(KeyT key, ValueT value) {
|
|
|
|
auto hash = HashT()(key);
|
2022-02-09 01:08:18 +01:00
|
|
|
std::size_t i = hash % nodes_.size();
|
2022-02-08 19:37:45 +01:00
|
|
|
while (true) {
|
|
|
|
if (nodes_[i].hash == 0 || (nodes_[i].hash == hash && nodes_[i].key == key)) {
|
|
|
|
nodes_[i].value = value;
|
|
|
|
nodes_[i].key = key;
|
|
|
|
nodes_[i].hash = hash;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
if (i == nodes_.size()) {
|
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename TableT>
|
2022-02-09 01:08:18 +01:00
|
|
|
static void BM_Get(benchmark::State &state) {
|
|
|
|
std::size_t n = state.range(0);
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1024;
|
2022-02-08 19:37:45 +01:00
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
using Key = typename TableT::key_type;
|
|
|
|
using Value = typename TableT::value_type::second_type;
|
|
|
|
using KeyValue = std::pair<Key, Value>;
|
2022-02-09 01:08:18 +01:00
|
|
|
td::vector<KeyValue> data;
|
|
|
|
td::vector<Key> keys;
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-21 00:22:01 +01:00
|
|
|
TableT table;
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < n; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
auto key = rnd();
|
|
|
|
auto value = rnd();
|
|
|
|
data.emplace_back(key, value);
|
2022-02-21 00:22:01 +01:00
|
|
|
table.emplace(key, value);
|
2022-02-08 19:37:45 +01:00
|
|
|
keys.push_back(key);
|
|
|
|
}
|
|
|
|
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t key_i = 0;
|
2023-01-02 12:29:03 +01:00
|
|
|
td::rand_shuffle(td::as_mutable_span(keys), rnd);
|
2022-02-08 19:37:45 +01:00
|
|
|
auto next_key = [&] {
|
|
|
|
key_i++;
|
|
|
|
if (key_i == data.size()) {
|
|
|
|
key_i = 0;
|
|
|
|
}
|
|
|
|
return keys[key_i];
|
|
|
|
};
|
|
|
|
|
2022-02-09 01:08:18 +01:00
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
benchmark::DoNotOptimize(table.find(next_key()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TableT>
|
2022-02-09 01:08:18 +01:00
|
|
|
static void BM_find_same(benchmark::State &state) {
|
2022-02-08 19:37:45 +01:00
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
TableT table;
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1024;
|
2022-02-08 19:37:45 +01:00
|
|
|
reserve(table, N);
|
|
|
|
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
table.emplace(rnd(), i);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto key = td::Random::secure_uint64();
|
|
|
|
table[key] = 123;
|
|
|
|
|
2022-02-09 01:08:18 +01:00
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
benchmark::DoNotOptimize(table.find(key));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TableT>
|
2022-02-09 01:08:18 +01:00
|
|
|
static void BM_emplace_same(benchmark::State &state) {
|
2022-02-08 19:37:45 +01:00
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
TableT table;
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1024;
|
2022-02-08 19:37:45 +01:00
|
|
|
reserve(table, N);
|
|
|
|
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
table.emplace(rnd(), i);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto key = 123743;
|
|
|
|
table[key] = 123;
|
|
|
|
|
2022-02-09 01:08:18 +01:00
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-11 22:48:35 +01:00
|
|
|
benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932));
|
2022-02-08 19:37:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-02-09 01:08:18 +01:00
|
|
|
|
2022-03-09 21:23:44 +01:00
|
|
|
template <typename TableT>
|
|
|
|
static void BM_emplace_string(benchmark::State &state) {
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
TableT table;
|
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1024;
|
|
|
|
reserve(table, N);
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
|
|
|
table.emplace(td::to_string(rnd()), i);
|
|
|
|
}
|
|
|
|
|
|
|
|
table["0"] = 123;
|
|
|
|
td::vector<td::string> strings;
|
|
|
|
for (std::size_t i = 0; i < 16; i++) {
|
|
|
|
strings.emplace_back(1, static_cast<char>('0' + i));
|
|
|
|
}
|
|
|
|
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
|
|
|
benchmark::DoNotOptimize(table.emplace(strings[i & 15], 43784932));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-09 20:59:08 +01:00
|
|
|
namespace td {
|
|
|
|
template <class K, class V, class FunctT>
|
2022-02-10 09:55:32 +01:00
|
|
|
static void table_remove_if(absl::flat_hash_map<K, V> &table, FunctT &&func) {
|
2022-02-09 20:59:08 +01:00
|
|
|
for (auto it = table.begin(); it != table.end();) {
|
|
|
|
if (func(*it)) {
|
|
|
|
auto copy = it;
|
|
|
|
++it;
|
|
|
|
table.erase(copy);
|
|
|
|
} else {
|
|
|
|
++it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-02-10 09:55:32 +01:00
|
|
|
} // namespace td
|
2022-02-09 20:59:08 +01:00
|
|
|
|
|
|
|
template <typename TableT>
|
|
|
|
static void BM_remove_if(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = N;
|
2022-02-09 20:59:08 +01:00
|
|
|
|
|
|
|
TableT table;
|
|
|
|
reserve(table, N);
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
|
|
|
state.PauseTiming();
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-09 20:59:08 +01:00
|
|
|
table.emplace(rnd(), i);
|
|
|
|
}
|
|
|
|
state.ResumeTiming();
|
|
|
|
|
2022-02-10 09:55:32 +01:00
|
|
|
td::table_remove_if(table, [](auto &it) { return it.second % 2 == 0; });
|
2022-02-09 20:59:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-10 22:20:57 +01:00
|
|
|
template <typename TableT>
|
|
|
|
static void BM_erase_all_with_begin(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = N;
|
2022-02-10 22:20:57 +01:00
|
|
|
|
|
|
|
TableT table;
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-10 22:20:57 +01:00
|
|
|
table.emplace(rnd() + 1, i);
|
|
|
|
}
|
|
|
|
while (!table.empty()) {
|
|
|
|
table.erase(table.begin());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TableT>
|
|
|
|
static void BM_cache(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 1000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1000000;
|
2022-02-10 22:20:57 +01:00
|
|
|
|
|
|
|
TableT table;
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
td::VectorQueue<td::uint64> keys;
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-10 22:20:57 +01:00
|
|
|
auto key = rnd() + 1;
|
|
|
|
keys.push(key);
|
|
|
|
table.emplace(key, i);
|
|
|
|
if (table.size() > N) {
|
|
|
|
table.erase(keys.pop());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TableT>
|
|
|
|
static void BM_cache2(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 1000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1000000;
|
2022-02-10 22:20:57 +01:00
|
|
|
|
|
|
|
TableT table;
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
td::VectorQueue<td::uint64> keys;
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-10 22:20:57 +01:00
|
|
|
auto key = rnd() + 1;
|
|
|
|
keys.push(key);
|
|
|
|
table.emplace(key, i);
|
|
|
|
if (table.size() > N) {
|
|
|
|
table.erase(keys.pop_rand(rnd));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TableT>
|
|
|
|
static void BM_cache3(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t N = state.range(0);
|
|
|
|
constexpr std::size_t BATCH_SIZE = 1000000;
|
2022-02-10 22:20:57 +01:00
|
|
|
|
|
|
|
TableT table;
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
td::VectorQueue<td::uint64> keys;
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t step = 20;
|
2022-02-10 22:20:57 +01:00
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i += step) {
|
2022-02-10 22:20:57 +01:00
|
|
|
auto key = rnd() + 1;
|
|
|
|
keys.push(key);
|
|
|
|
table.emplace(key, i);
|
|
|
|
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t j = 1; j < step; j++) {
|
2022-02-11 09:26:58 +01:00
|
|
|
auto key_to_find = keys.data()[rnd() % keys.size()];
|
2022-02-10 22:20:57 +01:00
|
|
|
benchmark::DoNotOptimize(table.find(key_to_find));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (table.size() > N) {
|
|
|
|
table.erase(keys.pop_rand(rnd));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-03-09 21:01:10 +01:00
|
|
|
|
2022-02-10 22:20:57 +01:00
|
|
|
template <typename TableT>
|
|
|
|
static void BM_remove_if_slow(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 5000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 500000;
|
2022-02-10 22:20:57 +01:00
|
|
|
|
2022-02-11 17:40:16 +01:00
|
|
|
TableT table;
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-11 17:40:16 +01:00
|
|
|
table.emplace(rnd() + 1, i);
|
|
|
|
}
|
|
|
|
auto first_key = table.begin()->first;
|
|
|
|
{
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t cnt = 0;
|
|
|
|
td::table_remove_if(table, [&cnt, n = N](auto &) {
|
2022-02-11 22:48:35 +01:00
|
|
|
cnt += 2;
|
2022-02-18 21:04:25 +01:00
|
|
|
return cnt <= n;
|
2022-02-11 22:48:35 +01:00
|
|
|
});
|
2022-02-11 17:40:16 +01:00
|
|
|
}
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-11 17:40:16 +01:00
|
|
|
table.emplace(first_key, i);
|
|
|
|
table.erase(first_key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-03-09 21:01:10 +01:00
|
|
|
|
2022-02-11 17:40:16 +01:00
|
|
|
template <typename TableT>
|
|
|
|
static void BM_remove_if_slow_old(benchmark::State &state) {
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 100000;
|
|
|
|
constexpr std::size_t BATCH_SIZE = 5000000;
|
2022-02-11 17:40:16 +01:00
|
|
|
|
2022-02-10 22:20:57 +01:00
|
|
|
TableT table;
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
|
|
|
td::Random::Xorshift128plus rnd(123);
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-10 22:20:57 +01:00
|
|
|
table.emplace(rnd() + 1, i);
|
|
|
|
if (table.size() > N) {
|
2022-02-18 21:04:25 +01:00
|
|
|
std::size_t cnt = 0;
|
2022-02-11 09:26:58 +01:00
|
|
|
td::table_remove_if(table, [&cnt, n = N](auto &) {
|
|
|
|
cnt += 2;
|
|
|
|
return cnt <= n;
|
|
|
|
});
|
2022-02-10 22:20:57 +01:00
|
|
|
}
|
|
|
|
}
|
2022-02-11 09:26:58 +01:00
|
|
|
}
|
|
|
|
}
|
2022-02-10 22:20:57 +01:00
|
|
|
|
2022-02-08 19:37:45 +01:00
|
|
|
template <typename TableT>
|
2022-02-09 01:08:18 +01:00
|
|
|
static void benchmark_create(td::Slice name) {
|
2022-02-08 19:37:45 +01:00
|
|
|
td::Random::Xorshift128plus rnd(123);
|
|
|
|
{
|
2022-02-18 21:04:25 +01:00
|
|
|
constexpr std::size_t N = 10000000;
|
2022-02-08 19:37:45 +01:00
|
|
|
TableT table;
|
|
|
|
reserve(table, N);
|
|
|
|
auto start = td::Timestamp::now();
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
table.emplace(rnd(), i);
|
|
|
|
}
|
|
|
|
auto end = td::Timestamp::now();
|
2022-02-23 13:58:12 +01:00
|
|
|
LOG(INFO) << name << ": create " << N << " elements: " << td::format::as_time(end.at() - start.at());
|
2022-02-08 19:37:45 +01:00
|
|
|
|
|
|
|
double res = 0;
|
2022-02-18 21:04:25 +01:00
|
|
|
td::vector<std::pair<std::size_t, td::format::Time>> pauses;
|
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
2022-02-08 19:37:45 +01:00
|
|
|
auto emplace_start = td::Timestamp::now();
|
|
|
|
table.emplace(rnd(), i);
|
|
|
|
auto emplace_end = td::Timestamp::now();
|
|
|
|
auto pause = emplace_end.at() - emplace_start.at();
|
|
|
|
res = td::max(pause, res);
|
|
|
|
if (pause > 0.001) {
|
|
|
|
pauses.emplace_back(i, td::format::as_time(pause));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-23 13:58:12 +01:00
|
|
|
LOG(INFO) << name << ": create another " << N << " elements, max pause = " << td::format::as_time(res) << " "
|
|
|
|
<< pauses;
|
2022-02-08 19:37:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:31:58 +01:00
|
|
|
struct CacheMissNode {
|
2022-02-18 21:04:25 +01:00
|
|
|
td::uint32 data{};
|
2022-02-17 19:31:58 +01:00
|
|
|
char padding[64 - sizeof(data)];
|
|
|
|
};
|
|
|
|
|
|
|
|
class IterateFast {
|
|
|
|
public:
|
2022-02-18 21:04:25 +01:00
|
|
|
static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
|
|
|
|
td::uint32 res = 1;
|
|
|
|
for (std::size_t i = 0; i < max_shift; i++) {
|
2022-02-17 19:31:58 +01:00
|
|
|
if (ptr[i].data % max_shift != 0) {
|
|
|
|
res *= ptr[i].data;
|
|
|
|
} else {
|
|
|
|
res /= ptr[i].data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class IterateSlow {
|
|
|
|
public:
|
2022-02-18 21:04:25 +01:00
|
|
|
static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
|
|
|
|
td::uint32 res = 1;
|
|
|
|
for (std::size_t i = 0;; i++) {
|
2022-02-17 19:31:58 +01:00
|
|
|
if (ptr[i].data % max_shift != 0) {
|
|
|
|
res *= ptr[i].data;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
};
|
2022-02-18 21:04:25 +01:00
|
|
|
|
2022-02-17 19:31:58 +01:00
|
|
|
template <class F>
|
2022-02-18 21:04:25 +01:00
|
|
|
static void BM_cache_miss(benchmark::State &state) {
|
|
|
|
td::uint32 max_shift = state.range(0);
|
2022-02-17 19:31:58 +01:00
|
|
|
bool flag = state.range(1);
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rnd(rd());
|
|
|
|
int N = 50000000;
|
2022-02-18 21:04:25 +01:00
|
|
|
td::vector<CacheMissNode> nodes(N);
|
|
|
|
td::uint32 i = 0;
|
2022-02-17 19:31:58 +01:00
|
|
|
for (auto &node : nodes) {
|
|
|
|
if (flag) {
|
|
|
|
node.data = i++ % max_shift;
|
|
|
|
} else {
|
|
|
|
node.data = rnd();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-18 21:04:25 +01:00
|
|
|
td::vector<int> positions(N);
|
|
|
|
std::uniform_int_distribution<td::uint32> rnd_pos(0, N - 1000);
|
2022-02-17 19:31:58 +01:00
|
|
|
for (auto &pos : positions) {
|
|
|
|
pos = rnd_pos(rnd);
|
|
|
|
if (flag) {
|
|
|
|
pos = pos / max_shift * max_shift + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (state.KeepRunningBatch(positions.size())) {
|
|
|
|
for (const auto pos : positions) {
|
|
|
|
auto *ptr = &nodes[pos];
|
|
|
|
auto res = F::iterate(ptr, max_shift);
|
|
|
|
benchmark::DoNotOptimize(res);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-19 09:34:43 +01:00
|
|
|
static td::uint64 equal_mask_slow(td::uint8 *bytes, td::uint8 needle) {
|
|
|
|
td::uint64 mask = 0;
|
2022-02-17 19:31:58 +01:00
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
mask |= (bytes[i] == needle) << i;
|
|
|
|
}
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class MaskT>
|
2022-02-18 21:04:25 +01:00
|
|
|
static void BM_mask(benchmark::State &state) {
|
|
|
|
std::size_t BATCH_SIZE = 1024;
|
|
|
|
td::vector<td::uint8> bytes(BATCH_SIZE + 16);
|
2022-02-17 19:31:58 +01:00
|
|
|
for (auto &b : bytes) {
|
2022-02-18 21:04:25 +01:00
|
|
|
b = static_cast<td::uint8>(td::Random::fast(0, 17));
|
2022-02-17 19:31:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
2022-02-18 21:04:25 +01:00
|
|
|
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
2022-02-17 19:31:58 +01:00
|
|
|
benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-02-18 21:04:25 +01:00
|
|
|
|
2022-02-18 00:48:33 +01:00
|
|
|
BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable);
|
2022-02-17 20:22:46 +01:00
|
|
|
#ifdef __aarch64__
|
2022-02-18 00:48:33 +01:00
|
|
|
BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly);
|
|
|
|
BENCHMARK_TEMPLATE(BM_mask, td::MaskNeon);
|
|
|
|
#endif
|
|
|
|
#if TD_SSE2
|
|
|
|
BENCHMARK_TEMPLATE(BM_mask, td::MaskSse2);
|
2022-02-17 20:22:46 +01:00
|
|
|
#endif
|
2022-02-17 19:31:58 +01:00
|
|
|
|
2022-11-23 17:37:32 +01:00
|
|
|
template <class KeyT, class ValueT, class HashT = td::Hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
2024-03-21 16:43:47 +01:00
|
|
|
using FlatHashMapImpl = td::FlatHashTable<td::MapNode<KeyT, ValueT, EqT>, HashT, EqT>;
|
2022-03-09 16:03:56 +01:00
|
|
|
|
2022-02-17 19:31:58 +01:00
|
|
|
#define FOR_EACH_TABLE(F) \
|
2022-03-09 16:03:56 +01:00
|
|
|
F(FlatHashMapImpl) \
|
2022-02-23 13:58:12 +01:00
|
|
|
F(td::FlatHashMapChunks) \
|
2022-02-17 19:31:58 +01:00
|
|
|
F(folly::F14FastMap) \
|
|
|
|
F(absl::flat_hash_map) \
|
|
|
|
F(std::unordered_map) \
|
2022-02-09 01:08:18 +01:00
|
|
|
F(std::map)
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-17 19:31:58 +01:00
|
|
|
//BENCHMARK(BM_cache_miss<IterateSlow>)->Ranges({{1, 16}, {0, 1}});
|
|
|
|
//BENCHMARK(BM_cache_miss<IterateFast>)->Ranges({{1, 16}, {0, 1}});
|
2022-02-11 09:26:58 +01:00
|
|
|
//BENCHMARK_TEMPLATE(BM_Get, VectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
|
|
|
//BENCHMARK_TEMPLATE(BM_Get, SortedVectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
|
|
|
//BENCHMARK_TEMPLATE(BM_Get, NoOpTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-11 09:26:58 +01:00
|
|
|
#define REGISTER_GET_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_Get, HT<td::uint64, td::uint64>)->Range(1, 1 << 23);
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-09 01:08:18 +01:00
|
|
|
#define REGISTER_FIND_BENCHMARK(HT) \
|
2022-02-11 09:26:58 +01:00
|
|
|
BENCHMARK_TEMPLATE(BM_find_same, HT<td::uint64, td::uint64>) \
|
2022-02-09 01:08:18 +01:00
|
|
|
->ComputeStatistics("max", [](const td::vector<double> &v) { return *std::max_element(v.begin(), v.end()); }) \
|
|
|
|
->ComputeStatistics("min", [](const td::vector<double> &v) { return *std::min_element(v.begin(), v.end()); }) \
|
|
|
|
->Repetitions(20) \
|
|
|
|
->DisplayAggregatesOnly(true);
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-11 09:26:58 +01:00
|
|
|
#define REGISTER_REMOVE_IF_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if, HT<td::uint64, td::uint64>);
|
|
|
|
#define REGISTER_EMPLACE_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_emplace_same, HT<td::uint64, td::uint64>);
|
2022-03-09 21:23:44 +01:00
|
|
|
#define REGISTER_EMPLACE_STRING_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_emplace_string, HT<td::string, td::uint64>);
|
2022-02-11 09:26:58 +01:00
|
|
|
#define REGISTER_CACHE_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache, HT<td::uint64, td::uint64>);
|
|
|
|
#define REGISTER_CACHE2_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache2, HT<td::uint64, td::uint64>);
|
|
|
|
#define REGISTER_CACHE3_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache3, HT<td::uint64, td::uint64>)->Range(1, 1 << 23);
|
|
|
|
#define REGISTER_ERASE_ALL_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_erase_all_with_begin, HT<td::uint64, td::uint64>);
|
|
|
|
#define REGISTER_REMOVE_IF_SLOW_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow, HT<td::uint64, td::uint64>);
|
2022-02-17 19:31:58 +01:00
|
|
|
#define REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow_old, HT<td::uint64, td::uint64>);
|
2022-02-10 22:20:57 +01:00
|
|
|
|
2022-02-17 19:31:58 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_GET_BENCHMARK)
|
2022-02-10 22:20:57 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_CACHE3_BENCHMARK)
|
|
|
|
FOR_EACH_TABLE(REGISTER_CACHE2_BENCHMARK)
|
|
|
|
FOR_EACH_TABLE(REGISTER_CACHE_BENCHMARK)
|
2022-02-09 20:59:08 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_REMOVE_IF_BENCHMARK)
|
2022-02-08 19:37:45 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_EMPLACE_BENCHMARK)
|
2022-03-09 21:23:44 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_EMPLACE_STRING_BENCHMARK)
|
2022-02-11 17:40:16 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_ERASE_ALL_BENCHMARK)
|
2022-02-10 22:20:57 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_FIND_BENCHMARK)
|
2022-02-17 19:31:58 +01:00
|
|
|
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK)
|
|
|
|
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK)
|
2022-02-08 19:37:45 +01:00
|
|
|
|
2022-02-10 22:20:57 +01:00
|
|
|
#define RUN_CREATE_BENCHMARK(HT) benchmark_create<HT<td::uint64, td::uint64>>(#HT);
|
2022-02-11 09:26:58 +01:00
|
|
|
|
2022-02-09 01:08:18 +01:00
|
|
|
int main(int argc, char **argv) {
|
2022-02-10 09:55:32 +01:00
|
|
|
// FOR_EACH_TABLE(RUN_CREATE_BENCHMARK);
|
2022-02-08 19:37:45 +01:00
|
|
|
|
|
|
|
benchmark::Initialize(&argc, argv);
|
|
|
|
benchmark::RunSpecifiedBenchmarks();
|
|
|
|
benchmark::Shutdown();
|
2022-02-09 01:08:18 +01:00
|
|
|
}
|