FlatHashMap: shrink table if too sparse

This commit is contained in:
Arseny Smirnov 2022-02-10 12:46:05 +01:00
parent f5d8e4de83
commit f4b3a09646
2 changed files with 30 additions and 13 deletions

View File

@ -8,6 +8,7 @@
#include "td/utils/bits.h" #include "td/utils/bits.h"
#include "td/utils/common.h" #include "td/utils/common.h"
#include "td/utils/logging.h"
#include <cstddef> #include <cstddef>
#include <functional> #include <functional>
@ -318,9 +319,7 @@ class FlatHashMapImpl {
template <class... ArgsT> template <class... ArgsT>
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) { std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
if (unlikely(should_resize())) { try_grow();
grow();
}
auto bucket = calc_bucket(key); auto bucket = calc_bucket(key);
while (true) { while (true) {
if (nodes_[bucket].key() == key) { if (nodes_[bucket].key() == key) {
@ -345,6 +344,7 @@ class FlatHashMapImpl {
return 0; return 0;
} }
erase(it); erase(it);
try_shrink();
return 1; return 1;
} }
@ -384,7 +384,7 @@ class FlatHashMapImpl {
++it; ++it;
} }
} }
// TODO: resize hashtable is necessary try_shrink();
} }
private: private:
@ -403,28 +403,45 @@ class FlatHashMapImpl {
} }
} }
bool should_resize() const { void try_grow() {
return should_resize(used_nodes_ + 1, nodes_.size()); if (should_grow(used_nodes_ + 1, nodes_.size())) {
grow();
}
} }
static bool should_resize(size_t used_count, size_t bucket_count) { static bool should_grow(size_t used_count, size_t bucket_count) {
return used_count * 5 > bucket_count * 3; return used_count * 5 > bucket_count * 3;
} }
void try_shrink() {
size_t calc_bucket(const KeyT &key) const { if (should_shrink(used_nodes_, nodes_.size())) {
return HashT()(key) * 2 % nodes_.size(); shrink();
}
}
static bool should_shrink(size_t used_count, size_t bucket_count) {
return used_count * 5 < bucket_count;
} }
static size_t normalize(size_t size) { static size_t normalize(size_t size) {
// return size ? (size | 7) : 0; size |= (size != 0) * 7;
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size)); return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size));
} }
void shrink() {
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
resize(want_size);
}
void grow() { void grow() {
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1); size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
// size_t want_size = td::max(nodes_.size(), (used_nodes_ + 1)) * 2;
resize(want_size); resize(want_size);
}
size_t calc_bucket(const KeyT &key) const {
CHECK(!is_key_empty(key));
return HashT()(key) * 2 % nodes_.size();
} }
void resize(size_t new_size) { void resize(size_t new_size) {
fixed_vector<Node> old_nodes(new_size); fixed_vector<Node> old_nodes(new_size);
std::swap(old_nodes, nodes_); std::swap(old_nodes, nodes_);

View File

@ -255,7 +255,7 @@ TEST(FlatHashMap, stress_test) {
}); });
td::RandomSteps runner(std::move(steps)); td::RandomSteps runner(std::move(steps));
for (size_t i = 0; i < 1000000; i++) { for (size_t i = 0; i < 10000000; i++) {
runner.step(rnd); runner.step(rnd);
} }
} }