FlatHashMap: some optimizations
This commit is contained in:
parent
e3a5b29d20
commit
5d074a4b18
@ -6,7 +6,9 @@
|
|||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "td/utils/bits.h"
|
||||||
#include "td/utils/common.h"
|
#include "td/utils/common.h"
|
||||||
|
#include "td/utils/logging.h"
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
@ -17,6 +19,57 @@
|
|||||||
|
|
||||||
namespace td {
|
namespace td {
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class fixed_vector {
|
||||||
|
public:
|
||||||
|
fixed_vector() = default;
|
||||||
|
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
|
||||||
|
}
|
||||||
|
fixed_vector(fixed_vector &&other) noexcept {
|
||||||
|
swap(other);
|
||||||
|
}
|
||||||
|
fixed_vector &operator=(fixed_vector &&other) noexcept {
|
||||||
|
swap(other);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
fixed_vector(const fixed_vector &) = delete;
|
||||||
|
fixed_vector &operator=(const fixed_vector &) = delete;
|
||||||
|
~fixed_vector() {
|
||||||
|
delete[] ptr_;
|
||||||
|
}
|
||||||
|
T &operator[](size_t i) {
|
||||||
|
return ptr_[i];
|
||||||
|
}
|
||||||
|
const T &operator[](size_t i) const {
|
||||||
|
return ptr_[i];
|
||||||
|
}
|
||||||
|
T *begin() {
|
||||||
|
return ptr_;
|
||||||
|
}
|
||||||
|
const T *begin() const {
|
||||||
|
return ptr_;
|
||||||
|
}
|
||||||
|
T *end() {
|
||||||
|
return ptr_ + size_;
|
||||||
|
}
|
||||||
|
const T *end() const {
|
||||||
|
return ptr_ + size_;
|
||||||
|
}
|
||||||
|
size_t size() const {
|
||||||
|
return size_;
|
||||||
|
}
|
||||||
|
using iterator = T *;
|
||||||
|
using const_iterator = const T *;
|
||||||
|
void swap(fixed_vector<T> &other) {
|
||||||
|
std::swap(ptr_, other.ptr_);
|
||||||
|
std::swap(size_, other.size_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
T *ptr_{};
|
||||||
|
size_t size_{0};
|
||||||
|
};
|
||||||
|
|
||||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
|
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
|
||||||
class FlatHashMapImpl {
|
class FlatHashMapImpl {
|
||||||
struct Node {
|
struct Node {
|
||||||
@ -71,10 +124,14 @@ class FlatHashMapImpl {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
using Self = FlatHashMapImpl<KeyT, ValueT, HashT>;
|
using Self = FlatHashMapImpl<KeyT, ValueT, HashT>;
|
||||||
using NodeIterator = typename std::vector<Node>::iterator;
|
using NodeIterator = typename fixed_vector<Node>::iterator;
|
||||||
using ConstNodeIterator = typename std::vector<Node>::const_iterator;
|
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// define key_type and value_type for benchmarks
|
||||||
|
using key_type = KeyT;
|
||||||
|
using value_type = std::pair<const KeyT, ValueT>;
|
||||||
|
|
||||||
struct Iterator {
|
struct Iterator {
|
||||||
using iterator_category = std::bidirectional_iterator_tag;
|
using iterator_category = std::bidirectional_iterator_tag;
|
||||||
using difference_type = std::ptrdiff_t;
|
using difference_type = std::ptrdiff_t;
|
||||||
@ -177,19 +234,37 @@ class FlatHashMapImpl {
|
|||||||
assign(begin, end);
|
assign(begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class SomeF, class NoneF>
|
||||||
|
auto with_node(const KeyT &key, SomeF &&some, NoneF &&none) {
|
||||||
|
size_t bucket = calc_bucket(key);
|
||||||
|
while (true) {
|
||||||
|
if (nodes_[bucket].key() == key) {
|
||||||
|
return some(nodes_.begin() + bucket);
|
||||||
|
}
|
||||||
|
if (nodes_[bucket].empty()) {
|
||||||
|
return none(nodes_.begin() + bucket);
|
||||||
|
}
|
||||||
|
bucket++;
|
||||||
|
if (bucket == nodes_.size()) {
|
||||||
|
bucket = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Iterator find(const KeyT &key) {
|
Iterator find(const KeyT &key) {
|
||||||
if (empty()) {
|
if (empty()) {
|
||||||
return end();
|
return end();
|
||||||
}
|
}
|
||||||
auto it = find_bucket_for_insert(key);
|
return with_node(
|
||||||
if (it->empty()) {
|
key,
|
||||||
return end();
|
[&](auto it) {
|
||||||
}
|
return Iterator{it, this};
|
||||||
return Iterator(it, this);
|
},
|
||||||
|
[&](auto it) { return end(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
ConstIterator find(const KeyT &key) const {
|
ConstIterator find(const KeyT &key) const {
|
||||||
return ConstIterator(const_cast<Self*>(this)->find(key));
|
return ConstIterator(const_cast<Self *>(this)->find(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size() const {
|
size_t size() const {
|
||||||
@ -221,33 +296,42 @@ class FlatHashMapImpl {
|
|||||||
return ConstIterator(const_cast<Self *>(this)->end());
|
return ConstIterator(const_cast<Self *>(this)->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reserve(size_t size) {
|
||||||
|
size_t want_size = normalize(size * 10 / 6 + 1);
|
||||||
|
// size_t want_size = size * 2;
|
||||||
|
if (want_size > nodes_.size()) {
|
||||||
|
resize(want_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class... ArgsT>
|
template <class... ArgsT>
|
||||||
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
|
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
|
||||||
if (should_resize()) {
|
if (unlikely(should_resize())) {
|
||||||
resize(used_nodes_ + 1);
|
grow();
|
||||||
}
|
}
|
||||||
auto it = find_bucket_for_insert(key);
|
return with_node(
|
||||||
if (it->empty()) {
|
key, [&](auto it) { return std::make_pair(Iterator(it, this), false); },
|
||||||
it->emplace(std::move(key), std::forward<ArgsT>(args)...);
|
[&](auto it) {
|
||||||
used_nodes_++;
|
it->emplace(std::move(key), std::forward<ArgsT>(args)...);
|
||||||
return std::make_pair(Iterator(it, this), true);
|
used_nodes_++;
|
||||||
}
|
return std::make_pair(Iterator(it, this), true);
|
||||||
return std::make_pair(Iterator(it, this), false);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ValueT &operator[](const KeyT &key) {
|
ValueT &operator[](const KeyT &key) {
|
||||||
DCHECK(!is_key_empty(key));
|
DCHECK(!is_key_empty(key));
|
||||||
|
|
||||||
if (should_resize()) {
|
if (should_resize()) {
|
||||||
resize(used_nodes_ + 1);
|
grow();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto it = find_bucket_for_insert(key);
|
return *with_node(
|
||||||
if (it->empty()) {
|
key, [&](auto it) { return &it->second; },
|
||||||
it->emplace(key);
|
[&](auto it) {
|
||||||
used_nodes_++;
|
it->emplace(key);
|
||||||
}
|
used_nodes_++;
|
||||||
return it->second;
|
return &it->second;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t erase(const KeyT &key) {
|
size_t erase(const KeyT &key) {
|
||||||
@ -265,7 +349,7 @@ class FlatHashMapImpl {
|
|||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
used_nodes_ = 0;
|
used_nodes_ = 0;
|
||||||
nodes_.clear();
|
nodes_ = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void erase(Iterator it) {
|
void erase(Iterator it) {
|
||||||
@ -305,7 +389,7 @@ class FlatHashMapImpl {
|
|||||||
return key == KeyT();
|
return key == KeyT();
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<Node> nodes_;
|
fixed_vector<Node> nodes_;
|
||||||
size_t used_nodes_{};
|
size_t used_nodes_{};
|
||||||
|
|
||||||
template <class ItT>
|
template <class ItT>
|
||||||
@ -317,43 +401,42 @@ class FlatHashMapImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool should_resize() const {
|
bool should_resize() const {
|
||||||
return (used_nodes_ + 1) * 10 > nodes_.size() * 6;
|
return should_resize(used_nodes_ + 1, nodes_.size());
|
||||||
|
}
|
||||||
|
static bool should_resize(size_t used_count, size_t buckets_count) {
|
||||||
|
return used_count * 10 > buckets_count * 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t calc_bucket(const KeyT &key) const {
|
size_t calc_bucket(const KeyT &key) const {
|
||||||
return HashT()(key) * 2 % nodes_.size();
|
return HashT()(key) * 2 % nodes_.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeIterator find_bucket_for_insert(const KeyT &key) {
|
static size_t normalize(size_t size) {
|
||||||
size_t bucket = calc_bucket(key);
|
return size_t(1) << (64 - count_leading_zeroes64(size));
|
||||||
while (!(nodes_[bucket].key() == key) && !nodes_[bucket].empty()) {
|
|
||||||
bucket++;
|
|
||||||
if (bucket == nodes_.size()) {
|
|
||||||
bucket = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nodes_.begin() + bucket;
|
|
||||||
}
|
}
|
||||||
|
void grow() {
|
||||||
ConstNodeIterator find_bucket_for_insert(const KeyT &key) const {
|
size_t want_size = normalize(td::max(nodes_.size() * 2 - 1, (used_nodes_ + 1) * 10 / 6 + 1));
|
||||||
return const_cast<Self *>(find_bucket_for_insert(key));
|
// size_t want_size = td::max(nodes_.size(), (used_nodes_ + 1)) * 2;
|
||||||
|
resize(want_size);
|
||||||
}
|
}
|
||||||
|
void resize(size_t new_size) {
|
||||||
|
// LOG(ERROR) << new_size;
|
||||||
|
fixed_vector<Node> old_nodes(new_size);
|
||||||
|
std::swap(old_nodes, nodes_);
|
||||||
|
|
||||||
void resize(size_t size) {
|
|
||||||
auto old_nodes = std::move(nodes_);
|
|
||||||
nodes_.resize(td::max(old_nodes.size(), size) * 2 + 1); // TODO: some other logic
|
|
||||||
for (auto &node : old_nodes) {
|
for (auto &node : old_nodes) {
|
||||||
if (node.empty()) {
|
if (node.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto new_node = find_bucket_for_insert(node.key());
|
with_node(
|
||||||
*new_node = std::move(node);
|
node.key(), [](auto it) { UNREACHABLE(); }, [&](auto it) { *it = std::move(node); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
|
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
|
||||||
//using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT>;
|
using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT>;
|
||||||
using FlatHashMap = std::unordered_map<KeyT, ValueT, HashT>;
|
//using FlatHashMap = std::unordered_map<KeyT, ValueT, HashT>;
|
||||||
|
//using FlatHashMap = absl::flat_hash_map<KeyT, ValueT, HashT>;
|
||||||
|
|
||||||
} // namespace td
|
} // namespace td
|
||||||
|
Loading…
Reference in New Issue
Block a user