tdlight/tdutils/td/utils/FlatHashMap.h

460 lines
11 KiB
C
Raw Normal View History

2022-02-07 22:42:53 +01:00
//
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
2022-02-07 20:40:28 +01:00
#pragma once
2022-02-08 19:11:14 +01:00
#include "td/utils/bits.h"
2022-02-07 22:42:53 +01:00
#include "td/utils/common.h"
#include <cstddef>
#include <functional>
2022-02-09 01:08:18 +01:00
#include <initializer_list>
2022-02-08 00:26:07 +01:00
#include <iterator>
2022-02-07 22:42:53 +01:00
#include <new>
2022-02-07 20:40:28 +01:00
#include <unordered_map>
#include <utility>
namespace td {
2022-02-08 19:11:14 +01:00
template <class T>
class fixed_vector {
public:
fixed_vector() = default;
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
}
fixed_vector(fixed_vector &&other) noexcept {
swap(other);
}
fixed_vector &operator=(fixed_vector &&other) noexcept {
swap(other);
return *this;
}
fixed_vector(const fixed_vector &) = delete;
fixed_vector &operator=(const fixed_vector &) = delete;
~fixed_vector() {
delete[] ptr_;
}
T &operator[](size_t i) {
return ptr_[i];
}
const T &operator[](size_t i) const {
return ptr_[i];
}
T *begin() {
return ptr_;
}
const T *begin() const {
return ptr_;
}
T *end() {
return ptr_ + size_;
}
const T *end() const {
return ptr_ + size_;
}
size_t size() const {
return size_;
}
using iterator = T *;
using const_iterator = const T *;
void swap(fixed_vector<T> &other) {
std::swap(ptr_, other.ptr_);
std::swap(size_, other.size_);
}
private:
T *ptr_{};
size_t size_{0};
};
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
2022-02-07 20:40:28 +01:00
class FlatHashMapImpl {
2022-02-08 20:47:10 +01:00
public:
2022-02-07 20:40:28 +01:00
struct Node {
2022-02-08 20:47:10 +01:00
using first_type = KeyT;
using second_type = ValueT;
2022-02-07 20:40:28 +01:00
KeyT first{};
union {
ValueT second;
};
const auto &key() const {
return first;
}
auto &value() {
return second;
}
Node() {
}
2022-02-08 20:47:10 +01:00
Node(KeyT key, ValueT value) : first(std::move(key)) {
new (&second) ValueT(std::move(value));
2022-02-09 01:08:18 +01:00
DCHECK(!empty());
2022-02-08 20:47:10 +01:00
}
2022-02-07 20:40:28 +01:00
~Node() {
if (!empty()) {
second.~ValueT();
}
}
2022-02-07 22:42:53 +01:00
Node(Node &&other) noexcept {
2022-02-07 20:40:28 +01:00
*this = std::move(other);
}
2022-02-07 22:42:53 +01:00
Node &operator=(Node &&other) noexcept {
2022-02-07 20:40:28 +01:00
DCHECK(empty());
DCHECK(!other.empty());
first = std::move(other.first);
other.first = KeyT{};
new (&second) ValueT(std::move(other.second));
other.second.~ValueT();
return *this;
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
bool empty() const {
return is_key_empty(key());
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
void clear() {
DCHECK(!empty());
first = KeyT();
second.~ValueT();
DCHECK(empty());
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
template <class... ArgsT>
void emplace(KeyT key, ArgsT &&...args) {
DCHECK(empty());
first = std::move(key);
new (&second) ValueT(std::forward<ArgsT>(args)...);
CHECK(!empty());
}
};
using Self = FlatHashMapImpl<KeyT, ValueT, HashT>;
2022-02-08 19:11:14 +01:00
using NodeIterator = typename fixed_vector<Node>::iterator;
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
2022-02-07 20:40:28 +01:00
2022-02-08 19:11:14 +01:00
using key_type = KeyT;
2022-02-08 20:47:10 +01:00
using value_type = Node;
2022-02-08 19:11:14 +01:00
2022-02-07 20:40:28 +01:00
struct Iterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = Node;
using pointer = Node *;
using reference = Node &;
friend class FlatHashMapImpl;
Iterator &operator++() {
do {
++it_;
} while (it_ != map_->nodes_.end() && it_->empty());
return *this;
}
Iterator &operator--() {
do {
--it_;
} while (it_->empty());
return *this;
}
Node &operator*() {
return *it_;
}
Node *operator->() {
return &*it_;
}
bool operator==(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ == other.it_;
}
bool operator!=(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ != other.it_;
}
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
}
private:
NodeIterator it_;
Self *map_;
};
2022-02-07 20:40:28 +01:00
struct ConstIterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = Node;
using pointer = const Node *;
using reference = const Node &;
friend class FlatHashMapImpl;
ConstIterator &operator++() {
++it_;
2022-02-07 20:40:28 +01:00
return *this;
}
ConstIterator &operator--() {
--it_;
2022-02-07 20:40:28 +01:00
return *this;
}
const Node &operator*() {
return *it_;
}
const Node *operator->() {
return &*it_;
}
bool operator==(const ConstIterator &other) const {
return it_ == other.it_;
}
bool operator!=(const ConstIterator &other) const {
return it_ != other.it_;
}
explicit ConstIterator(Iterator it) : it_(std::move(it)) {
2022-02-07 20:40:28 +01:00
}
private:
Iterator it_;
2022-02-07 20:40:28 +01:00
};
FlatHashMapImpl() = default;
2022-02-08 00:26:07 +01:00
FlatHashMapImpl(const FlatHashMapImpl &other) : FlatHashMapImpl(other.begin(), other.end()) {
}
FlatHashMapImpl &operator=(const FlatHashMapImpl &other) {
assign(other.begin(), other.end());
return *this;
}
2022-02-08 20:47:10 +01:00
FlatHashMapImpl(std::initializer_list<Node> nodes) {
reserve(nodes.size());
for (auto &node : nodes) {
2022-02-09 01:08:18 +01:00
auto bucket = calc_bucket(node.first);
while (true) {
if (nodes_[bucket].key() == node.first) {
nodes_[bucket].second = node.second;
break;
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(node.first, node.second);
used_nodes_++;
break;
}
2022-02-08 20:47:10 +01:00
next_bucket(bucket);
}
}
}
2022-02-07 22:42:53 +01:00
FlatHashMapImpl(FlatHashMapImpl &&other) noexcept : nodes_(std::move(other.nodes_)), used_nodes_(other.used_nodes_) {
2022-02-07 20:40:28 +01:00
other.used_nodes_ = 0;
}
2022-02-07 22:42:53 +01:00
FlatHashMapImpl &operator=(FlatHashMapImpl &&other) noexcept {
2022-02-07 20:40:28 +01:00
nodes_ = std::move(other.nodes_);
used_nodes_ = other.used_nodes_;
other.used_nodes_ = 0;
return *this;
}
2022-02-08 00:26:07 +01:00
~FlatHashMapImpl() = default;
2022-02-07 20:40:28 +01:00
template <class ItT>
FlatHashMapImpl(ItT begin, ItT end) {
assign(begin, end);
}
2022-02-08 19:37:31 +01:00
Iterator find(const KeyT &key) {
if (empty()) {
return end();
}
2022-02-09 01:08:18 +01:00
auto bucket = calc_bucket(key);
2022-02-08 19:11:14 +01:00
while (true) {
if (nodes_[bucket].key() == key) {
2022-02-08 19:37:31 +01:00
return Iterator{nodes_.begin() + bucket, this};
2022-02-08 19:11:14 +01:00
}
if (nodes_[bucket].empty()) {
2022-02-08 19:37:31 +01:00
return end();
2022-02-08 19:11:14 +01:00
}
2022-02-08 19:37:31 +01:00
next_bucket(bucket);
2022-02-08 19:11:14 +01:00
}
}
2022-02-07 20:40:28 +01:00
ConstIterator find(const KeyT &key) const {
2022-02-08 19:11:14 +01:00
return ConstIterator(const_cast<Self *>(this)->find(key));
2022-02-07 20:40:28 +01:00
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
size_t size() const {
return used_nodes_;
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
bool empty() const {
return size() == 0;
}
2022-02-08 00:26:07 +01:00
Iterator begin() {
2022-02-07 20:40:28 +01:00
if (empty()) {
return end();
}
auto it = nodes_.begin();
while (it->empty()) {
2022-02-07 22:42:53 +01:00
++it;
2022-02-07 20:40:28 +01:00
}
return Iterator(it, this);
}
2022-02-08 00:26:07 +01:00
Iterator end() {
2022-02-07 20:40:28 +01:00
return Iterator(nodes_.end(), this);
}
2022-02-08 00:26:07 +01:00
ConstIterator begin() const {
return ConstIterator(const_cast<Self *>(this)->begin());
2022-02-07 20:40:28 +01:00
}
2022-02-08 00:26:07 +01:00
ConstIterator end() const {
return ConstIterator(const_cast<Self *>(this)->end());
2022-02-07 20:40:28 +01:00
}
2022-02-08 19:11:14 +01:00
void reserve(size_t size) {
2022-02-09 01:08:18 +01:00
size_t want_size = normalize(size * 5 / 3 + 1);
2022-02-08 19:11:14 +01:00
// size_t want_size = size * 2;
if (want_size > nodes_.size()) {
resize(want_size);
}
}
2022-02-07 20:40:28 +01:00
template <class... ArgsT>
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
2022-02-08 19:11:14 +01:00
if (unlikely(should_resize())) {
grow();
}
2022-02-09 01:08:18 +01:00
auto bucket = calc_bucket(key);
2022-02-08 19:37:31 +01:00
while (true) {
if (nodes_[bucket].key() == key) {
return {Iterator{nodes_.begin() + bucket, this}, false};
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(std::move(key), std::forward<ArgsT>(args)...);
used_nodes_++;
return {Iterator{nodes_.begin() + bucket, this}, true};
}
next_bucket(bucket);
}
2022-02-07 20:40:28 +01:00
}
ValueT &operator[](const KeyT &key) {
2022-02-08 19:37:31 +01:00
return emplace(key).first->second;
2022-02-07 20:40:28 +01:00
}
size_t erase(const KeyT &key) {
auto it = find(key);
if (it == end()) {
return 0;
}
erase(it);
return 1;
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
size_t count(const KeyT &key) const {
return find(key) != end();
}
void clear() {
used_nodes_ = 0;
2022-02-08 19:11:14 +01:00
nodes_ = {};
2022-02-07 20:40:28 +01:00
}
void erase(Iterator it) {
DCHECK(it != end());
DCHECK(!is_key_empty(it->key()));
size_t empty_i = it.it_ - nodes_.begin();
auto empty_bucket = empty_i;
2022-02-07 22:42:53 +01:00
DCHECK(0 <= empty_i && empty_i < nodes_.size());
2022-02-07 20:40:28 +01:00
nodes_[empty_bucket].clear();
used_nodes_--;
for (size_t test_i = empty_i + 1;; test_i++) {
auto test_bucket = test_i;
if (test_bucket >= nodes_.size()) {
test_bucket -= nodes_.size();
}
2022-02-08 00:26:07 +01:00
if (nodes_[test_bucket].empty()) {
2022-02-07 20:40:28 +01:00
break;
}
2022-02-08 00:26:07 +01:00
auto want_i = calc_bucket(nodes_[test_bucket].key());
2022-02-07 20:40:28 +01:00
if (want_i < empty_i) {
want_i += nodes_.size();
}
if (want_i <= empty_i || want_i > test_i) {
nodes_[empty_bucket] = std::move(nodes_[test_bucket]);
empty_i = test_i;
empty_bucket = test_bucket;
}
}
}
private:
static bool is_key_empty(const KeyT &key) {
return key == KeyT();
}
2022-02-08 00:26:07 +01:00
2022-02-08 19:11:14 +01:00
fixed_vector<Node> nodes_;
2022-02-07 20:40:28 +01:00
size_t used_nodes_{};
2022-02-08 00:26:07 +01:00
template <class ItT>
void assign(ItT begin, ItT end) {
resize(std::distance(begin, end)); // TODO: should be conditional
for (; begin != end; ++begin) {
emplace(begin->first, begin->second);
}
}
2022-02-07 20:40:28 +01:00
bool should_resize() const {
2022-02-08 19:11:14 +01:00
return should_resize(used_nodes_ + 1, nodes_.size());
}
2022-02-09 01:08:18 +01:00
static bool should_resize(size_t used_count, size_t bucket_count) {
return used_count * 5 > bucket_count * 3;
2022-02-07 20:40:28 +01:00
}
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
size_t calc_bucket(const KeyT &key) const {
2022-02-08 00:26:07 +01:00
return HashT()(key) * 2 % nodes_.size();
2022-02-07 20:40:28 +01:00
}
2022-02-08 00:26:07 +01:00
2022-02-08 19:11:14 +01:00
static size_t normalize(size_t size) {
2022-02-09 01:08:18 +01:00
// return size ? (size | 7) : 0;
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size));
2022-02-07 20:40:28 +01:00
}
2022-02-09 01:08:18 +01:00
2022-02-08 19:11:14 +01:00
void grow() {
2022-02-09 01:08:18 +01:00
size_t want_size = normalize(td::max(nodes_.size() * 2 - 1, (used_nodes_ + 1) * 5 / 3 + 1));
2022-02-08 19:11:14 +01:00
// size_t want_size = td::max(nodes_.size(), (used_nodes_ + 1)) * 2;
resize(want_size);
2022-02-07 20:40:28 +01:00
}
2022-02-09 01:08:18 +01:00
2022-02-08 19:11:14 +01:00
void resize(size_t new_size) {
fixed_vector<Node> old_nodes(new_size);
std::swap(old_nodes, nodes_);
2022-02-08 00:26:07 +01:00
2022-02-07 20:40:28 +01:00
for (auto &node : old_nodes) {
2022-02-08 00:26:07 +01:00
if (node.empty()) {
2022-02-07 20:40:28 +01:00
continue;
}
2022-02-08 19:37:31 +01:00
size_t bucket = calc_bucket(node.key());
while (!nodes_[bucket].empty()) {
next_bucket(bucket);
}
nodes_[bucket] = std::move(node);
}
}
void next_bucket(size_t &bucket) const {
bucket++;
if (bucket == nodes_.size()) {
bucket = 0;
2022-02-07 20:40:28 +01:00
}
}
};
2022-02-07 22:42:53 +01:00
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>>
2022-02-08 19:11:14 +01:00
using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT>;
//using FlatHashMap = std::unordered_map<KeyT, ValueT, HashT>;
//using FlatHashMap = absl::flat_hash_map<KeyT, ValueT, HashT>;
2022-02-07 20:40:28 +01:00
} // namespace td