FlatHashMap: add implementation with chunks
This commit is contained in:
parent
deafeee33b
commit
34a69e3133
@ -75,6 +75,7 @@ endif()
|
||||
|
||||
find_package(ABSL QUIET)
|
||||
find_package(folly QUIET)
|
||||
find_package(gflags QUIET)
|
||||
|
||||
if (ABSL_FOUND AND folly_FOUND)
|
||||
add_executable(memory-hashset-memprof EXCLUDE_FROM_ALL hashset_memory.cpp)
|
||||
|
@ -203,8 +203,11 @@ set(TDUTILS_SOURCE
|
||||
td/utils/ExitGuard.h
|
||||
td/utils/FileLog.h
|
||||
td/utils/filesystem.h
|
||||
td/utils/fixed_vector.h
|
||||
td/utils/find_boundary.h
|
||||
td/utils/FlatHashMap.h
|
||||
td/utils/FlatHashMapChunks.h
|
||||
td/utils/FlatHashMapLinear.h
|
||||
td/utils/FloodControlFast.h
|
||||
td/utils/FloodControlStrict.h
|
||||
td/utils/format.h
|
||||
|
@ -6,586 +6,18 @@
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "td/utils/bits.h"
|
||||
#include "td/utils/common.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <new>
|
||||
#include <utility>
|
||||
#include "td/utils/FlatHashMapChunks.h"
|
||||
#include "td/utils/FlatHashMapLinear.h"
|
||||
|
||||
namespace td {
|
||||
|
||||
template <class T>
|
||||
class fixed_vector {
|
||||
public:
|
||||
fixed_vector() = default;
|
||||
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
|
||||
}
|
||||
fixed_vector(fixed_vector &&other) noexcept {
|
||||
swap(other);
|
||||
}
|
||||
fixed_vector &operator=(fixed_vector &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
fixed_vector(const fixed_vector &) = delete;
|
||||
fixed_vector &operator=(const fixed_vector &) = delete;
|
||||
~fixed_vector() {
|
||||
delete[] ptr_;
|
||||
}
|
||||
T &operator[](size_t i) {
|
||||
return ptr_[i];
|
||||
}
|
||||
const T &operator[](size_t i) const {
|
||||
return ptr_[i];
|
||||
}
|
||||
T *begin() {
|
||||
return ptr_;
|
||||
}
|
||||
const T *begin() const {
|
||||
return ptr_;
|
||||
}
|
||||
T *end() {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
const T *end() const {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
using iterator = T *;
|
||||
using const_iterator = const T *;
|
||||
void swap(fixed_vector<T> &other) {
|
||||
std::swap(ptr_, other.ptr_);
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T *ptr_{};
|
||||
size_t size_{0};
|
||||
};
|
||||
|
||||
// TODO: move
|
||||
template <class KeyT>
|
||||
bool is_key_empty(const KeyT &key) {
|
||||
return key == KeyT();
|
||||
}
|
||||
|
||||
template <class KeyT, class ValueT>
|
||||
struct MapNode {
|
||||
using first_type = KeyT;
|
||||
using second_type = ValueT;
|
||||
using key_type = KeyT;
|
||||
using public_type = MapNode<KeyT, ValueT>;
|
||||
using value_type = ValueT;
|
||||
KeyT first{};
|
||||
union {
|
||||
ValueT second;
|
||||
};
|
||||
const auto &key() const {
|
||||
return first;
|
||||
}
|
||||
auto &value() {
|
||||
return second;
|
||||
}
|
||||
auto &get_public() {
|
||||
return *this;
|
||||
}
|
||||
|
||||
MapNode() {
|
||||
}
|
||||
MapNode(KeyT key, ValueT value) : first(std::move(key)) {
|
||||
new (&second) ValueT(std::move(value));
|
||||
DCHECK(!empty());
|
||||
}
|
||||
~MapNode() {
|
||||
if (!empty()) {
|
||||
second.~ValueT();
|
||||
}
|
||||
}
|
||||
MapNode(MapNode &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
MapNode &operator=(MapNode &&other) noexcept {
|
||||
DCHECK(empty());
|
||||
DCHECK(!other.empty());
|
||||
first = std::move(other.first);
|
||||
other.first = KeyT{};
|
||||
new (&second) ValueT(std::move(other.second));
|
||||
other.second.~ValueT();
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return is_key_empty(key());
|
||||
}
|
||||
|
||||
void clear() {
|
||||
DCHECK(!empty());
|
||||
first = KeyT();
|
||||
second.~ValueT();
|
||||
DCHECK(empty());
|
||||
}
|
||||
|
||||
template <class... ArgsT>
|
||||
void emplace(KeyT key, ArgsT &&...args) {
|
||||
DCHECK(empty());
|
||||
first = std::move(key);
|
||||
new (&second) ValueT(std::forward<ArgsT>(args)...);
|
||||
DCHECK(!empty());
|
||||
}
|
||||
};
|
||||
|
||||
template <class KeyT>
|
||||
struct SetNode {
|
||||
using first_type = KeyT;
|
||||
using key_type = KeyT;
|
||||
using public_type = KeyT;
|
||||
using value_type = KeyT;
|
||||
KeyT first{};
|
||||
const auto &key() const {
|
||||
return first;
|
||||
}
|
||||
const auto &value() const {
|
||||
return first;
|
||||
}
|
||||
|
||||
auto &get_public() {
|
||||
return first;
|
||||
}
|
||||
SetNode() = default;
|
||||
explicit SetNode(KeyT key) : first(std::move(key)) {
|
||||
}
|
||||
SetNode(SetNode &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
SetNode &operator=(SetNode &&other) noexcept {
|
||||
DCHECK(empty());
|
||||
DCHECK(!other.empty());
|
||||
first = std::move(other.first);
|
||||
other.first = KeyT{};
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return is_key_empty(key());
|
||||
}
|
||||
|
||||
void clear() {
|
||||
first = KeyT();
|
||||
CHECK(empty());
|
||||
}
|
||||
|
||||
void emplace(KeyT key) {
|
||||
first = std::move(key);
|
||||
}
|
||||
};
|
||||
|
||||
template <class NodeT, class HashT, class EqT>
|
||||
class FlatHashTable {
|
||||
public:
|
||||
using Self = FlatHashTable<NodeT, HashT, EqT>;
|
||||
using Node = NodeT;
|
||||
using NodeIterator = typename fixed_vector<Node>::iterator;
|
||||
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
|
||||
|
||||
using KeyT = typename Node::key_type;
|
||||
using key_type = typename Node::key_type;
|
||||
using public_type = typename Node::public_type;
|
||||
using value_type = typename Node::public_type;
|
||||
|
||||
struct Iterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = public_type *;
|
||||
using reference = public_type &;
|
||||
|
||||
friend class FlatHashTable;
|
||||
Iterator &operator++() {
|
||||
do {
|
||||
++it_;
|
||||
} while (it_ != map_->nodes_.end() && it_->empty());
|
||||
return *this;
|
||||
}
|
||||
Iterator &operator--() {
|
||||
do {
|
||||
--it_;
|
||||
} while (it_->empty());
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return it_->get_public();
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
Iterator() = default;
|
||||
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
|
||||
}
|
||||
|
||||
private:
|
||||
NodeIterator it_;
|
||||
Self *map_;
|
||||
};
|
||||
|
||||
struct ConstIterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = const value_type *;
|
||||
using reference = const value_type &;
|
||||
|
||||
friend class FlatHashTable;
|
||||
ConstIterator &operator++() {
|
||||
++it_;
|
||||
return *this;
|
||||
}
|
||||
ConstIterator &operator--() {
|
||||
--it_;
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return *it_;
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const ConstIterator &other) const {
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const ConstIterator &other) const {
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
ConstIterator() = default;
|
||||
ConstIterator(Iterator it) : it_(std::move(it)) {
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator it_;
|
||||
};
|
||||
using iterator = Iterator;
|
||||
using const_iterator = ConstIterator;
|
||||
|
||||
FlatHashTable() = default;
|
||||
FlatHashTable(const FlatHashTable &other) : FlatHashTable(other.begin(), other.end()) {
|
||||
}
|
||||
FlatHashTable &operator=(const FlatHashTable &other) {
|
||||
assign(other.begin(), other.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
FlatHashTable(std::initializer_list<Node> nodes) {
|
||||
reserve(nodes.size());
|
||||
for (auto &node : nodes) {
|
||||
CHECK(!node.empty());
|
||||
auto bucket = calc_bucket(node.first);
|
||||
while (true) {
|
||||
if (nodes_[bucket].key() == node.first) {
|
||||
nodes_[bucket].second = node.second;
|
||||
break;
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
nodes_[bucket].emplace(node.first, node.second);
|
||||
used_nodes_++;
|
||||
break;
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlatHashTable(FlatHashTable &&other) noexcept : nodes_(std::move(other.nodes_)), used_nodes_(other.used_nodes_) {
|
||||
other.used_nodes_ = 0;
|
||||
}
|
||||
FlatHashTable &operator=(FlatHashTable &&other) noexcept {
|
||||
nodes_ = std::move(other.nodes_);
|
||||
used_nodes_ = other.used_nodes_;
|
||||
other.used_nodes_ = 0;
|
||||
return *this;
|
||||
}
|
||||
void swap(FlatHashTable &other) noexcept {
|
||||
using std::swap;
|
||||
swap(nodes_, other.nodes_);
|
||||
swap(used_nodes_, other.used_nodes_);
|
||||
}
|
||||
~FlatHashTable() = default;
|
||||
|
||||
template <class ItT>
|
||||
FlatHashTable(ItT begin, ItT end) {
|
||||
assign(begin, end);
|
||||
}
|
||||
|
||||
size_t bucket_count() const {
|
||||
return nodes_.size();
|
||||
}
|
||||
|
||||
Iterator find(const KeyT &key) {
|
||||
if (empty() || is_key_empty(key)) {
|
||||
return end();
|
||||
}
|
||||
auto bucket = calc_bucket(key);
|
||||
while (true) {
|
||||
if (EqT()(nodes_[bucket].key(), key)) {
|
||||
return Iterator{nodes_.begin() + bucket, this};
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
return end();
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
|
||||
ConstIterator find(const KeyT &key) const {
|
||||
return ConstIterator(const_cast<Self *>(this)->find(key));
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return used_nodes_;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
Iterator begin() {
|
||||
if (empty()) {
|
||||
return end();
|
||||
}
|
||||
auto it = nodes_.begin();
|
||||
while (it->empty()) {
|
||||
++it;
|
||||
}
|
||||
return Iterator(it, this);
|
||||
}
|
||||
Iterator end() {
|
||||
return Iterator(nodes_.end(), this);
|
||||
}
|
||||
|
||||
ConstIterator begin() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->begin());
|
||||
}
|
||||
ConstIterator end() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->end());
|
||||
}
|
||||
|
||||
void reserve(size_t size) {
|
||||
size_t want_size = normalize(size * 5 / 3 + 1);
|
||||
// size_t want_size = size * 2;
|
||||
if (want_size > nodes_.size()) {
|
||||
resize(want_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class... ArgsT>
|
||||
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
|
||||
try_grow();
|
||||
CHECK(!is_key_empty(key));
|
||||
auto bucket = calc_bucket(key);
|
||||
while (true) {
|
||||
if (EqT()(nodes_[bucket].key(), key)) {
|
||||
return {Iterator{nodes_.begin() + bucket, this}, false};
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
nodes_[bucket].emplace(std::move(key), std::forward<ArgsT>(args)...);
|
||||
used_nodes_++;
|
||||
return {Iterator{nodes_.begin() + bucket, this}, true};
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Iterator, bool> insert(KeyT key) {
|
||||
return emplace(std::move(key));
|
||||
}
|
||||
|
||||
template <class ItT>
|
||||
void insert(ItT begin, ItT end) {
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(*begin);
|
||||
}
|
||||
}
|
||||
|
||||
typename Node::value_type &operator[](const KeyT &key) {
|
||||
return emplace(key).first->value();
|
||||
}
|
||||
|
||||
size_t erase(const KeyT &key) {
|
||||
auto it = find(key);
|
||||
if (it == end()) {
|
||||
return 0;
|
||||
}
|
||||
erase(it);
|
||||
try_shrink();
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t count(const KeyT &key) const {
|
||||
return find(key) != end();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
used_nodes_ = 0;
|
||||
nodes_ = {};
|
||||
}
|
||||
|
||||
void erase(Iterator it) {
|
||||
DCHECK(it != end());
|
||||
DCHECK(!it.it_->empty());
|
||||
erase_node(it.it_);
|
||||
}
|
||||
|
||||
template <class F>
|
||||
void remove_if(F &&f) {
|
||||
auto it = nodes_.begin();
|
||||
while (it != nodes_.end() && !it->empty()) {
|
||||
++it;
|
||||
}
|
||||
auto first_empty = it;
|
||||
for (; it != nodes_.end();) {
|
||||
if (!it->empty() && f(*it)) {
|
||||
erase_node(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
for (it = nodes_.begin(); it != first_empty;) {
|
||||
if (!it->empty() && f(*it)) {
|
||||
erase_node(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
try_shrink();
|
||||
}
|
||||
|
||||
private:
|
||||
fixed_vector<Node> nodes_;
|
||||
size_t used_nodes_{};
|
||||
|
||||
template <class ItT>
|
||||
void assign(ItT begin, ItT end) {
|
||||
resize(std::distance(begin, end)); // TODO: should be conditional
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(begin->first, begin->second);
|
||||
}
|
||||
}
|
||||
|
||||
void try_grow() {
|
||||
if (should_grow(used_nodes_ + 1, nodes_.size())) {
|
||||
grow();
|
||||
}
|
||||
}
|
||||
static bool should_grow(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 5 > bucket_count * 3;
|
||||
}
|
||||
void try_shrink() {
|
||||
if (should_shrink(used_nodes_, nodes_.size())) {
|
||||
shrink();
|
||||
}
|
||||
}
|
||||
static bool should_shrink(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 10 < bucket_count;
|
||||
}
|
||||
|
||||
static size_t normalize(size_t size) {
|
||||
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size | 7));
|
||||
}
|
||||
|
||||
void shrink() {
|
||||
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
void grow() {
|
||||
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
size_t calc_bucket(const KeyT &key) const {
|
||||
return HashT()(key) * 2 % nodes_.size();
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
fixed_vector<Node> old_nodes(new_size);
|
||||
std::swap(old_nodes, nodes_);
|
||||
|
||||
for (auto &node : old_nodes) {
|
||||
if (node.empty()) {
|
||||
continue;
|
||||
}
|
||||
size_t bucket = calc_bucket(node.key());
|
||||
while (!nodes_[bucket].empty()) {
|
||||
next_bucket(bucket);
|
||||
}
|
||||
nodes_[bucket] = std::move(node);
|
||||
}
|
||||
}
|
||||
|
||||
void next_bucket(size_t &bucket) const {
|
||||
bucket++;
|
||||
if (bucket == nodes_.size()) {
|
||||
bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void erase_node(NodeIterator it) {
|
||||
size_t empty_i = it - nodes_.begin();
|
||||
auto empty_bucket = empty_i;
|
||||
DCHECK(0 <= empty_i && empty_i < nodes_.size());
|
||||
nodes_[empty_bucket].clear();
|
||||
used_nodes_--;
|
||||
|
||||
for (size_t test_i = empty_i + 1;; test_i++) {
|
||||
auto test_bucket = test_i;
|
||||
if (test_bucket >= nodes_.size()) {
|
||||
test_bucket -= nodes_.size();
|
||||
}
|
||||
|
||||
if (nodes_[test_bucket].empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto want_i = calc_bucket(nodes_[test_bucket].key());
|
||||
if (want_i < empty_i) {
|
||||
want_i += nodes_.size();
|
||||
}
|
||||
|
||||
if (want_i <= empty_i || want_i > test_i) {
|
||||
nodes_[empty_bucket] = std::move(nodes_[test_bucket]);
|
||||
empty_i = test_i;
|
||||
empty_bucket = test_bucket;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashMapImpl = FlatHashTable<MapNode<KeyT, ValueT>, HashT, EqT>;
|
||||
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashSetImpl = FlatHashTable<SetNode<KeyT>, HashT, EqT>;
|
||||
|
||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;
|
||||
//using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;
|
||||
using FlatHashMap = FlatHashMapChunks<KeyT, ValueT, HashT, EqT>;
|
||||
//using FlatHashMap = std::unordered_map<KeyT, ValueT, HashT, EqT>;
|
||||
|
||||
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashSet = FlatHashSetImpl<KeyT, HashT, EqT>;
|
||||
//using FlatHashSet = FlatHashSetImpl<KeyT, HashT, EqT>;
|
||||
using FlatHashSet = FlatHashSetChunks<KeyT, HashT, EqT>;
|
||||
//using FlatHashSet = std::unordered_set<KeyT, HashT, EqT>;
|
||||
|
||||
} // namespace td
|
||||
|
509
tdutils/td/utils/FlatHashMapChunks.h
Normal file
509
tdutils/td/utils/FlatHashMapChunks.h
Normal file
@ -0,0 +1,509 @@
|
||||
//
|
||||
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "td/utils/algorithm.h"
|
||||
#include "td/utils/bits.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/FlatHashMapLinear.h"
|
||||
#include "td/utils/logging.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <new>
|
||||
#include <utility>
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct MaskPortable {
|
||||
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
uint64_t res = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
res |= (bytes[i] == needle) << i;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
struct MaskNeonFolly {
|
||||
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
uint8x16_t input_mask = vld1q_u8(bytes);
|
||||
auto needle_mask = vdupq_n_u8(needle);
|
||||
auto eq_mask = vceqq_u8(input_mask, needle_mask);
|
||||
// get info from every byte into the bottom half of every uint16_t
|
||||
// by shifting right 4, then round to get it into a 64-bit vector
|
||||
uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4);
|
||||
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0);
|
||||
return mask & 0x1111111111111111;
|
||||
}
|
||||
};
|
||||
|
||||
struct MaskNeon {
|
||||
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
uint8x16_t input_mask = vld1q_u8(bytes);
|
||||
auto needle_mask = vdupq_n_u8(needle);
|
||||
auto eq_mask = vceqq_u8(input_mask, needle_mask);
|
||||
uint16x8_t MASK = vdupq_n_u16(0x180);
|
||||
uint16x8_t a_masked = vandq_u16(vreinterpretq_u16_u8(eq_mask), MASK);
|
||||
const int16_t __attribute__((aligned(16))) SHIFT_ARR[8] = {-7, -5, -3, -1, 1, 3, 5, 7};
|
||||
int16x8_t SHIFT = vld1q_s16(SHIFT_ARR);
|
||||
uint16x8_t a_shifted = vshlq_u16(a_masked, SHIFT);
|
||||
return vaddvq_u16(a_shifted);
|
||||
}
|
||||
};
|
||||
|
||||
namespace td {
|
||||
template <class NodeT, class HashT, class EqT>
|
||||
class FlatHashTableChunks {
|
||||
public:
|
||||
using Self = FlatHashTableChunks<NodeT, HashT, EqT>;
|
||||
using Node = NodeT;
|
||||
using NodeIterator = typename fixed_vector<Node>::iterator;
|
||||
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
|
||||
|
||||
using KeyT = typename Node::key_type;
|
||||
using key_type = typename Node::key_type;
|
||||
using public_type = typename Node::public_type;
|
||||
using value_type = typename Node::public_type;
|
||||
|
||||
struct Iterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = public_type *;
|
||||
using reference = public_type &;
|
||||
|
||||
friend class FlatHashTableChunks;
|
||||
Iterator &operator++() {
|
||||
do {
|
||||
++it_;
|
||||
} while (it_ != map_->nodes_.end() && it_->empty());
|
||||
return *this;
|
||||
}
|
||||
Iterator &operator--() {
|
||||
do {
|
||||
--it_;
|
||||
} while (it_->empty());
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return it_->get_public();
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
Iterator() = default;
|
||||
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
|
||||
}
|
||||
|
||||
private:
|
||||
NodeIterator it_;
|
||||
Self *map_;
|
||||
};
|
||||
|
||||
struct ConstIterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = const value_type *;
|
||||
using reference = const value_type &;
|
||||
|
||||
friend class FlatHashTableChunks;
|
||||
ConstIterator &operator++() {
|
||||
++it_;
|
||||
return *this;
|
||||
}
|
||||
ConstIterator &operator--() {
|
||||
--it_;
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return *it_;
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const ConstIterator &other) const {
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const ConstIterator &other) const {
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
ConstIterator() = default;
|
||||
ConstIterator(Iterator it) : it_(std::move(it)) {
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator it_;
|
||||
};
|
||||
using iterator = Iterator;
|
||||
using const_iterator = ConstIterator;
|
||||
|
||||
FlatHashTableChunks() = default;
|
||||
FlatHashTableChunks(const FlatHashTableChunks &other) : FlatHashTableChunks(other.begin(), other.end()) {
|
||||
}
|
||||
FlatHashTableChunks &operator=(const FlatHashTableChunks &other) {
|
||||
assign(other.begin(), other.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
FlatHashTableChunks(std::initializer_list<Node> nodes) {
|
||||
reserve(nodes.size());
|
||||
for (auto &node : td::reversed(nodes)) {
|
||||
CHECK(!node.empty());
|
||||
if (count(node.first) > 0) {
|
||||
continue;
|
||||
}
|
||||
emplace_node(Node{node.first, node.second});
|
||||
}
|
||||
}
|
||||
|
||||
FlatHashTableChunks(FlatHashTableChunks &&other) noexcept {
|
||||
swap(other);
|
||||
}
|
||||
FlatHashTableChunks &operator=(FlatHashTableChunks &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
void swap(FlatHashTableChunks &other) noexcept {
|
||||
using std::swap;
|
||||
swap(nodes_, other.nodes_);
|
||||
swap(chunks_, other.chunks_);
|
||||
swap(used_nodes_, other.used_nodes_);
|
||||
}
|
||||
~FlatHashTableChunks() = default;
|
||||
|
||||
template <class ItT>
|
||||
FlatHashTableChunks(ItT begin, ItT end) {
|
||||
assign(begin, end);
|
||||
}
|
||||
|
||||
size_t bucket_count() const {
|
||||
return nodes_.size();
|
||||
}
|
||||
|
||||
Iterator find(const KeyT &key) {
|
||||
if (empty() || is_key_empty(key)) {
|
||||
return end();
|
||||
}
|
||||
auto hash = calc_hash(key);
|
||||
auto chunk_it = get_chunk_it(hash.chunk_i);
|
||||
while (true) {
|
||||
auto chunk_i = chunk_it.next();
|
||||
auto &chunk = chunks_[chunk_i];
|
||||
auto mask = MaskNeon::equal_mask(chunk.ctrl, hash.small_hash) & Chunk::MASK;
|
||||
while (mask != 0) {
|
||||
auto it = nodes_.begin() + td::count_trailing_zeroes64(mask) + chunk_i * Chunk::CHUNK_SIZE;
|
||||
if (EqT()(it->first, key)) {
|
||||
return Iterator{it, this};
|
||||
}
|
||||
mask &= mask - 1;
|
||||
}
|
||||
if (chunk.skipped_cnt == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return end();
|
||||
}
|
||||
|
||||
ConstIterator find(const KeyT &key) const {
|
||||
return ConstIterator(const_cast<Self *>(this)->find(key));
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return used_nodes_;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
Iterator begin() {
|
||||
if (empty()) {
|
||||
return end();
|
||||
}
|
||||
auto it = nodes_.begin();
|
||||
while (it->empty()) {
|
||||
++it;
|
||||
}
|
||||
return Iterator(it, this);
|
||||
}
|
||||
Iterator end() {
|
||||
return Iterator(nodes_.end(), this);
|
||||
}
|
||||
|
||||
ConstIterator begin() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->begin());
|
||||
}
|
||||
ConstIterator end() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->end());
|
||||
}
|
||||
|
||||
void reserve(size_t size) {
|
||||
//size_t want_size = normalize(size * 5 / 3 + 1);
|
||||
size_t want_size = normalize(size * 14 / 12 + 1);
|
||||
// size_t want_size = size * 2;
|
||||
if (want_size > nodes_.size()) {
|
||||
resize(want_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class... ArgsT>
|
||||
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
|
||||
CHECK(!is_key_empty(key));
|
||||
auto it = find(key);
|
||||
if (it != end()) {
|
||||
return {it, false};
|
||||
}
|
||||
try_grow();
|
||||
|
||||
auto hash = calc_hash(key);
|
||||
auto chunk_it = get_chunk_it(hash.chunk_i);
|
||||
while (true) {
|
||||
auto chunk_i = chunk_it.next();
|
||||
auto &chunk = chunks_[chunk_i];
|
||||
auto mask = MaskPortable::equal_mask(chunk.ctrl, 0) & Chunk::MASK;
|
||||
if (mask != 0) {
|
||||
auto shift = td::count_trailing_zeroes64(mask);
|
||||
DCHECK(chunk.ctrl[shift] == 0);
|
||||
auto node_it = nodes_.begin() + shift + chunk_i * Chunk::CHUNK_SIZE;
|
||||
DCHECK(node_it->empty());
|
||||
node_it->emplace(std::move(key), std::forward<ArgsT>(args)...);
|
||||
DCHECK(!node_it->empty());
|
||||
chunk.ctrl[shift] = hash.small_hash;
|
||||
used_nodes_++;
|
||||
return {{node_it, this}, true};
|
||||
}
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
|
||||
chunk.skipped_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Iterator, bool> insert(KeyT key) {
|
||||
return emplace(std::move(key));
|
||||
}
|
||||
|
||||
template <class ItT>
|
||||
void insert(ItT begin, ItT end) {
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(*begin);
|
||||
}
|
||||
}
|
||||
|
||||
typename Node::value_type &operator[](const KeyT &key) {
|
||||
return emplace(key).first->value();
|
||||
}
|
||||
|
||||
size_t erase(const KeyT &key) {
|
||||
auto it = find(key);
|
||||
if (it == end()) {
|
||||
return 0;
|
||||
}
|
||||
erase(it);
|
||||
try_shrink();
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t count(const KeyT &key) const {
|
||||
return find(key) != end();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
used_nodes_ = 0;
|
||||
nodes_ = {};
|
||||
chunks_ = {};
|
||||
}
|
||||
|
||||
void erase(Iterator it) {
|
||||
DCHECK(it != end());
|
||||
DCHECK(!it.it_->empty());
|
||||
erase_node(it.it_);
|
||||
}
|
||||
|
||||
template <class F>
|
||||
void remove_if(F &&f) {
|
||||
for (auto it = nodes_.begin(), end = nodes_.end(); it != end; ++it) {
|
||||
if (!it->empty() && f(it->get_public())) {
|
||||
erase_node(it);
|
||||
}
|
||||
}
|
||||
try_shrink();
|
||||
}
|
||||
|
||||
private:
|
||||
struct Chunk {
|
||||
static constexpr int CHUNK_SIZE = 14;
|
||||
static constexpr int MASK = (1 << CHUNK_SIZE) - 1;
|
||||
// 0x0 - empty
|
||||
td::uint8 ctrl[CHUNK_SIZE] = {};
|
||||
uint16 skipped_cnt{0};
|
||||
};
|
||||
fixed_vector<Node> nodes_;
|
||||
fixed_vector<Chunk> chunks_;
|
||||
size_t used_nodes_{};
|
||||
|
||||
template <class ItT>
|
||||
void assign(ItT begin, ItT end) {
|
||||
clear();
|
||||
reserve(std::distance(begin, end));
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(begin->first, begin->second);
|
||||
}
|
||||
}
|
||||
|
||||
void try_grow() {
|
||||
if (should_grow(used_nodes_ + 1, nodes_.size())) {
|
||||
grow();
|
||||
}
|
||||
}
|
||||
static bool should_grow(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 14 > bucket_count * 12;
|
||||
}
|
||||
void try_shrink() {
|
||||
if (should_shrink(used_nodes_, nodes_.size())) {
|
||||
shrink();
|
||||
}
|
||||
}
|
||||
static bool should_shrink(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 10 < bucket_count;
|
||||
}
|
||||
|
||||
static size_t normalize(size_t size) {
|
||||
auto x = (size / Chunk::CHUNK_SIZE) | 1;
|
||||
auto y = static_cast<size_t>(1) << (64 - count_leading_zeroes64(x));
|
||||
return y * Chunk::CHUNK_SIZE;
|
||||
}
|
||||
|
||||
void shrink() {
|
||||
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
void grow() {
|
||||
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
struct HashInfo {
|
||||
size_t chunk_i;
|
||||
uint8_t small_hash;
|
||||
};
|
||||
struct ChunkIt {
|
||||
size_t chunk_i;
|
||||
size_t chunk_n;
|
||||
size_t shift{};
|
||||
size_t next() {
|
||||
chunk_i += shift;
|
||||
shift++;
|
||||
if (chunk_i >= chunk_n) {
|
||||
chunk_i -= chunk_n;
|
||||
}
|
||||
return chunk_i;
|
||||
}
|
||||
};
|
||||
|
||||
ChunkIt get_chunk_it(size_t chunk_i) {
|
||||
return {chunk_i, chunks_.size()};
|
||||
}
|
||||
|
||||
HashInfo calc_hash(const KeyT &key) {
|
||||
auto h = HashT()(key);
|
||||
// TODO: will be problematic with current hash.
|
||||
return {(h >> 8) % chunks_.size(), uint8_t(0x80 | h)};
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
CHECK(new_size >= Chunk::CHUNK_SIZE);
|
||||
fixed_vector<Node> old_nodes(new_size);
|
||||
fixed_vector<Chunk> chunks(new_size / Chunk::CHUNK_SIZE);
|
||||
std::swap(old_nodes, nodes_);
|
||||
chunks_ = std::move(chunks);
|
||||
used_nodes_ = 0;
|
||||
|
||||
for (auto &node : old_nodes) {
|
||||
if (node.empty()) {
|
||||
continue;
|
||||
}
|
||||
emplace_node(std::move(node));
|
||||
}
|
||||
}
|
||||
|
||||
void emplace_node(Node &&node) {
|
||||
DCHECK(!node.empty());
|
||||
auto hash = calc_hash(node.first);
|
||||
auto chunk_it = get_chunk_it(hash.chunk_i);
|
||||
while (true) {
|
||||
auto chunk_i = chunk_it.next();
|
||||
auto &chunk = chunks_[chunk_i];
|
||||
auto mask = MaskPortable::equal_mask(chunk.ctrl, 0) & Chunk::MASK;
|
||||
if (mask != 0) {
|
||||
auto shift = td::count_trailing_zeroes64(mask);
|
||||
auto node_it = nodes_.begin() + shift + chunk_i * Chunk::CHUNK_SIZE;
|
||||
DCHECK(node_it->empty());
|
||||
*node_it = std::move(node);
|
||||
DCHECK(chunk.ctrl[shift] == 0);
|
||||
chunk.ctrl[shift] = hash.small_hash;
|
||||
DCHECK(chunk.ctrl[shift] != 0);
|
||||
used_nodes_++;
|
||||
break;
|
||||
}
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
|
||||
chunk.skipped_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
void next_bucket(size_t &bucket) const {
|
||||
bucket++;
|
||||
if (unlikely(bucket == nodes_.size())) {
|
||||
bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void erase_node(NodeIterator it) {
|
||||
DCHECK(!it->empty());
|
||||
size_t empty_i = it - nodes_.begin();
|
||||
DCHECK(0 <= empty_i && empty_i < nodes_.size());
|
||||
auto empty_chunk_i = empty_i / Chunk::CHUNK_SIZE;
|
||||
auto hash = calc_hash(it->first);
|
||||
auto chunk_it = get_chunk_it(hash.chunk_i);
|
||||
while (true) {
|
||||
auto chunk_i = chunk_it.next();
|
||||
auto &chunk = chunks_[chunk_i];
|
||||
if (chunk_i == empty_chunk_i) {
|
||||
chunk.ctrl[empty_i - empty_chunk_i * Chunk::CHUNK_SIZE] = 0;
|
||||
break;
|
||||
}
|
||||
chunk.skipped_cnt--;
|
||||
}
|
||||
it->clear();
|
||||
used_nodes_--;
|
||||
}
|
||||
};
|
||||
|
||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashMapChunks = FlatHashTableChunks<MapNode<KeyT, ValueT>, HashT, EqT>;
|
||||
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashSetChunks = FlatHashTableChunks<SetNode<KeyT>, HashT, EqT>;
|
||||
|
||||
template <class NodeT, class HashT, class EqT, class FuncT>
|
||||
void table_remove_if(FlatHashTableChunks<NodeT, HashT, EqT> &table, FuncT &&func) {
|
||||
table.remove_if(func);
|
||||
}
|
||||
|
||||
} // namespace td
|
533
tdutils/td/utils/FlatHashMapLinear.h
Normal file
533
tdutils/td/utils/FlatHashMapLinear.h
Normal file
@ -0,0 +1,533 @@
|
||||
//
|
||||
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "td/utils/bits.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/fixed_vector.h"
|
||||
#include "td/utils/logging.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <new>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace td {
|
||||
// TODO: move
|
||||
template <class KeyT>
|
||||
bool is_key_empty(const KeyT &key) {
|
||||
return key == KeyT();
|
||||
}
|
||||
|
||||
template <class KeyT, class ValueT>
|
||||
struct MapNode {
|
||||
using first_type = KeyT;
|
||||
using second_type = ValueT;
|
||||
using key_type = KeyT;
|
||||
using public_type = MapNode<KeyT, ValueT>;
|
||||
using value_type = ValueT;
|
||||
KeyT first{};
|
||||
union {
|
||||
ValueT second;
|
||||
};
|
||||
const auto &key() const {
|
||||
return first;
|
||||
}
|
||||
auto &value() {
|
||||
return second;
|
||||
}
|
||||
auto &get_public() {
|
||||
return *this;
|
||||
}
|
||||
|
||||
MapNode() {
|
||||
}
|
||||
MapNode(KeyT key, ValueT value) : first(std::move(key)) {
|
||||
new (&second) ValueT(std::move(value));
|
||||
DCHECK(!empty());
|
||||
}
|
||||
~MapNode() {
|
||||
if (!empty()) {
|
||||
second.~ValueT();
|
||||
}
|
||||
}
|
||||
MapNode(MapNode &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
MapNode &operator=(MapNode &&other) noexcept {
|
||||
DCHECK(empty());
|
||||
DCHECK(!other.empty());
|
||||
first = std::move(other.first);
|
||||
other.first = KeyT{};
|
||||
new (&second) ValueT(std::move(other.second));
|
||||
other.second.~ValueT();
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return is_key_empty(key());
|
||||
}
|
||||
|
||||
void clear() {
|
||||
DCHECK(!empty());
|
||||
first = KeyT();
|
||||
second.~ValueT();
|
||||
DCHECK(empty());
|
||||
}
|
||||
|
||||
template <class... ArgsT>
|
||||
void emplace(KeyT key, ArgsT &&...args) {
|
||||
DCHECK(empty());
|
||||
first = std::move(key);
|
||||
new (&second) ValueT(std::forward<ArgsT>(args)...);
|
||||
DCHECK(!empty());
|
||||
}
|
||||
};
|
||||
|
||||
template <class KeyT>
|
||||
struct SetNode {
|
||||
using first_type = KeyT;
|
||||
using key_type = KeyT;
|
||||
using public_type = KeyT;
|
||||
using value_type = KeyT;
|
||||
KeyT first{};
|
||||
const auto &key() const {
|
||||
return first;
|
||||
}
|
||||
const auto &value() const {
|
||||
return first;
|
||||
}
|
||||
|
||||
auto &get_public() {
|
||||
return first;
|
||||
}
|
||||
SetNode() = default;
|
||||
explicit SetNode(KeyT key) : first(std::move(key)) {
|
||||
}
|
||||
SetNode(SetNode &&other) noexcept {
|
||||
*this = std::move(other);
|
||||
}
|
||||
SetNode &operator=(SetNode &&other) noexcept {
|
||||
DCHECK(empty());
|
||||
DCHECK(!other.empty());
|
||||
first = std::move(other.first);
|
||||
other.first = KeyT{};
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return is_key_empty(key());
|
||||
}
|
||||
|
||||
void clear() {
|
||||
first = KeyT();
|
||||
CHECK(empty());
|
||||
}
|
||||
|
||||
void emplace(KeyT key) {
|
||||
first = std::move(key);
|
||||
}
|
||||
};
|
||||
|
||||
template <class NodeT, class HashT, class EqT>
|
||||
class FlatHashTable {
|
||||
public:
|
||||
using Self = FlatHashTable<NodeT, HashT, EqT>;
|
||||
using Node = NodeT;
|
||||
using NodeIterator = typename fixed_vector<Node>::iterator;
|
||||
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
|
||||
|
||||
using KeyT = typename Node::key_type;
|
||||
using key_type = typename Node::key_type;
|
||||
using public_type = typename Node::public_type;
|
||||
using value_type = typename Node::public_type;
|
||||
|
||||
struct Iterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = public_type *;
|
||||
using reference = public_type &;
|
||||
|
||||
friend class FlatHashTable;
|
||||
Iterator &operator++() {
|
||||
do {
|
||||
++it_;
|
||||
} while (it_ != map_->nodes_.end() && it_->empty());
|
||||
return *this;
|
||||
}
|
||||
Iterator &operator--() {
|
||||
do {
|
||||
--it_;
|
||||
} while (it_->empty());
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return it_->get_public();
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const Iterator &other) const {
|
||||
DCHECK(map_ == other.map_);
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
Iterator() = default;
|
||||
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
|
||||
}
|
||||
|
||||
private:
|
||||
NodeIterator it_;
|
||||
Self *map_;
|
||||
};
|
||||
|
||||
struct ConstIterator {
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using value_type = public_type;
|
||||
using pointer = const value_type *;
|
||||
using reference = const value_type &;
|
||||
|
||||
friend class FlatHashTable;
|
||||
ConstIterator &operator++() {
|
||||
++it_;
|
||||
return *this;
|
||||
}
|
||||
ConstIterator &operator--() {
|
||||
--it_;
|
||||
return *this;
|
||||
}
|
||||
reference operator*() {
|
||||
return *it_;
|
||||
}
|
||||
pointer operator->() {
|
||||
return &*it_;
|
||||
}
|
||||
bool operator==(const ConstIterator &other) const {
|
||||
return it_ == other.it_;
|
||||
}
|
||||
bool operator!=(const ConstIterator &other) const {
|
||||
return it_ != other.it_;
|
||||
}
|
||||
|
||||
ConstIterator() = default;
|
||||
ConstIterator(Iterator it) : it_(std::move(it)) {
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator it_;
|
||||
};
|
||||
using iterator = Iterator;
|
||||
using const_iterator = ConstIterator;
|
||||
|
||||
FlatHashTable() = default;
|
||||
FlatHashTable(const FlatHashTable &other) : FlatHashTable(other.begin(), other.end()) {
|
||||
}
|
||||
FlatHashTable &operator=(const FlatHashTable &other) {
|
||||
assign(other.begin(), other.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
FlatHashTable(std::initializer_list<Node> nodes) {
|
||||
reserve(nodes.size());
|
||||
for (auto &node : nodes) {
|
||||
CHECK(!node.empty());
|
||||
auto bucket = calc_bucket(node.first);
|
||||
while (true) {
|
||||
if (nodes_[bucket].key() == node.first) {
|
||||
nodes_[bucket].second = node.second;
|
||||
break;
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
nodes_[bucket].emplace(node.first, node.second);
|
||||
used_nodes_++;
|
||||
break;
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlatHashTable(FlatHashTable &&other) noexcept : nodes_(std::move(other.nodes_)), used_nodes_(other.used_nodes_) {
|
||||
other.used_nodes_ = 0;
|
||||
}
|
||||
FlatHashTable &operator=(FlatHashTable &&other) noexcept {
|
||||
nodes_ = std::move(other.nodes_);
|
||||
used_nodes_ = other.used_nodes_;
|
||||
other.used_nodes_ = 0;
|
||||
return *this;
|
||||
}
|
||||
void swap(FlatHashTable &other) noexcept {
|
||||
using std::swap;
|
||||
swap(nodes_, other.nodes_);
|
||||
swap(used_nodes_, other.used_nodes_);
|
||||
}
|
||||
~FlatHashTable() = default;
|
||||
|
||||
template <class ItT>
|
||||
FlatHashTable(ItT begin, ItT end) {
|
||||
assign(begin, end);
|
||||
}
|
||||
|
||||
size_t bucket_count() const {
|
||||
return nodes_.size();
|
||||
}
|
||||
|
||||
Iterator find(const KeyT &key) {
|
||||
if (empty() || is_key_empty(key)) {
|
||||
return end();
|
||||
}
|
||||
auto bucket = calc_bucket(key);
|
||||
while (true) {
|
||||
if (EqT()(nodes_[bucket].key(), key)) {
|
||||
return Iterator{nodes_.begin() + bucket, this};
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
return end();
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
|
||||
ConstIterator find(const KeyT &key) const {
|
||||
return ConstIterator(const_cast<Self *>(this)->find(key));
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return used_nodes_;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
Iterator begin() {
|
||||
if (empty()) {
|
||||
return end();
|
||||
}
|
||||
auto it = nodes_.begin();
|
||||
while (it->empty()) {
|
||||
++it;
|
||||
}
|
||||
return Iterator(it, this);
|
||||
}
|
||||
Iterator end() {
|
||||
return Iterator(nodes_.end(), this);
|
||||
}
|
||||
|
||||
ConstIterator begin() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->begin());
|
||||
}
|
||||
ConstIterator end() const {
|
||||
return ConstIterator(const_cast<Self *>(this)->end());
|
||||
}
|
||||
|
||||
void reserve(size_t size) {
|
||||
size_t want_size = normalize(size * 5 / 3 + 1);
|
||||
// size_t want_size = size * 2;
|
||||
if (want_size > nodes_.size()) {
|
||||
resize(want_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class... ArgsT>
|
||||
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
|
||||
try_grow();
|
||||
CHECK(!is_key_empty(key));
|
||||
auto bucket = calc_bucket(key);
|
||||
while (true) {
|
||||
if (EqT()(nodes_[bucket].key(), key)) {
|
||||
return {Iterator{nodes_.begin() + bucket, this}, false};
|
||||
}
|
||||
if (nodes_[bucket].empty()) {
|
||||
nodes_[bucket].emplace(std::move(key), std::forward<ArgsT>(args)...);
|
||||
used_nodes_++;
|
||||
return {Iterator{nodes_.begin() + bucket, this}, true};
|
||||
}
|
||||
next_bucket(bucket);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Iterator, bool> insert(KeyT key) {
|
||||
return emplace(std::move(key));
|
||||
}
|
||||
|
||||
template <class ItT>
|
||||
void insert(ItT begin, ItT end) {
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(*begin);
|
||||
}
|
||||
}
|
||||
|
||||
typename Node::value_type &operator[](const KeyT &key) {
|
||||
return emplace(key).first->value();
|
||||
}
|
||||
|
||||
size_t erase(const KeyT &key) {
|
||||
auto it = find(key);
|
||||
if (it == end()) {
|
||||
return 0;
|
||||
}
|
||||
erase(it);
|
||||
try_shrink();
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t count(const KeyT &key) const {
|
||||
return find(key) != end();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
used_nodes_ = 0;
|
||||
nodes_ = {};
|
||||
}
|
||||
|
||||
void erase(Iterator it) {
|
||||
DCHECK(it != end());
|
||||
DCHECK(!it.it_->empty());
|
||||
erase_node(it.it_);
|
||||
}
|
||||
|
||||
template <class F>
|
||||
void remove_if(F &&f) {
|
||||
auto it = nodes_.begin();
|
||||
while (it != nodes_.end() && !it->empty()) {
|
||||
++it;
|
||||
}
|
||||
auto first_empty = it;
|
||||
for (; it != nodes_.end();) {
|
||||
if (!it->empty() && f(it->get_public())) {
|
||||
erase_node(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
for (it = nodes_.begin(); it != first_empty;) {
|
||||
if (!it->empty() && f(it->get_public())) {
|
||||
erase_node(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
try_shrink();
|
||||
}
|
||||
|
||||
private:
|
||||
fixed_vector<Node> nodes_;
|
||||
size_t used_nodes_{};
|
||||
|
||||
template <class ItT>
|
||||
void assign(ItT begin, ItT end) {
|
||||
resize(std::distance(begin, end)); // TODO: should be conditional
|
||||
for (; begin != end; ++begin) {
|
||||
emplace(begin->first, begin->second);
|
||||
}
|
||||
}
|
||||
|
||||
void try_grow() {
|
||||
if (should_grow(used_nodes_ + 1, nodes_.size())) {
|
||||
grow();
|
||||
}
|
||||
}
|
||||
static bool should_grow(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 5 > bucket_count * 3;
|
||||
}
|
||||
void try_shrink() {
|
||||
if (should_shrink(used_nodes_, nodes_.size())) {
|
||||
shrink();
|
||||
}
|
||||
}
|
||||
static bool should_shrink(size_t used_count, size_t bucket_count) {
|
||||
return used_count * 10 < bucket_count;
|
||||
}
|
||||
|
||||
static size_t normalize(size_t size) {
|
||||
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size | 7));
|
||||
}
|
||||
|
||||
void shrink() {
|
||||
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
void grow() {
|
||||
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
|
||||
resize(want_size);
|
||||
}
|
||||
|
||||
size_t calc_bucket(const KeyT &key) const {
|
||||
return HashT()(key) % nodes_.size();
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
fixed_vector<Node> old_nodes(new_size);
|
||||
std::swap(old_nodes, nodes_);
|
||||
|
||||
for (auto &node : old_nodes) {
|
||||
if (node.empty()) {
|
||||
continue;
|
||||
}
|
||||
size_t bucket = calc_bucket(node.key());
|
||||
while (!nodes_[bucket].empty()) {
|
||||
next_bucket(bucket);
|
||||
}
|
||||
nodes_[bucket] = std::move(node);
|
||||
}
|
||||
}
|
||||
|
||||
void next_bucket(size_t &bucket) const {
|
||||
bucket++;
|
||||
if (unlikely(bucket == nodes_.size())) {
|
||||
bucket = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void erase_node(NodeIterator it) {
|
||||
size_t empty_i = it - nodes_.begin();
|
||||
auto empty_bucket = empty_i;
|
||||
DCHECK(0 <= empty_i && empty_i < nodes_.size());
|
||||
nodes_[empty_bucket].clear();
|
||||
used_nodes_--;
|
||||
|
||||
for (size_t test_i = empty_i + 1;; test_i++) {
|
||||
auto test_bucket = test_i;
|
||||
if (test_bucket >= nodes_.size()) {
|
||||
test_bucket -= nodes_.size();
|
||||
}
|
||||
|
||||
if (nodes_[test_bucket].empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto want_i = calc_bucket(nodes_[test_bucket].key());
|
||||
if (want_i < empty_i) {
|
||||
want_i += nodes_.size();
|
||||
}
|
||||
|
||||
if (want_i <= empty_i || want_i > test_i) {
|
||||
nodes_[empty_bucket] = std::move(nodes_[test_bucket]);
|
||||
empty_i = test_i;
|
||||
empty_bucket = test_bucket;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashMapImpl = FlatHashTable<MapNode<KeyT, ValueT>, HashT, EqT>;
|
||||
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
using FlatHashSetImpl = FlatHashTable<SetNode<KeyT>, HashT, EqT>;
|
||||
|
||||
} // namespace td
|
@ -181,12 +181,14 @@ struct reversion_wrapper {
|
||||
|
||||
template <typename T>
|
||||
auto begin(reversion_wrapper<T> w) {
|
||||
return w.iterable.rbegin();
|
||||
using std::rbegin;
|
||||
return rbegin(w.iterable);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto end(reversion_wrapper<T> w) {
|
||||
return w.iterable.rend();
|
||||
using std::rend;
|
||||
return rend(w.iterable);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
|
58
tdutils/td/utils/fixed_vector.h
Normal file
58
tdutils/td/utils/fixed_vector.h
Normal file
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
#include "td/utils/common.h"
|
||||
|
||||
namespace td {
|
||||
template <class T>
|
||||
class fixed_vector {
|
||||
public:
|
||||
fixed_vector() = default;
|
||||
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
|
||||
}
|
||||
fixed_vector(fixed_vector &&other) noexcept {
|
||||
swap(other);
|
||||
}
|
||||
fixed_vector &operator=(fixed_vector &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
fixed_vector(const fixed_vector &) = delete;
|
||||
fixed_vector &operator=(const fixed_vector &) = delete;
|
||||
~fixed_vector() {
|
||||
delete[] ptr_;
|
||||
}
|
||||
T &operator[](size_t i) {
|
||||
return ptr_[i];
|
||||
}
|
||||
const T &operator[](size_t i) const {
|
||||
return ptr_[i];
|
||||
}
|
||||
T *begin() {
|
||||
return ptr_;
|
||||
}
|
||||
const T *begin() const {
|
||||
return ptr_;
|
||||
}
|
||||
T *end() {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
const T *end() const {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
using iterator = T *;
|
||||
using const_iterator = const T *;
|
||||
void swap(fixed_vector<T> &other) {
|
||||
std::swap(ptr_, other.ptr_);
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
|
||||
private:
|
||||
T *ptr_{};
|
||||
size_t size_{0};
|
||||
};
|
||||
} // namespace td
|
@ -7,13 +7,16 @@
|
||||
#include "td/utils/algorithm.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/FlatHashMap.h"
|
||||
#include "td/utils/FlatHashMapChunks.h"
|
||||
#include "td/utils/Random.h"
|
||||
#include "td/utils/Slice.h"
|
||||
#include "td/utils/tests.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
template <class T>
|
||||
@ -23,18 +26,66 @@ static auto extract_kv(const T &reference) {
|
||||
return expected;
|
||||
}
|
||||
|
||||
TEST(FlatHashMap, basic) {
|
||||
{
|
||||
td::FlatHashSet<int> s;
|
||||
s.insert(5);
|
||||
for (auto x : s) {
|
||||
template <class T>
|
||||
static auto extract_k(const T &reference) {
|
||||
auto expected = td::transform(reference, [](auto &it) { return it; });
|
||||
std::sort(expected.begin(), expected.end());
|
||||
return expected;
|
||||
}
|
||||
|
||||
TEST(FlatHashMapChunks, basic) {
|
||||
td::FlatHashMapChunks<int, int> kv;
|
||||
kv[5] = 3;
|
||||
ASSERT_EQ(3, kv[5]);
|
||||
kv[3] = 4;
|
||||
ASSERT_EQ(4, kv[3]);
|
||||
}
|
||||
|
||||
TEST(FlatHashMap, probing) {
|
||||
auto test = [](int buckets, int elements) {
|
||||
CHECK(buckets >= elements);
|
||||
std::vector<bool> data(buckets, false);
|
||||
std::random_device rnd;
|
||||
std::mt19937 mt(rnd());
|
||||
std::uniform_int_distribution<int32_t> d(0, buckets - 1);
|
||||
for (int i = 0; i < elements; i++) {
|
||||
int pos = d(mt);
|
||||
while (data[pos]) {
|
||||
pos++;
|
||||
if (pos == buckets) {
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
data[pos] = true;
|
||||
}
|
||||
int max_chain = 0;
|
||||
int cur_chain = 0;
|
||||
for (auto x : data) {
|
||||
if (x) {
|
||||
cur_chain++;
|
||||
max_chain = std::max(max_chain, cur_chain);
|
||||
} else {
|
||||
cur_chain = 0;
|
||||
}
|
||||
}
|
||||
LOG(ERROR) << "buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain;
|
||||
};
|
||||
test(8192, int(8192 * 0.8));
|
||||
test(8192, int(8192 * 0.6));
|
||||
test(8192, int(8192 * 0.3));
|
||||
}
|
||||
|
||||
TEST(FlatHashSet, TL) {
|
||||
return;
|
||||
td::FlatHashSet<int> s;
|
||||
int N = 100000;
|
||||
for (int i = 0; i < 10000000; i++) {
|
||||
s.insert((i + N / 2) % N);
|
||||
s.erase(i % N);
|
||||
s.insert((i + N / 2) % N + 1);
|
||||
s.erase(i % N + 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FlatHashMap, basic) {
|
||||
{
|
||||
td::FlatHashMap<int, int> map;
|
||||
map[1] = 2;
|
||||
@ -71,7 +122,7 @@ TEST(FlatHashMap, basic) {
|
||||
ASSERT_EQ(1u, map.size());
|
||||
}
|
||||
|
||||
using KV = td::FlatHashMapImpl<td::string, td::string>;
|
||||
using KV = td::FlatHashMap<td::string, td::string>;
|
||||
using Data = td::vector<std::pair<td::string, td::string>>;
|
||||
auto data = Data{{"a", "b"}, {"c", "d"}};
|
||||
{ ASSERT_EQ(Data{}, extract_kv(KV())); }
|
||||
@ -163,25 +214,36 @@ TEST(FlatHashMap, remove_if_basic) {
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr size_t MAX_TABLE_SIZE = 1000;
|
||||
TEST(FlatHashMap, stress_test) {
|
||||
td::vector<td::RandomSteps::Step> steps;
|
||||
auto add_step = [&steps](td::Slice, td::uint32 weight, auto f) {
|
||||
steps.emplace_back(td::RandomSteps::Step{std::move(f), weight});
|
||||
};
|
||||
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
size_t max_table_size = 1000; // dynamic value
|
||||
size_t max_table_size = MAX_TABLE_SIZE; // dynamic value
|
||||
std::unordered_map<td::uint64, td::uint64> ref;
|
||||
td::FlatHashMapImpl<td::uint64, td::uint64> tbl;
|
||||
td::FlatHashMap<td::uint64, td::uint64> tbl;
|
||||
|
||||
auto validate = [&] {
|
||||
ASSERT_EQ(ref.empty(), tbl.empty());
|
||||
ASSERT_EQ(ref.size(), tbl.size());
|
||||
ASSERT_EQ(extract_kv(ref), extract_kv(tbl));
|
||||
for (auto &kv : ref) {
|
||||
ASSERT_EQ(ref[kv.first], tbl[kv.first]);
|
||||
auto tbl_it = tbl.find(kv.first);
|
||||
ASSERT_TRUE(tbl_it != tbl.end());
|
||||
ASSERT_EQ(kv.second, tbl_it->second);
|
||||
}
|
||||
};
|
||||
|
||||
td::vector<td::RandomSteps::Step> steps;
|
||||
auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) {
|
||||
auto g = [&, step_name, f = std::move(f)]() {
|
||||
//LOG(ERROR) << step_name;
|
||||
//ASSERT_EQ(ref.size(), tbl.size());
|
||||
f();
|
||||
ASSERT_EQ(ref.size(), tbl.size());
|
||||
//validate();
|
||||
};
|
||||
steps.emplace_back(td::RandomSteps::Step{std::move(g), weight});
|
||||
};
|
||||
|
||||
auto gen_key = [&] {
|
||||
auto key = rnd() % 4000 + 1;
|
||||
return key;
|
||||
@ -191,13 +253,13 @@ TEST(FlatHashMap, stress_test) {
|
||||
validate();
|
||||
td::reset_to_empty(ref);
|
||||
td::reset_to_empty(tbl);
|
||||
max_table_size = rnd.fast(1, 1000);
|
||||
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
|
||||
});
|
||||
add_step("Clear hash table", 1, [&] {
|
||||
validate();
|
||||
ref.clear();
|
||||
tbl.clear();
|
||||
max_table_size = rnd.fast(1, 1000);
|
||||
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
|
||||
});
|
||||
|
||||
add_step("Insert random value", 1000, [&] {
|
||||
@ -265,6 +327,88 @@ TEST(FlatHashMap, stress_test) {
|
||||
td::table_remove_if(ref, condition);
|
||||
});
|
||||
|
||||
td::RandomSteps runner(std::move(steps));
|
||||
for (size_t i = 0; i < 1000000000; i++) {
|
||||
runner.step(rnd);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FlatHashSet, stress_test) {
|
||||
td::vector<td::RandomSteps::Step> steps;
|
||||
auto add_step = [&steps](td::Slice, td::uint32 weight, auto f) {
|
||||
steps.emplace_back(td::RandomSteps::Step{std::move(f), weight});
|
||||
};
|
||||
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
size_t max_table_size = MAX_TABLE_SIZE; // dynamic value
|
||||
std::unordered_set<td::uint64> ref;
|
||||
td::FlatHashSet<td::uint64> tbl;
|
||||
|
||||
auto validate = [&] {
|
||||
ASSERT_EQ(ref.empty(), tbl.empty());
|
||||
ASSERT_EQ(ref.size(), tbl.size());
|
||||
ASSERT_EQ(extract_k(ref), extract_k(tbl));
|
||||
};
|
||||
auto gen_key = [&] {
|
||||
auto key = rnd() % 4000 + 1;
|
||||
return key;
|
||||
};
|
||||
|
||||
add_step("Reset hash table", 1, [&] {
|
||||
validate();
|
||||
td::reset_to_empty(ref);
|
||||
td::reset_to_empty(tbl);
|
||||
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
|
||||
});
|
||||
add_step("Clear hash table", 1, [&] {
|
||||
validate();
|
||||
ref.clear();
|
||||
tbl.clear();
|
||||
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
|
||||
});
|
||||
|
||||
add_step("Insert random value", 1000, [&] {
|
||||
if (tbl.size() > max_table_size) {
|
||||
return;
|
||||
}
|
||||
auto key = gen_key();
|
||||
ref.insert(key);
|
||||
tbl.insert(key);
|
||||
});
|
||||
|
||||
add_step("reserve", 10, [&] { tbl.reserve(rnd() % max_table_size); });
|
||||
|
||||
add_step("find", 1000, [&] {
|
||||
auto key = gen_key();
|
||||
auto ref_it = ref.find(key);
|
||||
auto tbl_it = tbl.find(key);
|
||||
ASSERT_EQ(ref_it == ref.end(), tbl_it == tbl.end());
|
||||
if (ref_it != ref.end()) {
|
||||
ASSERT_EQ(*ref_it, *tbl_it);
|
||||
}
|
||||
});
|
||||
|
||||
add_step("find_and_erase", 100, [&] {
|
||||
auto key = gen_key();
|
||||
auto ref_it = ref.find(key);
|
||||
auto tbl_it = tbl.find(key);
|
||||
ASSERT_EQ(ref_it == ref.end(), tbl_it == tbl.end());
|
||||
if (ref_it != ref.end()) {
|
||||
ref.erase(ref_it);
|
||||
tbl.erase(tbl_it);
|
||||
}
|
||||
});
|
||||
|
||||
add_step("remove_if", 5, [&] {
|
||||
auto mul = rnd();
|
||||
auto bit = rnd() % 64;
|
||||
auto condition = [&](auto &it) {
|
||||
return (((it * mul) >> bit) & 1) == 0;
|
||||
};
|
||||
td::table_remove_if(tbl, condition);
|
||||
td::table_remove_if(ref, condition);
|
||||
});
|
||||
|
||||
td::RandomSteps runner(std::move(steps));
|
||||
for (size_t i = 0; i < 10000000; i++) {
|
||||
runner.step(rnd);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "td/utils/algorithm.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/FlatHashMap.h"
|
||||
#include "td/utils/FlatHashMapChunks.h"
|
||||
#include "td/utils/format.h"
|
||||
#include "td/utils/Hash.h"
|
||||
#include "td/utils/logging.h"
|
||||
@ -398,7 +399,7 @@ static void BM_remove_if_slow(benchmark::State &state) {
|
||||
template <typename TableT>
|
||||
static void BM_remove_if_slow_old(benchmark::State &state) {
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = 500000;
|
||||
constexpr size_t BATCH_SIZE = 5000000;
|
||||
|
||||
TableT table;
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
@ -449,13 +450,112 @@ static void benchmark_create(td::Slice name) {
|
||||
}
|
||||
}
|
||||
|
||||
struct CacheMissNode {
|
||||
uint32_t data{};
|
||||
char padding[64 - sizeof(data)];
|
||||
};
|
||||
|
||||
class IterateFast {
|
||||
public:
|
||||
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
|
||||
uint32_t res = 1;
|
||||
for (size_t i = 0; i < max_shift; i++) {
|
||||
if (ptr[i].data % max_shift != 0) {
|
||||
res *= ptr[i].data;
|
||||
} else {
|
||||
res /= ptr[i].data;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
class IterateSlow {
|
||||
public:
|
||||
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
|
||||
uint32_t res = 1;
|
||||
for (size_t i = 0;; i++) {
|
||||
if (ptr[i].data % max_shift != 0) {
|
||||
res *= ptr[i].data;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
#include <random>
|
||||
template <class F>
|
||||
void BM_cache_miss(benchmark::State &state) {
|
||||
uint32_t max_shift = state.range(0);
|
||||
bool flag = state.range(1);
|
||||
std::random_device rd;
|
||||
std::mt19937 rnd(rd());
|
||||
int N = 50000000;
|
||||
std::vector<CacheMissNode> nodes(N);
|
||||
uint32_t i = 0;
|
||||
for (auto &node : nodes) {
|
||||
if (flag) {
|
||||
node.data = i++ % max_shift;
|
||||
} else {
|
||||
node.data = rnd();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> positions(N);
|
||||
std::uniform_int_distribution<uint32_t> rnd_pos(0, N - 1000);
|
||||
for (auto &pos : positions) {
|
||||
pos = rnd_pos(rnd);
|
||||
if (flag) {
|
||||
pos = pos / max_shift * max_shift + 1;
|
||||
}
|
||||
}
|
||||
|
||||
while (state.KeepRunningBatch(positions.size())) {
|
||||
for (const auto pos : positions) {
|
||||
auto *ptr = &nodes[pos];
|
||||
auto res = F::iterate(ptr, max_shift);
|
||||
benchmark::DoNotOptimize(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) {
|
||||
uint64_t mask = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
mask |= (bytes[i] == needle) << i;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
template <class MaskT>
|
||||
void BM_mask(benchmark::State &state) {
|
||||
size_t BATCH_SIZE = 1024;
|
||||
std::vector<uint8_t> bytes(BATCH_SIZE + 16);
|
||||
for (auto &b : bytes) {
|
||||
b = static_cast<uint8_t>(td::Random::fast(0, 17));
|
||||
}
|
||||
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17));
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK_TEMPLATE(BM_mask, MaskPortable);
|
||||
BENCHMARK_TEMPLATE(BM_mask, MaskNeonFolly);
|
||||
BENCHMARK_TEMPLATE(BM_mask, MaskNeon);
|
||||
|
||||
#define FOR_EACH_TABLE(F) \
|
||||
F(td::FlatHashMapChunks) \
|
||||
F(td::FlatHashMapImpl) \
|
||||
F(folly::F14FastMap) \
|
||||
F(absl::flat_hash_map) \
|
||||
F(std::unordered_map) \
|
||||
F(std::map)
|
||||
|
||||
//BENCHMARK(BM_cache_miss<IterateSlow>)->Ranges({{1, 16}, {0, 1}});
|
||||
//BENCHMARK(BM_cache_miss<IterateFast>)->Ranges({{1, 16}, {0, 1}});
|
||||
//BENCHMARK_TEMPLATE(BM_Get, VectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
||||
//BENCHMARK_TEMPLATE(BM_Get, SortedVectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
||||
//BENCHMARK_TEMPLATE(BM_Get, NoOpTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
|
||||
@ -476,16 +576,18 @@ static void benchmark_create(td::Slice name) {
|
||||
#define REGISTER_CACHE3_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache3, HT<td::uint64, td::uint64>)->Range(1, 1 << 23);
|
||||
#define REGISTER_ERASE_ALL_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_erase_all_with_begin, HT<td::uint64, td::uint64>);
|
||||
#define REGISTER_REMOVE_IF_SLOW_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow, HT<td::uint64, td::uint64>);
|
||||
#define REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow_old, HT<td::uint64, td::uint64>);
|
||||
|
||||
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_GET_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_CACHE3_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_CACHE2_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_CACHE_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_REMOVE_IF_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_EMPLACE_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_ERASE_ALL_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_GET_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_FIND_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK)
|
||||
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK)
|
||||
|
||||
#define RUN_CREATE_BENCHMARK(HT) benchmark_create<HT<td::uint64, td::uint64>>(#HT);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user