FlatHashMap: add implementation with chunks

This commit is contained in:
Arseny Smirnov 2022-02-17 19:31:58 +01:00
parent deafeee33b
commit 34a69e3133
9 changed files with 1390 additions and 606 deletions

View File

@ -75,6 +75,7 @@ endif()
find_package(ABSL QUIET)
find_package(folly QUIET)
find_package(gflags QUIET)
if (ABSL_FOUND AND folly_FOUND)
add_executable(memory-hashset-memprof EXCLUDE_FROM_ALL hashset_memory.cpp)

View File

@ -203,8 +203,11 @@ set(TDUTILS_SOURCE
td/utils/ExitGuard.h
td/utils/FileLog.h
td/utils/filesystem.h
td/utils/fixed_vector.h
td/utils/find_boundary.h
td/utils/FlatHashMap.h
td/utils/FlatHashMapChunks.h
td/utils/FlatHashMapLinear.h
td/utils/FloodControlFast.h
td/utils/FloodControlStrict.h
td/utils/format.h

View File

@ -6,586 +6,18 @@
//
#pragma once
#include "td/utils/bits.h"
#include "td/utils/common.h"
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <new>
#include <utility>
#include "td/utils/FlatHashMapChunks.h"
#include "td/utils/FlatHashMapLinear.h"
namespace td {
template <class T>
class fixed_vector {
public:
fixed_vector() = default;
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
}
fixed_vector(fixed_vector &&other) noexcept {
swap(other);
}
fixed_vector &operator=(fixed_vector &&other) noexcept {
swap(other);
return *this;
}
fixed_vector(const fixed_vector &) = delete;
fixed_vector &operator=(const fixed_vector &) = delete;
~fixed_vector() {
delete[] ptr_;
}
T &operator[](size_t i) {
return ptr_[i];
}
const T &operator[](size_t i) const {
return ptr_[i];
}
T *begin() {
return ptr_;
}
const T *begin() const {
return ptr_;
}
T *end() {
return ptr_ + size_;
}
const T *end() const {
return ptr_ + size_;
}
bool empty() const {
return size() == 0;
}
size_t size() const {
return size_;
}
using iterator = T *;
using const_iterator = const T *;
void swap(fixed_vector<T> &other) {
std::swap(ptr_, other.ptr_);
std::swap(size_, other.size_);
}
private:
T *ptr_{};
size_t size_{0};
};
// TODO: move
template <class KeyT>
bool is_key_empty(const KeyT &key) {
return key == KeyT();
}
template <class KeyT, class ValueT>
struct MapNode {
using first_type = KeyT;
using second_type = ValueT;
using key_type = KeyT;
using public_type = MapNode<KeyT, ValueT>;
using value_type = ValueT;
KeyT first{};
union {
ValueT second;
};
const auto &key() const {
return first;
}
auto &value() {
return second;
}
auto &get_public() {
return *this;
}
MapNode() {
}
MapNode(KeyT key, ValueT value) : first(std::move(key)) {
new (&second) ValueT(std::move(value));
DCHECK(!empty());
}
~MapNode() {
if (!empty()) {
second.~ValueT();
}
}
MapNode(MapNode &&other) noexcept {
*this = std::move(other);
}
MapNode &operator=(MapNode &&other) noexcept {
DCHECK(empty());
DCHECK(!other.empty());
first = std::move(other.first);
other.first = KeyT{};
new (&second) ValueT(std::move(other.second));
other.second.~ValueT();
return *this;
}
bool empty() const {
return is_key_empty(key());
}
void clear() {
DCHECK(!empty());
first = KeyT();
second.~ValueT();
DCHECK(empty());
}
template <class... ArgsT>
void emplace(KeyT key, ArgsT &&...args) {
DCHECK(empty());
first = std::move(key);
new (&second) ValueT(std::forward<ArgsT>(args)...);
DCHECK(!empty());
}
};
template <class KeyT>
struct SetNode {
using first_type = KeyT;
using key_type = KeyT;
using public_type = KeyT;
using value_type = KeyT;
KeyT first{};
const auto &key() const {
return first;
}
const auto &value() const {
return first;
}
auto &get_public() {
return first;
}
SetNode() = default;
explicit SetNode(KeyT key) : first(std::move(key)) {
}
SetNode(SetNode &&other) noexcept {
*this = std::move(other);
}
SetNode &operator=(SetNode &&other) noexcept {
DCHECK(empty());
DCHECK(!other.empty());
first = std::move(other.first);
other.first = KeyT{};
return *this;
}
bool empty() const {
return is_key_empty(key());
}
void clear() {
first = KeyT();
CHECK(empty());
}
void emplace(KeyT key) {
first = std::move(key);
}
};
template <class NodeT, class HashT, class EqT>
class FlatHashTable {
public:
using Self = FlatHashTable<NodeT, HashT, EqT>;
using Node = NodeT;
using NodeIterator = typename fixed_vector<Node>::iterator;
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
using KeyT = typename Node::key_type;
using key_type = typename Node::key_type;
using public_type = typename Node::public_type;
using value_type = typename Node::public_type;
struct Iterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = public_type *;
using reference = public_type &;
friend class FlatHashTable;
Iterator &operator++() {
do {
++it_;
} while (it_ != map_->nodes_.end() && it_->empty());
return *this;
}
Iterator &operator--() {
do {
--it_;
} while (it_->empty());
return *this;
}
reference operator*() {
return it_->get_public();
}
pointer operator->() {
return &*it_;
}
bool operator==(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ == other.it_;
}
bool operator!=(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ != other.it_;
}
Iterator() = default;
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
}
private:
NodeIterator it_;
Self *map_;
};
struct ConstIterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = const value_type *;
using reference = const value_type &;
friend class FlatHashTable;
ConstIterator &operator++() {
++it_;
return *this;
}
ConstIterator &operator--() {
--it_;
return *this;
}
reference operator*() {
return *it_;
}
pointer operator->() {
return &*it_;
}
bool operator==(const ConstIterator &other) const {
return it_ == other.it_;
}
bool operator!=(const ConstIterator &other) const {
return it_ != other.it_;
}
ConstIterator() = default;
ConstIterator(Iterator it) : it_(std::move(it)) {
}
private:
Iterator it_;
};
using iterator = Iterator;
using const_iterator = ConstIterator;
FlatHashTable() = default;
FlatHashTable(const FlatHashTable &other) : FlatHashTable(other.begin(), other.end()) {
}
FlatHashTable &operator=(const FlatHashTable &other) {
assign(other.begin(), other.end());
return *this;
}
FlatHashTable(std::initializer_list<Node> nodes) {
reserve(nodes.size());
for (auto &node : nodes) {
CHECK(!node.empty());
auto bucket = calc_bucket(node.first);
while (true) {
if (nodes_[bucket].key() == node.first) {
nodes_[bucket].second = node.second;
break;
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(node.first, node.second);
used_nodes_++;
break;
}
next_bucket(bucket);
}
}
}
FlatHashTable(FlatHashTable &&other) noexcept : nodes_(std::move(other.nodes_)), used_nodes_(other.used_nodes_) {
other.used_nodes_ = 0;
}
FlatHashTable &operator=(FlatHashTable &&other) noexcept {
nodes_ = std::move(other.nodes_);
used_nodes_ = other.used_nodes_;
other.used_nodes_ = 0;
return *this;
}
void swap(FlatHashTable &other) noexcept {
using std::swap;
swap(nodes_, other.nodes_);
swap(used_nodes_, other.used_nodes_);
}
~FlatHashTable() = default;
template <class ItT>
FlatHashTable(ItT begin, ItT end) {
assign(begin, end);
}
size_t bucket_count() const {
return nodes_.size();
}
Iterator find(const KeyT &key) {
if (empty() || is_key_empty(key)) {
return end();
}
auto bucket = calc_bucket(key);
while (true) {
if (EqT()(nodes_[bucket].key(), key)) {
return Iterator{nodes_.begin() + bucket, this};
}
if (nodes_[bucket].empty()) {
return end();
}
next_bucket(bucket);
}
}
ConstIterator find(const KeyT &key) const {
return ConstIterator(const_cast<Self *>(this)->find(key));
}
size_t size() const {
return used_nodes_;
}
bool empty() const {
return size() == 0;
}
Iterator begin() {
if (empty()) {
return end();
}
auto it = nodes_.begin();
while (it->empty()) {
++it;
}
return Iterator(it, this);
}
Iterator end() {
return Iterator(nodes_.end(), this);
}
ConstIterator begin() const {
return ConstIterator(const_cast<Self *>(this)->begin());
}
ConstIterator end() const {
return ConstIterator(const_cast<Self *>(this)->end());
}
void reserve(size_t size) {
size_t want_size = normalize(size * 5 / 3 + 1);
// size_t want_size = size * 2;
if (want_size > nodes_.size()) {
resize(want_size);
}
}
template <class... ArgsT>
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
try_grow();
CHECK(!is_key_empty(key));
auto bucket = calc_bucket(key);
while (true) {
if (EqT()(nodes_[bucket].key(), key)) {
return {Iterator{nodes_.begin() + bucket, this}, false};
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(std::move(key), std::forward<ArgsT>(args)...);
used_nodes_++;
return {Iterator{nodes_.begin() + bucket, this}, true};
}
next_bucket(bucket);
}
}
std::pair<Iterator, bool> insert(KeyT key) {
return emplace(std::move(key));
}
template <class ItT>
void insert(ItT begin, ItT end) {
for (; begin != end; ++begin) {
emplace(*begin);
}
}
typename Node::value_type &operator[](const KeyT &key) {
return emplace(key).first->value();
}
size_t erase(const KeyT &key) {
auto it = find(key);
if (it == end()) {
return 0;
}
erase(it);
try_shrink();
return 1;
}
size_t count(const KeyT &key) const {
return find(key) != end();
}
void clear() {
used_nodes_ = 0;
nodes_ = {};
}
void erase(Iterator it) {
DCHECK(it != end());
DCHECK(!it.it_->empty());
erase_node(it.it_);
}
template <class F>
void remove_if(F &&f) {
auto it = nodes_.begin();
while (it != nodes_.end() && !it->empty()) {
++it;
}
auto first_empty = it;
for (; it != nodes_.end();) {
if (!it->empty() && f(*it)) {
erase_node(it);
} else {
++it;
}
}
for (it = nodes_.begin(); it != first_empty;) {
if (!it->empty() && f(*it)) {
erase_node(it);
} else {
++it;
}
}
try_shrink();
}
private:
fixed_vector<Node> nodes_;
size_t used_nodes_{};
template <class ItT>
void assign(ItT begin, ItT end) {
resize(std::distance(begin, end)); // TODO: should be conditional
for (; begin != end; ++begin) {
emplace(begin->first, begin->second);
}
}
void try_grow() {
if (should_grow(used_nodes_ + 1, nodes_.size())) {
grow();
}
}
static bool should_grow(size_t used_count, size_t bucket_count) {
return used_count * 5 > bucket_count * 3;
}
void try_shrink() {
if (should_shrink(used_nodes_, nodes_.size())) {
shrink();
}
}
static bool should_shrink(size_t used_count, size_t bucket_count) {
return used_count * 10 < bucket_count;
}
static size_t normalize(size_t size) {
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size | 7));
}
void shrink() {
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
resize(want_size);
}
void grow() {
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
resize(want_size);
}
size_t calc_bucket(const KeyT &key) const {
return HashT()(key) * 2 % nodes_.size();
}
void resize(size_t new_size) {
fixed_vector<Node> old_nodes(new_size);
std::swap(old_nodes, nodes_);
for (auto &node : old_nodes) {
if (node.empty()) {
continue;
}
size_t bucket = calc_bucket(node.key());
while (!nodes_[bucket].empty()) {
next_bucket(bucket);
}
nodes_[bucket] = std::move(node);
}
}
void next_bucket(size_t &bucket) const {
bucket++;
if (bucket == nodes_.size()) {
bucket = 0;
}
}
void erase_node(NodeIterator it) {
size_t empty_i = it - nodes_.begin();
auto empty_bucket = empty_i;
DCHECK(0 <= empty_i && empty_i < nodes_.size());
nodes_[empty_bucket].clear();
used_nodes_--;
for (size_t test_i = empty_i + 1;; test_i++) {
auto test_bucket = test_i;
if (test_bucket >= nodes_.size()) {
test_bucket -= nodes_.size();
}
if (nodes_[test_bucket].empty()) {
break;
}
auto want_i = calc_bucket(nodes_[test_bucket].key());
if (want_i < empty_i) {
want_i += nodes_.size();
}
if (want_i <= empty_i || want_i > test_i) {
nodes_[empty_bucket] = std::move(nodes_[test_bucket]);
empty_i = test_i;
empty_bucket = test_bucket;
}
}
}
};
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashMapImpl = FlatHashTable<MapNode<KeyT, ValueT>, HashT, EqT>;
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashSetImpl = FlatHashTable<SetNode<KeyT>, HashT, EqT>;
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;
//using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;
using FlatHashMap = FlatHashMapChunks<KeyT, ValueT, HashT, EqT>;
//using FlatHashMap = std::unordered_map<KeyT, ValueT, HashT, EqT>;
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashSet = FlatHashSetImpl<KeyT, HashT, EqT>;
//using FlatHashSet = FlatHashSetImpl<KeyT, HashT, EqT>;
using FlatHashSet = FlatHashSetChunks<KeyT, HashT, EqT>;
//using FlatHashSet = std::unordered_set<KeyT, HashT, EqT>;
} // namespace td

View File

@ -0,0 +1,509 @@
//
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#pragma once
#include "td/utils/algorithm.h"
#include "td/utils/bits.h"
#include "td/utils/common.h"
#include "td/utils/FlatHashMapLinear.h"
#include "td/utils/logging.h"
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <new>
#include <utility>
#include <arm_neon.h>
struct MaskPortable {
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
uint64_t res = 0;
for (int i = 0; i < 16; i++) {
res |= (bytes[i] == needle) << i;
}
return res;
}
};
struct MaskNeonFolly {
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
uint8x16_t input_mask = vld1q_u8(bytes);
auto needle_mask = vdupq_n_u8(needle);
auto eq_mask = vceqq_u8(input_mask, needle_mask);
// get info from every byte into the bottom half of every uint16_t
// by shifting right 4, then round to get it into a 64-bit vector
uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4);
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0);
return mask & 0x1111111111111111;
}
};
struct MaskNeon {
static uint64_t equal_mask(uint8_t *bytes, uint8_t needle) {
uint8x16_t input_mask = vld1q_u8(bytes);
auto needle_mask = vdupq_n_u8(needle);
auto eq_mask = vceqq_u8(input_mask, needle_mask);
uint16x8_t MASK = vdupq_n_u16(0x180);
uint16x8_t a_masked = vandq_u16(vreinterpretq_u16_u8(eq_mask), MASK);
const int16_t __attribute__((aligned(16))) SHIFT_ARR[8] = {-7, -5, -3, -1, 1, 3, 5, 7};
int16x8_t SHIFT = vld1q_s16(SHIFT_ARR);
uint16x8_t a_shifted = vshlq_u16(a_masked, SHIFT);
return vaddvq_u16(a_shifted);
}
};
namespace td {
template <class NodeT, class HashT, class EqT>
class FlatHashTableChunks {
public:
using Self = FlatHashTableChunks<NodeT, HashT, EqT>;
using Node = NodeT;
using NodeIterator = typename fixed_vector<Node>::iterator;
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
using KeyT = typename Node::key_type;
using key_type = typename Node::key_type;
using public_type = typename Node::public_type;
using value_type = typename Node::public_type;
struct Iterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = public_type *;
using reference = public_type &;
friend class FlatHashTableChunks;
Iterator &operator++() {
do {
++it_;
} while (it_ != map_->nodes_.end() && it_->empty());
return *this;
}
Iterator &operator--() {
do {
--it_;
} while (it_->empty());
return *this;
}
reference operator*() {
return it_->get_public();
}
pointer operator->() {
return &*it_;
}
bool operator==(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ == other.it_;
}
bool operator!=(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ != other.it_;
}
Iterator() = default;
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
}
private:
NodeIterator it_;
Self *map_;
};
struct ConstIterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = const value_type *;
using reference = const value_type &;
friend class FlatHashTableChunks;
ConstIterator &operator++() {
++it_;
return *this;
}
ConstIterator &operator--() {
--it_;
return *this;
}
reference operator*() {
return *it_;
}
pointer operator->() {
return &*it_;
}
bool operator==(const ConstIterator &other) const {
return it_ == other.it_;
}
bool operator!=(const ConstIterator &other) const {
return it_ != other.it_;
}
ConstIterator() = default;
ConstIterator(Iterator it) : it_(std::move(it)) {
}
private:
Iterator it_;
};
using iterator = Iterator;
using const_iterator = ConstIterator;
FlatHashTableChunks() = default;
FlatHashTableChunks(const FlatHashTableChunks &other) : FlatHashTableChunks(other.begin(), other.end()) {
}
FlatHashTableChunks &operator=(const FlatHashTableChunks &other) {
assign(other.begin(), other.end());
return *this;
}
FlatHashTableChunks(std::initializer_list<Node> nodes) {
reserve(nodes.size());
for (auto &node : td::reversed(nodes)) {
CHECK(!node.empty());
if (count(node.first) > 0) {
continue;
}
emplace_node(Node{node.first, node.second});
}
}
FlatHashTableChunks(FlatHashTableChunks &&other) noexcept {
swap(other);
}
FlatHashTableChunks &operator=(FlatHashTableChunks &&other) noexcept {
swap(other);
return *this;
}
void swap(FlatHashTableChunks &other) noexcept {
using std::swap;
swap(nodes_, other.nodes_);
swap(chunks_, other.chunks_);
swap(used_nodes_, other.used_nodes_);
}
~FlatHashTableChunks() = default;
template <class ItT>
FlatHashTableChunks(ItT begin, ItT end) {
assign(begin, end);
}
size_t bucket_count() const {
return nodes_.size();
}
Iterator find(const KeyT &key) {
if (empty() || is_key_empty(key)) {
return end();
}
auto hash = calc_hash(key);
auto chunk_it = get_chunk_it(hash.chunk_i);
while (true) {
auto chunk_i = chunk_it.next();
auto &chunk = chunks_[chunk_i];
auto mask = MaskNeon::equal_mask(chunk.ctrl, hash.small_hash) & Chunk::MASK;
while (mask != 0) {
auto it = nodes_.begin() + td::count_trailing_zeroes64(mask) + chunk_i * Chunk::CHUNK_SIZE;
if (EqT()(it->first, key)) {
return Iterator{it, this};
}
mask &= mask - 1;
}
if (chunk.skipped_cnt == 0) {
break;
}
}
return end();
}
ConstIterator find(const KeyT &key) const {
return ConstIterator(const_cast<Self *>(this)->find(key));
}
size_t size() const {
return used_nodes_;
}
bool empty() const {
return size() == 0;
}
Iterator begin() {
if (empty()) {
return end();
}
auto it = nodes_.begin();
while (it->empty()) {
++it;
}
return Iterator(it, this);
}
Iterator end() {
return Iterator(nodes_.end(), this);
}
ConstIterator begin() const {
return ConstIterator(const_cast<Self *>(this)->begin());
}
ConstIterator end() const {
return ConstIterator(const_cast<Self *>(this)->end());
}
void reserve(size_t size) {
//size_t want_size = normalize(size * 5 / 3 + 1);
size_t want_size = normalize(size * 14 / 12 + 1);
// size_t want_size = size * 2;
if (want_size > nodes_.size()) {
resize(want_size);
}
}
template <class... ArgsT>
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
CHECK(!is_key_empty(key));
auto it = find(key);
if (it != end()) {
return {it, false};
}
try_grow();
auto hash = calc_hash(key);
auto chunk_it = get_chunk_it(hash.chunk_i);
while (true) {
auto chunk_i = chunk_it.next();
auto &chunk = chunks_[chunk_i];
auto mask = MaskPortable::equal_mask(chunk.ctrl, 0) & Chunk::MASK;
if (mask != 0) {
auto shift = td::count_trailing_zeroes64(mask);
DCHECK(chunk.ctrl[shift] == 0);
auto node_it = nodes_.begin() + shift + chunk_i * Chunk::CHUNK_SIZE;
DCHECK(node_it->empty());
node_it->emplace(std::move(key), std::forward<ArgsT>(args)...);
DCHECK(!node_it->empty());
chunk.ctrl[shift] = hash.small_hash;
used_nodes_++;
return {{node_it, this}, true};
}
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
chunk.skipped_cnt++;
}
}
std::pair<Iterator, bool> insert(KeyT key) {
return emplace(std::move(key));
}
template <class ItT>
void insert(ItT begin, ItT end) {
for (; begin != end; ++begin) {
emplace(*begin);
}
}
typename Node::value_type &operator[](const KeyT &key) {
return emplace(key).first->value();
}
size_t erase(const KeyT &key) {
auto it = find(key);
if (it == end()) {
return 0;
}
erase(it);
try_shrink();
return 1;
}
size_t count(const KeyT &key) const {
return find(key) != end();
}
void clear() {
used_nodes_ = 0;
nodes_ = {};
chunks_ = {};
}
void erase(Iterator it) {
DCHECK(it != end());
DCHECK(!it.it_->empty());
erase_node(it.it_);
}
template <class F>
void remove_if(F &&f) {
for (auto it = nodes_.begin(), end = nodes_.end(); it != end; ++it) {
if (!it->empty() && f(it->get_public())) {
erase_node(it);
}
}
try_shrink();
}
private:
struct Chunk {
static constexpr int CHUNK_SIZE = 14;
static constexpr int MASK = (1 << CHUNK_SIZE) - 1;
// 0x0 - empty
td::uint8 ctrl[CHUNK_SIZE] = {};
uint16 skipped_cnt{0};
};
fixed_vector<Node> nodes_;
fixed_vector<Chunk> chunks_;
size_t used_nodes_{};
template <class ItT>
void assign(ItT begin, ItT end) {
clear();
reserve(std::distance(begin, end));
for (; begin != end; ++begin) {
emplace(begin->first, begin->second);
}
}
void try_grow() {
if (should_grow(used_nodes_ + 1, nodes_.size())) {
grow();
}
}
static bool should_grow(size_t used_count, size_t bucket_count) {
return used_count * 14 > bucket_count * 12;
}
void try_shrink() {
if (should_shrink(used_nodes_, nodes_.size())) {
shrink();
}
}
static bool should_shrink(size_t used_count, size_t bucket_count) {
return used_count * 10 < bucket_count;
}
static size_t normalize(size_t size) {
auto x = (size / Chunk::CHUNK_SIZE) | 1;
auto y = static_cast<size_t>(1) << (64 - count_leading_zeroes64(x));
return y * Chunk::CHUNK_SIZE;
}
void shrink() {
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
resize(want_size);
}
void grow() {
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
resize(want_size);
}
struct HashInfo {
size_t chunk_i;
uint8_t small_hash;
};
struct ChunkIt {
size_t chunk_i;
size_t chunk_n;
size_t shift{};
size_t next() {
chunk_i += shift;
shift++;
if (chunk_i >= chunk_n) {
chunk_i -= chunk_n;
}
return chunk_i;
}
};
ChunkIt get_chunk_it(size_t chunk_i) {
return {chunk_i, chunks_.size()};
}
HashInfo calc_hash(const KeyT &key) {
auto h = HashT()(key);
// TODO: will be problematic with current hash.
return {(h >> 8) % chunks_.size(), uint8_t(0x80 | h)};
}
void resize(size_t new_size) {
CHECK(new_size >= Chunk::CHUNK_SIZE);
fixed_vector<Node> old_nodes(new_size);
fixed_vector<Chunk> chunks(new_size / Chunk::CHUNK_SIZE);
std::swap(old_nodes, nodes_);
chunks_ = std::move(chunks);
used_nodes_ = 0;
for (auto &node : old_nodes) {
if (node.empty()) {
continue;
}
emplace_node(std::move(node));
}
}
void emplace_node(Node &&node) {
DCHECK(!node.empty());
auto hash = calc_hash(node.first);
auto chunk_it = get_chunk_it(hash.chunk_i);
while (true) {
auto chunk_i = chunk_it.next();
auto &chunk = chunks_[chunk_i];
auto mask = MaskPortable::equal_mask(chunk.ctrl, 0) & Chunk::MASK;
if (mask != 0) {
auto shift = td::count_trailing_zeroes64(mask);
auto node_it = nodes_.begin() + shift + chunk_i * Chunk::CHUNK_SIZE;
DCHECK(node_it->empty());
*node_it = std::move(node);
DCHECK(chunk.ctrl[shift] == 0);
chunk.ctrl[shift] = hash.small_hash;
DCHECK(chunk.ctrl[shift] != 0);
used_nodes_++;
break;
}
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
chunk.skipped_cnt++;
}
}
void next_bucket(size_t &bucket) const {
bucket++;
if (unlikely(bucket == nodes_.size())) {
bucket = 0;
}
}
void erase_node(NodeIterator it) {
DCHECK(!it->empty());
size_t empty_i = it - nodes_.begin();
DCHECK(0 <= empty_i && empty_i < nodes_.size());
auto empty_chunk_i = empty_i / Chunk::CHUNK_SIZE;
auto hash = calc_hash(it->first);
auto chunk_it = get_chunk_it(hash.chunk_i);
while (true) {
auto chunk_i = chunk_it.next();
auto &chunk = chunks_[chunk_i];
if (chunk_i == empty_chunk_i) {
chunk.ctrl[empty_i - empty_chunk_i * Chunk::CHUNK_SIZE] = 0;
break;
}
chunk.skipped_cnt--;
}
it->clear();
used_nodes_--;
}
};
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashMapChunks = FlatHashTableChunks<MapNode<KeyT, ValueT>, HashT, EqT>;
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashSetChunks = FlatHashTableChunks<SetNode<KeyT>, HashT, EqT>;
template <class NodeT, class HashT, class EqT, class FuncT>
void table_remove_if(FlatHashTableChunks<NodeT, HashT, EqT> &table, FuncT &&func) {
table.remove_if(func);
}
} // namespace td

View File

@ -0,0 +1,533 @@
//
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#pragma once
#include "td/utils/bits.h"
#include "td/utils/common.h"
#include "td/utils/fixed_vector.h"
#include "td/utils/logging.h"
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <new>
#include <unordered_map>
#include <utility>
namespace td {
// TODO: move
template <class KeyT>
bool is_key_empty(const KeyT &key) {
return key == KeyT();
}
template <class KeyT, class ValueT>
struct MapNode {
using first_type = KeyT;
using second_type = ValueT;
using key_type = KeyT;
using public_type = MapNode<KeyT, ValueT>;
using value_type = ValueT;
KeyT first{};
union {
ValueT second;
};
const auto &key() const {
return first;
}
auto &value() {
return second;
}
auto &get_public() {
return *this;
}
MapNode() {
}
MapNode(KeyT key, ValueT value) : first(std::move(key)) {
new (&second) ValueT(std::move(value));
DCHECK(!empty());
}
~MapNode() {
if (!empty()) {
second.~ValueT();
}
}
MapNode(MapNode &&other) noexcept {
*this = std::move(other);
}
MapNode &operator=(MapNode &&other) noexcept {
DCHECK(empty());
DCHECK(!other.empty());
first = std::move(other.first);
other.first = KeyT{};
new (&second) ValueT(std::move(other.second));
other.second.~ValueT();
return *this;
}
bool empty() const {
return is_key_empty(key());
}
void clear() {
DCHECK(!empty());
first = KeyT();
second.~ValueT();
DCHECK(empty());
}
template <class... ArgsT>
void emplace(KeyT key, ArgsT &&...args) {
DCHECK(empty());
first = std::move(key);
new (&second) ValueT(std::forward<ArgsT>(args)...);
DCHECK(!empty());
}
};
template <class KeyT>
struct SetNode {
using first_type = KeyT;
using key_type = KeyT;
using public_type = KeyT;
using value_type = KeyT;
KeyT first{};
const auto &key() const {
return first;
}
const auto &value() const {
return first;
}
auto &get_public() {
return first;
}
SetNode() = default;
explicit SetNode(KeyT key) : first(std::move(key)) {
}
SetNode(SetNode &&other) noexcept {
*this = std::move(other);
}
SetNode &operator=(SetNode &&other) noexcept {
DCHECK(empty());
DCHECK(!other.empty());
first = std::move(other.first);
other.first = KeyT{};
return *this;
}
bool empty() const {
return is_key_empty(key());
}
void clear() {
first = KeyT();
CHECK(empty());
}
void emplace(KeyT key) {
first = std::move(key);
}
};
template <class NodeT, class HashT, class EqT>
class FlatHashTable {
public:
using Self = FlatHashTable<NodeT, HashT, EqT>;
using Node = NodeT;
using NodeIterator = typename fixed_vector<Node>::iterator;
using ConstNodeIterator = typename fixed_vector<Node>::const_iterator;
using KeyT = typename Node::key_type;
using key_type = typename Node::key_type;
using public_type = typename Node::public_type;
using value_type = typename Node::public_type;
struct Iterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = public_type *;
using reference = public_type &;
friend class FlatHashTable;
Iterator &operator++() {
do {
++it_;
} while (it_ != map_->nodes_.end() && it_->empty());
return *this;
}
Iterator &operator--() {
do {
--it_;
} while (it_->empty());
return *this;
}
reference operator*() {
return it_->get_public();
}
pointer operator->() {
return &*it_;
}
bool operator==(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ == other.it_;
}
bool operator!=(const Iterator &other) const {
DCHECK(map_ == other.map_);
return it_ != other.it_;
}
Iterator() = default;
Iterator(NodeIterator it, Self *map) : it_(std::move(it)), map_(map) {
}
private:
NodeIterator it_;
Self *map_;
};
struct ConstIterator {
using iterator_category = std::bidirectional_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = public_type;
using pointer = const value_type *;
using reference = const value_type &;
friend class FlatHashTable;
ConstIterator &operator++() {
++it_;
return *this;
}
ConstIterator &operator--() {
--it_;
return *this;
}
reference operator*() {
return *it_;
}
pointer operator->() {
return &*it_;
}
bool operator==(const ConstIterator &other) const {
return it_ == other.it_;
}
bool operator!=(const ConstIterator &other) const {
return it_ != other.it_;
}
ConstIterator() = default;
ConstIterator(Iterator it) : it_(std::move(it)) {
}
private:
Iterator it_;
};
using iterator = Iterator;
using const_iterator = ConstIterator;
FlatHashTable() = default;
FlatHashTable(const FlatHashTable &other) : FlatHashTable(other.begin(), other.end()) {
}
FlatHashTable &operator=(const FlatHashTable &other) {
assign(other.begin(), other.end());
return *this;
}
FlatHashTable(std::initializer_list<Node> nodes) {
reserve(nodes.size());
for (auto &node : nodes) {
CHECK(!node.empty());
auto bucket = calc_bucket(node.first);
while (true) {
if (nodes_[bucket].key() == node.first) {
nodes_[bucket].second = node.second;
break;
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(node.first, node.second);
used_nodes_++;
break;
}
next_bucket(bucket);
}
}
}
FlatHashTable(FlatHashTable &&other) noexcept : nodes_(std::move(other.nodes_)), used_nodes_(other.used_nodes_) {
other.used_nodes_ = 0;
}
FlatHashTable &operator=(FlatHashTable &&other) noexcept {
nodes_ = std::move(other.nodes_);
used_nodes_ = other.used_nodes_;
other.used_nodes_ = 0;
return *this;
}
void swap(FlatHashTable &other) noexcept {
using std::swap;
swap(nodes_, other.nodes_);
swap(used_nodes_, other.used_nodes_);
}
~FlatHashTable() = default;
template <class ItT>
FlatHashTable(ItT begin, ItT end) {
assign(begin, end);
}
size_t bucket_count() const {
return nodes_.size();
}
Iterator find(const KeyT &key) {
if (empty() || is_key_empty(key)) {
return end();
}
auto bucket = calc_bucket(key);
while (true) {
if (EqT()(nodes_[bucket].key(), key)) {
return Iterator{nodes_.begin() + bucket, this};
}
if (nodes_[bucket].empty()) {
return end();
}
next_bucket(bucket);
}
}
ConstIterator find(const KeyT &key) const {
return ConstIterator(const_cast<Self *>(this)->find(key));
}
size_t size() const {
return used_nodes_;
}
bool empty() const {
return size() == 0;
}
Iterator begin() {
if (empty()) {
return end();
}
auto it = nodes_.begin();
while (it->empty()) {
++it;
}
return Iterator(it, this);
}
Iterator end() {
return Iterator(nodes_.end(), this);
}
ConstIterator begin() const {
return ConstIterator(const_cast<Self *>(this)->begin());
}
ConstIterator end() const {
return ConstIterator(const_cast<Self *>(this)->end());
}
void reserve(size_t size) {
size_t want_size = normalize(size * 5 / 3 + 1);
// size_t want_size = size * 2;
if (want_size > nodes_.size()) {
resize(want_size);
}
}
template <class... ArgsT>
std::pair<Iterator, bool> emplace(KeyT key, ArgsT &&...args) {
try_grow();
CHECK(!is_key_empty(key));
auto bucket = calc_bucket(key);
while (true) {
if (EqT()(nodes_[bucket].key(), key)) {
return {Iterator{nodes_.begin() + bucket, this}, false};
}
if (nodes_[bucket].empty()) {
nodes_[bucket].emplace(std::move(key), std::forward<ArgsT>(args)...);
used_nodes_++;
return {Iterator{nodes_.begin() + bucket, this}, true};
}
next_bucket(bucket);
}
}
std::pair<Iterator, bool> insert(KeyT key) {
return emplace(std::move(key));
}
template <class ItT>
void insert(ItT begin, ItT end) {
for (; begin != end; ++begin) {
emplace(*begin);
}
}
typename Node::value_type &operator[](const KeyT &key) {
return emplace(key).first->value();
}
size_t erase(const KeyT &key) {
auto it = find(key);
if (it == end()) {
return 0;
}
erase(it);
try_shrink();
return 1;
}
size_t count(const KeyT &key) const {
return find(key) != end();
}
void clear() {
used_nodes_ = 0;
nodes_ = {};
}
void erase(Iterator it) {
DCHECK(it != end());
DCHECK(!it.it_->empty());
erase_node(it.it_);
}
template <class F>
void remove_if(F &&f) {
auto it = nodes_.begin();
while (it != nodes_.end() && !it->empty()) {
++it;
}
auto first_empty = it;
for (; it != nodes_.end();) {
if (!it->empty() && f(it->get_public())) {
erase_node(it);
} else {
++it;
}
}
for (it = nodes_.begin(); it != first_empty;) {
if (!it->empty() && f(it->get_public())) {
erase_node(it);
} else {
++it;
}
}
try_shrink();
}
private:
fixed_vector<Node> nodes_;
size_t used_nodes_{};
template <class ItT>
void assign(ItT begin, ItT end) {
resize(std::distance(begin, end)); // TODO: should be conditional
for (; begin != end; ++begin) {
emplace(begin->first, begin->second);
}
}
void try_grow() {
if (should_grow(used_nodes_ + 1, nodes_.size())) {
grow();
}
}
static bool should_grow(size_t used_count, size_t bucket_count) {
return used_count * 5 > bucket_count * 3;
}
void try_shrink() {
if (should_shrink(used_nodes_, nodes_.size())) {
shrink();
}
}
static bool should_shrink(size_t used_count, size_t bucket_count) {
return used_count * 10 < bucket_count;
}
static size_t normalize(size_t size) {
return static_cast<size_t>(1) << (64 - count_leading_zeroes64(size | 7));
}
void shrink() {
size_t want_size = normalize((used_nodes_ + 1) * 5 / 3 + 1);
resize(want_size);
}
void grow() {
size_t want_size = normalize(2 * nodes_.size() - !nodes_.empty());
resize(want_size);
}
size_t calc_bucket(const KeyT &key) const {
return HashT()(key) % nodes_.size();
}
void resize(size_t new_size) {
fixed_vector<Node> old_nodes(new_size);
std::swap(old_nodes, nodes_);
for (auto &node : old_nodes) {
if (node.empty()) {
continue;
}
size_t bucket = calc_bucket(node.key());
while (!nodes_[bucket].empty()) {
next_bucket(bucket);
}
nodes_[bucket] = std::move(node);
}
}
void next_bucket(size_t &bucket) const {
bucket++;
if (unlikely(bucket == nodes_.size())) {
bucket = 0;
}
}
void erase_node(NodeIterator it) {
size_t empty_i = it - nodes_.begin();
auto empty_bucket = empty_i;
DCHECK(0 <= empty_i && empty_i < nodes_.size());
nodes_[empty_bucket].clear();
used_nodes_--;
for (size_t test_i = empty_i + 1;; test_i++) {
auto test_bucket = test_i;
if (test_bucket >= nodes_.size()) {
test_bucket -= nodes_.size();
}
if (nodes_[test_bucket].empty()) {
break;
}
auto want_i = calc_bucket(nodes_[test_bucket].key());
if (want_i < empty_i) {
want_i += nodes_.size();
}
if (want_i <= empty_i || want_i > test_i) {
nodes_[empty_bucket] = std::move(nodes_[test_bucket]);
empty_i = test_i;
empty_bucket = test_bucket;
}
}
}
};
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashMapImpl = FlatHashTable<MapNode<KeyT, ValueT>, HashT, EqT>;
template <class KeyT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashSetImpl = FlatHashTable<SetNode<KeyT>, HashT, EqT>;
} // namespace td

View File

@ -181,12 +181,14 @@ struct reversion_wrapper {
template <typename T>
auto begin(reversion_wrapper<T> w) {
return w.iterable.rbegin();
using std::rbegin;
return rbegin(w.iterable);
}
template <typename T>
auto end(reversion_wrapper<T> w) {
return w.iterable.rend();
using std::rend;
return rend(w.iterable);
}
} // namespace detail

View File

@ -0,0 +1,58 @@
#pragma once
#include "td/utils/common.h"
namespace td {
template <class T>
class fixed_vector {
public:
fixed_vector() = default;
explicit fixed_vector(size_t size) : ptr_(new T[size]), size_(size) {
}
fixed_vector(fixed_vector &&other) noexcept {
swap(other);
}
fixed_vector &operator=(fixed_vector &&other) noexcept {
swap(other);
return *this;
}
fixed_vector(const fixed_vector &) = delete;
fixed_vector &operator=(const fixed_vector &) = delete;
~fixed_vector() {
delete[] ptr_;
}
T &operator[](size_t i) {
return ptr_[i];
}
const T &operator[](size_t i) const {
return ptr_[i];
}
T *begin() {
return ptr_;
}
const T *begin() const {
return ptr_;
}
T *end() {
return ptr_ + size_;
}
const T *end() const {
return ptr_ + size_;
}
bool empty() const {
return size() == 0;
}
size_t size() const {
return size_;
}
using iterator = T *;
using const_iterator = const T *;
void swap(fixed_vector<T> &other) {
std::swap(ptr_, other.ptr_);
std::swap(size_, other.size_);
}
private:
T *ptr_{};
size_t size_{0};
};
} // namespace td

View File

@ -7,13 +7,16 @@
#include "td/utils/algorithm.h"
#include "td/utils/common.h"
#include "td/utils/FlatHashMap.h"
#include "td/utils/FlatHashMapChunks.h"
#include "td/utils/Random.h"
#include "td/utils/Slice.h"
#include "td/utils/tests.h"
#include <algorithm>
#include <array>
#include <random>
#include <unordered_map>
#include <unordered_set>
#include <utility>
template <class T>
@ -23,18 +26,66 @@ static auto extract_kv(const T &reference) {
return expected;
}
TEST(FlatHashMap, basic) {
{
td::FlatHashSet<int> s;
s.insert(5);
for (auto x : s) {
template <class T>
static auto extract_k(const T &reference) {
auto expected = td::transform(reference, [](auto &it) { return it; });
std::sort(expected.begin(), expected.end());
return expected;
}
TEST(FlatHashMapChunks, basic) {
td::FlatHashMapChunks<int, int> kv;
kv[5] = 3;
ASSERT_EQ(3, kv[5]);
kv[3] = 4;
ASSERT_EQ(4, kv[3]);
}
TEST(FlatHashMap, probing) {
auto test = [](int buckets, int elements) {
CHECK(buckets >= elements);
std::vector<bool> data(buckets, false);
std::random_device rnd;
std::mt19937 mt(rnd());
std::uniform_int_distribution<int32_t> d(0, buckets - 1);
for (int i = 0; i < elements; i++) {
int pos = d(mt);
while (data[pos]) {
pos++;
if (pos == buckets) {
pos = 0;
}
}
data[pos] = true;
}
int N = 100000;
for (int i = 0; i < 10000000; i++) {
s.insert((i + N / 2) % N);
s.erase(i % N);
int max_chain = 0;
int cur_chain = 0;
for (auto x : data) {
if (x) {
cur_chain++;
max_chain = std::max(max_chain, cur_chain);
} else {
cur_chain = 0;
}
}
LOG(ERROR) << "buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain;
};
test(8192, int(8192 * 0.8));
test(8192, int(8192 * 0.6));
test(8192, int(8192 * 0.3));
}
TEST(FlatHashSet, TL) {
return;
td::FlatHashSet<int> s;
int N = 100000;
for (int i = 0; i < 10000000; i++) {
s.insert((i + N / 2) % N + 1);
s.erase(i % N + 1);
}
}
TEST(FlatHashMap, basic) {
{
td::FlatHashMap<int, int> map;
map[1] = 2;
@ -71,7 +122,7 @@ TEST(FlatHashMap, basic) {
ASSERT_EQ(1u, map.size());
}
using KV = td::FlatHashMapImpl<td::string, td::string>;
using KV = td::FlatHashMap<td::string, td::string>;
using Data = td::vector<std::pair<td::string, td::string>>;
auto data = Data{{"a", "b"}, {"c", "d"}};
{ ASSERT_EQ(Data{}, extract_kv(KV())); }
@ -163,25 +214,36 @@ TEST(FlatHashMap, remove_if_basic) {
}
}
static constexpr size_t MAX_TABLE_SIZE = 1000;
TEST(FlatHashMap, stress_test) {
td::vector<td::RandomSteps::Step> steps;
auto add_step = [&steps](td::Slice, td::uint32 weight, auto f) {
steps.emplace_back(td::RandomSteps::Step{std::move(f), weight});
};
td::Random::Xorshift128plus rnd(123);
size_t max_table_size = 1000; // dynamic value
size_t max_table_size = MAX_TABLE_SIZE; // dynamic value
std::unordered_map<td::uint64, td::uint64> ref;
td::FlatHashMapImpl<td::uint64, td::uint64> tbl;
td::FlatHashMap<td::uint64, td::uint64> tbl;
auto validate = [&] {
ASSERT_EQ(ref.empty(), tbl.empty());
ASSERT_EQ(ref.size(), tbl.size());
ASSERT_EQ(extract_kv(ref), extract_kv(tbl));
for (auto &kv : ref) {
ASSERT_EQ(ref[kv.first], tbl[kv.first]);
auto tbl_it = tbl.find(kv.first);
ASSERT_TRUE(tbl_it != tbl.end());
ASSERT_EQ(kv.second, tbl_it->second);
}
};
td::vector<td::RandomSteps::Step> steps;
auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) {
auto g = [&, step_name, f = std::move(f)]() {
//LOG(ERROR) << step_name;
//ASSERT_EQ(ref.size(), tbl.size());
f();
ASSERT_EQ(ref.size(), tbl.size());
//validate();
};
steps.emplace_back(td::RandomSteps::Step{std::move(g), weight});
};
auto gen_key = [&] {
auto key = rnd() % 4000 + 1;
return key;
@ -191,13 +253,13 @@ TEST(FlatHashMap, stress_test) {
validate();
td::reset_to_empty(ref);
td::reset_to_empty(tbl);
max_table_size = rnd.fast(1, 1000);
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
});
add_step("Clear hash table", 1, [&] {
validate();
ref.clear();
tbl.clear();
max_table_size = rnd.fast(1, 1000);
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
});
add_step("Insert random value", 1000, [&] {
@ -265,6 +327,88 @@ TEST(FlatHashMap, stress_test) {
td::table_remove_if(ref, condition);
});
td::RandomSteps runner(std::move(steps));
for (size_t i = 0; i < 1000000000; i++) {
runner.step(rnd);
}
}
TEST(FlatHashSet, stress_test) {
td::vector<td::RandomSteps::Step> steps;
auto add_step = [&steps](td::Slice, td::uint32 weight, auto f) {
steps.emplace_back(td::RandomSteps::Step{std::move(f), weight});
};
td::Random::Xorshift128plus rnd(123);
size_t max_table_size = MAX_TABLE_SIZE; // dynamic value
std::unordered_set<td::uint64> ref;
td::FlatHashSet<td::uint64> tbl;
auto validate = [&] {
ASSERT_EQ(ref.empty(), tbl.empty());
ASSERT_EQ(ref.size(), tbl.size());
ASSERT_EQ(extract_k(ref), extract_k(tbl));
};
auto gen_key = [&] {
auto key = rnd() % 4000 + 1;
return key;
};
add_step("Reset hash table", 1, [&] {
validate();
td::reset_to_empty(ref);
td::reset_to_empty(tbl);
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
});
add_step("Clear hash table", 1, [&] {
validate();
ref.clear();
tbl.clear();
max_table_size = rnd.fast(1, MAX_TABLE_SIZE);
});
add_step("Insert random value", 1000, [&] {
if (tbl.size() > max_table_size) {
return;
}
auto key = gen_key();
ref.insert(key);
tbl.insert(key);
});
add_step("reserve", 10, [&] { tbl.reserve(rnd() % max_table_size); });
add_step("find", 1000, [&] {
auto key = gen_key();
auto ref_it = ref.find(key);
auto tbl_it = tbl.find(key);
ASSERT_EQ(ref_it == ref.end(), tbl_it == tbl.end());
if (ref_it != ref.end()) {
ASSERT_EQ(*ref_it, *tbl_it);
}
});
add_step("find_and_erase", 100, [&] {
auto key = gen_key();
auto ref_it = ref.find(key);
auto tbl_it = tbl.find(key);
ASSERT_EQ(ref_it == ref.end(), tbl_it == tbl.end());
if (ref_it != ref.end()) {
ref.erase(ref_it);
tbl.erase(tbl_it);
}
});
add_step("remove_if", 5, [&] {
auto mul = rnd();
auto bit = rnd() % 64;
auto condition = [&](auto &it) {
return (((it * mul) >> bit) & 1) == 0;
};
td::table_remove_if(tbl, condition);
td::table_remove_if(ref, condition);
});
td::RandomSteps runner(std::move(steps));
for (size_t i = 0; i < 10000000; i++) {
runner.step(rnd);

View File

@ -7,6 +7,7 @@
#include "td/utils/algorithm.h"
#include "td/utils/common.h"
#include "td/utils/FlatHashMap.h"
#include "td/utils/FlatHashMapChunks.h"
#include "td/utils/format.h"
#include "td/utils/Hash.h"
#include "td/utils/logging.h"
@ -398,7 +399,7 @@ static void BM_remove_if_slow(benchmark::State &state) {
template <typename TableT>
static void BM_remove_if_slow_old(benchmark::State &state) {
constexpr size_t N = 100000;
constexpr size_t BATCH_SIZE = 500000;
constexpr size_t BATCH_SIZE = 5000000;
TableT table;
while (state.KeepRunningBatch(BATCH_SIZE)) {
@ -449,13 +450,112 @@ static void benchmark_create(td::Slice name) {
}
}
#define FOR_EACH_TABLE(F) \
F(td::FlatHashMapImpl) \
F(folly::F14FastMap) \
F(absl::flat_hash_map) \
F(std::unordered_map) \
struct CacheMissNode {
uint32_t data{};
char padding[64 - sizeof(data)];
};
class IterateFast {
public:
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
uint32_t res = 1;
for (size_t i = 0; i < max_shift; i++) {
if (ptr[i].data % max_shift != 0) {
res *= ptr[i].data;
} else {
res /= ptr[i].data;
}
}
return res;
}
};
class IterateSlow {
public:
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
uint32_t res = 1;
for (size_t i = 0;; i++) {
if (ptr[i].data % max_shift != 0) {
res *= ptr[i].data;
} else {
break;
}
}
return res;
}
};
#include <random>
template <class F>
void BM_cache_miss(benchmark::State &state) {
uint32_t max_shift = state.range(0);
bool flag = state.range(1);
std::random_device rd;
std::mt19937 rnd(rd());
int N = 50000000;
std::vector<CacheMissNode> nodes(N);
uint32_t i = 0;
for (auto &node : nodes) {
if (flag) {
node.data = i++ % max_shift;
} else {
node.data = rnd();
}
}
std::vector<int> positions(N);
std::uniform_int_distribution<uint32_t> rnd_pos(0, N - 1000);
for (auto &pos : positions) {
pos = rnd_pos(rnd);
if (flag) {
pos = pos / max_shift * max_shift + 1;
}
}
while (state.KeepRunningBatch(positions.size())) {
for (const auto pos : positions) {
auto *ptr = &nodes[pos];
auto res = F::iterate(ptr, max_shift);
benchmark::DoNotOptimize(res);
}
}
}
uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) {
uint64_t mask = 0;
for (int i = 0; i < 16; i++) {
mask |= (bytes[i] == needle) << i;
}
return mask;
}
template <class MaskT>
void BM_mask(benchmark::State &state) {
size_t BATCH_SIZE = 1024;
std::vector<uint8_t> bytes(BATCH_SIZE + 16);
for (auto &b : bytes) {
b = static_cast<uint8_t>(td::Random::fast(0, 17));
}
while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) {
benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17));
}
}
}
BENCHMARK_TEMPLATE(BM_mask, MaskPortable);
BENCHMARK_TEMPLATE(BM_mask, MaskNeonFolly);
BENCHMARK_TEMPLATE(BM_mask, MaskNeon);
#define FOR_EACH_TABLE(F) \
F(td::FlatHashMapChunks) \
F(td::FlatHashMapImpl) \
F(folly::F14FastMap) \
F(absl::flat_hash_map) \
F(std::unordered_map) \
F(std::map)
//BENCHMARK(BM_cache_miss<IterateSlow>)->Ranges({{1, 16}, {0, 1}});
//BENCHMARK(BM_cache_miss<IterateFast>)->Ranges({{1, 16}, {0, 1}});
//BENCHMARK_TEMPLATE(BM_Get, VectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
//BENCHMARK_TEMPLATE(BM_Get, SortedVectorTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
//BENCHMARK_TEMPLATE(BM_Get, NoOpTable<td::uint64, td::uint64>)->Range(1, 1 << 26);
@ -476,16 +576,18 @@ static void benchmark_create(td::Slice name) {
#define REGISTER_CACHE3_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_cache3, HT<td::uint64, td::uint64>)->Range(1, 1 << 23);
#define REGISTER_ERASE_ALL_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_erase_all_with_begin, HT<td::uint64, td::uint64>);
#define REGISTER_REMOVE_IF_SLOW_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow, HT<td::uint64, td::uint64>);
#define REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK(HT) BENCHMARK_TEMPLATE(BM_remove_if_slow_old, HT<td::uint64, td::uint64>);
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK)
FOR_EACH_TABLE(REGISTER_GET_BENCHMARK)
FOR_EACH_TABLE(REGISTER_CACHE3_BENCHMARK)
FOR_EACH_TABLE(REGISTER_CACHE2_BENCHMARK)
FOR_EACH_TABLE(REGISTER_CACHE_BENCHMARK)
FOR_EACH_TABLE(REGISTER_REMOVE_IF_BENCHMARK)
FOR_EACH_TABLE(REGISTER_EMPLACE_BENCHMARK)
FOR_EACH_TABLE(REGISTER_ERASE_ALL_BENCHMARK)
FOR_EACH_TABLE(REGISTER_GET_BENCHMARK)
FOR_EACH_TABLE(REGISTER_FIND_BENCHMARK)
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_OLD_BENCHMARK)
FOR_EACH_TABLE(REGISTER_REMOVE_IF_SLOW_BENCHMARK)
#define RUN_CREATE_BENCHMARK(HT) benchmark_create<HT<td::uint64, td::uint64>>(#HT);