Various fixes.

This commit is contained in:
levlam 2022-02-18 23:04:25 +03:00
parent b5cf85d6e2
commit ae3854d97c
6 changed files with 120 additions and 100 deletions

View File

@ -9,6 +9,9 @@
#include "td/utils/FlatHashMapChunks.h" #include "td/utils/FlatHashMapChunks.h"
#include "td/utils/FlatHashMapLinear.h" #include "td/utils/FlatHashMapLinear.h"
//#include <unordered_map>
//#include <unordered_set>
namespace td { namespace td {
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>> template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
//using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>; //using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;

View File

@ -10,22 +10,22 @@
#include "td/utils/bits.h" #include "td/utils/bits.h"
#include "td/utils/common.h" #include "td/utils/common.h"
#include "td/utils/FlatHashMapLinear.h" #include "td/utils/FlatHashMapLinear.h"
#include "td/utils/logging.h"
#include <cstddef> #include <cstddef>
#include <functional> #include <functional>
#include <initializer_list> #include <initializer_list>
#include <iterator> #include <iterator>
#include <new> #include <limits>
#include <utility> #include <utility>
#if (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))) #if defined(__SSE2__) || (TD_MSVC && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)))
#define TD_SSE2 1 #define TD_SSE2 1
#endif #endif
#ifdef __aarch64__ #ifdef __aarch64__
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#if TD_SSE2 #if TD_SSE2
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
@ -33,12 +33,12 @@
namespace td { namespace td {
template <int shift> template <int shift>
struct MaskIterator { struct MaskIterator {
uint64_t mask; uint64 mask;
explicit operator bool() const { explicit operator bool() const {
return mask != 0; return mask != 0;
} }
int pos() const { int pos() const {
return td::count_trailing_zeroes64(mask) / shift; return count_trailing_zeroes64(mask) / shift;
} }
void next() { void next() {
mask &= mask - 1; mask &= mask - 1;
@ -63,8 +63,8 @@ struct MaskIterator {
}; };
struct MaskPortable { struct MaskPortable {
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
uint64_t res = 0; uint64 res = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
res |= (bytes[i] == needle) << i; res |= (bytes[i] == needle) << i;
} }
@ -74,20 +74,20 @@ struct MaskPortable {
#ifdef __aarch64__ #ifdef __aarch64__
struct MaskNeonFolly { struct MaskNeonFolly {
static MaskIterator<4> equal_mask(uint8_t *bytes, uint8_t needle) { static MaskIterator<4> equal_mask(uint8 *bytes, uint8 needle) {
uint8x16_t input_mask = vld1q_u8(bytes); uint8x16_t input_mask = vld1q_u8(bytes);
auto needle_mask = vdupq_n_u8(needle); auto needle_mask = vdupq_n_u8(needle);
auto eq_mask = vceqq_u8(input_mask, needle_mask); auto eq_mask = vceqq_u8(input_mask, needle_mask);
// get info from every byte into the bottom half of every uint16_t // get info from every byte into the bottom half of every uint16
// by shifting right 4, then round to get it into a 64-bit vector // by shifting right 4, then round to get it into a 64-bit vector
uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4); uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4);
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0); uint64 mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0);
return {mask & 0x11111111111111}; return {mask & 0x11111111111111};
} }
}; };
struct MaskNeon { struct MaskNeon {
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
uint8x16_t input_mask = vld1q_u8(bytes); uint8x16_t input_mask = vld1q_u8(bytes);
auto needle_mask = vdupq_n_u8(needle); auto needle_mask = vdupq_n_u8(needle);
auto eq_mask = vceqq_u8(input_mask, needle_mask); auto eq_mask = vceqq_u8(input_mask, needle_mask);
@ -101,11 +101,11 @@ struct MaskNeon {
}; };
#elif TD_SSE2 #elif TD_SSE2
struct MaskSse2 { struct MaskSse2 {
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
auto input_mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes)); auto input_mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes));
auto needle_mask = _mm_set1_epi8(needle); auto needle_mask = _mm_set1_epi8(needle);
auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask); auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask);
return {static_cast<uint32_t>(_mm_movemask_epi8(match_mask))}; return {static_cast<uint32>(_mm_movemask_epi8(match_mask)) & ((1u << 14) - 1)};
} }
}; };
#endif #endif
@ -224,7 +224,7 @@ class FlatHashTableChunks {
FlatHashTableChunks(std::initializer_list<Node> nodes) { FlatHashTableChunks(std::initializer_list<Node> nodes) {
reserve(nodes.size()); reserve(nodes.size());
for (auto &node : td::reversed(nodes)) { for (auto &node : reversed(nodes)) {
CHECK(!node.empty()); CHECK(!node.empty());
if (count(node.first) > 0) { if (count(node.first) > 0) {
continue; continue;
@ -351,7 +351,7 @@ class FlatHashTableChunks {
used_nodes_++; used_nodes_++;
return {{node_it, this}, true}; return {{node_it, this}, true};
} }
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max()); CHECK(chunk.skipped_cnt != std::numeric_limits<uint16>::max());
chunk.skipped_cnt++; chunk.skipped_cnt++;
chunk_it.next(); chunk_it.next();
} }
@ -413,7 +413,7 @@ class FlatHashTableChunks {
static constexpr int CHUNK_SIZE = 14; static constexpr int CHUNK_SIZE = 14;
static constexpr int MASK = (1 << CHUNK_SIZE) - 1; static constexpr int MASK = (1 << CHUNK_SIZE) - 1;
// 0x0 - empty // 0x0 - empty
td::uint8 ctrl[CHUNK_SIZE] = {}; uint8 ctrl[CHUNK_SIZE] = {};
uint16 skipped_cnt{0}; uint16 skipped_cnt{0};
}; };
fixed_vector<Node> nodes_; fixed_vector<Node> nodes_;
@ -464,7 +464,7 @@ class FlatHashTableChunks {
struct HashInfo { struct HashInfo {
size_t chunk_i; size_t chunk_i;
uint8_t small_hash; uint8 small_hash;
}; };
struct ChunkIt { struct ChunkIt {
size_t chunk_i; size_t chunk_i;
@ -488,7 +488,7 @@ class FlatHashTableChunks {
HashInfo calc_hash(const KeyT &key) { HashInfo calc_hash(const KeyT &key) {
auto h = HashT()(key); auto h = HashT()(key);
// TODO: will be problematic with current hash. // TODO: will be problematic with current hash.
return {(h >> 8) % chunks_.size(), uint8_t(0x80 | h)}; return {(h >> 8) % chunks_.size(), static_cast<uint8>(0x80 | h)};
} }
void resize(size_t new_size) { void resize(size_t new_size) {
@ -526,7 +526,7 @@ class FlatHashTableChunks {
used_nodes_++; used_nodes_++;
break; break;
} }
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max()); CHECK(chunk.skipped_cnt != std::numeric_limits<uint16>::max());
chunk.skipped_cnt++; chunk.skipped_cnt++;
chunk_it.next(); chunk_it.next();
} }

View File

@ -9,14 +9,12 @@
#include "td/utils/bits.h" #include "td/utils/bits.h"
#include "td/utils/common.h" #include "td/utils/common.h"
#include "td/utils/fixed_vector.h" #include "td/utils/fixed_vector.h"
#include "td/utils/logging.h"
#include <cstddef> #include <cstddef>
#include <functional> #include <functional>
#include <initializer_list> #include <initializer_list>
#include <iterator> #include <iterator>
#include <new> #include <new>
#include <unordered_map>
#include <utility> #include <utility>
namespace td { namespace td {

View File

@ -1,7 +1,17 @@
//
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#pragma once #pragma once
#include "td/utils/common.h" #include "td/utils/common.h"
#include <utility>
namespace td { namespace td {
template <class T> template <class T>
class fixed_vector { class fixed_vector {
public: public:
@ -20,12 +30,17 @@ class fixed_vector {
~fixed_vector() { ~fixed_vector() {
delete[] ptr_; delete[] ptr_;
} }
using iterator = T *;
using const_iterator = const T *;
T &operator[](size_t i) { T &operator[](size_t i) {
return ptr_[i]; return ptr_[i];
} }
const T &operator[](size_t i) const { const T &operator[](size_t i) const {
return ptr_[i]; return ptr_[i];
} }
T *begin() { T *begin() {
return ptr_; return ptr_;
} }
@ -38,14 +53,14 @@ class fixed_vector {
const T *end() const { const T *end() const {
return ptr_ + size_; return ptr_ + size_;
} }
bool empty() const { bool empty() const {
return size() == 0; return size() == 0;
} }
size_t size() const { size_t size() const {
return size_; return size_;
} }
using iterator = T *;
using const_iterator = const T *;
void swap(fixed_vector<T> &other) { void swap(fixed_vector<T> &other) {
std::swap(ptr_, other.ptr_); std::swap(ptr_, other.ptr_);
std::swap(size_, other.size_); std::swap(size_, other.size_);
@ -55,4 +70,5 @@ class fixed_vector {
T *ptr_{}; T *ptr_{};
size_t size_{0}; size_t size_{0};
}; };
} // namespace td } // namespace td

View File

@ -8,6 +8,7 @@
#include "td/utils/common.h" #include "td/utils/common.h"
#include "td/utils/FlatHashMap.h" #include "td/utils/FlatHashMap.h"
#include "td/utils/FlatHashMapChunks.h" #include "td/utils/FlatHashMapChunks.h"
#include "td/utils/logging.h"
#include "td/utils/Random.h" #include "td/utils/Random.h"
#include "td/utils/Slice.h" #include "td/utils/Slice.h"
#include "td/utils/tests.h" #include "td/utils/tests.h"
@ -44,10 +45,10 @@ TEST(FlatHashMapChunks, basic) {
TEST(FlatHashMap, probing) { TEST(FlatHashMap, probing) {
auto test = [](int buckets, int elements) { auto test = [](int buckets, int elements) {
CHECK(buckets >= elements); CHECK(buckets >= elements);
std::vector<bool> data(buckets, false); td::vector<bool> data(buckets, false);
std::random_device rnd; std::random_device rnd;
std::mt19937 mt(rnd()); std::mt19937 mt(rnd());
std::uniform_int_distribution<int32_t> d(0, buckets - 1); std::uniform_int_distribution<td::int32> d(0, buckets - 1);
for (int i = 0; i < elements; i++) { for (int i = 0; i < elements; i++) {
int pos = d(mt); int pos = d(mt);
while (data[pos]) { while (data[pos]) {
@ -63,16 +64,16 @@ TEST(FlatHashMap, probing) {
for (auto x : data) { for (auto x : data) {
if (x) { if (x) {
cur_chain++; cur_chain++;
max_chain = std::max(max_chain, cur_chain); max_chain = td::max(max_chain, cur_chain);
} else { } else {
cur_chain = 0; cur_chain = 0;
} }
} }
LOG(ERROR) << "buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain; LOG(INFO) << "Buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain;
}; };
test(8192, int(8192 * 0.8)); test(8192, static_cast<int>(8192 * 0.8));
test(8192, int(8192 * 0.6)); test(8192, static_cast<int>(8192 * 0.6));
test(8192, int(8192 * 0.3)); test(8192, static_cast<int>(8192 * 0.3));
} }
TEST(FlatHashSet, TL) { TEST(FlatHashSet, TL) {
@ -187,7 +188,7 @@ TEST(FlatHashMap, basic) {
TEST(FlatHashMap, remove_if_basic) { TEST(FlatHashMap, remove_if_basic) {
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
constexpr int TESTS_N = 10000; constexpr int TESTS_N = 1000;
constexpr int MAX_TABLE_SIZE = 1000; constexpr int MAX_TABLE_SIZE = 1000;
for (int test_i = 0; test_i < TESTS_N; test_i++) { for (int test_i = 0; test_i < TESTS_N; test_i++) {
std::unordered_map<td::uint64, td::uint64> reference; std::unordered_map<td::uint64, td::uint64> reference;
@ -234,7 +235,7 @@ TEST(FlatHashMap, stress_test) {
td::vector<td::RandomSteps::Step> steps; td::vector<td::RandomSteps::Step> steps;
auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) { auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) {
auto g = [&, step_name, f = std::move(f)]() { auto g = [&, step_name, f = std::move(f)] {
//LOG(ERROR) << step_name; //LOG(ERROR) << step_name;
//ASSERT_EQ(ref.size(), tbl.size()); //ASSERT_EQ(ref.size(), tbl.size());
f(); f();
@ -328,7 +329,7 @@ TEST(FlatHashMap, stress_test) {
}); });
td::RandomSteps runner(std::move(steps)); td::RandomSteps runner(std::move(steps));
for (size_t i = 0; i < 10000000; i++) { for (size_t i = 0; i < 1000000; i++) {
runner.step(rnd); runner.step(rnd);
} }
} }

View File

@ -23,16 +23,17 @@
#include <benchmark/benchmark.h> #include <benchmark/benchmark.h>
#include <folly/container/F14Map.h> #include <folly/container/F14Map.h>
#include <map> #include <map>
#include <random>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
template <class TableT> template <class TableT>
static void reserve(TableT &table, size_t size) { static void reserve(TableT &table, std::size_t size) {
table.reserve(size); table.reserve(size);
} }
template <class A, class B> template <class A, class B>
static void reserve(std::map<A, B> &table, size_t size) { static void reserve(std::map<A, B> &table, std::size_t size) {
} }
template <class KeyT, class ValueT> template <class KeyT, class ValueT>
@ -132,7 +133,7 @@ class SimpleHashTable {
ValueT *find(const KeyT &needle) { ValueT *find(const KeyT &needle) {
auto hash = HashT()(needle); auto hash = HashT()(needle);
size_t i = hash % nodes_.size(); std::size_t i = hash % nodes_.size();
while (true) { while (true) {
if (nodes_[i].key == needle) { if (nodes_[i].key == needle) {
return &nodes_[i].value; return &nodes_[i].value;
@ -185,7 +186,7 @@ static void BM_Get(benchmark::State &state) {
td::vector<KeyValue> data; td::vector<KeyValue> data;
td::vector<Key> keys; td::vector<Key> keys;
for (size_t i = 0; i < n; i++) { for (std::size_t i = 0; i < n; i++) {
auto key = rnd(); auto key = rnd();
auto value = rnd(); auto value = rnd();
data.emplace_back(key, value); data.emplace_back(key, value);
@ -193,7 +194,7 @@ static void BM_Get(benchmark::State &state) {
} }
TableT table(data.begin(), data.end()); TableT table(data.begin(), data.end());
size_t key_i = 0; std::size_t key_i = 0;
td::random_shuffle(td::as_mutable_span(keys), rnd); td::random_shuffle(td::as_mutable_span(keys), rnd);
auto next_key = [&] { auto next_key = [&] {
key_i++; key_i++;
@ -204,7 +205,7 @@ static void BM_Get(benchmark::State &state) {
}; };
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
benchmark::DoNotOptimize(table.find(next_key())); benchmark::DoNotOptimize(table.find(next_key()));
} }
} }
@ -214,11 +215,11 @@ template <typename TableT>
static void BM_find_same(benchmark::State &state) { static void BM_find_same(benchmark::State &state) {
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
TableT table; TableT table;
constexpr size_t N = 100000; constexpr std::size_t N = 100000;
constexpr size_t BATCH_SIZE = 1024; constexpr std::size_t BATCH_SIZE = 1024;
reserve(table, N); reserve(table, N);
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
table.emplace(rnd(), i); table.emplace(rnd(), i);
} }
@ -226,7 +227,7 @@ static void BM_find_same(benchmark::State &state) {
table[key] = 123; table[key] = 123;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
benchmark::DoNotOptimize(table.find(key)); benchmark::DoNotOptimize(table.find(key));
} }
} }
@ -236,11 +237,11 @@ template <typename TableT>
static void BM_emplace_same(benchmark::State &state) { static void BM_emplace_same(benchmark::State &state) {
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
TableT table; TableT table;
constexpr size_t N = 100000; constexpr std::size_t N = 100000;
constexpr size_t BATCH_SIZE = 1024; constexpr std::size_t BATCH_SIZE = 1024;
reserve(table, N); reserve(table, N);
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
table.emplace(rnd(), i); table.emplace(rnd(), i);
} }
@ -248,7 +249,7 @@ static void BM_emplace_same(benchmark::State &state) {
table[key] = 123; table[key] = 123;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932)); benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932));
} }
} }
@ -271,15 +272,15 @@ static void table_remove_if(absl::flat_hash_map<K, V> &table, FunctT &&func) {
template <typename TableT> template <typename TableT>
static void BM_remove_if(benchmark::State &state) { static void BM_remove_if(benchmark::State &state) {
constexpr size_t N = 100000; constexpr std::size_t N = 100000;
constexpr size_t BATCH_SIZE = N; constexpr std::size_t BATCH_SIZE = N;
TableT table; TableT table;
reserve(table, N); reserve(table, N);
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
state.PauseTiming(); state.PauseTiming();
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
table.emplace(rnd(), i); table.emplace(rnd(), i);
} }
state.ResumeTiming(); state.ResumeTiming();
@ -290,13 +291,13 @@ static void BM_remove_if(benchmark::State &state) {
template <typename TableT> template <typename TableT>
static void BM_erase_all_with_begin(benchmark::State &state) { static void BM_erase_all_with_begin(benchmark::State &state) {
constexpr size_t N = 100000; constexpr std::size_t N = 100000;
constexpr size_t BATCH_SIZE = N; constexpr std::size_t BATCH_SIZE = N;
TableT table; TableT table;
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
table.emplace(rnd() + 1, i); table.emplace(rnd() + 1, i);
} }
while (!table.empty()) { while (!table.empty()) {
@ -307,14 +308,14 @@ static void BM_erase_all_with_begin(benchmark::State &state) {
template <typename TableT> template <typename TableT>
static void BM_cache(benchmark::State &state) { static void BM_cache(benchmark::State &state) {
constexpr size_t N = 1000; constexpr std::size_t N = 1000;
constexpr size_t BATCH_SIZE = 1000000; constexpr std::size_t BATCH_SIZE = 1000000;
TableT table; TableT table;
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
td::VectorQueue<td::uint64> keys; td::VectorQueue<td::uint64> keys;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
auto key = rnd() + 1; auto key = rnd() + 1;
keys.push(key); keys.push(key);
table.emplace(key, i); table.emplace(key, i);
@ -327,14 +328,14 @@ static void BM_cache(benchmark::State &state) {
template <typename TableT> template <typename TableT>
static void BM_cache2(benchmark::State &state) { static void BM_cache2(benchmark::State &state) {
constexpr size_t N = 1000; constexpr std::size_t N = 1000;
constexpr size_t BATCH_SIZE = 1000000; constexpr std::size_t BATCH_SIZE = 1000000;
TableT table; TableT table;
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
td::VectorQueue<td::uint64> keys; td::VectorQueue<td::uint64> keys;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
auto key = rnd() + 1; auto key = rnd() + 1;
keys.push(key); keys.push(key);
table.emplace(key, i); table.emplace(key, i);
@ -347,20 +348,20 @@ static void BM_cache2(benchmark::State &state) {
template <typename TableT> template <typename TableT>
static void BM_cache3(benchmark::State &state) { static void BM_cache3(benchmark::State &state) {
size_t N = state.range(0); std::size_t N = state.range(0);
constexpr size_t BATCH_SIZE = 1000000; constexpr std::size_t BATCH_SIZE = 1000000;
TableT table; TableT table;
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
td::VectorQueue<td::uint64> keys; td::VectorQueue<td::uint64> keys;
size_t step = 20; std::size_t step = 20;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i += step) { for (std::size_t i = 0; i < BATCH_SIZE; i += step) {
auto key = rnd() + 1; auto key = rnd() + 1;
keys.push(key); keys.push(key);
table.emplace(key, i); table.emplace(key, i);
for (size_t j = 1; j < step; j++) { for (std::size_t j = 1; j < step; j++) {
auto key_to_find = keys.data()[rnd() % keys.size()]; auto key_to_find = keys.data()[rnd() % keys.size()];
benchmark::DoNotOptimize(table.find(key_to_find)); benchmark::DoNotOptimize(table.find(key_to_find));
} }
@ -373,24 +374,24 @@ static void BM_cache3(benchmark::State &state) {
} }
template <typename TableT> template <typename TableT>
static void BM_remove_if_slow(benchmark::State &state) { static void BM_remove_if_slow(benchmark::State &state) {
constexpr size_t N = 5000; constexpr std::size_t N = 5000;
constexpr size_t BATCH_SIZE = 500000; constexpr std::size_t BATCH_SIZE = 500000;
TableT table; TableT table;
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
table.emplace(rnd() + 1, i); table.emplace(rnd() + 1, i);
} }
auto first_key = table.begin()->first; auto first_key = table.begin()->first;
{ {
size_t cnt = 0; std::size_t cnt = 0;
td::table_remove_if(table, [&cnt](auto &) { td::table_remove_if(table, [&cnt, n = N](auto &) {
cnt += 2; cnt += 2;
return cnt <= N; return cnt <= n;
}); });
} }
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
table.emplace(first_key, i); table.emplace(first_key, i);
table.erase(first_key); table.erase(first_key);
} }
@ -398,16 +399,16 @@ static void BM_remove_if_slow(benchmark::State &state) {
} }
template <typename TableT> template <typename TableT>
static void BM_remove_if_slow_old(benchmark::State &state) { static void BM_remove_if_slow_old(benchmark::State &state) {
constexpr size_t N = 100000; constexpr std::size_t N = 100000;
constexpr size_t BATCH_SIZE = 5000000; constexpr std::size_t BATCH_SIZE = 5000000;
TableT table; TableT table;
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
table.emplace(rnd() + 1, i); table.emplace(rnd() + 1, i);
if (table.size() > N) { if (table.size() > N) {
size_t cnt = 0; std::size_t cnt = 0;
td::table_remove_if(table, [&cnt, n = N](auto &) { td::table_remove_if(table, [&cnt, n = N](auto &) {
cnt += 2; cnt += 2;
return cnt <= n; return cnt <= n;
@ -421,11 +422,11 @@ template <typename TableT>
static void benchmark_create(td::Slice name) { static void benchmark_create(td::Slice name) {
td::Random::Xorshift128plus rnd(123); td::Random::Xorshift128plus rnd(123);
{ {
constexpr size_t N = 10000000; constexpr std::size_t N = 10000000;
TableT table; TableT table;
reserve(table, N); reserve(table, N);
auto start = td::Timestamp::now(); auto start = td::Timestamp::now();
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
table.emplace(rnd(), i); table.emplace(rnd(), i);
} }
auto end = td::Timestamp::now(); auto end = td::Timestamp::now();
@ -433,8 +434,8 @@ static void benchmark_create(td::Slice name) {
<< "create " << N << " elements: " << td::format::as_time(end.at() - start.at()); << "create " << N << " elements: " << td::format::as_time(end.at() - start.at());
double res = 0; double res = 0;
td::vector<std::pair<size_t, td::format::Time>> pauses; td::vector<std::pair<std::size_t, td::format::Time>> pauses;
for (size_t i = 0; i < N; i++) { for (std::size_t i = 0; i < N; i++) {
auto emplace_start = td::Timestamp::now(); auto emplace_start = td::Timestamp::now();
table.emplace(rnd(), i); table.emplace(rnd(), i);
auto emplace_end = td::Timestamp::now(); auto emplace_end = td::Timestamp::now();
@ -451,15 +452,15 @@ static void benchmark_create(td::Slice name) {
} }
struct CacheMissNode { struct CacheMissNode {
uint32_t data{}; td::uint32 data{};
char padding[64 - sizeof(data)]; char padding[64 - sizeof(data)];
}; };
class IterateFast { class IterateFast {
public: public:
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) { static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
uint32_t res = 1; td::uint32 res = 1;
for (size_t i = 0; i < max_shift; i++) { for (std::size_t i = 0; i < max_shift; i++) {
if (ptr[i].data % max_shift != 0) { if (ptr[i].data % max_shift != 0) {
res *= ptr[i].data; res *= ptr[i].data;
} else { } else {
@ -472,9 +473,9 @@ class IterateFast {
class IterateSlow { class IterateSlow {
public: public:
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) { static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
uint32_t res = 1; td::uint32 res = 1;
for (size_t i = 0;; i++) { for (std::size_t i = 0;; i++) {
if (ptr[i].data % max_shift != 0) { if (ptr[i].data % max_shift != 0) {
res *= ptr[i].data; res *= ptr[i].data;
} else { } else {
@ -484,16 +485,16 @@ class IterateSlow {
return res; return res;
} }
}; };
#include <random>
template <class F> template <class F>
void BM_cache_miss(benchmark::State &state) { static void BM_cache_miss(benchmark::State &state) {
uint32_t max_shift = state.range(0); td::uint32 max_shift = state.range(0);
bool flag = state.range(1); bool flag = state.range(1);
std::random_device rd; std::random_device rd;
std::mt19937 rnd(rd()); std::mt19937 rnd(rd());
int N = 50000000; int N = 50000000;
std::vector<CacheMissNode> nodes(N); td::vector<CacheMissNode> nodes(N);
uint32_t i = 0; td::uint32 i = 0;
for (auto &node : nodes) { for (auto &node : nodes) {
if (flag) { if (flag) {
node.data = i++ % max_shift; node.data = i++ % max_shift;
@ -502,8 +503,8 @@ void BM_cache_miss(benchmark::State &state) {
} }
} }
std::vector<int> positions(N); td::vector<int> positions(N);
std::uniform_int_distribution<uint32_t> rnd_pos(0, N - 1000); std::uniform_int_distribution<td::uint32> rnd_pos(0, N - 1000);
for (auto &pos : positions) { for (auto &pos : positions) {
pos = rnd_pos(rnd); pos = rnd_pos(rnd);
if (flag) { if (flag) {
@ -520,7 +521,7 @@ void BM_cache_miss(benchmark::State &state) {
} }
} }
uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) { static uint64_t equal_mask_slow(td::uint8 *bytes, td::uint8 needle) {
uint64_t mask = 0; uint64_t mask = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
mask |= (bytes[i] == needle) << i; mask |= (bytes[i] == needle) << i;
@ -529,19 +530,20 @@ uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) {
} }
template <class MaskT> template <class MaskT>
void BM_mask(benchmark::State &state) { static void BM_mask(benchmark::State &state) {
size_t BATCH_SIZE = 1024; std::size_t BATCH_SIZE = 1024;
std::vector<uint8_t> bytes(BATCH_SIZE + 16); td::vector<td::uint8> bytes(BATCH_SIZE + 16);
for (auto &b : bytes) { for (auto &b : bytes) {
b = static_cast<uint8_t>(td::Random::fast(0, 17)); b = static_cast<td::uint8>(td::Random::fast(0, 17));
} }
while (state.KeepRunningBatch(BATCH_SIZE)) { while (state.KeepRunningBatch(BATCH_SIZE)) {
for (size_t i = 0; i < BATCH_SIZE; i++) { for (std::size_t i = 0; i < BATCH_SIZE; i++) {
benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17)); benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17));
} }
} }
} }
BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable); BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable);
#ifdef __aarch64__ #ifdef __aarch64__
BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly); BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly);