Various fixes.
This commit is contained in:
parent
b5cf85d6e2
commit
ae3854d97c
@ -9,6 +9,9 @@
|
||||
#include "td/utils/FlatHashMapChunks.h"
|
||||
#include "td/utils/FlatHashMapLinear.h"
|
||||
|
||||
//#include <unordered_map>
|
||||
//#include <unordered_set>
|
||||
|
||||
namespace td {
|
||||
template <class KeyT, class ValueT, class HashT = std::hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
||||
//using FlatHashMap = FlatHashMapImpl<KeyT, ValueT, HashT, EqT>;
|
||||
|
@ -10,22 +10,22 @@
|
||||
#include "td/utils/bits.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/FlatHashMapLinear.h"
|
||||
#include "td/utils/logging.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <new>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#if (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)))
|
||||
#if defined(__SSE2__) || (TD_MSVC && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)))
|
||||
#define TD_SSE2 1
|
||||
#endif
|
||||
|
||||
#ifdef __aarch64__
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if TD_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
@ -33,12 +33,12 @@
|
||||
namespace td {
|
||||
template <int shift>
|
||||
struct MaskIterator {
|
||||
uint64_t mask;
|
||||
uint64 mask;
|
||||
explicit operator bool() const {
|
||||
return mask != 0;
|
||||
}
|
||||
int pos() const {
|
||||
return td::count_trailing_zeroes64(mask) / shift;
|
||||
return count_trailing_zeroes64(mask) / shift;
|
||||
}
|
||||
void next() {
|
||||
mask &= mask - 1;
|
||||
@ -63,8 +63,8 @@ struct MaskIterator {
|
||||
};
|
||||
|
||||
struct MaskPortable {
|
||||
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
uint64_t res = 0;
|
||||
static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
|
||||
uint64 res = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
res |= (bytes[i] == needle) << i;
|
||||
}
|
||||
@ -74,20 +74,20 @@ struct MaskPortable {
|
||||
|
||||
#ifdef __aarch64__
|
||||
struct MaskNeonFolly {
|
||||
static MaskIterator<4> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
static MaskIterator<4> equal_mask(uint8 *bytes, uint8 needle) {
|
||||
uint8x16_t input_mask = vld1q_u8(bytes);
|
||||
auto needle_mask = vdupq_n_u8(needle);
|
||||
auto eq_mask = vceqq_u8(input_mask, needle_mask);
|
||||
// get info from every byte into the bottom half of every uint16_t
|
||||
// get info from every byte into the bottom half of every uint16
|
||||
// by shifting right 4, then round to get it into a 64-bit vector
|
||||
uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4);
|
||||
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0);
|
||||
uint64 mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0);
|
||||
return {mask & 0x11111111111111};
|
||||
}
|
||||
};
|
||||
|
||||
struct MaskNeon {
|
||||
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
|
||||
uint8x16_t input_mask = vld1q_u8(bytes);
|
||||
auto needle_mask = vdupq_n_u8(needle);
|
||||
auto eq_mask = vceqq_u8(input_mask, needle_mask);
|
||||
@ -101,11 +101,11 @@ struct MaskNeon {
|
||||
};
|
||||
#elif TD_SSE2
|
||||
struct MaskSse2 {
|
||||
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) {
|
||||
auto input_mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes));
|
||||
auto needle_mask = _mm_set1_epi8(needle);
|
||||
auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask);
|
||||
return {static_cast<uint32_t>(_mm_movemask_epi8(match_mask))};
|
||||
return {static_cast<uint32>(_mm_movemask_epi8(match_mask)) & ((1u << 14) - 1)};
|
||||
}
|
||||
};
|
||||
#endif
|
||||
@ -224,7 +224,7 @@ class FlatHashTableChunks {
|
||||
|
||||
FlatHashTableChunks(std::initializer_list<Node> nodes) {
|
||||
reserve(nodes.size());
|
||||
for (auto &node : td::reversed(nodes)) {
|
||||
for (auto &node : reversed(nodes)) {
|
||||
CHECK(!node.empty());
|
||||
if (count(node.first) > 0) {
|
||||
continue;
|
||||
@ -351,7 +351,7 @@ class FlatHashTableChunks {
|
||||
used_nodes_++;
|
||||
return {{node_it, this}, true};
|
||||
}
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16>::max());
|
||||
chunk.skipped_cnt++;
|
||||
chunk_it.next();
|
||||
}
|
||||
@ -413,7 +413,7 @@ class FlatHashTableChunks {
|
||||
static constexpr int CHUNK_SIZE = 14;
|
||||
static constexpr int MASK = (1 << CHUNK_SIZE) - 1;
|
||||
// 0x0 - empty
|
||||
td::uint8 ctrl[CHUNK_SIZE] = {};
|
||||
uint8 ctrl[CHUNK_SIZE] = {};
|
||||
uint16 skipped_cnt{0};
|
||||
};
|
||||
fixed_vector<Node> nodes_;
|
||||
@ -464,7 +464,7 @@ class FlatHashTableChunks {
|
||||
|
||||
struct HashInfo {
|
||||
size_t chunk_i;
|
||||
uint8_t small_hash;
|
||||
uint8 small_hash;
|
||||
};
|
||||
struct ChunkIt {
|
||||
size_t chunk_i;
|
||||
@ -488,7 +488,7 @@ class FlatHashTableChunks {
|
||||
HashInfo calc_hash(const KeyT &key) {
|
||||
auto h = HashT()(key);
|
||||
// TODO: will be problematic with current hash.
|
||||
return {(h >> 8) % chunks_.size(), uint8_t(0x80 | h)};
|
||||
return {(h >> 8) % chunks_.size(), static_cast<uint8>(0x80 | h)};
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
@ -526,7 +526,7 @@ class FlatHashTableChunks {
|
||||
used_nodes_++;
|
||||
break;
|
||||
}
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16_t>::max());
|
||||
CHECK(chunk.skipped_cnt != std::numeric_limits<uint16>::max());
|
||||
chunk.skipped_cnt++;
|
||||
chunk_it.next();
|
||||
}
|
||||
|
@ -9,14 +9,12 @@
|
||||
#include "td/utils/bits.h"
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/fixed_vector.h"
|
||||
#include "td/utils/logging.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <new>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace td {
|
||||
|
@ -1,7 +1,17 @@
|
||||
//
|
||||
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "td/utils/common.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace td {
|
||||
|
||||
template <class T>
|
||||
class fixed_vector {
|
||||
public:
|
||||
@ -20,12 +30,17 @@ class fixed_vector {
|
||||
~fixed_vector() {
|
||||
delete[] ptr_;
|
||||
}
|
||||
|
||||
using iterator = T *;
|
||||
using const_iterator = const T *;
|
||||
|
||||
T &operator[](size_t i) {
|
||||
return ptr_[i];
|
||||
}
|
||||
const T &operator[](size_t i) const {
|
||||
return ptr_[i];
|
||||
}
|
||||
|
||||
T *begin() {
|
||||
return ptr_;
|
||||
}
|
||||
@ -38,14 +53,14 @@ class fixed_vector {
|
||||
const T *end() const {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
using iterator = T *;
|
||||
using const_iterator = const T *;
|
||||
|
||||
void swap(fixed_vector<T> &other) {
|
||||
std::swap(ptr_, other.ptr_);
|
||||
std::swap(size_, other.size_);
|
||||
@ -55,4 +70,5 @@ class fixed_vector {
|
||||
T *ptr_{};
|
||||
size_t size_{0};
|
||||
};
|
||||
|
||||
} // namespace td
|
@ -8,6 +8,7 @@
|
||||
#include "td/utils/common.h"
|
||||
#include "td/utils/FlatHashMap.h"
|
||||
#include "td/utils/FlatHashMapChunks.h"
|
||||
#include "td/utils/logging.h"
|
||||
#include "td/utils/Random.h"
|
||||
#include "td/utils/Slice.h"
|
||||
#include "td/utils/tests.h"
|
||||
@ -44,10 +45,10 @@ TEST(FlatHashMapChunks, basic) {
|
||||
TEST(FlatHashMap, probing) {
|
||||
auto test = [](int buckets, int elements) {
|
||||
CHECK(buckets >= elements);
|
||||
std::vector<bool> data(buckets, false);
|
||||
td::vector<bool> data(buckets, false);
|
||||
std::random_device rnd;
|
||||
std::mt19937 mt(rnd());
|
||||
std::uniform_int_distribution<int32_t> d(0, buckets - 1);
|
||||
std::uniform_int_distribution<td::int32> d(0, buckets - 1);
|
||||
for (int i = 0; i < elements; i++) {
|
||||
int pos = d(mt);
|
||||
while (data[pos]) {
|
||||
@ -63,16 +64,16 @@ TEST(FlatHashMap, probing) {
|
||||
for (auto x : data) {
|
||||
if (x) {
|
||||
cur_chain++;
|
||||
max_chain = std::max(max_chain, cur_chain);
|
||||
max_chain = td::max(max_chain, cur_chain);
|
||||
} else {
|
||||
cur_chain = 0;
|
||||
}
|
||||
}
|
||||
LOG(ERROR) << "buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain;
|
||||
LOG(INFO) << "Buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain;
|
||||
};
|
||||
test(8192, int(8192 * 0.8));
|
||||
test(8192, int(8192 * 0.6));
|
||||
test(8192, int(8192 * 0.3));
|
||||
test(8192, static_cast<int>(8192 * 0.8));
|
||||
test(8192, static_cast<int>(8192 * 0.6));
|
||||
test(8192, static_cast<int>(8192 * 0.3));
|
||||
}
|
||||
|
||||
TEST(FlatHashSet, TL) {
|
||||
@ -187,7 +188,7 @@ TEST(FlatHashMap, basic) {
|
||||
TEST(FlatHashMap, remove_if_basic) {
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
|
||||
constexpr int TESTS_N = 10000;
|
||||
constexpr int TESTS_N = 1000;
|
||||
constexpr int MAX_TABLE_SIZE = 1000;
|
||||
for (int test_i = 0; test_i < TESTS_N; test_i++) {
|
||||
std::unordered_map<td::uint64, td::uint64> reference;
|
||||
@ -234,7 +235,7 @@ TEST(FlatHashMap, stress_test) {
|
||||
|
||||
td::vector<td::RandomSteps::Step> steps;
|
||||
auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) {
|
||||
auto g = [&, step_name, f = std::move(f)]() {
|
||||
auto g = [&, step_name, f = std::move(f)] {
|
||||
//LOG(ERROR) << step_name;
|
||||
//ASSERT_EQ(ref.size(), tbl.size());
|
||||
f();
|
||||
@ -328,7 +329,7 @@ TEST(FlatHashMap, stress_test) {
|
||||
});
|
||||
|
||||
td::RandomSteps runner(std::move(steps));
|
||||
for (size_t i = 0; i < 10000000; i++) {
|
||||
for (size_t i = 0; i < 1000000; i++) {
|
||||
runner.step(rnd);
|
||||
}
|
||||
}
|
||||
|
@ -23,16 +23,17 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <folly/container/F14Map.h>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
template <class TableT>
|
||||
static void reserve(TableT &table, size_t size) {
|
||||
static void reserve(TableT &table, std::size_t size) {
|
||||
table.reserve(size);
|
||||
}
|
||||
|
||||
template <class A, class B>
|
||||
static void reserve(std::map<A, B> &table, size_t size) {
|
||||
static void reserve(std::map<A, B> &table, std::size_t size) {
|
||||
}
|
||||
|
||||
template <class KeyT, class ValueT>
|
||||
@ -132,7 +133,7 @@ class SimpleHashTable {
|
||||
|
||||
ValueT *find(const KeyT &needle) {
|
||||
auto hash = HashT()(needle);
|
||||
size_t i = hash % nodes_.size();
|
||||
std::size_t i = hash % nodes_.size();
|
||||
while (true) {
|
||||
if (nodes_[i].key == needle) {
|
||||
return &nodes_[i].value;
|
||||
@ -185,7 +186,7 @@ static void BM_Get(benchmark::State &state) {
|
||||
td::vector<KeyValue> data;
|
||||
td::vector<Key> keys;
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
for (std::size_t i = 0; i < n; i++) {
|
||||
auto key = rnd();
|
||||
auto value = rnd();
|
||||
data.emplace_back(key, value);
|
||||
@ -193,7 +194,7 @@ static void BM_Get(benchmark::State &state) {
|
||||
}
|
||||
TableT table(data.begin(), data.end());
|
||||
|
||||
size_t key_i = 0;
|
||||
std::size_t key_i = 0;
|
||||
td::random_shuffle(td::as_mutable_span(keys), rnd);
|
||||
auto next_key = [&] {
|
||||
key_i++;
|
||||
@ -204,7 +205,7 @@ static void BM_Get(benchmark::State &state) {
|
||||
};
|
||||
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
benchmark::DoNotOptimize(table.find(next_key()));
|
||||
}
|
||||
}
|
||||
@ -214,11 +215,11 @@ template <typename TableT>
|
||||
static void BM_find_same(benchmark::State &state) {
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
TableT table;
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = 1024;
|
||||
constexpr std::size_t N = 100000;
|
||||
constexpr std::size_t BATCH_SIZE = 1024;
|
||||
reserve(table, N);
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
table.emplace(rnd(), i);
|
||||
}
|
||||
|
||||
@ -226,7 +227,7 @@ static void BM_find_same(benchmark::State &state) {
|
||||
table[key] = 123;
|
||||
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
benchmark::DoNotOptimize(table.find(key));
|
||||
}
|
||||
}
|
||||
@ -236,11 +237,11 @@ template <typename TableT>
|
||||
static void BM_emplace_same(benchmark::State &state) {
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
TableT table;
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = 1024;
|
||||
constexpr std::size_t N = 100000;
|
||||
constexpr std::size_t BATCH_SIZE = 1024;
|
||||
reserve(table, N);
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
table.emplace(rnd(), i);
|
||||
}
|
||||
|
||||
@ -248,7 +249,7 @@ static void BM_emplace_same(benchmark::State &state) {
|
||||
table[key] = 123;
|
||||
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932));
|
||||
}
|
||||
}
|
||||
@ -271,15 +272,15 @@ static void table_remove_if(absl::flat_hash_map<K, V> &table, FunctT &&func) {
|
||||
|
||||
template <typename TableT>
|
||||
static void BM_remove_if(benchmark::State &state) {
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = N;
|
||||
constexpr std::size_t N = 100000;
|
||||
constexpr std::size_t BATCH_SIZE = N;
|
||||
|
||||
TableT table;
|
||||
reserve(table, N);
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
state.PauseTiming();
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
table.emplace(rnd(), i);
|
||||
}
|
||||
state.ResumeTiming();
|
||||
@ -290,13 +291,13 @@ static void BM_remove_if(benchmark::State &state) {
|
||||
|
||||
template <typename TableT>
|
||||
static void BM_erase_all_with_begin(benchmark::State &state) {
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = N;
|
||||
constexpr std::size_t N = 100000;
|
||||
constexpr std::size_t BATCH_SIZE = N;
|
||||
|
||||
TableT table;
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
table.emplace(rnd() + 1, i);
|
||||
}
|
||||
while (!table.empty()) {
|
||||
@ -307,14 +308,14 @@ static void BM_erase_all_with_begin(benchmark::State &state) {
|
||||
|
||||
template <typename TableT>
|
||||
static void BM_cache(benchmark::State &state) {
|
||||
constexpr size_t N = 1000;
|
||||
constexpr size_t BATCH_SIZE = 1000000;
|
||||
constexpr std::size_t N = 1000;
|
||||
constexpr std::size_t BATCH_SIZE = 1000000;
|
||||
|
||||
TableT table;
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
td::VectorQueue<td::uint64> keys;
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
auto key = rnd() + 1;
|
||||
keys.push(key);
|
||||
table.emplace(key, i);
|
||||
@ -327,14 +328,14 @@ static void BM_cache(benchmark::State &state) {
|
||||
|
||||
template <typename TableT>
|
||||
static void BM_cache2(benchmark::State &state) {
|
||||
constexpr size_t N = 1000;
|
||||
constexpr size_t BATCH_SIZE = 1000000;
|
||||
constexpr std::size_t N = 1000;
|
||||
constexpr std::size_t BATCH_SIZE = 1000000;
|
||||
|
||||
TableT table;
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
td::VectorQueue<td::uint64> keys;
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
auto key = rnd() + 1;
|
||||
keys.push(key);
|
||||
table.emplace(key, i);
|
||||
@ -347,20 +348,20 @@ static void BM_cache2(benchmark::State &state) {
|
||||
|
||||
template <typename TableT>
|
||||
static void BM_cache3(benchmark::State &state) {
|
||||
size_t N = state.range(0);
|
||||
constexpr size_t BATCH_SIZE = 1000000;
|
||||
std::size_t N = state.range(0);
|
||||
constexpr std::size_t BATCH_SIZE = 1000000;
|
||||
|
||||
TableT table;
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
td::VectorQueue<td::uint64> keys;
|
||||
size_t step = 20;
|
||||
std::size_t step = 20;
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i += step) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i += step) {
|
||||
auto key = rnd() + 1;
|
||||
keys.push(key);
|
||||
table.emplace(key, i);
|
||||
|
||||
for (size_t j = 1; j < step; j++) {
|
||||
for (std::size_t j = 1; j < step; j++) {
|
||||
auto key_to_find = keys.data()[rnd() % keys.size()];
|
||||
benchmark::DoNotOptimize(table.find(key_to_find));
|
||||
}
|
||||
@ -373,24 +374,24 @@ static void BM_cache3(benchmark::State &state) {
|
||||
}
|
||||
template <typename TableT>
|
||||
static void BM_remove_if_slow(benchmark::State &state) {
|
||||
constexpr size_t N = 5000;
|
||||
constexpr size_t BATCH_SIZE = 500000;
|
||||
constexpr std::size_t N = 5000;
|
||||
constexpr std::size_t BATCH_SIZE = 500000;
|
||||
|
||||
TableT table;
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
table.emplace(rnd() + 1, i);
|
||||
}
|
||||
auto first_key = table.begin()->first;
|
||||
{
|
||||
size_t cnt = 0;
|
||||
td::table_remove_if(table, [&cnt](auto &) {
|
||||
std::size_t cnt = 0;
|
||||
td::table_remove_if(table, [&cnt, n = N](auto &) {
|
||||
cnt += 2;
|
||||
return cnt <= N;
|
||||
return cnt <= n;
|
||||
});
|
||||
}
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
table.emplace(first_key, i);
|
||||
table.erase(first_key);
|
||||
}
|
||||
@ -398,16 +399,16 @@ static void BM_remove_if_slow(benchmark::State &state) {
|
||||
}
|
||||
template <typename TableT>
|
||||
static void BM_remove_if_slow_old(benchmark::State &state) {
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t BATCH_SIZE = 5000000;
|
||||
constexpr std::size_t N = 100000;
|
||||
constexpr std::size_t BATCH_SIZE = 5000000;
|
||||
|
||||
TableT table;
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
table.emplace(rnd() + 1, i);
|
||||
if (table.size() > N) {
|
||||
size_t cnt = 0;
|
||||
std::size_t cnt = 0;
|
||||
td::table_remove_if(table, [&cnt, n = N](auto &) {
|
||||
cnt += 2;
|
||||
return cnt <= n;
|
||||
@ -421,11 +422,11 @@ template <typename TableT>
|
||||
static void benchmark_create(td::Slice name) {
|
||||
td::Random::Xorshift128plus rnd(123);
|
||||
{
|
||||
constexpr size_t N = 10000000;
|
||||
constexpr std::size_t N = 10000000;
|
||||
TableT table;
|
||||
reserve(table, N);
|
||||
auto start = td::Timestamp::now();
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
table.emplace(rnd(), i);
|
||||
}
|
||||
auto end = td::Timestamp::now();
|
||||
@ -433,8 +434,8 @@ static void benchmark_create(td::Slice name) {
|
||||
<< "create " << N << " elements: " << td::format::as_time(end.at() - start.at());
|
||||
|
||||
double res = 0;
|
||||
td::vector<std::pair<size_t, td::format::Time>> pauses;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
td::vector<std::pair<std::size_t, td::format::Time>> pauses;
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
auto emplace_start = td::Timestamp::now();
|
||||
table.emplace(rnd(), i);
|
||||
auto emplace_end = td::Timestamp::now();
|
||||
@ -451,15 +452,15 @@ static void benchmark_create(td::Slice name) {
|
||||
}
|
||||
|
||||
struct CacheMissNode {
|
||||
uint32_t data{};
|
||||
td::uint32 data{};
|
||||
char padding[64 - sizeof(data)];
|
||||
};
|
||||
|
||||
class IterateFast {
|
||||
public:
|
||||
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
|
||||
uint32_t res = 1;
|
||||
for (size_t i = 0; i < max_shift; i++) {
|
||||
static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
|
||||
td::uint32 res = 1;
|
||||
for (std::size_t i = 0; i < max_shift; i++) {
|
||||
if (ptr[i].data % max_shift != 0) {
|
||||
res *= ptr[i].data;
|
||||
} else {
|
||||
@ -472,9 +473,9 @@ class IterateFast {
|
||||
|
||||
class IterateSlow {
|
||||
public:
|
||||
static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) {
|
||||
uint32_t res = 1;
|
||||
for (size_t i = 0;; i++) {
|
||||
static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) {
|
||||
td::uint32 res = 1;
|
||||
for (std::size_t i = 0;; i++) {
|
||||
if (ptr[i].data % max_shift != 0) {
|
||||
res *= ptr[i].data;
|
||||
} else {
|
||||
@ -484,16 +485,16 @@ class IterateSlow {
|
||||
return res;
|
||||
}
|
||||
};
|
||||
#include <random>
|
||||
|
||||
template <class F>
|
||||
void BM_cache_miss(benchmark::State &state) {
|
||||
uint32_t max_shift = state.range(0);
|
||||
static void BM_cache_miss(benchmark::State &state) {
|
||||
td::uint32 max_shift = state.range(0);
|
||||
bool flag = state.range(1);
|
||||
std::random_device rd;
|
||||
std::mt19937 rnd(rd());
|
||||
int N = 50000000;
|
||||
std::vector<CacheMissNode> nodes(N);
|
||||
uint32_t i = 0;
|
||||
td::vector<CacheMissNode> nodes(N);
|
||||
td::uint32 i = 0;
|
||||
for (auto &node : nodes) {
|
||||
if (flag) {
|
||||
node.data = i++ % max_shift;
|
||||
@ -502,8 +503,8 @@ void BM_cache_miss(benchmark::State &state) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> positions(N);
|
||||
std::uniform_int_distribution<uint32_t> rnd_pos(0, N - 1000);
|
||||
td::vector<int> positions(N);
|
||||
std::uniform_int_distribution<td::uint32> rnd_pos(0, N - 1000);
|
||||
for (auto &pos : positions) {
|
||||
pos = rnd_pos(rnd);
|
||||
if (flag) {
|
||||
@ -520,7 +521,7 @@ void BM_cache_miss(benchmark::State &state) {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) {
|
||||
static uint64_t equal_mask_slow(td::uint8 *bytes, td::uint8 needle) {
|
||||
uint64_t mask = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
mask |= (bytes[i] == needle) << i;
|
||||
@ -529,19 +530,20 @@ uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) {
|
||||
}
|
||||
|
||||
template <class MaskT>
|
||||
void BM_mask(benchmark::State &state) {
|
||||
size_t BATCH_SIZE = 1024;
|
||||
std::vector<uint8_t> bytes(BATCH_SIZE + 16);
|
||||
static void BM_mask(benchmark::State &state) {
|
||||
std::size_t BATCH_SIZE = 1024;
|
||||
td::vector<td::uint8> bytes(BATCH_SIZE + 16);
|
||||
for (auto &b : bytes) {
|
||||
b = static_cast<uint8_t>(td::Random::fast(0, 17));
|
||||
b = static_cast<td::uint8>(td::Random::fast(0, 17));
|
||||
}
|
||||
|
||||
while (state.KeepRunningBatch(BATCH_SIZE)) {
|
||||
for (size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
for (std::size_t i = 0; i < BATCH_SIZE; i++) {
|
||||
benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable);
|
||||
#ifdef __aarch64__
|
||||
BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly);
|
||||
|
Loading…
Reference in New Issue
Block a user