From ae3854d97c7e983e9560f19ca144b5a89ca5348e Mon Sep 17 00:00:00 2001 From: levlam Date: Fri, 18 Feb 2022 23:04:25 +0300 Subject: [PATCH] Various fixes. --- tdutils/td/utils/FlatHashMap.h | 3 + tdutils/td/utils/FlatHashMapChunks.h | 38 ++++---- tdutils/td/utils/FlatHashMapLinear.h | 2 - tdutils/td/utils/fixed_vector.h | 22 ++++- tdutils/test/HashSet.cpp | 21 +++-- tdutils/test/hashset_benchmark.cpp | 134 ++++++++++++++------------- 6 files changed, 120 insertions(+), 100 deletions(-) diff --git a/tdutils/td/utils/FlatHashMap.h b/tdutils/td/utils/FlatHashMap.h index eeffde721..097d02939 100644 --- a/tdutils/td/utils/FlatHashMap.h +++ b/tdutils/td/utils/FlatHashMap.h @@ -9,6 +9,9 @@ #include "td/utils/FlatHashMapChunks.h" #include "td/utils/FlatHashMapLinear.h" +//#include +//#include + namespace td { template , class EqT = std::equal_to> //using FlatHashMap = FlatHashMapImpl; diff --git a/tdutils/td/utils/FlatHashMapChunks.h b/tdutils/td/utils/FlatHashMapChunks.h index e1e289721..73bed6e18 100644 --- a/tdutils/td/utils/FlatHashMapChunks.h +++ b/tdutils/td/utils/FlatHashMapChunks.h @@ -10,22 +10,22 @@ #include "td/utils/bits.h" #include "td/utils/common.h" #include "td/utils/FlatHashMapLinear.h" -#include "td/utils/logging.h" #include #include #include #include -#include +#include #include -#if (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))) +#if defined(__SSE2__) || (TD_MSVC && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))) #define TD_SSE2 1 #endif #ifdef __aarch64__ #include #endif + #if TD_SSE2 #include #endif @@ -33,12 +33,12 @@ namespace td { template struct MaskIterator { - uint64_t mask; + uint64 mask; explicit operator bool() const { return mask != 0; } int pos() const { - return td::count_trailing_zeroes64(mask) / shift; + return count_trailing_zeroes64(mask) / shift; } void next() { mask &= mask - 1; @@ -63,8 +63,8 @@ struct MaskIterator { }; struct MaskPortable { - static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { - uint64_t res = 0; + static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) { + uint64 res = 0; for (int i = 0; i < 16; i++) { res |= (bytes[i] == needle) << i; } @@ -74,20 +74,20 @@ struct MaskPortable { #ifdef __aarch64__ struct MaskNeonFolly { - static MaskIterator<4> equal_mask(uint8_t *bytes, uint8_t needle) { + static MaskIterator<4> equal_mask(uint8 *bytes, uint8 needle) { uint8x16_t input_mask = vld1q_u8(bytes); auto needle_mask = vdupq_n_u8(needle); auto eq_mask = vceqq_u8(input_mask, needle_mask); - // get info from every byte into the bottom half of every uint16_t + // get info from every byte into the bottom half of every uint16 // by shifting right 4, then round to get it into a 64-bit vector uint8x8_t shifted_eq_mask = vshrn_n_u16(vreinterpretq_u16_u8(eq_mask), 4); - uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0); + uint64 mask = vget_lane_u64(vreinterpret_u64_u8(shifted_eq_mask), 0); return {mask & 0x11111111111111}; } }; struct MaskNeon { - static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { + static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) { uint8x16_t input_mask = vld1q_u8(bytes); auto needle_mask = vdupq_n_u8(needle); auto eq_mask = vceqq_u8(input_mask, needle_mask); @@ -101,11 +101,11 @@ struct MaskNeon { }; #elif TD_SSE2 struct MaskSse2 { - static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) { + static MaskIterator<1> equal_mask(uint8 *bytes, uint8 needle) { auto input_mask = _mm_loadu_si128(reinterpret_cast(bytes)); auto needle_mask = _mm_set1_epi8(needle); auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask); - return {static_cast(_mm_movemask_epi8(match_mask))}; + return {static_cast(_mm_movemask_epi8(match_mask)) & ((1u << 14) - 1)}; } }; #endif @@ -224,7 +224,7 @@ class FlatHashTableChunks { FlatHashTableChunks(std::initializer_list nodes) { reserve(nodes.size()); - for (auto &node : td::reversed(nodes)) { + for (auto &node : reversed(nodes)) { CHECK(!node.empty()); if (count(node.first) > 0) { continue; @@ -351,7 +351,7 @@ class FlatHashTableChunks { used_nodes_++; return {{node_it, this}, true}; } - CHECK(chunk.skipped_cnt != std::numeric_limits::max()); + CHECK(chunk.skipped_cnt != std::numeric_limits::max()); chunk.skipped_cnt++; chunk_it.next(); } @@ -413,7 +413,7 @@ class FlatHashTableChunks { static constexpr int CHUNK_SIZE = 14; static constexpr int MASK = (1 << CHUNK_SIZE) - 1; // 0x0 - empty - td::uint8 ctrl[CHUNK_SIZE] = {}; + uint8 ctrl[CHUNK_SIZE] = {}; uint16 skipped_cnt{0}; }; fixed_vector nodes_; @@ -464,7 +464,7 @@ class FlatHashTableChunks { struct HashInfo { size_t chunk_i; - uint8_t small_hash; + uint8 small_hash; }; struct ChunkIt { size_t chunk_i; @@ -488,7 +488,7 @@ class FlatHashTableChunks { HashInfo calc_hash(const KeyT &key) { auto h = HashT()(key); // TODO: will be problematic with current hash. - return {(h >> 8) % chunks_.size(), uint8_t(0x80 | h)}; + return {(h >> 8) % chunks_.size(), static_cast(0x80 | h)}; } void resize(size_t new_size) { @@ -526,7 +526,7 @@ class FlatHashTableChunks { used_nodes_++; break; } - CHECK(chunk.skipped_cnt != std::numeric_limits::max()); + CHECK(chunk.skipped_cnt != std::numeric_limits::max()); chunk.skipped_cnt++; chunk_it.next(); } diff --git a/tdutils/td/utils/FlatHashMapLinear.h b/tdutils/td/utils/FlatHashMapLinear.h index ff7e7b60e..8a2ea9e07 100644 --- a/tdutils/td/utils/FlatHashMapLinear.h +++ b/tdutils/td/utils/FlatHashMapLinear.h @@ -9,14 +9,12 @@ #include "td/utils/bits.h" #include "td/utils/common.h" #include "td/utils/fixed_vector.h" -#include "td/utils/logging.h" #include #include #include #include #include -#include #include namespace td { diff --git a/tdutils/td/utils/fixed_vector.h b/tdutils/td/utils/fixed_vector.h index ccb05192e..0ee720d3c 100644 --- a/tdutils/td/utils/fixed_vector.h +++ b/tdutils/td/utils/fixed_vector.h @@ -1,7 +1,17 @@ +// +// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022 +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// #pragma once + #include "td/utils/common.h" +#include + namespace td { + template class fixed_vector { public: @@ -20,12 +30,17 @@ class fixed_vector { ~fixed_vector() { delete[] ptr_; } + + using iterator = T *; + using const_iterator = const T *; + T &operator[](size_t i) { return ptr_[i]; } const T &operator[](size_t i) const { return ptr_[i]; } + T *begin() { return ptr_; } @@ -38,14 +53,14 @@ class fixed_vector { const T *end() const { return ptr_ + size_; } + bool empty() const { return size() == 0; } size_t size() const { return size_; } - using iterator = T *; - using const_iterator = const T *; + void swap(fixed_vector &other) { std::swap(ptr_, other.ptr_); std::swap(size_, other.size_); @@ -55,4 +70,5 @@ class fixed_vector { T *ptr_{}; size_t size_{0}; }; -} // namespace td \ No newline at end of file + +} // namespace td diff --git a/tdutils/test/HashSet.cpp b/tdutils/test/HashSet.cpp index 5ae2734f1..029a90fa7 100644 --- a/tdutils/test/HashSet.cpp +++ b/tdutils/test/HashSet.cpp @@ -8,6 +8,7 @@ #include "td/utils/common.h" #include "td/utils/FlatHashMap.h" #include "td/utils/FlatHashMapChunks.h" +#include "td/utils/logging.h" #include "td/utils/Random.h" #include "td/utils/Slice.h" #include "td/utils/tests.h" @@ -44,10 +45,10 @@ TEST(FlatHashMapChunks, basic) { TEST(FlatHashMap, probing) { auto test = [](int buckets, int elements) { CHECK(buckets >= elements); - std::vector data(buckets, false); + td::vector data(buckets, false); std::random_device rnd; std::mt19937 mt(rnd()); - std::uniform_int_distribution d(0, buckets - 1); + std::uniform_int_distribution d(0, buckets - 1); for (int i = 0; i < elements; i++) { int pos = d(mt); while (data[pos]) { @@ -63,16 +64,16 @@ TEST(FlatHashMap, probing) { for (auto x : data) { if (x) { cur_chain++; - max_chain = std::max(max_chain, cur_chain); + max_chain = td::max(max_chain, cur_chain); } else { cur_chain = 0; } } - LOG(ERROR) << "buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain; + LOG(INFO) << "Buckets=" << buckets << " elements=" << elements << " max_chain=" << max_chain; }; - test(8192, int(8192 * 0.8)); - test(8192, int(8192 * 0.6)); - test(8192, int(8192 * 0.3)); + test(8192, static_cast(8192 * 0.8)); + test(8192, static_cast(8192 * 0.6)); + test(8192, static_cast(8192 * 0.3)); } TEST(FlatHashSet, TL) { @@ -187,7 +188,7 @@ TEST(FlatHashMap, basic) { TEST(FlatHashMap, remove_if_basic) { td::Random::Xorshift128plus rnd(123); - constexpr int TESTS_N = 10000; + constexpr int TESTS_N = 1000; constexpr int MAX_TABLE_SIZE = 1000; for (int test_i = 0; test_i < TESTS_N; test_i++) { std::unordered_map reference; @@ -234,7 +235,7 @@ TEST(FlatHashMap, stress_test) { td::vector steps; auto add_step = [&](td::Slice step_name, td::uint32 weight, auto f) { - auto g = [&, step_name, f = std::move(f)]() { + auto g = [&, step_name, f = std::move(f)] { //LOG(ERROR) << step_name; //ASSERT_EQ(ref.size(), tbl.size()); f(); @@ -328,7 +329,7 @@ TEST(FlatHashMap, stress_test) { }); td::RandomSteps runner(std::move(steps)); - for (size_t i = 0; i < 10000000; i++) { + for (size_t i = 0; i < 1000000; i++) { runner.step(rnd); } } diff --git a/tdutils/test/hashset_benchmark.cpp b/tdutils/test/hashset_benchmark.cpp index 507114957..5b95cfb1a 100644 --- a/tdutils/test/hashset_benchmark.cpp +++ b/tdutils/test/hashset_benchmark.cpp @@ -23,16 +23,17 @@ #include #include #include +#include #include #include template -static void reserve(TableT &table, size_t size) { +static void reserve(TableT &table, std::size_t size) { table.reserve(size); } template -static void reserve(std::map &table, size_t size) { +static void reserve(std::map &table, std::size_t size) { } template @@ -132,7 +133,7 @@ class SimpleHashTable { ValueT *find(const KeyT &needle) { auto hash = HashT()(needle); - size_t i = hash % nodes_.size(); + std::size_t i = hash % nodes_.size(); while (true) { if (nodes_[i].key == needle) { return &nodes_[i].value; @@ -185,7 +186,7 @@ static void BM_Get(benchmark::State &state) { td::vector data; td::vector keys; - for (size_t i = 0; i < n; i++) { + for (std::size_t i = 0; i < n; i++) { auto key = rnd(); auto value = rnd(); data.emplace_back(key, value); @@ -193,7 +194,7 @@ static void BM_Get(benchmark::State &state) { } TableT table(data.begin(), data.end()); - size_t key_i = 0; + std::size_t key_i = 0; td::random_shuffle(td::as_mutable_span(keys), rnd); auto next_key = [&] { key_i++; @@ -204,7 +205,7 @@ static void BM_Get(benchmark::State &state) { }; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.find(next_key())); } } @@ -214,11 +215,11 @@ template static void BM_find_same(benchmark::State &state) { td::Random::Xorshift128plus rnd(123); TableT table; - constexpr size_t N = 100000; - constexpr size_t BATCH_SIZE = 1024; + constexpr std::size_t N = 100000; + constexpr std::size_t BATCH_SIZE = 1024; reserve(table, N); - for (size_t i = 0; i < N; i++) { + for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } @@ -226,7 +227,7 @@ static void BM_find_same(benchmark::State &state) { table[key] = 123; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.find(key)); } } @@ -236,11 +237,11 @@ template static void BM_emplace_same(benchmark::State &state) { td::Random::Xorshift128plus rnd(123); TableT table; - constexpr size_t N = 100000; - constexpr size_t BATCH_SIZE = 1024; + constexpr std::size_t N = 100000; + constexpr std::size_t BATCH_SIZE = 1024; reserve(table, N); - for (size_t i = 0; i < N; i++) { + for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } @@ -248,7 +249,7 @@ static void BM_emplace_same(benchmark::State &state) { table[key] = 123; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(table.emplace(key + (i & 15) * 100, 43784932)); } } @@ -271,15 +272,15 @@ static void table_remove_if(absl::flat_hash_map &table, FunctT &&func) { template static void BM_remove_if(benchmark::State &state) { - constexpr size_t N = 100000; - constexpr size_t BATCH_SIZE = N; + constexpr std::size_t N = 100000; + constexpr std::size_t BATCH_SIZE = N; TableT table; reserve(table, N); while (state.KeepRunningBatch(BATCH_SIZE)) { state.PauseTiming(); td::Random::Xorshift128plus rnd(123); - for (size_t i = 0; i < N; i++) { + for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } state.ResumeTiming(); @@ -290,13 +291,13 @@ static void BM_remove_if(benchmark::State &state) { template static void BM_erase_all_with_begin(benchmark::State &state) { - constexpr size_t N = 100000; - constexpr size_t BATCH_SIZE = N; + constexpr std::size_t N = 100000; + constexpr std::size_t BATCH_SIZE = N; TableT table; td::Random::Xorshift128plus rnd(123); while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(rnd() + 1, i); } while (!table.empty()) { @@ -307,14 +308,14 @@ static void BM_erase_all_with_begin(benchmark::State &state) { template static void BM_cache(benchmark::State &state) { - constexpr size_t N = 1000; - constexpr size_t BATCH_SIZE = 1000000; + constexpr std::size_t N = 1000; + constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); @@ -327,14 +328,14 @@ static void BM_cache(benchmark::State &state) { template static void BM_cache2(benchmark::State &state) { - constexpr size_t N = 1000; - constexpr size_t BATCH_SIZE = 1000000; + constexpr std::size_t N = 1000; + constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); @@ -347,20 +348,20 @@ static void BM_cache2(benchmark::State &state) { template static void BM_cache3(benchmark::State &state) { - size_t N = state.range(0); - constexpr size_t BATCH_SIZE = 1000000; + std::size_t N = state.range(0); + constexpr std::size_t BATCH_SIZE = 1000000; TableT table; td::Random::Xorshift128plus rnd(123); td::VectorQueue keys; - size_t step = 20; + std::size_t step = 20; while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i += step) { + for (std::size_t i = 0; i < BATCH_SIZE; i += step) { auto key = rnd() + 1; keys.push(key); table.emplace(key, i); - for (size_t j = 1; j < step; j++) { + for (std::size_t j = 1; j < step; j++) { auto key_to_find = keys.data()[rnd() % keys.size()]; benchmark::DoNotOptimize(table.find(key_to_find)); } @@ -373,24 +374,24 @@ static void BM_cache3(benchmark::State &state) { } template static void BM_remove_if_slow(benchmark::State &state) { - constexpr size_t N = 5000; - constexpr size_t BATCH_SIZE = 500000; + constexpr std::size_t N = 5000; + constexpr std::size_t BATCH_SIZE = 500000; TableT table; td::Random::Xorshift128plus rnd(123); - for (size_t i = 0; i < N; i++) { + for (std::size_t i = 0; i < N; i++) { table.emplace(rnd() + 1, i); } auto first_key = table.begin()->first; { - size_t cnt = 0; - td::table_remove_if(table, [&cnt](auto &) { + std::size_t cnt = 0; + td::table_remove_if(table, [&cnt, n = N](auto &) { cnt += 2; - return cnt <= N; + return cnt <= n; }); } while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(first_key, i); table.erase(first_key); } @@ -398,16 +399,16 @@ static void BM_remove_if_slow(benchmark::State &state) { } template static void BM_remove_if_slow_old(benchmark::State &state) { - constexpr size_t N = 100000; - constexpr size_t BATCH_SIZE = 5000000; + constexpr std::size_t N = 100000; + constexpr std::size_t BATCH_SIZE = 5000000; TableT table; while (state.KeepRunningBatch(BATCH_SIZE)) { td::Random::Xorshift128plus rnd(123); - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { table.emplace(rnd() + 1, i); if (table.size() > N) { - size_t cnt = 0; + std::size_t cnt = 0; td::table_remove_if(table, [&cnt, n = N](auto &) { cnt += 2; return cnt <= n; @@ -421,11 +422,11 @@ template static void benchmark_create(td::Slice name) { td::Random::Xorshift128plus rnd(123); { - constexpr size_t N = 10000000; + constexpr std::size_t N = 10000000; TableT table; reserve(table, N); auto start = td::Timestamp::now(); - for (size_t i = 0; i < N; i++) { + for (std::size_t i = 0; i < N; i++) { table.emplace(rnd(), i); } auto end = td::Timestamp::now(); @@ -433,8 +434,8 @@ static void benchmark_create(td::Slice name) { << "create " << N << " elements: " << td::format::as_time(end.at() - start.at()); double res = 0; - td::vector> pauses; - for (size_t i = 0; i < N; i++) { + td::vector> pauses; + for (std::size_t i = 0; i < N; i++) { auto emplace_start = td::Timestamp::now(); table.emplace(rnd(), i); auto emplace_end = td::Timestamp::now(); @@ -451,15 +452,15 @@ static void benchmark_create(td::Slice name) { } struct CacheMissNode { - uint32_t data{}; + td::uint32 data{}; char padding[64 - sizeof(data)]; }; class IterateFast { public: - static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) { - uint32_t res = 1; - for (size_t i = 0; i < max_shift; i++) { + static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) { + td::uint32 res = 1; + for (std::size_t i = 0; i < max_shift; i++) { if (ptr[i].data % max_shift != 0) { res *= ptr[i].data; } else { @@ -472,9 +473,9 @@ class IterateFast { class IterateSlow { public: - static __attribute__((noinline)) uint32_t iterate(CacheMissNode *ptr, size_t max_shift) { - uint32_t res = 1; - for (size_t i = 0;; i++) { + static td::uint32 iterate(CacheMissNode *ptr, std::size_t max_shift) { + td::uint32 res = 1; + for (std::size_t i = 0;; i++) { if (ptr[i].data % max_shift != 0) { res *= ptr[i].data; } else { @@ -484,16 +485,16 @@ class IterateSlow { return res; } }; -#include + template -void BM_cache_miss(benchmark::State &state) { - uint32_t max_shift = state.range(0); +static void BM_cache_miss(benchmark::State &state) { + td::uint32 max_shift = state.range(0); bool flag = state.range(1); std::random_device rd; std::mt19937 rnd(rd()); int N = 50000000; - std::vector nodes(N); - uint32_t i = 0; + td::vector nodes(N); + td::uint32 i = 0; for (auto &node : nodes) { if (flag) { node.data = i++ % max_shift; @@ -502,8 +503,8 @@ void BM_cache_miss(benchmark::State &state) { } } - std::vector positions(N); - std::uniform_int_distribution rnd_pos(0, N - 1000); + td::vector positions(N); + std::uniform_int_distribution rnd_pos(0, N - 1000); for (auto &pos : positions) { pos = rnd_pos(rnd); if (flag) { @@ -520,7 +521,7 @@ void BM_cache_miss(benchmark::State &state) { } } -uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) { +static uint64_t equal_mask_slow(td::uint8 *bytes, td::uint8 needle) { uint64_t mask = 0; for (int i = 0; i < 16; i++) { mask |= (bytes[i] == needle) << i; @@ -529,19 +530,20 @@ uint64_t equal_mask_slow(uint8_t *bytes, uint8_t needle) { } template -void BM_mask(benchmark::State &state) { - size_t BATCH_SIZE = 1024; - std::vector bytes(BATCH_SIZE + 16); +static void BM_mask(benchmark::State &state) { + std::size_t BATCH_SIZE = 1024; + td::vector bytes(BATCH_SIZE + 16); for (auto &b : bytes) { - b = static_cast(td::Random::fast(0, 17)); + b = static_cast(td::Random::fast(0, 17)); } while (state.KeepRunningBatch(BATCH_SIZE)) { - for (size_t i = 0; i < BATCH_SIZE; i++) { + for (std::size_t i = 0; i < BATCH_SIZE; i++) { benchmark::DoNotOptimize(MaskT::equal_mask(bytes.data() + i, 17)); } } } + BENCHMARK_TEMPLATE(BM_mask, td::MaskPortable); #ifdef __aarch64__ BENCHMARK_TEMPLATE(BM_mask, td::MaskNeonFolly);