FlatHashMap: sse2 support

This commit is contained in:
Arseny Smirnov 2022-02-18 00:47:58 +01:00
parent a356cc7e3d
commit 39934a7f9e

View File

@ -19,6 +19,18 @@
#include <new>
#include <utility>
#if (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)))
#define TD_SSE2
#endif
#ifdef __aarch64__
#include <arm_neon.h>
#endif
#if TD_SSE2
#include <emmintrin.h>
#endif
namespace td {
template <int shift>
struct MaskIterator {
uint64_t mask;
@ -61,7 +73,6 @@ struct MaskPortable {
};
#ifdef __aarch64__
#include <arm_neon.h>
struct MaskNeonFolly {
static MaskIterator<4> equal_mask(uint8_t *bytes, uint8_t needle) {
uint8x16_t input_mask = vld1q_u8(bytes);
@ -88,15 +99,25 @@ struct MaskNeon {
return {vaddvq_u16(a_shifted) & ((1u << 14) - 1)};
}
};
#elif TD_SSE2
struct MaskSse2 {
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) {
auto input_mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes));
auto needle_mask = _mm_set1_epi8(needle);
auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask);
return MaskIterator<1>(static_cast<uint32_t>(_mm_movemask_epi8(match_mask)));
}
};
#endif
#ifdef __aarch64__
using MaskHelper = MaskNeonFolly;
#elif TD_SSE2
using MaskHelper = MaskSse2;
#else
using MaskHelper = MaskPortable;
#endif
namespace td {
template <class NodeT, class HashT, class EqT>
class FlatHashTableChunks {
public: