FlatHashMap: sse2 support
This commit is contained in:
parent
a356cc7e3d
commit
39934a7f9e
@ -19,6 +19,18 @@
|
||||
#include <new>
|
||||
#include <utility>
|
||||
|
||||
#if (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)))
|
||||
#define TD_SSE2
|
||||
#endif
|
||||
|
||||
#ifdef __aarch64__
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if TD_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace td {
|
||||
template <int shift>
|
||||
struct MaskIterator {
|
||||
uint64_t mask;
|
||||
@ -61,7 +73,6 @@ struct MaskPortable {
|
||||
};
|
||||
|
||||
#ifdef __aarch64__
|
||||
#include <arm_neon.h>
|
||||
struct MaskNeonFolly {
|
||||
static MaskIterator<4> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
uint8x16_t input_mask = vld1q_u8(bytes);
|
||||
@ -88,15 +99,25 @@ struct MaskNeon {
|
||||
return {vaddvq_u16(a_shifted) & ((1u << 14) - 1)};
|
||||
}
|
||||
};
|
||||
#elif TD_SSE2
|
||||
struct MaskSse2 {
|
||||
static MaskIterator<1> equal_mask(uint8_t *bytes, uint8_t needle) {
|
||||
auto input_mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes));
|
||||
auto needle_mask = _mm_set1_epi8(needle);
|
||||
auto match_mask = _mm_cmpeq_epi8(needle_mask, input_mask);
|
||||
return MaskIterator<1>(static_cast<uint32_t>(_mm_movemask_epi8(match_mask)));
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef __aarch64__
|
||||
using MaskHelper = MaskNeonFolly;
|
||||
#elif TD_SSE2
|
||||
using MaskHelper = MaskSse2;
|
||||
#else
|
||||
using MaskHelper = MaskPortable;
|
||||
#endif
|
||||
|
||||
namespace td {
|
||||
template <class NodeT, class HashT, class EqT>
|
||||
class FlatHashTableChunks {
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user