crc32: build a whole special Extend function for SSE 4.2.
Disassembling the Extend function shows something that looks much more healthy now. The SSE 4.2 instructions are right there in the body of the function. Intel(R) Core(TM) i7-3540M CPU @ 3.00GHz Before: crc32c: 1.305 micros/op 766260 ops/sec; 2993.2 MB/s (4K per op) After: crc32c: 0.442 micros/op 2263843 ops/sec; 8843.1 MB/s (4K per op)
This commit is contained in:
parent
284c365b77
commit
56ca75e89e
@ -334,19 +334,8 @@ static bool isSSE42() {
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef void (*Function)(uint64_t*, uint8_t const**);
|
||||
|
||||
static inline Function Choose_CRC32() {
|
||||
return isSSE42() ? Fast_CRC32 : Slow_CRC32;
|
||||
}
|
||||
|
||||
static Function func = Choose_CRC32();
|
||||
|
||||
static inline void CRC32(uint64_t* l, uint8_t const **p) {
|
||||
func(l, p);
|
||||
}
|
||||
|
||||
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||
template<void (*CRC32)(uint64_t*, uint8_t const**)>
|
||||
uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
|
||||
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
|
||||
const uint8_t *e = p + size;
|
||||
uint64_t l = crc ^ 0xffffffffu;
|
||||
@ -388,5 +377,17 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||
return l ^ 0xffffffffu;
|
||||
}
|
||||
|
||||
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
|
||||
|
||||
static inline Function Choose_Extend() {
|
||||
return isSSE42() ? ExtendImpl<Fast_CRC32> : ExtendImpl<Slow_CRC32>;
|
||||
}
|
||||
|
||||
Function ChosenExtend = Choose_Extend();
|
||||
|
||||
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||
return ChosenExtend(crc, buf, size);
|
||||
}
|
||||
|
||||
} // namespace crc32c
|
||||
} // namespace rocksdb
|
||||
|
Loading…
Reference in New Issue
Block a user