crc32: build a whole special Extend function for SSE 4.2.

Disassembling the Extend function shows something that looks
much more healthy now. The SSE 4.2 instructions are right
there in the body of the function.

Intel(R) Core(TM) i7-3540M CPU @ 3.00GHz

Before:

crc32c: 1.305 micros/op 766260 ops/sec; 2993.2 MB/s (4K per op)

After:

crc32c: 0.442 micros/op 2263843 ops/sec; 8843.1 MB/s (4K per op)
This commit is contained in:
Albert Strasheim 2014-04-02 15:15:57 -07:00
parent 284c365b77
commit 56ca75e89e

View File

@ -334,19 +334,8 @@ static bool isSSE42() {
#endif
}
typedef void (*Function)(uint64_t*, uint8_t const**);
static inline Function Choose_CRC32() {
return isSSE42() ? Fast_CRC32 : Slow_CRC32;
}
static Function func = Choose_CRC32();
static inline void CRC32(uint64_t* l, uint8_t const **p) {
func(l, p);
}
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
template<void (*CRC32)(uint64_t*, uint8_t const**)>
uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint64_t l = crc ^ 0xffffffffu;
@ -388,5 +377,17 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
return l ^ 0xffffffffu;
}
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
static inline Function Choose_Extend() {
return isSSE42() ? ExtendImpl<Fast_CRC32> : ExtendImpl<Slow_CRC32>;
}
Function ChosenExtend = Choose_Extend();
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
return ChosenExtend(crc, buf, size);
}
} // namespace crc32c
} // namespace rocksdb