specify SSE42 'target' attribute for Fast_CRC32()
Summary: if we enable SSE42 globally when compiling the tree for preparing a portable binary, which could be running on CPU w/o SSE42 instructions even the GCC on the building host is able to emit SSE42 code, this leads to illegal instruction errors on machines not supporting SSE42. to solve this problem, crc32 detects the supported instruction at runtime, and selects the supported CRC32 implementation according to the result of `cpuid`. but intrinics like "_mm_crc32_u64()" will not be available unless the "target" machine is appropriately specified in the command line, like "-msse42", or using the "target" attribute. we could pass "-msse42" only when compiling crc32c.cc, and allow the compiler to generate the SSE42 instructions, but we are still at the risk of executing illegal instructions on machines does not support SSE42 if the compiler emits code that is not guarded by our runtime detection. and we need to do the change in both Makefile and CMakefile. or, we can use GCC's "target" attribute to enable the machine specific instructions on certain function. in this way, we have finer grained control of the used "target". and no need to change the makefiles. so we don't need to duplicate the changes on both makefile and cmake as the previous approach. this problem surfaces when preparing a package for GNU/Linux distribution, and we only applies to optimization for SSE42, so using a feature only available on GCC/Clang is not that formidable. Closes https://github.com/facebook/rocksdb/pull/2807 Differential Revision: D5786084 Pulled By: siying fbshipit-source-id: bca5c0f877b8d6fb55f58f8f122254a26422843d
This commit is contained in:
parent
7e19a571e9
commit
ba3c58cab6
@ -336,6 +336,15 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
|
|||||||
table0_[c >> 24];
|
table0_[c >> 24];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_SSE42) && defined(__GNUC__)
|
||||||
|
#if defined(__clang__)
|
||||||
|
#if __has_cpp_attribute(gnu::target)
|
||||||
|
__attribute__ ((target ("sse4.2")))
|
||||||
|
#endif
|
||||||
|
#else // gcc supports this since 4.4
|
||||||
|
__attribute__ ((target ("sse4.2")))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
|
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
|
||||||
#ifndef HAVE_SSE42
|
#ifndef HAVE_SSE42
|
||||||
Slow_CRC32(l, p);
|
Slow_CRC32(l, p);
|
||||||
@ -400,8 +409,7 @@ static bool isSSE42() {
|
|||||||
return false;
|
return false;
|
||||||
#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
|
#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
|
||||||
uint32_t c_;
|
uint32_t c_;
|
||||||
uint32_t d_;
|
__asm__("cpuid" : "=c"(c_) : "a"(1) : "ebx", "edx");
|
||||||
__asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
|
|
||||||
return c_ & (1U << 20); // copied from CpuId.h in Folly.
|
return c_ & (1U << 20); // copied from CpuId.h in Folly.
|
||||||
#elif defined(_WIN64)
|
#elif defined(_WIN64)
|
||||||
int info[4];
|
int info[4];
|
||||||
@ -474,7 +482,7 @@ std::string IsFastCrc32Supported() {
|
|||||||
return fast_zero_msg;
|
return fast_zero_msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
Function ChosenExtend = Choose_Extend();
|
static Function ChosenExtend = Choose_Extend();
|
||||||
|
|
||||||
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||||
return ChosenExtend(crc, buf, size);
|
return ChosenExtend(crc, buf, size);
|
||||||
|
Loading…
Reference in New Issue
Block a user