Avoid popcnt on Windows when unavailable and in portable builds (#9680)
Summary: Fixes https://github.com/facebook/rocksdb/issues/9560. Only use popcnt intrinsic when HAVE_SSE42 is set. Also avoid setting it based on compiler test in portable builds because such test will pass on MSVC even without proper arch flags (ref: https://devblogs.microsoft.com/oldnewthing/20201026-00/?p=104397). Pull Request resolved: https://github.com/facebook/rocksdb/pull/9680 Test Plan: verified the combinations of -DPORTABLE and -DFORCE_SSE42 produce expected compiler flags on Linux. Verified MSVC build using PORTABLE=1 (in CircleCI) does not set HAVE_SSE42. Reviewed By: pdillinger Differential Revision: D34739033 Pulled By: ajkr fbshipit-source-id: d10456f3392945fc3e59430a1777840f7b60b276
This commit is contained in:
parent
fec4403ff1
commit
062396af15
@ -321,6 +321,7 @@ if(NOT MSVC)
|
||||
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
|
||||
endif()
|
||||
|
||||
if (NOT PORTABLE OR FORCE_SSE42)
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <cstdint>
|
||||
#include <nmmintrin.h>
|
||||
@ -339,6 +340,7 @@ if(HAVE_SSE42)
|
||||
elseif(FORCE_SSE42)
|
||||
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Check if -latomic is required or not
|
||||
if (NOT MSVC)
|
||||
|
22
util/math.h
22
util/math.h
@ -92,18 +92,25 @@ inline int CountTrailingZeroBits(T v) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_M_X64)
|
||||
// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
|
||||
// the following code at coarse granularity for simpler macros. It's important
|
||||
// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
|
||||
#ifdef _MSC_VER
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename T>
|
||||
int BitsSetToOneFallback(T v) {
|
||||
const int kBits = static_cast<int>(sizeof(T)) * 8;
|
||||
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
|
||||
// we static_cast these bit patterns in order to truncate them to the correct
|
||||
// size
|
||||
// size. Warning C4309 dislikes this technique, so disable it here.
|
||||
#pragma warning(disable : 4309)
|
||||
v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
|
||||
v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
|
||||
((v >> 2) & static_cast<T>(0x3333333333333333ull)));
|
||||
v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
|
||||
#pragma warning(default : 4309)
|
||||
for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
|
||||
v += static_cast<T>(v >> shift_bits);
|
||||
}
|
||||
@ -113,7 +120,8 @@ int BitsSetToOneFallback(T v) {
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
// Number of bits set to 1. Also known as "population count".
|
||||
template <typename T>
|
||||
@ -126,21 +134,21 @@ inline int BitsSetToOne(T v) {
|
||||
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
|
||||
// The bit mask is to neutralize sign extension on small signed types
|
||||
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
|
||||
#if defined(_M_X64) || defined(_M_IX86)
|
||||
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
||||
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
|
||||
#else
|
||||
return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
|
||||
#endif
|
||||
} else if (sizeof(T) == sizeof(uint32_t)) {
|
||||
#if defined(_M_X64) || defined(_M_IX86)
|
||||
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
||||
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
||||
#else
|
||||
return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
|
||||
#endif
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
#if defined(HAVE_SSE42) && defined(_M_X64)
|
||||
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
||||
#elif defined(_M_IX86)
|
||||
#elif defined(HAVE_SSE42) && defined(_M_IX86)
|
||||
return static_cast<int>(
|
||||
__popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
|
||||
__popcnt(static_cast<uint32_t>(v))));
|
||||
|
Loading…
Reference in New Issue
Block a user