Avoid popcnt on Windows when unavailable and in portable builds (#9680)
Summary: Fixes https://github.com/facebook/rocksdb/issues/9560. Only use popcnt intrinsic when HAVE_SSE42 is set. Also avoid setting it based on compiler test in portable builds because such test will pass on MSVC even without proper arch flags (ref: https://devblogs.microsoft.com/oldnewthing/20201026-00/?p=104397). Pull Request resolved: https://github.com/facebook/rocksdb/pull/9680 Test Plan: verified the combinations of -DPORTABLE and -DFORCE_SSE42 produce expected compiler flags on Linux. Verified MSVC build using PORTABLE=1 (in CircleCI) does not set HAVE_SSE42. Reviewed By: pdillinger Differential Revision: D34739033 Pulled By: ajkr fbshipit-source-id: d10456f3392945fc3e59430a1777840f7b60b276
This commit is contained in:
parent
2e9a9f04d7
commit
1a2781d48c
@ -321,7 +321,8 @@ if(NOT MSVC)
|
|||||||
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
|
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
CHECK_CXX_SOURCE_COMPILES("
|
if (NOT PORTABLE OR FORCE_SSE42)
|
||||||
|
CHECK_CXX_SOURCE_COMPILES("
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <nmmintrin.h>
|
#include <nmmintrin.h>
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
@ -333,11 +334,12 @@ int main() {
|
|||||||
auto d = _mm_cvtsi128_si64(c);
|
auto d = _mm_cvtsi128_si64(c);
|
||||||
}
|
}
|
||||||
" HAVE_SSE42)
|
" HAVE_SSE42)
|
||||||
if(HAVE_SSE42)
|
if(HAVE_SSE42)
|
||||||
add_definitions(-DHAVE_SSE42)
|
add_definitions(-DHAVE_SSE42)
|
||||||
add_definitions(-DHAVE_PCLMUL)
|
add_definitions(-DHAVE_PCLMUL)
|
||||||
elseif(FORCE_SSE42)
|
elseif(FORCE_SSE42)
|
||||||
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
|
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Check if -latomic is required or not
|
# Check if -latomic is required or not
|
||||||
|
22
util/math.h
22
util/math.h
@ -92,18 +92,25 @@ inline int CountTrailingZeroBits(T v) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(_M_X64)
|
// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
|
||||||
|
// the following code at coarse granularity for simpler macros. It's important
|
||||||
|
// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int BitsSetToOneFallback(T v) {
|
int BitsSetToOneFallback(T v) {
|
||||||
const int kBits = static_cast<int>(sizeof(T)) * 8;
|
const int kBits = static_cast<int>(sizeof(T)) * 8;
|
||||||
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
|
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
|
||||||
// we static_cast these bit patterns in order to truncate them to the correct
|
// we static_cast these bit patterns in order to truncate them to the correct
|
||||||
// size
|
// size. Warning C4309 dislikes this technique, so disable it here.
|
||||||
|
#pragma warning(disable : 4309)
|
||||||
v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
|
v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
|
||||||
v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
|
v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
|
||||||
((v >> 2) & static_cast<T>(0x3333333333333333ull)));
|
((v >> 2) & static_cast<T>(0x3333333333333333ull)));
|
||||||
v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
|
v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
|
||||||
|
#pragma warning(default : 4309)
|
||||||
for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
|
for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
|
||||||
v += static_cast<T>(v >> shift_bits);
|
v += static_cast<T>(v >> shift_bits);
|
||||||
}
|
}
|
||||||
@ -113,7 +120,8 @@ int BitsSetToOneFallback(T v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
#endif
|
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
// Number of bits set to 1. Also known as "population count".
|
// Number of bits set to 1. Also known as "population count".
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -126,21 +134,21 @@ inline int BitsSetToOne(T v) {
|
|||||||
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
|
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
|
||||||
// The bit mask is to neutralize sign extension on small signed types
|
// The bit mask is to neutralize sign extension on small signed types
|
||||||
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
|
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
|
||||||
#if defined(_M_X64) || defined(_M_IX86)
|
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
||||||
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
|
||||||
#else
|
#else
|
||||||
return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
|
return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
|
||||||
#endif
|
#endif
|
||||||
} else if (sizeof(T) == sizeof(uint32_t)) {
|
} else if (sizeof(T) == sizeof(uint32_t)) {
|
||||||
#if defined(_M_X64) || defined(_M_IX86)
|
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
||||||
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
||||||
#else
|
#else
|
||||||
return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
|
return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
#ifdef _M_X64
|
#if defined(HAVE_SSE42) && defined(_M_X64)
|
||||||
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
||||||
#elif defined(_M_IX86)
|
#elif defined(HAVE_SSE42) && defined(_M_IX86)
|
||||||
return static_cast<int>(
|
return static_cast<int>(
|
||||||
__popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
|
__popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
|
||||||
__popcnt(static_cast<uint32_t>(v))));
|
__popcnt(static_cast<uint32_t>(v))));
|
||||||
|
Loading…
Reference in New Issue
Block a user