New bit manipulation functions and 128-bit value library (#7338)
Summary: These new functions and 128-bit value bit operations are expected to be used in a forthcoming Bloom filter alternative. No functional changes to production code, just new code only called by unit tests, cosmetic changes to existing headers, and fix an existing function for a yet-unused template instantiation (BitsSetToOne on something signed and smaller than 32 bits). Pull Request resolved: https://github.com/facebook/rocksdb/pull/7338 Test Plan: Unit tests included. Works with and without TEST_UINT128_COMPAT=1 to check compatibility with and without __uint128_t. Also added that parameter to the CircleCI build build-linux-shared_lib-alt_namespace-status_checked. Reviewed By: jay-zhuang Differential Revision: D23494945 Pulled By: pdillinger fbshipit-source-id: 5c0dc419100d9df5d4d9abb153b2855d5aea39e8
This commit is contained in:
parent
a09c3cf13e
commit
c4d8838a2b
@ -31,7 +31,7 @@ jobs:
|
|||||||
- checkout # check out the code in the project directory
|
- checkout # check out the code in the project directory
|
||||||
- run: pyenv global 3.5.2
|
- run: pyenv global 3.5.2
|
||||||
- run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
|
- run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
|
||||||
- run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain
|
- run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" GTEST_THROW_ON_FAILURE=0 GTEST_OUTPUT="xml:/tmp/test-results/" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain
|
||||||
- store_test_results:
|
- store_test_results:
|
||||||
path: /tmp/test-results
|
path: /tmp/test-results
|
||||||
|
|
||||||
|
4
Makefile
4
Makefile
@ -410,6 +410,10 @@ ifdef TEST_CACHE_LINE_SIZE
|
|||||||
PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
|
PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
|
||||||
PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
|
PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
|
||||||
endif
|
endif
|
||||||
|
ifdef TEST_UINT128_COMPAT
|
||||||
|
PLATFORM_CCFLAGS += -DTEST_UINT128_COMPAT=1
|
||||||
|
PLATFORM_CXXFLAGS += -DTEST_UINT128_COMPAT=1
|
||||||
|
endif
|
||||||
|
|
||||||
# This (the first rule) must depend on "all".
|
# This (the first rule) must depend on "all".
|
||||||
default: all
|
default: all
|
||||||
|
@ -665,11 +665,18 @@ if test "$TRY_SSE_ETC"; then
|
|||||||
# It doesn't even really check that your current CPU is compatible.
|
# It doesn't even really check that your current CPU is compatible.
|
||||||
#
|
#
|
||||||
# SSE4.2 available since nehalem, ca. 2008-2010
|
# SSE4.2 available since nehalem, ca. 2008-2010
|
||||||
|
# Includes POPCNT for BitsSetToOne, BitParity
|
||||||
TRY_SSE42="-msse4.2"
|
TRY_SSE42="-msse4.2"
|
||||||
# PCLMUL available since westmere, ca. 2010-2011
|
# PCLMUL available since westmere, ca. 2010-2011
|
||||||
TRY_PCLMUL="-mpclmul"
|
TRY_PCLMUL="-mpclmul"
|
||||||
# AVX2 available since haswell, ca. 2013-2015
|
# AVX2 available since haswell, ca. 2013-2015
|
||||||
TRY_AVX2="-mavx2"
|
TRY_AVX2="-mavx2"
|
||||||
|
# BMI available since haswell, ca. 2013-2015
|
||||||
|
# Primarily for TZCNT for CountTrailingZeroBits
|
||||||
|
TRY_BMI="-mbmi"
|
||||||
|
# LZCNT available since haswell, ca. 2013-2015
|
||||||
|
# For FloorLog2
|
||||||
|
TRY_LZCNT="-mlzcnt"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <<EOF
|
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||||
@ -718,6 +725,34 @@ elif test "$USE_SSE"; then
|
|||||||
echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
|
echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||||
|
#include <cstdint>
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
(void)argv;
|
||||||
|
return (int)_tzcnt_u64((uint64_t)argc);
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
if [ "$?" = 0 ]; then
|
||||||
|
COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI"
|
||||||
|
elif test "$USE_SSE"; then
|
||||||
|
echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||||
|
#include <cstdint>
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
(void)argv;
|
||||||
|
return (int)_lzcnt_u64((uint64_t)argc);
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
if [ "$?" = 0 ]; then
|
||||||
|
COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT"
|
||||||
|
elif test "$USE_SSE"; then
|
||||||
|
echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
int main() {
|
int main() {
|
||||||
|
@ -88,12 +88,16 @@ inline uint32_t fastrange32(uint32_t hash, uint32_t range) {
|
|||||||
return static_cast<uint32_t>(product >> 32);
|
return static_cast<uint32_t>(product >> 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef TEST_UINT128_COMPAT
|
||||||
|
#undef HAVE_UINT128_EXTENSION
|
||||||
|
#endif
|
||||||
|
|
||||||
// An alternative to % for mapping a 64-bit hash value to an arbitrary range
|
// An alternative to % for mapping a 64-bit hash value to an arbitrary range
|
||||||
// that fits in size_t. See https://github.com/lemire/fastrange
|
// that fits in size_t. See https://github.com/lemire/fastrange
|
||||||
// We find size_t more convenient than uint64_t for the range, with side
|
// We find size_t more convenient than uint64_t for the range, with side
|
||||||
// benefit of better optimization on 32-bit platforms.
|
// benefit of better optimization on 32-bit platforms.
|
||||||
inline size_t fastrange64(uint64_t hash, size_t range) {
|
inline size_t fastrange64(uint64_t hash, size_t range) {
|
||||||
#if defined(HAVE_UINT128_EXTENSION)
|
#ifdef HAVE_UINT128_EXTENSION
|
||||||
// Can use compiler's 128-bit type. Trust it to do the right thing.
|
// Can use compiler's 128-bit type. Trust it to do the right thing.
|
||||||
__uint128_t wide = __uint128_t{range} * hash;
|
__uint128_t wide = __uint128_t{range} * hash;
|
||||||
return static_cast<size_t>(wide >> 64);
|
return static_cast<size_t>(wide >> 64);
|
||||||
|
@ -7,12 +7,14 @@
|
|||||||
// Use of this source code is governed by a BSD-style license that can be
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "test_util/testharness.h"
|
#include "test_util/testharness.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/hash.h"
|
#include "util/math128.h"
|
||||||
|
|
||||||
using ROCKSDB_NAMESPACE::EncodeFixed32;
|
using ROCKSDB_NAMESPACE::EncodeFixed32;
|
||||||
using ROCKSDB_NAMESPACE::GetSliceHash64;
|
using ROCKSDB_NAMESPACE::GetSliceHash64;
|
||||||
@ -370,6 +372,159 @@ size_t fastrange64(uint64_t hash, size_t range) {
|
|||||||
return ROCKSDB_NAMESPACE::fastrange64(hash, range);
|
return ROCKSDB_NAMESPACE::fastrange64(hash, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests for math.h / math128.h (not worth a separate test binary)
|
||||||
|
using ROCKSDB_NAMESPACE::BitParity;
|
||||||
|
using ROCKSDB_NAMESPACE::BitsSetToOne;
|
||||||
|
using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
|
||||||
|
using ROCKSDB_NAMESPACE::DecodeFixed128;
|
||||||
|
using ROCKSDB_NAMESPACE::EncodeFixed128;
|
||||||
|
using ROCKSDB_NAMESPACE::FloorLog2;
|
||||||
|
using ROCKSDB_NAMESPACE::Lower64of128;
|
||||||
|
using ROCKSDB_NAMESPACE::Multiply64to128;
|
||||||
|
using ROCKSDB_NAMESPACE::Unsigned128;
|
||||||
|
using ROCKSDB_NAMESPACE::Upper64of128;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void test_BitOps() {
|
||||||
|
// This complex code is to generalize to 128-bit values. Otherwise
|
||||||
|
// we could just use = static_cast<T>(0x5555555555555555ULL);
|
||||||
|
T everyOtherBit = 0;
|
||||||
|
for (unsigned i = 0; i < sizeof(T); ++i) {
|
||||||
|
everyOtherBit = (everyOtherBit << 8) | T{0x55};
|
||||||
|
}
|
||||||
|
|
||||||
|
// This one built using bit operations, as our 128-bit layer
|
||||||
|
// might not implement arithmetic such as subtraction.
|
||||||
|
T vm1 = 0; // "v minus one"
|
||||||
|
|
||||||
|
for (int i = 0; i < int{8 * sizeof(T)}; ++i) {
|
||||||
|
T v = T{1} << i;
|
||||||
|
// If we could directly use arithmetic:
|
||||||
|
// T vm1 = static_cast<T>(v - 1);
|
||||||
|
|
||||||
|
// FloorLog2
|
||||||
|
if (v > 0) {
|
||||||
|
EXPECT_EQ(FloorLog2(v), i);
|
||||||
|
}
|
||||||
|
if (vm1 > 0) {
|
||||||
|
EXPECT_EQ(FloorLog2(vm1), i - 1);
|
||||||
|
EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CountTrailingZeroBits
|
||||||
|
if (v != 0) {
|
||||||
|
EXPECT_EQ(CountTrailingZeroBits(v), i);
|
||||||
|
}
|
||||||
|
if (vm1 != 0) {
|
||||||
|
EXPECT_EQ(CountTrailingZeroBits(vm1), 0);
|
||||||
|
}
|
||||||
|
if (i < int{8 * sizeof(T)} - 1) {
|
||||||
|
EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// BitsSetToOne
|
||||||
|
EXPECT_EQ(BitsSetToOne(v), 1);
|
||||||
|
EXPECT_EQ(BitsSetToOne(vm1), i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2);
|
||||||
|
|
||||||
|
// BitParity
|
||||||
|
EXPECT_EQ(BitParity(v), 1);
|
||||||
|
EXPECT_EQ(BitParity(vm1), i & 1);
|
||||||
|
EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
|
||||||
|
|
||||||
|
vm1 = (vm1 << 1) | 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(MathTest, BitOps) {
|
||||||
|
test_BitOps<uint32_t>();
|
||||||
|
test_BitOps<uint64_t>();
|
||||||
|
test_BitOps<uint16_t>();
|
||||||
|
test_BitOps<uint8_t>();
|
||||||
|
test_BitOps<unsigned char>();
|
||||||
|
test_BitOps<unsigned short>();
|
||||||
|
test_BitOps<unsigned int>();
|
||||||
|
test_BitOps<unsigned long>();
|
||||||
|
test_BitOps<unsigned long long>();
|
||||||
|
test_BitOps<char>();
|
||||||
|
test_BitOps<size_t>();
|
||||||
|
test_BitOps<int32_t>();
|
||||||
|
test_BitOps<int64_t>();
|
||||||
|
test_BitOps<int16_t>();
|
||||||
|
test_BitOps<int8_t>();
|
||||||
|
test_BitOps<signed char>();
|
||||||
|
test_BitOps<short>();
|
||||||
|
test_BitOps<int>();
|
||||||
|
test_BitOps<long>();
|
||||||
|
test_BitOps<long long>();
|
||||||
|
test_BitOps<ptrdiff_t>();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(MathTest, BitOps128) { test_BitOps<Unsigned128>(); }
|
||||||
|
|
||||||
|
TEST(MathTest, Math128) {
|
||||||
|
const Unsigned128 sixteenHexOnes = 0x1111111111111111U;
|
||||||
|
const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes;
|
||||||
|
const Unsigned128 sixteenHexTwos = 0x2222222222222222U;
|
||||||
|
const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos;
|
||||||
|
|
||||||
|
// v will slide from all hex ones to all hex twos
|
||||||
|
Unsigned128 v = thirtyHexOnes;
|
||||||
|
for (int i = 0; i <= 30; ++i) {
|
||||||
|
// Test bitwise operations
|
||||||
|
EXPECT_EQ(BitsSetToOne(v), 30);
|
||||||
|
EXPECT_EQ(BitsSetToOne(~v), 128 - 30);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i);
|
||||||
|
EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i);
|
||||||
|
|
||||||
|
// Test comparisons
|
||||||
|
EXPECT_EQ(v == thirtyHexOnes, i == 0);
|
||||||
|
EXPECT_EQ(v == thirtyHexTwos, i == 30);
|
||||||
|
EXPECT_EQ(v > thirtyHexOnes, i > 0);
|
||||||
|
EXPECT_EQ(v > thirtyHexTwos, false);
|
||||||
|
EXPECT_EQ(v >= thirtyHexOnes, true);
|
||||||
|
EXPECT_EQ(v >= thirtyHexTwos, i == 30);
|
||||||
|
EXPECT_EQ(v < thirtyHexOnes, false);
|
||||||
|
EXPECT_EQ(v < thirtyHexTwos, i < 30);
|
||||||
|
EXPECT_EQ(v <= thirtyHexOnes, i == 0);
|
||||||
|
EXPECT_EQ(v <= thirtyHexTwos, true);
|
||||||
|
|
||||||
|
// Update v, clearing upper-most byte
|
||||||
|
v = ((v << 12) >> 8) | 0x2;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 128; ++i) {
|
||||||
|
// Test shifts
|
||||||
|
Unsigned128 sl = thirtyHexOnes << i;
|
||||||
|
Unsigned128 sr = thirtyHexOnes >> i;
|
||||||
|
EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4));
|
||||||
|
EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4));
|
||||||
|
EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 64x64->128 multiply
|
||||||
|
Unsigned128 product =
|
||||||
|
Multiply64to128(0x1111111111111111U, 0x2222222222222222U);
|
||||||
|
EXPECT_EQ(Lower64of128(product), 2295594818061633090U);
|
||||||
|
EXPECT_EQ(Upper64of128(product), 163971058432973792U);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(MathTest, Coding128) {
|
||||||
|
const char *in = "_1234567890123456";
|
||||||
|
Unsigned128 decoded = DecodeFixed128(in + 1);
|
||||||
|
EXPECT_EQ(Lower64of128(decoded), 4050765991979987505U);
|
||||||
|
EXPECT_EQ(Upper64of128(decoded), 3906085646303834169U);
|
||||||
|
char out[18];
|
||||||
|
out[0] = '_';
|
||||||
|
EncodeFixed128(out + 1, decoded);
|
||||||
|
out[17] = '\0';
|
||||||
|
EXPECT_EQ(std::string(in), std::string(out));
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
|
||||||
|
102
util/math.h
102
util/math.h
@ -13,24 +13,112 @@
|
|||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
|
// Fast implementation of floor(log2(v)). Undefined for 0 or negative
|
||||||
|
// numbers (in case of signed type).
|
||||||
|
template <typename T>
|
||||||
|
inline int FloorLog2(T v) {
|
||||||
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
||||||
|
assert(v > 0);
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
||||||
|
unsigned long lz = 0;
|
||||||
|
if (sizeof(T) <= sizeof(uint32_t)) {
|
||||||
|
_BitScanReverse(&lz, static_cast<uint32_t>(v));
|
||||||
|
} else {
|
||||||
|
_BitScanReverse64(&lz, static_cast<uint64_t>(v));
|
||||||
|
}
|
||||||
|
return 63 - static_cast<int>(lz);
|
||||||
|
#else
|
||||||
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
||||||
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
||||||
|
int lz = __builtin_clz(static_cast<unsigned int>(v));
|
||||||
|
return int{sizeof(unsigned int)} * 8 - 1 - lz;
|
||||||
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
||||||
|
int lz = __builtin_clzl(static_cast<unsigned long>(v));
|
||||||
|
return int{sizeof(unsigned long)} * 8 - 1 - lz;
|
||||||
|
} else {
|
||||||
|
int lz = __builtin_clzll(static_cast<unsigned long long>(v));
|
||||||
|
return int{sizeof(unsigned long long)} * 8 - 1 - lz;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of low-order zero bits before the first 1 bit. Undefined for 0.
|
||||||
|
template <typename T>
|
||||||
|
inline int CountTrailingZeroBits(T v) {
|
||||||
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
||||||
|
assert(v != 0);
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
||||||
|
unsigned long tz = 0;
|
||||||
|
if (sizeof(T) <= sizeof(uint32_t)) {
|
||||||
|
_BitScanForward(&tz, static_cast<uint32_t>(v));
|
||||||
|
} else {
|
||||||
|
_BitScanForward64(&tz, static_cast<uint64_t>(v));
|
||||||
|
}
|
||||||
|
return static_cast<int>(tz);
|
||||||
|
#else
|
||||||
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
||||||
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
||||||
|
return __builtin_ctz(static_cast<unsigned int>(v));
|
||||||
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
||||||
|
return __builtin_ctzl(static_cast<unsigned long>(v));
|
||||||
|
} else {
|
||||||
|
return __builtin_ctzll(static_cast<unsigned long long>(v));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of bits set to 1. Also known as "population count".
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline int BitsSetToOne(T v) {
|
inline int BitsSetToOne(T v) {
|
||||||
static_assert(std::is_integral<T>::value, "non-integral type");
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
||||||
if (sizeof(T) > sizeof(uint32_t)) {
|
if (sizeof(T) < sizeof(uint32_t)) {
|
||||||
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
// This bit mask is to avoid a compiler warning on unused path
|
||||||
} else {
|
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
|
||||||
|
// The bit mask is to neutralize sign extension on small signed types
|
||||||
|
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
|
||||||
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
|
||||||
|
} else if (sizeof(T) == sizeof(uint32_t)) {
|
||||||
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
||||||
|
} else {
|
||||||
|
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
||||||
if (sizeof(T) > sizeof(unsigned long)) {
|
if (sizeof(T) < sizeof(unsigned int)) {
|
||||||
return __builtin_popcountll(static_cast<unsigned long long>(v));
|
// This bit mask is to avoid a compiler warning on unused path
|
||||||
} else if (sizeof(T) > sizeof(unsigned int)) {
|
constexpr auto mm = 8 * sizeof(unsigned int) - 1;
|
||||||
|
// This bit mask is to neutralize sign extension on small signed types
|
||||||
|
constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
|
||||||
|
return __builtin_popcount(static_cast<unsigned int>(v) & m);
|
||||||
|
} else if (sizeof(T) == sizeof(unsigned int)) {
|
||||||
|
return __builtin_popcount(static_cast<unsigned int>(v));
|
||||||
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
||||||
return __builtin_popcountl(static_cast<unsigned long>(v));
|
return __builtin_popcountl(static_cast<unsigned long>(v));
|
||||||
} else {
|
} else {
|
||||||
return __builtin_popcount(static_cast<unsigned int>(v));
|
return __builtin_popcountll(static_cast<unsigned long long>(v));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline int BitParity(T v) {
|
||||||
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
// bit parity == oddness of popcount
|
||||||
|
return BitsSetToOne(v) & 1;
|
||||||
|
#else
|
||||||
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
||||||
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
||||||
|
// On any sane systen, potential sign extension here won't change parity
|
||||||
|
return __builtin_parity(static_cast<unsigned int>(v));
|
||||||
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
||||||
|
return __builtin_parityl(static_cast<unsigned long>(v));
|
||||||
|
} else {
|
||||||
|
return __builtin_parityll(static_cast<unsigned long long>(v));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
223
util/math128.h
Normal file
223
util/math128.h
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "util/coding.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
|
||||||
|
#ifdef TEST_UINT128_COMPAT
|
||||||
|
#undef HAVE_UINT128_EXTENSION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
|
// Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
|
||||||
|
// shifts, and comparisons. __uint128_t is not always available.
|
||||||
|
|
||||||
|
#ifdef HAVE_UINT128_EXTENSION
|
||||||
|
using Unsigned128 = __uint128_t;
|
||||||
|
#else
|
||||||
|
struct Unsigned128 {
|
||||||
|
uint64_t lo;
|
||||||
|
uint64_t hi;
|
||||||
|
|
||||||
|
inline Unsigned128() {
|
||||||
|
static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
|
||||||
|
"unexpected overhead in representation");
|
||||||
|
lo = 0;
|
||||||
|
hi = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128(uint64_t lower) {
|
||||||
|
lo = lower;
|
||||||
|
hi = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128(uint64_t lower, uint64_t upper) {
|
||||||
|
lo = lower;
|
||||||
|
hi = upper;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
|
||||||
|
shift &= 127;
|
||||||
|
Unsigned128 rv;
|
||||||
|
if (shift >= 64) {
|
||||||
|
rv.lo = 0;
|
||||||
|
rv.hi = lhs.lo << (shift & 63);
|
||||||
|
} else {
|
||||||
|
uint64_t tmp = lhs.lo;
|
||||||
|
rv.lo = tmp << shift;
|
||||||
|
// Ensure shift==0 shifts away everything. (This avoids another
|
||||||
|
// conditional branch on shift == 0.)
|
||||||
|
tmp = tmp >> 1 >> (63 - shift);
|
||||||
|
rv.hi = tmp | (lhs.hi << shift);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
|
||||||
|
lhs = lhs << shift;
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
|
||||||
|
shift &= 127;
|
||||||
|
Unsigned128 rv;
|
||||||
|
if (shift >= 64) {
|
||||||
|
rv.hi = 0;
|
||||||
|
rv.lo = lhs.hi >> (shift & 63);
|
||||||
|
} else {
|
||||||
|
uint64_t tmp = lhs.hi;
|
||||||
|
rv.hi = tmp >> shift;
|
||||||
|
// Ensure shift==0 shifts away everything
|
||||||
|
tmp = tmp << 1 << (63 - shift);
|
||||||
|
rv.lo = tmp | (lhs.lo >> shift);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
|
||||||
|
lhs = lhs >> shift;
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
lhs = lhs & rhs;
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
lhs = lhs | rhs;
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
lhs = lhs ^ rhs;
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 operator~(const Unsigned128& v) {
|
||||||
|
return Unsigned128(~v.lo, ~v.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
|
||||||
|
return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline uint64_t Lower64of128(Unsigned128 v) {
|
||||||
|
#ifdef HAVE_UINT128_EXTENSION
|
||||||
|
return static_cast<uint64_t>(v);
|
||||||
|
#else
|
||||||
|
return v.lo;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64_t Upper64of128(Unsigned128 v) {
|
||||||
|
#ifdef HAVE_UINT128_EXTENSION
|
||||||
|
return static_cast<uint64_t>(v >> 64);
|
||||||
|
#else
|
||||||
|
return v.hi;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// This generally compiles down to a single fast instruction on 64-bit.
|
||||||
|
// This doesn't really make sense as operator* because it's not a
|
||||||
|
// general 128x128 multiply and provides more output than 64x64 multiply.
|
||||||
|
inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
|
||||||
|
#ifdef HAVE_UINT128_EXTENSION
|
||||||
|
return Unsigned128{a} * Unsigned128{b};
|
||||||
|
#else
|
||||||
|
// Full decomposition
|
||||||
|
// NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
|
||||||
|
// -> 128-bit multiplication and optimize it appropriately.
|
||||||
|
uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
|
||||||
|
uint64_t lower = tmp & 0xffffFFFF;
|
||||||
|
tmp >>= 32;
|
||||||
|
tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
|
||||||
|
// Avoid overflow: first add lower 32 of tmp2, and later upper 32
|
||||||
|
uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
|
||||||
|
tmp += tmp2 & 0xffffFFFF;
|
||||||
|
lower |= tmp << 32;
|
||||||
|
tmp >>= 32;
|
||||||
|
tmp += tmp2 >> 32;
|
||||||
|
tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
|
||||||
|
return Unsigned128(lower, tmp);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline int FloorLog2(Unsigned128 v) {
|
||||||
|
if (Upper64of128(v) == 0) {
|
||||||
|
return FloorLog2(Lower64of128(v));
|
||||||
|
} else {
|
||||||
|
return FloorLog2(Upper64of128(v)) + 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline int CountTrailingZeroBits(Unsigned128 v) {
|
||||||
|
if (Lower64of128(v) != 0) {
|
||||||
|
return CountTrailingZeroBits(Lower64of128(v));
|
||||||
|
} else {
|
||||||
|
return CountTrailingZeroBits(Upper64of128(v)) + 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline int BitsSetToOne(Unsigned128 v) {
|
||||||
|
return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline int BitParity(Unsigned128 v) {
|
||||||
|
return BitParity(Lower64of128(v)) ^ BitParity(Upper64of128(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void EncodeFixed128(char* dst, Unsigned128 value) {
|
||||||
|
EncodeFixed64(dst, Lower64of128(value));
|
||||||
|
EncodeFixed64(dst + 8, Upper64of128(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unsigned128 DecodeFixed128(const char* ptr) {
|
||||||
|
Unsigned128 rv = DecodeFixed64(ptr + 8);
|
||||||
|
return (rv << 64) | DecodeFixed64(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
Loading…
Reference in New Issue
Block a user