efd035164b
Summary: Especially after updating to C++17, I don't see a compelling case for *requiring* any folly components in RocksDB. I was able to purge the existing hard dependencies, and it can be quite difficult to strip out non-trivial components from folly for use in RocksDB. (The prospect of doing that on F14 has changed my mind on the best approach here.) But this change creates an optional integration where we can plug in components from folly at compile time, starting here with F14FastMap to replace std::unordered_map when possible (probably no public APIs for example). I have replaced the biggest CPU users of std::unordered_map with compile-time pluggable UnorderedMap which will use F14FastMap when USE_FOLLY is set. USE_FOLLY is always set in the Meta-internal buck build, and a simulation of that is in the Makefile for public CI testing. A full folly build is not needed, but checking out the full folly repo is much simpler for getting the dependency, and anything else we might want to optionally integrate in the future. Some picky details: * I don't think the distributed mutex stuff is actually used, so it was easy to remove. * I implemented an alternative to `folly::constexpr_log2` (which is much easier in C++17 than C++11) so that I could pull out the hard dependencies on `ConstexprMath.h` * I had to add noexcept move constructors/operators to some types to make F14's complainUnlessNothrowMoveAndDestroy check happy, and I added a macro to make that easier in some common cases. * Updated Meta-internal buck build to use folly F14Map (always) No updates to HISTORY.md nor INSTALL.md as this is not (yet?) considered a production integration for open source users. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9546 Test Plan: CircleCI tests updated so that a couple of them use folly. Most internal unit & stress/crash tests updated to use Meta-internal latest folly. (Note: they should probably use buck but they currently use Makefile.) Example performance improvement: when filter partitions are pinned in cache, they are tracked by PartitionedFilterBlockReader::filter_map_ and we can build a test that exercises that heavily. Build DB with ``` TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=30000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -partition_index_and_filters ``` and test with (simultaneous runs with & without folly, ~20 times each to see convergence) ``` TEST_TMPDIR=/dev/shm/rocksdb ./db_bench_folly -readonly -use_existing_db -benchmarks=readrandom -num=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -partition_index_and_filters -duration=40 -pin_l0_filter_and_index_blocks_in_cache ``` Average ops/s no folly: 26229.2 Average ops/s with folly: 26853.3 (+2.4%) Reviewed By: ajkr Differential Revision: D34181736 Pulled By: pdillinger fbshipit-source-id: ffa6ad5104c2880321d8a1aa7187e00ab0d02e94
254 lines
8.8 KiB
C++
254 lines
8.8 KiB
C++
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <assert.h>
|
|
#ifdef _MSC_VER
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
#include <cstdint>
|
|
#include <type_traits>
|
|
|
|
#include "rocksdb/rocksdb_namespace.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// Fast implementation of floor(log2(v)). Undefined for 0 or negative
|
|
// numbers (in case of signed type).
|
|
template <typename T>
|
|
inline int FloorLog2(T v) {
|
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
|
assert(v > 0);
|
|
#ifdef _MSC_VER
|
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
|
unsigned long idx = 0;
|
|
if (sizeof(T) <= sizeof(uint32_t)) {
|
|
_BitScanReverse(&idx, static_cast<uint32_t>(v));
|
|
} else {
|
|
#if defined(_M_X64) || defined(_M_ARM64)
|
|
_BitScanReverse64(&idx, static_cast<uint64_t>(v));
|
|
#else
|
|
const auto vh = static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32);
|
|
if (vh != 0) {
|
|
_BitScanReverse(&idx, static_cast<uint32_t>(vh));
|
|
idx += 32;
|
|
} else {
|
|
_BitScanReverse(&idx, static_cast<uint32_t>(v));
|
|
}
|
|
#endif
|
|
}
|
|
return idx;
|
|
#else
|
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
|
int lz = __builtin_clz(static_cast<unsigned int>(v));
|
|
return int{sizeof(unsigned int)} * 8 - 1 - lz;
|
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
|
int lz = __builtin_clzl(static_cast<unsigned long>(v));
|
|
return int{sizeof(unsigned long)} * 8 - 1 - lz;
|
|
} else {
|
|
int lz = __builtin_clzll(static_cast<unsigned long long>(v));
|
|
return int{sizeof(unsigned long long)} * 8 - 1 - lz;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Constexpr version of FloorLog2
|
|
template <typename T>
|
|
constexpr int ConstexprFloorLog2(T v) {
|
|
int rv = 0;
|
|
while (v > T{1}) {
|
|
++rv;
|
|
v >>= 1;
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
// Number of low-order zero bits before the first 1 bit. Undefined for 0.
|
|
template <typename T>
|
|
inline int CountTrailingZeroBits(T v) {
|
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
|
assert(v != 0);
|
|
#ifdef _MSC_VER
|
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
|
unsigned long tz = 0;
|
|
if (sizeof(T) <= sizeof(uint32_t)) {
|
|
_BitScanForward(&tz, static_cast<uint32_t>(v));
|
|
} else {
|
|
#if defined(_M_X64) || defined(_M_ARM64)
|
|
_BitScanForward64(&tz, static_cast<uint64_t>(v));
|
|
#else
|
|
_BitScanForward(&tz, static_cast<uint32_t>(v));
|
|
if (tz == 0) {
|
|
_BitScanForward(&tz,
|
|
static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32));
|
|
tz += 32;
|
|
}
|
|
#endif
|
|
}
|
|
return static_cast<int>(tz);
|
|
#else
|
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
|
return __builtin_ctz(static_cast<unsigned int>(v));
|
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
|
return __builtin_ctzl(static_cast<unsigned long>(v));
|
|
} else {
|
|
return __builtin_ctzll(static_cast<unsigned long long>(v));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
|
|
// the following code at coarse granularity for simpler macros. It's important
|
|
// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
|
|
#ifdef _MSC_VER
|
|
|
|
namespace detail {
|
|
|
|
template <typename T>
|
|
int BitsSetToOneFallback(T v) {
|
|
const int kBits = static_cast<int>(sizeof(T)) * 8;
|
|
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
|
|
// we static_cast these bit patterns in order to truncate them to the correct
|
|
// size. Warning C4309 dislikes this technique, so disable it here.
|
|
#pragma warning(disable : 4309)
|
|
v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
|
|
v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
|
|
((v >> 2) & static_cast<T>(0x3333333333333333ull)));
|
|
v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
|
|
#pragma warning(default : 4309)
|
|
for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
|
|
v += static_cast<T>(v >> shift_bits);
|
|
}
|
|
// we want the bottom "slot" that's big enough to represent a value up to
|
|
// (and including) kBits.
|
|
return static_cast<int>(v & static_cast<T>(kBits | (kBits - 1)));
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
#endif // _MSC_VER
|
|
|
|
// Number of bits set to 1. Also known as "population count".
|
|
template <typename T>
|
|
inline int BitsSetToOne(T v) {
|
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
|
#ifdef _MSC_VER
|
|
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
|
|
if (sizeof(T) < sizeof(uint32_t)) {
|
|
// This bit mask is to avoid a compiler warning on unused path
|
|
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
|
|
// The bit mask is to neutralize sign extension on small signed types
|
|
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
|
|
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
|
|
#else
|
|
return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
|
|
#endif
|
|
} else if (sizeof(T) == sizeof(uint32_t)) {
|
|
#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
|
|
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
|
|
#else
|
|
return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
|
|
#endif
|
|
} else {
|
|
#if defined(HAVE_SSE42) && defined(_M_X64)
|
|
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
|
|
#elif defined(HAVE_SSE42) && defined(_M_IX86)
|
|
return static_cast<int>(
|
|
__popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
|
|
__popcnt(static_cast<uint32_t>(v))));
|
|
#else
|
|
return detail::BitsSetToOneFallback(static_cast<uint64_t>(v));
|
|
#endif
|
|
}
|
|
#else
|
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
|
if (sizeof(T) < sizeof(unsigned int)) {
|
|
// This bit mask is to avoid a compiler warning on unused path
|
|
constexpr auto mm = 8 * sizeof(unsigned int) - 1;
|
|
// This bit mask is to neutralize sign extension on small signed types
|
|
constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
|
|
return __builtin_popcount(static_cast<unsigned int>(v) & m);
|
|
} else if (sizeof(T) == sizeof(unsigned int)) {
|
|
return __builtin_popcount(static_cast<unsigned int>(v));
|
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
|
return __builtin_popcountl(static_cast<unsigned long>(v));
|
|
} else {
|
|
return __builtin_popcountll(static_cast<unsigned long long>(v));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
template <typename T>
|
|
inline int BitParity(T v) {
|
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
|
#ifdef _MSC_VER
|
|
// bit parity == oddness of popcount
|
|
return BitsSetToOne(v) & 1;
|
|
#else
|
|
static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
|
|
if (sizeof(T) <= sizeof(unsigned int)) {
|
|
// On any sane systen, potential sign extension here won't change parity
|
|
return __builtin_parity(static_cast<unsigned int>(v));
|
|
} else if (sizeof(T) <= sizeof(unsigned long)) {
|
|
return __builtin_parityl(static_cast<unsigned long>(v));
|
|
} else {
|
|
return __builtin_parityll(static_cast<unsigned long long>(v));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Swaps between big and little endian. Can be used in combination with the
|
|
// little-endian encoding/decoding functions in coding_lean.h and coding.h to
|
|
// encode/decode big endian.
|
|
template <typename T>
|
|
inline T EndianSwapValue(T v) {
|
|
static_assert(std::is_integral<T>::value, "non-integral type");
|
|
|
|
#ifdef _MSC_VER
|
|
if (sizeof(T) == 2) {
|
|
return static_cast<T>(_byteswap_ushort(static_cast<uint16_t>(v)));
|
|
} else if (sizeof(T) == 4) {
|
|
return static_cast<T>(_byteswap_ulong(static_cast<uint32_t>(v)));
|
|
} else if (sizeof(T) == 8) {
|
|
return static_cast<T>(_byteswap_uint64(static_cast<uint64_t>(v)));
|
|
}
|
|
#else
|
|
if (sizeof(T) == 2) {
|
|
return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(v)));
|
|
} else if (sizeof(T) == 4) {
|
|
return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(v)));
|
|
} else if (sizeof(T) == 8) {
|
|
return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(v)));
|
|
}
|
|
#endif
|
|
// Recognized by clang as bswap, but not by gcc :(
|
|
T ret_val = 0;
|
|
for (std::size_t i = 0; i < sizeof(T); ++i) {
|
|
ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i));
|
|
}
|
|
return ret_val;
|
|
}
|
|
|
|
// Reverses the order of bits in an integral value
|
|
template <typename T>
|
|
inline T ReverseBits(T v) {
|
|
T r = EndianSwapValue(v);
|
|
const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
|
|
const T kEveryByte = kHighestByte | (kHighestByte / 255);
|
|
|
|
r = ((r & (kEveryByte * 0x0f)) << 4) | ((r >> 4) & (kEveryByte * 0x0f));
|
|
r = ((r & (kEveryByte * 0x33)) << 2) | ((r >> 2) & (kEveryByte * 0x33));
|
|
r = ((r & (kEveryByte * 0x55)) << 1) | ((r >> 1) & (kEveryByte * 0x55));
|
|
|
|
return r;
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|