cross-platform compatibility improvements
Summary: We've had a couple CockroachDB users fail to build RocksDB on exotic platforms, so I figured I'd try my hand at solving these issues upstream. The problems stem from a) `USE_SSE=1` being too aggressive about turning on SSE4.2, even on toolchains that don't support SSE4.2 and b) RocksDB attempting to detect support for thread-local storage based on OS, even though it can vary by compiler on the same OS. See the individual commit messages for details. Regarding SSE support, this PR should change virtually nothing for non-CMake based builds. `make`, `PORTABLE=1 make`, `USE_SSE=1 make`, and `PORTABLE=1 USE_SSE=1 make` function exactly as before, except that SSE support will be automatically disabled when a simple SSE4.2-using test program fails to compile, as it does on OpenBSD. (OpenBSD's ports GCC supports SSE4.2, but its binutils do not, so `__SSE_4_2__` is defined but an SSE4.2-using program will fail to assemble.) A warning is emitted in this case. The CMake build is modified to support the same set of options, except that `USE_SSE` is spelled `FORCE_SSE42` because `USE_SSE` is rather useless now that we can automatically detect SSE support, and I figure changing options in the CMake build is less disruptive than changing the non-CMake build. I've tested these changes on all the platforms I can get my hands on (macOS, Windows MSVC, Windows MinGW, and OpenBSD) and it all works splendidly. Let me know if there's anything you object to—I obviously don't mean to break any of your build pipelines in the process of fixing ours downstream. Closes https://github.com/facebook/rocksdb/pull/2199 Differential Revision: D5054042 Pulled By: yiwu-arbug fbshipit-source-id: 938e1fc665c049c02ae15698e1409155b8e72171
This commit is contained in:
parent
d004333021
commit
11c5d4741a
@ -106,29 +106,56 @@ if(NOT WIN32)
|
||||
string(STRIP "${ROCKSDB_VERSION_MAJOR}" ROCKSDB_VERSION_MAJOR)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
option(WITH_AVX2 "build with AVX2" ON)
|
||||
if(WITH_AVX2)
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
|
||||
endif()
|
||||
option(WITH_MD_LIBRARY "build with MD" ON)
|
||||
if(WIN32 AND MSVC)
|
||||
if(WITH_MD_LIBRARY)
|
||||
set(RUNTIME_LIBRARY "MD")
|
||||
else()
|
||||
set(RUNTIME_LIBRARY "MT")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(WITH_MD_LIBRARY "build with MD" ON)
|
||||
if(MSVC)
|
||||
if(WITH_MD_LIBRARY)
|
||||
set(RUNTIME_LIBRARY "MD")
|
||||
else()
|
||||
set(RUNTIME_LIBRARY "MT")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
option(WITH_SSE42 "build with SSE4.2" ON)
|
||||
if(WITH_SSE42)
|
||||
option(PORTABLE "build a portable binary" OFF)
|
||||
option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF)
|
||||
if(PORTABLE)
|
||||
# MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h
|
||||
# is available, it is available by default.
|
||||
if(FORCE_SSE42 AND NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||
endif()
|
||||
else()
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
include(CheckCXXSourceCompiles)
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <cstdint>
|
||||
#include <nmmintrin.h>
|
||||
int main() {
|
||||
volatile uint32_t x = _mm_crc32_u32(0, 0);
|
||||
}
|
||||
" HAVE_SSE42)
|
||||
if(HAVE_SSE42)
|
||||
add_definitions(-DHAVE_SSE42)
|
||||
elseif(FORCE_SSE42)
|
||||
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
|
||||
endif()
|
||||
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#if defined(_MSC_VER) && !defined(__thread)
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
int main() {
|
||||
static __thread int tls;
|
||||
}
|
||||
" HAVE_THREAD_LOCAL)
|
||||
if(HAVE_THREAD_LOCAL)
|
||||
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
endif()
|
||||
|
||||
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
|
||||
@ -520,7 +547,7 @@ if(WIN32)
|
||||
set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib)
|
||||
set(LIBS ${ROCKSDB_STATIC_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
|
||||
else()
|
||||
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} rt)
|
||||
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT})
|
||||
set(LIBS ${ROCKSDB_SHARED_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
|
||||
|
||||
add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES})
|
||||
|
@ -21,9 +21,11 @@ depend on gflags. You will need to have gflags installed to run `make all`. This
|
||||
use binaries compiled by `make all` in production.
|
||||
|
||||
* By default the binary we produce is optimized for the platform you're compiling on
|
||||
(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before
|
||||
your make commands, like this: `PORTABLE=1 make static_lib`. If you want to build a binary that
|
||||
makes use of SSE4, add 'USE_SSE=1' before your make commands, like this: `USE_SSE=1 make static_lib`.
|
||||
(`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your
|
||||
CPU supports it. To print a warning if your CPU does not support SSE4.2, build with
|
||||
`USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want
|
||||
to build a portable binary, add `PORTABLE=1` before your make commands, like this:
|
||||
`PORTABLE=1 make static_lib`.
|
||||
|
||||
## Dependencies
|
||||
|
||||
|
1
TARGETS
1
TARGETS
@ -11,6 +11,7 @@ rocksdb_compiler_flags = [
|
||||
"-DROCKSDB_MALLOC_USABLE_SIZE",
|
||||
"-DROCKSDB_RANGESYNC_PRESENT",
|
||||
"-DROCKSDB_SCHED_GETCPU_PRESENT",
|
||||
"-DROCKSDB_SUPPORT_THREAD_LOCAL",
|
||||
"-DOS_LINUX",
|
||||
# Flags to enable libs we include
|
||||
"-DSNAPPY",
|
||||
|
@ -442,14 +442,8 @@ if test "$USE_HDFS"; then
|
||||
JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS"
|
||||
fi
|
||||
|
||||
if [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then
|
||||
# Intel SSE instructions breaks compilation on FreeBSD i386
|
||||
unset USE_SSE
|
||||
fi
|
||||
|
||||
if test "$USE_SSE"; then
|
||||
# if Intel SSE instruction set is supported, set USE_SSE=1
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 "
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse4.2"
|
||||
elif test -z "$PORTABLE"; then
|
||||
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
|
||||
# Tune for this POWER processor, treating '+' models as base models
|
||||
@ -462,6 +456,36 @@ elif test -z "$PORTABLE"; then
|
||||
fi
|
||||
fi
|
||||
|
||||
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
#include <cstdint>
|
||||
#include <nmmintrin.h>
|
||||
int main() {
|
||||
volatile uint32_t x = _mm_crc32_u32(0, 0);
|
||||
}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42"
|
||||
elif test "$USE_SSE"; then
|
||||
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling"
|
||||
fi
|
||||
|
||||
# iOS doesn't support thread-local storage, but this check would erroneously
|
||||
# succeed because the cross-compiler flags are added by the Makefile, not this
|
||||
# script.
|
||||
if [ "$PLATFORM" != IOS ]; then
|
||||
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
#if defined(_MSC_VER) && !defined(__thread)
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
int main() {
|
||||
static __thread int tls;
|
||||
}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
|
||||
fi
|
||||
fi
|
||||
|
||||
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/perf_level.h"
|
||||
|
||||
// A thread local context for gathering io-stats efficiently and transparently.
|
||||
@ -46,12 +47,8 @@ struct IOStatsContext {
|
||||
uint64_t logger_nanos;
|
||||
};
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
# ifdef _MSC_VER
|
||||
extern __declspec(thread) IOStatsContext iostats_context;
|
||||
# else
|
||||
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
extern __thread IOStatsContext iostats_context;
|
||||
# endif
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
#endif
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/perf_level.h"
|
||||
|
||||
namespace rocksdb {
|
||||
@ -150,10 +151,8 @@ struct PerfContext {
|
||||
uint64_t env_new_logger_nanos;
|
||||
};
|
||||
|
||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||
#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
extern PerfContext perf_context;
|
||||
#elif defined(_MSC_VER)
|
||||
extern __declspec(thread) PerfContext perf_context;
|
||||
#else
|
||||
#if defined(OS_SOLARIS)
|
||||
PerfContext *getPerfContext();
|
||||
|
@ -22,8 +22,7 @@
|
||||
|
||||
#if !defined(ROCKSDB_LITE) && \
|
||||
!defined(NROCKSDB_THREAD_STATUS) && \
|
||||
!defined(OS_MACOSX) && \
|
||||
!defined(IOS_CROSS_COMPILE)
|
||||
defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
#define ROCKSDB_USING_THREAD_STATUS
|
||||
#endif
|
||||
|
||||
|
@ -9,13 +9,9 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
# ifdef _MSC_VER
|
||||
__declspec(thread) IOStatsContext iostats_context;
|
||||
# else
|
||||
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
__thread IOStatsContext iostats_context;
|
||||
# endif
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
#endif
|
||||
|
||||
void IOStatsContext::Reset() {
|
||||
thread_pool_id = Env::Priority::TOTAL;
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "monitoring/perf_step_timer.h"
|
||||
#include "rocksdb/iostats_context.h"
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
|
||||
// increment a specific counter by the specified value
|
||||
#define IOSTATS_ADD(metric, value) \
|
||||
@ -41,7 +41,7 @@
|
||||
PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \
|
||||
iostats_step_timer_ ## metric.Start();
|
||||
|
||||
#else // IOS_CROSS_COMPILE
|
||||
#else // ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
|
||||
#define IOSTATS_ADD(metric, value)
|
||||
#define IOSTATS_ADD_IF_POSITIVE(metric, value)
|
||||
@ -53,4 +53,4 @@
|
||||
|
||||
#define IOSTATS_TIMER_GUARD(metric)
|
||||
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
|
@ -11,10 +11,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||
#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
PerfContext perf_context;
|
||||
#elif defined(_MSC_VER)
|
||||
__declspec(thread) PerfContext perf_context;
|
||||
#else
|
||||
#if defined(OS_SOLARIS)
|
||||
__thread PerfContext perf_context_;
|
||||
@ -24,7 +22,7 @@ namespace rocksdb {
|
||||
#endif
|
||||
|
||||
void PerfContext::Reset() {
|
||||
#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE)
|
||||
#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
user_key_comparison_count = 0;
|
||||
block_cache_hit_count = 0;
|
||||
block_read_count = 0;
|
||||
@ -98,7 +96,7 @@ void PerfContext::Reset() {
|
||||
}
|
||||
|
||||
std::string PerfContext::ToString(bool exclude_zero_counters) const {
|
||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||
#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
return "";
|
||||
#else
|
||||
std::ostringstream ss;
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||
#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
|
||||
|
||||
#define PERF_TIMER_GUARD(metric)
|
||||
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition)
|
||||
|
@ -7,16 +7,14 @@
|
||||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <sstream>
|
||||
#include "monitoring/perf_level_imp.h"
|
||||
#include "port/port.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#if defined(IOS_CROSS_COMPILE)
|
||||
PerfLevel perf_level = kEnableCount;
|
||||
#else
|
||||
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
__thread PerfLevel perf_level = kEnableCount;
|
||||
#else
|
||||
PerfLevel perf_level = kEnableCount;
|
||||
#endif
|
||||
|
||||
void SetPerfLevel(PerfLevel level) {
|
||||
|
@ -11,10 +11,10 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#if defined(IOS_CROSS_COMPILE)
|
||||
extern PerfLevel perf_level;
|
||||
#else
|
||||
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
extern __thread PerfLevel perf_level;
|
||||
#else
|
||||
extern PerfLevel perf_level;
|
||||
#endif
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -15,14 +15,9 @@
|
||||
#include "util/crc32c.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#ifdef __SSE4_2__
|
||||
#ifdef HAVE_SSE42
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#if defined(_WIN64)
|
||||
#ifdef __AVX2__
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace rocksdb {
|
||||
@ -298,21 +293,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
|
||||
return DecodeFixed32(reinterpret_cast<const char*>(p));
|
||||
}
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#ifdef __LP64__
|
||||
#if defined(HAVE_SSE42) && (defined(__LP64__) || defined(_WIN64))
|
||||
static inline uint64_t LE_LOAD64(const uint8_t *p) {
|
||||
return DecodeFixed64(reinterpret_cast<const char*>(p));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN64)
|
||||
#ifdef __AVX2__
|
||||
static inline uint64_t LE_LOAD64(const uint8_t *p) {
|
||||
return DecodeFixed64(reinterpret_cast<const char*>(p));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
|
||||
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
|
||||
*p += 4;
|
||||
@ -330,8 +316,9 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
|
||||
}
|
||||
|
||||
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
|
||||
#ifdef __SSE4_2__
|
||||
#ifdef __LP64__
|
||||
#ifndef HAVE_SSE42
|
||||
Slow_CRC32(l, p);
|
||||
#elif defined(__LP64__) || defined(_WIN64)
|
||||
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
|
||||
*p += 8;
|
||||
#else
|
||||
@ -340,16 +327,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
|
||||
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
|
||||
*p += 4;
|
||||
#endif
|
||||
#elif defined(_WIN64)
|
||||
#ifdef __AVX2__
|
||||
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
|
||||
*p += 8;
|
||||
#else
|
||||
Slow_CRC32(l, p);
|
||||
#endif
|
||||
#else
|
||||
Slow_CRC32(l, p);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<void (*CRC32)(uint64_t*, uint8_t const**)>
|
||||
@ -418,9 +395,7 @@ static inline Function Choose_Extend() {
|
||||
}
|
||||
|
||||
bool IsFastCrc32Supported() {
|
||||
#ifdef __SSE4_2__
|
||||
return isSSE42();
|
||||
#elif defined(_WIN64)
|
||||
#if defined(__SSE4_2__) || defined(_WIN64)
|
||||
return isSSE42();
|
||||
#else
|
||||
return false;
|
||||
|
@ -20,14 +20,6 @@
|
||||
#include "util/autovector.h"
|
||||
#include "port/port.h"
|
||||
|
||||
#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
# if defined(OS_WIN) || defined(OS_MACOSX) || defined(IOS_CROSS_COMPILE)
|
||||
# define ROCKSDB_SUPPORT_THREAD_LOCAL 0
|
||||
# else
|
||||
# define ROCKSDB_SUPPORT_THREAD_LOCAL 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// Cleanup function that will be called for a stored thread local
|
||||
|
Loading…
x
Reference in New Issue
Block a user