Enable Fast CRC32 for Win64

Summary:
Currently the fast crc32 path is not enabled on Windows. I am trying to enable it here, hopefully, with the minimum impact to the existing code structure.
Closes https://github.com/facebook/rocksdb/pull/2033

Differential Revision: D4770635

Pulled By: siying

fbshipit-source-id: 676f8b8
This commit is contained in:
Min Wei 2017-03-29 17:34:36 -07:00 committed by Facebook Github Bot
parent f9813b853d
commit 8a8c967460
2 changed files with 36 additions and 2 deletions

View File

@ -42,6 +42,11 @@ endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules/")
if(WIN32)
if (DEFINED AVX2)
set(USE_AVX2 ${AVX2})
else ()
set(USE_AVX2 1)
endif ()
include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc)
else()
option(WITH_JEMALLOC "build with JeMalloc" OFF)
@ -111,7 +116,11 @@ add_library(build_version OBJECT ${BUILD_VERSION_CC})
target_include_directories(build_version PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/util)
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
if (${USE_AVX2} EQUAL 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /arch:AVX2 /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W3 /wd4127 /wd4800 /wd4996 /wd4351")
else()
@ -378,7 +387,7 @@ set(SOURCES
util/histogram_windowing.cc
util/instrumented_mutex.cc
util/iostats_context.cc
util/lru_cache.cc
tools/ldb_cmd.cc
tools/ldb_tool.cc

View File

@ -16,6 +16,11 @@
#ifdef __SSE4_2__
#include <nmmintrin.h>
#endif
#if defined(_WIN64)
#ifdef __AVX2__
#include <nmmintrin.h>
#endif
#endif
#include "util/coding.h"
namespace rocksdb {
@ -299,6 +304,13 @@ static inline uint64_t LE_LOAD64(const uint8_t *p) {
#endif
#endif
#if defined(_WIN64)
#ifdef __AVX2__
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}
#endif
#endif
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4;
@ -326,6 +338,13 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
*p += 4;
#endif
#elif defined(_WIN64)
#ifdef __AVX2__
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8;
#else
Slow_CRC32(l, p);
#endif
#else
Slow_CRC32(l, p);
#endif
@ -381,6 +400,10 @@ static bool isSSE42() {
uint32_t d_;
__asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
return c_ & (1U << 20); // copied from CpuId.h in Folly.
#elif defined(_WIN64)
int info[4];
__cpuidex(info, 0x00000001, 0);
return (info[2] & ((int)1 << 20)) != 0;
#else
return false;
#endif
@ -395,6 +418,8 @@ static inline Function Choose_Extend() {
bool IsFastCrc32Supported() {
#ifdef __SSE4_2__
return isSSE42();
#elif defined(_WIN64)
return isSSE42();
#else
return false;
#endif