Compare commits
22 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
75cde85242 | ||
|
c221ba20f2 | ||
|
6c834fbcdb | ||
|
3f557dd7cb | ||
|
c0529b5351 | ||
|
0af1972830 | ||
|
023c98033a | ||
|
dab9465c40 | ||
|
0a5502754e | ||
|
1710638ae2 | ||
|
9199b1da4c | ||
|
61120a3fcf | ||
|
e6feb1dc1d | ||
|
af42561165 | ||
|
4a0ee7570c | ||
|
531712dd87 | ||
|
cf087e4290 | ||
|
ceaa4af2c3 | ||
|
351a14930e | ||
|
bfde4d73b7 | ||
|
8d31cca4f9 | ||
|
486ba33ce1 |
@ -177,7 +177,6 @@ set(SOURCES
|
||||
table/merger.cc
|
||||
table/sst_file_writer.cc
|
||||
table/meta_blocks.cc
|
||||
table/mock_table.cc
|
||||
table/plain_table_builder.cc
|
||||
table/plain_table_factory.cc
|
||||
table/plain_table_index.cc
|
||||
@ -214,7 +213,6 @@ set(SOURCES
|
||||
util/logging.cc
|
||||
util/log_buffer.cc
|
||||
util/memenv.cc
|
||||
util/mock_env.cc
|
||||
util/murmurhash.cc
|
||||
util/mutable_cf_options.cc
|
||||
util/options.cc
|
||||
@ -277,6 +275,8 @@ set(SOURCES
|
||||
# and linked to tests. Add test only code that is not #ifdefed for Release here.
|
||||
set(TESTUTIL_SOURCE
|
||||
db/db_test_util.cc
|
||||
table/mock_table.cc
|
||||
util/mock_env.cc
|
||||
util/thread_status_updater_debug.cc
|
||||
)
|
||||
|
||||
|
@ -1,12 +1,19 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
|
||||
## 4.5.1 (3/25/2016)
|
||||
### Bug Fixes
|
||||
* Fix failures caused by the destorying order of singleton objects.
|
||||
|
||||
## 4.5.0 (2/5/2016)
|
||||
### Public API Changes
|
||||
* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes.
|
||||
* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes.
|
||||
* Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll.
|
||||
* DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead
|
||||
|
||||
### New Features
|
||||
* ldb tool now supports operations to non-default column families.
|
||||
* Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true.
|
||||
* Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate.
|
||||
|
||||
## 4.4.0 (1/14/2016)
|
||||
|
4
Makefile
4
Makefile
@ -192,10 +192,6 @@ default: all
|
||||
WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \
|
||||
-Wno-unused-parameter
|
||||
|
||||
ifndef DISABLE_WARNING_AS_ERROR
|
||||
WARNING_FLAGS += -Werror
|
||||
endif
|
||||
|
||||
CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||
CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers
|
||||
|
||||
|
@ -52,12 +52,7 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
|
||||
FBCODE_BUILD="true"
|
||||
# If we're compiling with TSAN we need pic build
|
||||
PIC_BUILD=$COMPILE_WITH_TSAN
|
||||
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
else
|
||||
# we need this to build with MySQL. Don't use for other purposes.
|
||||
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
||||
fi
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
fi
|
||||
|
||||
# Delete existing output, if it exists
|
||||
|
@ -1,16 +1,19 @@
|
||||
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/
|
||||
CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.9.x/gcc-4.9-glibc-2.20/024dbc3
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.20/gcc-4.9-glibc-2.20/500e281
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/69d56740ffb89d8bc81ded8ec428c01a813ea948/0.4.7/gcc-4.9-glibc-2.20/e9936bf
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.9-glibc-2.20/a6c5e1e
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.9-glibc-2.20/12266b1
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
|
||||
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
|
||||
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832
|
||||
|
@ -1,4 +1,4 @@
|
||||
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/ebc96bc2fb751b5a0300b8d91a95bdf24ac1d88b/4.8.1/centos6-native/cc6c9dc
|
||||
CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.17/gcc-4.8.1-glibc-2.17/99df8fc
|
||||
|
@ -13,12 +13,12 @@ source "$BASEDIR/dependencies.sh"
|
||||
CFLAGS=""
|
||||
|
||||
# libgcc
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/libs"
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
|
||||
|
||||
# glibc
|
||||
GLIBC_INCLUDE="$GLIBC_BASE/include"
|
||||
GLIBC_LIBS=" -L $GLIB_BASE/lib"
|
||||
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
|
||||
|
||||
# snappy
|
||||
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
|
||||
@ -43,12 +43,16 @@ if test -z $PIC_BUILD; then
|
||||
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
|
||||
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
|
||||
CFLAGS+=" -DLZ4"
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
CFLAGS+=" -DZSTD"
|
||||
fi
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
else
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DZSTD"
|
||||
|
||||
# location of gflags headers and libraries
|
||||
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
@ -56,7 +60,7 @@ if test -z $PIC_BUILD; then
|
||||
else
|
||||
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DGFLAGS=google"
|
||||
CFLAGS+=" -DGFLAGS=gflags"
|
||||
|
||||
# location of jemalloc
|
||||
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
|
||||
@ -72,13 +76,22 @@ if test -z $PIC_BUILD; then
|
||||
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
|
||||
fi
|
||||
|
||||
# location of TBB
|
||||
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
|
||||
else
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DTBB"
|
||||
|
||||
# use Intel SSE support for checksum calculations
|
||||
export USE_SSE=1
|
||||
|
||||
BINUTILS="$BINUTILS_BASE/bin"
|
||||
AR="$BINUTILS/ar"
|
||||
|
||||
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
|
||||
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
|
||||
|
||||
STDLIBS="-L $GCC_BASE/lib64"
|
||||
|
||||
@ -87,7 +100,7 @@ CLANG_LIB="$CLANG_BASE/lib"
|
||||
CLANG_SRC="$CLANG_BASE/../../src"
|
||||
|
||||
CLANG_ANALYZER="$CLANG_BIN/clang++"
|
||||
CLANG_SCAN_BUILD="$CLANG_SRC/clang/tools/scan-build/scan-build"
|
||||
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
|
||||
|
||||
if [ -z "$USE_CLANG" ]; then
|
||||
# gcc
|
||||
@ -95,40 +108,45 @@ if [ -z "$USE_CLANG" ]; then
|
||||
CXX="$GCC_BASE/bin/g++"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
JEMALLOC=1
|
||||
else
|
||||
# clang
|
||||
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
|
||||
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
|
||||
CC="$CLANG_BIN/clang"
|
||||
CXX="$CLANG_BIN/clang++"
|
||||
|
||||
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $CLANG_INCLUDE"
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
|
||||
CFLAGS+=" -Wno-expansion-to-defined "
|
||||
CXXFLAGS="-nostdinc++"
|
||||
fi
|
||||
|
||||
CFLAGS+=" $DEPS_INCLUDE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
|
||||
CXXFLAGS+=" $CFLAGS"
|
||||
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.9-glibc-2.20/lib/ld.so"
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
|
||||
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
|
||||
EXEC_LDFLAGS+=" $LIBUNWIND"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.9-glibc-2.20/lib"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
|
||||
# required by libtbb
|
||||
EXEC_LDFLAGS+=" -ldl"
|
||||
|
||||
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
|
||||
|
||||
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
|
||||
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
|
||||
|
||||
VALGRIND_VER="$VALGRIND_BASE/bin/"
|
||||
|
||||
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD
|
||||
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD
|
||||
|
@ -64,8 +64,8 @@ touch "$OUTPUT"
|
||||
echo "Writing dependencies to $OUTPUT"
|
||||
|
||||
# Compilers locations
|
||||
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1`
|
||||
CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1`
|
||||
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.9.x/centos6-native/*/`
|
||||
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/`
|
||||
|
||||
log_variable GCC_BASE
|
||||
log_variable CLANG_BASE
|
||||
@ -101,8 +101,8 @@ touch "$OUTPUT"
|
||||
echo "Writing 4.8.1 dependencies to $OUTPUT"
|
||||
|
||||
# Compilers locations
|
||||
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1`
|
||||
CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1`
|
||||
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
|
||||
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/`
|
||||
|
||||
log_variable GCC_BASE
|
||||
log_variable CLANG_BASE
|
||||
|
@ -12,7 +12,9 @@ namespace rocksdb {
|
||||
|
||||
// -- AutoRollLogger
|
||||
Status AutoRollLogger::ResetLogger() {
|
||||
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
|
||||
status_ = env_->NewLogger(log_fname_, &logger_);
|
||||
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger");
|
||||
|
||||
if (!status_.ok()) {
|
||||
return status_;
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "db/filename.h"
|
||||
#include "port/port.h"
|
||||
#include "port/util_logger.h"
|
||||
#include "util/sync_point.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -53,11 +54,26 @@ class AutoRollLogger : public Logger {
|
||||
return status_;
|
||||
}
|
||||
|
||||
size_t GetLogFileSize() const override { return logger_->GetLogFileSize(); }
|
||||
size_t GetLogFileSize() const override {
|
||||
std::shared_ptr<Logger> logger;
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
// pin down the current logger_ instance before releasing the mutex.
|
||||
logger = logger_;
|
||||
}
|
||||
return logger->GetLogFileSize();
|
||||
}
|
||||
|
||||
void Flush() override {
|
||||
if (logger_) {
|
||||
logger_->Flush();
|
||||
std::shared_ptr<Logger> logger;
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
// pin down the current logger_ instance before releasing the mutex.
|
||||
logger = logger_;
|
||||
}
|
||||
TEST_SYNC_POINT("AutoRollLogger::Flush:PinnedLogger");
|
||||
if (logger) {
|
||||
logger->Flush();
|
||||
}
|
||||
}
|
||||
|
||||
@ -101,7 +117,7 @@ class AutoRollLogger : public Logger {
|
||||
uint64_t ctime_;
|
||||
uint64_t cached_now_access_count;
|
||||
uint64_t call_NowMicros_every_N_records_;
|
||||
port::Mutex mutex_;
|
||||
mutable port::Mutex mutex_;
|
||||
};
|
||||
|
||||
// Facade to craete logger automatically
|
||||
|
@ -4,6 +4,7 @@
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
@ -11,6 +12,8 @@
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include "db/auto_roll_logger.h"
|
||||
#include "port/port.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/testharness.h"
|
||||
#include "rocksdb/db.h"
|
||||
#include <sys/stat.h>
|
||||
@ -260,7 +263,60 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
|
||||
auto_roll_logger, options.log_file_time_to_roll,
|
||||
kSampleMessage + ":CreateLoggerFromOptions - both");
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) {
|
||||
DBOptions options;
|
||||
shared_ptr<Logger> logger;
|
||||
|
||||
InitTestDb();
|
||||
options.max_log_file_size = 1024 * 5;
|
||||
ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger));
|
||||
AutoRollLogger* auto_roll_logger =
|
||||
dynamic_cast<AutoRollLogger*>(logger.get());
|
||||
ASSERT_TRUE(auto_roll_logger);
|
||||
std::thread flush_thread;
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
||||
// Need to pin the old logger before beginning the roll, as rolling grabs
|
||||
// the mutex, which would prevent us from accessing the old logger.
|
||||
{"AutoRollLogger::Flush:PinnedLogger",
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"},
|
||||
// Need to finish the flush thread init before this callback because the
|
||||
// callback accesses flush_thread.get_id() in order to apply certain sync
|
||||
// points only to the flush thread.
|
||||
{"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit",
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin"},
|
||||
// Need to reset logger at this point in Flush() to exercise a race
|
||||
// condition case, which is executing the flush with the pinned (old)
|
||||
// logger after the roll has cut over to a new logger.
|
||||
{"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1",
|
||||
"AutoRollLogger::ResetLogger:BeforeNewLogger"},
|
||||
{"AutoRollLogger::ResetLogger:AfterNewLogger",
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2"},
|
||||
});
|
||||
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||
"PosixLogger::Flush:BeginCallback", [&](void* arg) {
|
||||
TEST_SYNC_POINT(
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin");
|
||||
if (std::this_thread::get_id() == flush_thread.get_id()) {
|
||||
TEST_SYNC_POINT(
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1");
|
||||
TEST_SYNC_POINT(
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2");
|
||||
}
|
||||
});
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
flush_thread = std::thread([&]() { auto_roll_logger->Flush(); });
|
||||
TEST_SYNC_POINT(
|
||||
"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit");
|
||||
RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size,
|
||||
kSampleMessage + ":LogFlushWhileRolling");
|
||||
flush_thread.join();
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
|
||||
#endif // OS_WIN
|
||||
|
||||
TEST_F(AutoRollLoggerTest, InfoLogLevel) {
|
||||
InitTestDb();
|
||||
|
@ -135,6 +135,10 @@ Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) {
|
||||
"Delete filtering (filter_deletes) is not compatible with concurrent "
|
||||
"memtable writes (allow_concurrent_memtable_writes)");
|
||||
}
|
||||
if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) {
|
||||
return Status::InvalidArgument(
|
||||
"Memtable doesn't concurrent writes (allow_concurrent_memtable_write)");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "rocksdb/db.h"
|
||||
@ -142,9 +143,6 @@ TEST_F(CompactFilesTest, ObsoleteFiles) {
|
||||
}
|
||||
|
||||
auto l0_files = collector->GetFlushedFiles();
|
||||
CompactionOptions compact_opt;
|
||||
compact_opt.compression = kNoCompression;
|
||||
compact_opt.output_file_size_limit = kWriteBufferSize * 5;
|
||||
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1));
|
||||
|
||||
// verify all compaction input files are deleted
|
||||
@ -154,6 +152,62 @@ TEST_F(CompactFilesTest, ObsoleteFiles) {
|
||||
delete db;
|
||||
}
|
||||
|
||||
TEST_F(CompactFilesTest, CapturingPendingFiles) {
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
// Disable RocksDB background compaction.
|
||||
options.compaction_style = kCompactionStyleNone;
|
||||
// Always do full scans for obsolete files (needed to reproduce the issue).
|
||||
options.delete_obsolete_files_period_micros = 0;
|
||||
|
||||
// Add listener.
|
||||
FlushedFileCollector* collector = new FlushedFileCollector();
|
||||
options.listeners.emplace_back(collector);
|
||||
|
||||
DB* db = nullptr;
|
||||
DestroyDB(db_name_, options);
|
||||
Status s = DB::Open(options, db_name_, &db);
|
||||
assert(s.ok());
|
||||
assert(db);
|
||||
|
||||
// Create 5 files.
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
db->Put(WriteOptions(), "key" + ToString(i), "value");
|
||||
db->Flush(FlushOptions());
|
||||
}
|
||||
|
||||
auto l0_files = collector->GetFlushedFiles();
|
||||
EXPECT_EQ(5, l0_files.size());
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
||||
{"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"},
|
||||
{"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"},
|
||||
});
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
// Start compacting files.
|
||||
std::thread compaction_thread(
|
||||
[&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); });
|
||||
|
||||
// In the meantime flush another file.
|
||||
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0");
|
||||
db->Put(WriteOptions(), "key5", "value");
|
||||
db->Flush(FlushOptions());
|
||||
TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1");
|
||||
|
||||
compaction_thread.join();
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
|
||||
delete db;
|
||||
|
||||
// Make sure we can reopen the DB.
|
||||
s = DB::Open(options, db_name_, &db);
|
||||
ASSERT_TRUE(s.ok());
|
||||
assert(db);
|
||||
delete db;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -275,7 +275,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
|
||||
db_options_.delete_obsolete_files_period_micros),
|
||||
last_stats_dump_time_microsec_(0),
|
||||
next_job_id_(1),
|
||||
flush_on_destroy_(false),
|
||||
has_unpersisted_data_(false),
|
||||
env_options_(db_options_),
|
||||
#ifndef ROCKSDB_LITE
|
||||
wal_manager_(db_options_, env_options_),
|
||||
@ -322,7 +322,8 @@ void DBImpl::CancelAllBackgroundWork(bool wait) {
|
||||
DBImpl::~DBImpl() {
|
||||
mutex_.Lock();
|
||||
|
||||
if (!shutting_down_.load(std::memory_order_acquire) && flush_on_destroy_) {
|
||||
if (!shutting_down_.load(std::memory_order_acquire) &&
|
||||
has_unpersisted_data_) {
|
||||
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||
if (!cfd->IsDropped() && !cfd->mem()->IsEmpty()) {
|
||||
cfd->Ref();
|
||||
@ -1813,6 +1814,9 @@ Status DBImpl::CompactFilesImpl(
|
||||
std::vector<SequenceNumber> snapshot_seqs =
|
||||
snapshots_.GetAll(&earliest_write_conflict_snapshot);
|
||||
|
||||
auto pending_outputs_inserted_elem =
|
||||
CaptureCurrentFileNumberInPendingOutputs();
|
||||
|
||||
assert(is_snapshot_supported_ || snapshots_.empty());
|
||||
CompactionJob compaction_job(
|
||||
job_context->job_id, c.get(), db_options_, env_options_, versions_.get(),
|
||||
@ -1840,6 +1844,8 @@ Status DBImpl::CompactFilesImpl(
|
||||
TEST_SYNC_POINT("CompactFilesImpl:0");
|
||||
TEST_SYNC_POINT("CompactFilesImpl:1");
|
||||
compaction_job.Run();
|
||||
TEST_SYNC_POINT("CompactFilesImpl:2");
|
||||
TEST_SYNC_POINT("CompactFilesImpl:3");
|
||||
mutex_.Lock();
|
||||
|
||||
Status status = compaction_job.Install(*c->mutable_cf_options(), &mutex_);
|
||||
@ -1848,7 +1854,8 @@ Status DBImpl::CompactFilesImpl(
|
||||
c->column_family_data(), job_context, *c->mutable_cf_options());
|
||||
}
|
||||
c->ReleaseCompactionFiles(s);
|
||||
c.reset();
|
||||
|
||||
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
|
||||
|
||||
if (status.ok()) {
|
||||
// Done
|
||||
@ -1864,6 +1871,8 @@ Status DBImpl::CompactFilesImpl(
|
||||
}
|
||||
}
|
||||
|
||||
c.reset();
|
||||
|
||||
bg_compaction_scheduled_--;
|
||||
if (bg_compaction_scheduled_ == 0) {
|
||||
bg_cv_.SignalAll();
|
||||
@ -3306,13 +3315,19 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
|
||||
LookupKey lkey(key, snapshot);
|
||||
PERF_TIMER_STOP(get_snapshot_time);
|
||||
|
||||
if (sv->mem->Get(lkey, value, &s, &merge_context)) {
|
||||
// Done
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
} else if (sv->imm->Get(lkey, value, &s, &merge_context)) {
|
||||
// Done
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
} else {
|
||||
bool skip_memtable =
|
||||
(read_options.read_tier == kPersistedTier && has_unpersisted_data_);
|
||||
bool done = false;
|
||||
if (!skip_memtable) {
|
||||
if (sv->mem->Get(lkey, value, &s, &merge_context)) {
|
||||
done = true;
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
} else if (sv->imm->Get(lkey, value, &s, &merge_context)) {
|
||||
done = true;
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
}
|
||||
}
|
||||
if (!done) {
|
||||
PERF_TIMER_GUARD(get_from_output_files_time);
|
||||
sv->current->Get(read_options, lkey, value, &s, &merge_context,
|
||||
value_found);
|
||||
@ -3397,14 +3412,23 @@ std::vector<Status> DBImpl::MultiGet(
|
||||
assert(mgd_iter != multiget_cf_data.end());
|
||||
auto mgd = mgd_iter->second;
|
||||
auto super_version = mgd->super_version;
|
||||
if (super_version->mem->Get(lkey, value, &s, &merge_context)) {
|
||||
// Done
|
||||
} else if (super_version->imm->Get(lkey, value, &s, &merge_context)) {
|
||||
// Done
|
||||
} else {
|
||||
bool skip_memtable =
|
||||
(read_options.read_tier == kPersistedTier && has_unpersisted_data_);
|
||||
bool done = false;
|
||||
if (!skip_memtable) {
|
||||
if (super_version->mem->Get(lkey, value, &s, &merge_context)) {
|
||||
done = true;
|
||||
// TODO(?): RecordTick(stats_, MEMTABLE_HIT)?
|
||||
} else if (super_version->imm->Get(lkey, value, &s, &merge_context)) {
|
||||
done = true;
|
||||
// TODO(?): RecordTick(stats_, MEMTABLE_HIT)?
|
||||
}
|
||||
}
|
||||
if (!done) {
|
||||
PERF_TIMER_GUARD(get_from_output_files_time);
|
||||
super_version->current->Get(read_options, lkey, value, &s,
|
||||
&merge_context);
|
||||
// TODO(?): RecordTick(stats_, MEMTABLE_MISS)?
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
@ -3843,6 +3867,10 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options,
|
||||
|
||||
Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
|
||||
ColumnFamilyHandle* column_family) {
|
||||
if (read_options.read_tier == kPersistedTier) {
|
||||
return NewErrorIterator(Status::NotSupported(
|
||||
"ReadTier::kPersistedData is not yet supported in iterators."));
|
||||
}
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||
auto cfd = cfh->cfd();
|
||||
|
||||
@ -3949,6 +3977,10 @@ Status DBImpl::NewIterators(
|
||||
const ReadOptions& read_options,
|
||||
const std::vector<ColumnFamilyHandle*>& column_families,
|
||||
std::vector<Iterator*>* iterators) {
|
||||
if (read_options.read_tier == kPersistedTier) {
|
||||
return Status::NotSupported(
|
||||
"ReadTier::kPersistedData is not yet supported in iterators.");
|
||||
}
|
||||
iterators->clear();
|
||||
iterators->reserve(column_families.size());
|
||||
XFUNC_TEST("", "managed_new", managed_new1, xf_manage_new,
|
||||
@ -4328,7 +4360,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
|
||||
PERF_TIMER_STOP(write_pre_and_post_process_time);
|
||||
|
||||
if (write_options.disableWAL) {
|
||||
flush_on_destroy_ = true;
|
||||
has_unpersisted_data_ = true;
|
||||
}
|
||||
|
||||
uint64_t log_size = 0;
|
||||
|
@ -822,7 +822,10 @@ class DBImpl : public DB {
|
||||
// they're unique
|
||||
std::atomic<int> next_job_id_;
|
||||
|
||||
bool flush_on_destroy_; // Used when disableWAL is true.
|
||||
// A flag indicating whether the current rocksdb database has any
|
||||
// data that is not yet persisted into either WAL or SST file.
|
||||
// Used when disableWAL is true.
|
||||
bool has_unpersisted_data_;
|
||||
|
||||
static const int KEEP_LOG_FILE_NUM = 1000;
|
||||
// MSVC version 1800 still does not have constexpr for ::max()
|
||||
|
152
db/db_test.cc
152
db/db_test.cc
@ -521,6 +521,135 @@ TEST_F(DBTest, PutSingleDeleteGet) {
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
}
|
||||
|
||||
TEST_F(DBTest, ReadFromPersistedTier) {
|
||||
do {
|
||||
Random rnd(301);
|
||||
Options options = CurrentOptions();
|
||||
for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
|
||||
CreateAndReopenWithCF({"pikachu"}, options);
|
||||
WriteOptions wopt;
|
||||
wopt.disableWAL = (disableWAL == 1);
|
||||
// 1st round: put but not flush
|
||||
ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
|
||||
ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
|
||||
ASSERT_EQ("first", Get(1, "foo"));
|
||||
ASSERT_EQ("one", Get(1, "bar"));
|
||||
|
||||
// Read directly from persited data.
|
||||
ReadOptions ropt;
|
||||
ropt.read_tier = kPersistedTier;
|
||||
std::string value;
|
||||
if (wopt.disableWAL) {
|
||||
// as data has not yet being flushed, we expect not found.
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
|
||||
} else {
|
||||
ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
|
||||
ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
|
||||
}
|
||||
|
||||
// Multiget
|
||||
std::vector<ColumnFamilyHandle*> multiget_cfs;
|
||||
multiget_cfs.push_back(handles_[1]);
|
||||
multiget_cfs.push_back(handles_[1]);
|
||||
std::vector<Slice> multiget_keys;
|
||||
multiget_keys.push_back("foo");
|
||||
multiget_keys.push_back("bar");
|
||||
std::vector<std::string> multiget_values;
|
||||
auto statuses =
|
||||
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
|
||||
if (wopt.disableWAL) {
|
||||
ASSERT_TRUE(statuses[0].IsNotFound());
|
||||
ASSERT_TRUE(statuses[1].IsNotFound());
|
||||
} else {
|
||||
ASSERT_OK(statuses[0]);
|
||||
ASSERT_OK(statuses[1]);
|
||||
}
|
||||
|
||||
// 2nd round: flush and put a new value in memtable.
|
||||
ASSERT_OK(Flush(1));
|
||||
ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
|
||||
|
||||
// once the data has been flushed, we are able to get the
|
||||
// data when kPersistedTier is used.
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
|
||||
ASSERT_EQ(value, "first");
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
|
||||
ASSERT_EQ(value, "one");
|
||||
if (wopt.disableWAL) {
|
||||
ASSERT_TRUE(
|
||||
db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
|
||||
} else {
|
||||
ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
|
||||
ASSERT_EQ(value, "hello");
|
||||
}
|
||||
|
||||
// Expect same result in multiget
|
||||
multiget_cfs.push_back(handles_[1]);
|
||||
multiget_keys.push_back("rocksdb");
|
||||
statuses =
|
||||
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
|
||||
ASSERT_TRUE(statuses[0].ok());
|
||||
ASSERT_EQ("first", multiget_values[0]);
|
||||
ASSERT_TRUE(statuses[1].ok());
|
||||
ASSERT_EQ("one", multiget_values[1]);
|
||||
if (wopt.disableWAL) {
|
||||
ASSERT_TRUE(statuses[2].IsNotFound());
|
||||
} else {
|
||||
ASSERT_OK(statuses[2]);
|
||||
}
|
||||
|
||||
// 3rd round: delete and flush
|
||||
ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
|
||||
Flush(1);
|
||||
ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
|
||||
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
|
||||
if (wopt.disableWAL) {
|
||||
// Still expect finding the value as its delete has not yet being
|
||||
// flushed.
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
|
||||
ASSERT_EQ(value, "one");
|
||||
} else {
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
|
||||
}
|
||||
ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
|
||||
ASSERT_EQ(value, "hello");
|
||||
|
||||
statuses =
|
||||
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
|
||||
ASSERT_TRUE(statuses[0].IsNotFound());
|
||||
if (wopt.disableWAL) {
|
||||
ASSERT_TRUE(statuses[1].ok());
|
||||
ASSERT_EQ("one", multiget_values[1]);
|
||||
} else {
|
||||
ASSERT_TRUE(statuses[1].IsNotFound());
|
||||
}
|
||||
ASSERT_TRUE(statuses[2].ok());
|
||||
ASSERT_EQ("hello", multiget_values[2]);
|
||||
if (wopt.disableWAL == 0) {
|
||||
DestroyAndReopen(options);
|
||||
}
|
||||
}
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
}
|
||||
|
||||
TEST_F(DBTest, PersistedTierOnIterator) {
|
||||
// The test needs to be changed if kPersistedTier is supported in iterator.
|
||||
Options options = CurrentOptions();
|
||||
CreateAndReopenWithCF({"pikachu"}, options);
|
||||
ReadOptions ropt;
|
||||
ropt.read_tier = kPersistedTier;
|
||||
|
||||
auto* iter = db_->NewIterator(ropt, handles_[1]);
|
||||
ASSERT_TRUE(iter->status().IsNotSupported());
|
||||
delete iter;
|
||||
|
||||
std::vector<Iterator*> iters;
|
||||
ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
|
||||
Close();
|
||||
}
|
||||
|
||||
TEST_F(DBTest, SingleDeleteFlush) {
|
||||
// Test to check whether flushing preserves a single delete hidden
|
||||
// behind a put.
|
||||
@ -5272,6 +5401,7 @@ class DBTestRandomized : public DBTest,
|
||||
option_configs.push_back(option_config);
|
||||
}
|
||||
}
|
||||
option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
|
||||
return option_configs;
|
||||
}
|
||||
};
|
||||
@ -5737,6 +5867,28 @@ TEST_F(DBTest, TableOptionsSanitizeTest) {
|
||||
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
|
||||
ASSERT_OK(TryReopen(options));
|
||||
}
|
||||
|
||||
TEST_F(DBTest, ConcurrentMemtableNotSupported) {
|
||||
Options options = CurrentOptions();
|
||||
options.allow_concurrent_memtable_write = true;
|
||||
options.soft_pending_compaction_bytes_limit = 0;
|
||||
options.hard_pending_compaction_bytes_limit = 100;
|
||||
options.create_if_missing = true;
|
||||
|
||||
DestroyDB(dbname_, options);
|
||||
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
|
||||
ASSERT_NOK(TryReopen(options));
|
||||
|
||||
options.memtable_factory.reset(new SkipListFactory);
|
||||
ASSERT_OK(TryReopen(options));
|
||||
|
||||
ColumnFamilyOptions cf_options(options);
|
||||
cf_options.memtable_factory.reset(
|
||||
NewHashLinkListRepFactory(4, 0, 3, true, 4));
|
||||
ColumnFamilyHandle* handle;
|
||||
ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
|
||||
}
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
TEST_F(DBTest, SanitizeNumThreads) {
|
||||
|
@ -339,6 +339,10 @@ Options DBTestBase::CurrentOptions(
|
||||
options.prefix_extractor.reset(NewNoopTransform());
|
||||
break;
|
||||
}
|
||||
case kBlockBasedTableWithIndexRestartInterval: {
|
||||
table_options.index_block_restart_interval = 8;
|
||||
break;
|
||||
}
|
||||
case kOptimizeFiltersForHits: {
|
||||
options.optimize_filters_for_hits = true;
|
||||
set_block_based_table_factory = true;
|
||||
|
@ -529,6 +529,7 @@ class DBTestBase : public testing::Test {
|
||||
kEnd = 31,
|
||||
kLevelSubcompactions = 31,
|
||||
kUniversalSubcompactions = 32,
|
||||
kBlockBasedTableWithIndexRestartInterval = 33,
|
||||
};
|
||||
int option_config_;
|
||||
|
||||
|
@ -273,7 +273,7 @@ Status TableCache::Get(const ReadOptions& options,
|
||||
if (handle != nullptr) {
|
||||
ReleaseHandle(handle);
|
||||
}
|
||||
} else if (options.read_tier && s.IsIncomplete()) {
|
||||
} else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) {
|
||||
// Couldn't find Table in cache but treat as kFound if no_io set
|
||||
get_context->MarkKeyMayExist();
|
||||
return Status::OK();
|
||||
|
@ -188,10 +188,6 @@ class MemTableRep {
|
||||
// Default: true
|
||||
virtual bool IsSnapshotSupported() const { return true; }
|
||||
|
||||
// Return true if the current MemTableRep supports concurrent inserts
|
||||
// Default: false
|
||||
virtual bool IsInsertConcurrentlySupported() const { return false; }
|
||||
|
||||
protected:
|
||||
// When *key is an internal key concatenated with the value, returns the
|
||||
// user key.
|
||||
@ -210,6 +206,10 @@ class MemTableRepFactory {
|
||||
const SliceTransform*,
|
||||
Logger* logger) = 0;
|
||||
virtual const char* Name() const = 0;
|
||||
|
||||
// Return true if the current MemTableRep supports concurrent inserts
|
||||
// Default: false
|
||||
virtual bool IsInsertConcurrentlySupported() const { return false; }
|
||||
};
|
||||
|
||||
// This uses a skip list to store keys. It is the default.
|
||||
@ -229,6 +229,8 @@ class SkipListFactory : public MemTableRepFactory {
|
||||
Logger* logger) override;
|
||||
virtual const char* Name() const override { return "SkipListFactory"; }
|
||||
|
||||
bool IsInsertConcurrentlySupported() const override { return true; }
|
||||
|
||||
private:
|
||||
const size_t lookahead_;
|
||||
};
|
||||
|
@ -1294,8 +1294,12 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
|
||||
// the block cache. It will not page in data from the OS cache or data that
|
||||
// resides in storage.
|
||||
enum ReadTier {
|
||||
kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage
|
||||
kBlockCacheTier = 0x1 // data in memtable or block cache
|
||||
kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage
|
||||
kBlockCacheTier = 0x1, // data in memtable or block cache
|
||||
kPersistedTier = 0x2 // persisted data. When WAL is disabled, this option
|
||||
// will skip data in memtable.
|
||||
// Note that this ReadTier currently only supports
|
||||
// Get and MultiGet and does not support iterators.
|
||||
};
|
||||
|
||||
// Options that control read operations
|
||||
|
@ -120,6 +120,9 @@ struct BlockBasedTableOptions {
|
||||
// value will be silently overwritten with 1.
|
||||
int block_restart_interval = 16;
|
||||
|
||||
// Same as block_restart_interval but used for the index block.
|
||||
int index_block_restart_interval = 1;
|
||||
|
||||
// Use delta encoding to compress keys in blocks.
|
||||
// Iterator::PinData() requires this option to be disabled.
|
||||
//
|
||||
|
@ -5,8 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCKSDB_MAJOR 4
|
||||
#define ROCKSDB_MINOR 4
|
||||
#define ROCKSDB_PATCH 0
|
||||
#define ROCKSDB_MINOR 5
|
||||
#define ROCKSDB_PATCH 1
|
||||
|
||||
// Do not use these. We made the mistake of declaring macros starting with
|
||||
// double underscore. Now we have to live with our choice. We'll deprecate these
|
||||
|
@ -25,8 +25,6 @@ public:
|
||||
transform_(transform), lookahead_(lookahead) {
|
||||
}
|
||||
|
||||
virtual bool IsInsertConcurrentlySupported() const override { return true; }
|
||||
|
||||
virtual KeyHandle Allocate(const size_t len, char** buf) override {
|
||||
*buf = skip_list_.AllocateKey(len);
|
||||
return static_cast<KeyHandle>(*buf);
|
||||
|
@ -113,15 +113,17 @@ class IndexBuilder {
|
||||
//
|
||||
// Optimizations:
|
||||
// 1. Made block's `block_restart_interval` to be 1, which will avoid linear
|
||||
// search when doing index lookup.
|
||||
// search when doing index lookup (can be disabled by setting
|
||||
// index_block_restart_interval).
|
||||
// 2. Shorten the key length for index block. Other than honestly using the
|
||||
// last key in the data block as the index key, we instead find a shortest
|
||||
// substitute key that serves the same function.
|
||||
class ShortenedIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator)
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator,
|
||||
int index_block_restart_interval)
|
||||
: IndexBuilder(comparator),
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */) {}
|
||||
index_block_builder_(index_block_restart_interval) {}
|
||||
|
||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||
const Slice* first_key_in_next_block,
|
||||
@ -178,9 +180,10 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
||||
class HashIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit HashIndexBuilder(const Comparator* comparator,
|
||||
const SliceTransform* hash_key_extractor)
|
||||
const SliceTransform* hash_key_extractor,
|
||||
int index_block_restart_interval)
|
||||
: IndexBuilder(comparator),
|
||||
primary_index_builder_(comparator),
|
||||
primary_index_builder_(comparator, index_block_restart_interval),
|
||||
hash_key_extractor_(hash_key_extractor) {}
|
||||
|
||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||
@ -266,13 +269,16 @@ namespace {
|
||||
|
||||
// Create a index builder based on its type.
|
||||
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator,
|
||||
const SliceTransform* prefix_extractor) {
|
||||
const SliceTransform* prefix_extractor,
|
||||
int index_block_restart_interval) {
|
||||
switch (type) {
|
||||
case BlockBasedTableOptions::kBinarySearch: {
|
||||
return new ShortenedIndexBuilder(comparator);
|
||||
return new ShortenedIndexBuilder(comparator,
|
||||
index_block_restart_interval);
|
||||
}
|
||||
case BlockBasedTableOptions::kHashSearch: {
|
||||
return new HashIndexBuilder(comparator, prefix_extractor);
|
||||
return new HashIndexBuilder(comparator, prefix_extractor,
|
||||
index_block_restart_interval);
|
||||
}
|
||||
default: {
|
||||
assert(!"Do not recognize the index type ");
|
||||
@ -484,9 +490,10 @@ struct BlockBasedTableBuilder::Rep {
|
||||
data_block(table_options.block_restart_interval,
|
||||
table_options.use_delta_encoding),
|
||||
internal_prefix_transform(_ioptions.prefix_extractor),
|
||||
index_builder(CreateIndexBuilder(table_options.index_type,
|
||||
&internal_comparator,
|
||||
&this->internal_prefix_transform)),
|
||||
index_builder(
|
||||
CreateIndexBuilder(table_options.index_type, &internal_comparator,
|
||||
&this->internal_prefix_transform,
|
||||
table_options.index_block_restart_interval)),
|
||||
compression_type(_compression_type),
|
||||
compression_opts(_compression_opts),
|
||||
filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder(
|
||||
|
@ -42,6 +42,9 @@ BlockBasedTableFactory::BlockBasedTableFactory(
|
||||
if (table_options_.block_restart_interval < 1) {
|
||||
table_options_.block_restart_interval = 1;
|
||||
}
|
||||
if (table_options_.index_block_restart_interval < 1) {
|
||||
table_options_.index_block_restart_interval = 1;
|
||||
}
|
||||
}
|
||||
|
||||
Status BlockBasedTableFactory::NewTableReader(
|
||||
@ -150,6 +153,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
|
||||
snprintf(buffer, kBufferSize, " block_restart_interval: %d\n",
|
||||
table_options_.block_restart_interval);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n",
|
||||
table_options_.index_block_restart_interval);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " filter_policy: %s\n",
|
||||
table_options_.filter_policy == nullptr ?
|
||||
"nullptr" : table_options_.filter_policy->Name());
|
||||
|
@ -1255,7 +1255,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
BlockIter biter;
|
||||
NewDataBlockIterator(rep_, read_options, iiter.value(), &biter);
|
||||
|
||||
if (read_options.read_tier && biter.status().IsIncomplete()) {
|
||||
if (read_options.read_tier == kBlockCacheTier &&
|
||||
biter.status().IsIncomplete()) {
|
||||
// couldn't get block from block_cache
|
||||
// Update Saver.state to Found because we are only looking for whether
|
||||
// we can guarantee the key is not there when "no_io" is set
|
||||
|
@ -637,6 +637,7 @@ class HarnessTest : public testing::Test {
|
||||
new FlushBlockBySizePolicyFactory());
|
||||
table_options_.block_size = 256;
|
||||
table_options_.block_restart_interval = args.restart_interval;
|
||||
table_options_.index_block_restart_interval = args.restart_interval;
|
||||
table_options_.format_version = args.format_version;
|
||||
options_.table_factory.reset(
|
||||
new BlockBasedTableFactory(table_options_));
|
||||
@ -2282,6 +2283,67 @@ TEST_F(HarnessTest, FooterTests) {
|
||||
}
|
||||
}
|
||||
|
||||
class IndexBlockRestartIntervalTest
|
||||
: public BlockBasedTableTest,
|
||||
public ::testing::WithParamInterface<int> {
|
||||
public:
|
||||
static std::vector<int> GetRestartValues() { return {-1, 0, 1, 8, 16, 32}; }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
|
||||
::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
|
||||
|
||||
TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
|
||||
const int kKeysInTable = 10000;
|
||||
const int kKeySize = 100;
|
||||
const int kValSize = 500;
|
||||
|
||||
int index_block_restart_interval = GetParam();
|
||||
|
||||
Options options;
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.block_size = 64; // small block size to get big index block
|
||||
table_options.index_block_restart_interval = index_block_restart_interval;
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
|
||||
TableConstructor c(BytewiseComparator());
|
||||
static Random rnd(301);
|
||||
for (int i = 0; i < kKeysInTable; i++) {
|
||||
InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
|
||||
c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
|
||||
}
|
||||
|
||||
std::vector<std::string> keys;
|
||||
stl_wrappers::KVMap kvmap;
|
||||
std::unique_ptr<InternalKeyComparator> comparator(
|
||||
new InternalKeyComparator(BytewiseComparator()));
|
||||
const ImmutableCFOptions ioptions(options);
|
||||
c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap);
|
||||
auto reader = c.GetTableReader();
|
||||
|
||||
std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(ReadOptions()));
|
||||
|
||||
// Test point lookup
|
||||
for (auto& kv : kvmap) {
|
||||
db_iter->Seek(kv.first);
|
||||
|
||||
ASSERT_TRUE(db_iter->Valid());
|
||||
ASSERT_OK(db_iter->status());
|
||||
ASSERT_EQ(db_iter->key(), kv.first);
|
||||
ASSERT_EQ(db_iter->value(), kv.second);
|
||||
}
|
||||
|
||||
// Test iterating
|
||||
auto kv_iter = kvmap.begin();
|
||||
for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
|
||||
ASSERT_EQ(db_iter->key(), kv_iter->first);
|
||||
ASSERT_EQ(db_iter->value(), kv_iter->second);
|
||||
kv_iter++;
|
||||
}
|
||||
ASSERT_EQ(kv_iter, kvmap.end());
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -109,6 +109,10 @@ function summarize_result {
|
||||
test_name=$2
|
||||
bench_name=$3
|
||||
|
||||
# Note that this function assumes that the benchmark executes long enough so
|
||||
# that "Compaction Stats" is written to stdout at least once. If it won't
|
||||
# happen then empty output from grep when searching for "Sum" will cause
|
||||
# syntax errors.
|
||||
uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' )
|
||||
stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' )
|
||||
stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' )
|
||||
@ -159,8 +163,22 @@ function run_bulkload {
|
||||
}
|
||||
|
||||
function run_fillseq {
|
||||
# This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
|
||||
# client can discover where to restart a load after a crash. I think this is a good way to load.
|
||||
# This runs with a vector memtable. WAL can be either disabled or enabled
|
||||
# depending on the input parameter (1 for disabled, 0 for enabled). The main
|
||||
# benefit behind disabling WAL is to make loading faster. It is still crash
|
||||
# safe and the client can discover where to restart a load after a crash. I
|
||||
# think this is a good way to load.
|
||||
|
||||
# Make sure that we'll have unique names for all the files so that data won't
|
||||
# be overwritten.
|
||||
if [ $1 == 1 ]; then
|
||||
log_file_name=$output_dir/benchmark_fillseq.wal_disabled.v${value_size}.log
|
||||
test_name=fillseq.wal_disabled.v${value_size}
|
||||
else
|
||||
log_file_name=$output_dir/benchmark_fillseq.wal_enabled.v${value_size}.log
|
||||
test_name=fillseq.wal_enabled.v${value_size}
|
||||
fi
|
||||
|
||||
echo "Loading $num_keys keys sequentially"
|
||||
cmd="./db_bench --benchmarks=fillseq \
|
||||
--use_existing_db=0 \
|
||||
@ -169,12 +187,14 @@ function run_fillseq {
|
||||
--min_level_to_compress=0 \
|
||||
--threads=1 \
|
||||
--memtablerep=vector \
|
||||
--disable_wal=1 \
|
||||
--disable_wal=$1 \
|
||||
--seed=$( date +%s ) \
|
||||
2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
|
||||
echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
|
||||
2>&1 | tee -a $log_file_name"
|
||||
echo $cmd | tee $log_file_name
|
||||
eval $cmd
|
||||
summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq
|
||||
|
||||
# The constant "fillseq" which we pass to db_bench is the benchmark name.
|
||||
summarize_result $log_file_name $test_name fillseq
|
||||
}
|
||||
|
||||
function run_change {
|
||||
@ -310,8 +330,10 @@ for job in ${jobs[@]}; do
|
||||
start=$(now)
|
||||
if [ $job = bulkload ]; then
|
||||
run_bulkload
|
||||
elif [ $job = fillseq ]; then
|
||||
run_fillseq
|
||||
elif [ $job = fillseq_disable_wal ]; then
|
||||
run_fillseq 1
|
||||
elif [ $job = fillseq_enable_wal ]; then
|
||||
run_fillseq 0
|
||||
elif [ $job = overwrite ]; then
|
||||
run_change overwrite
|
||||
elif [ $job = updaterandom ]; then
|
||||
|
@ -24,6 +24,7 @@ default_params = {
|
||||
"disable_data_sync": 0,
|
||||
"disable_wal": 0,
|
||||
"filter_deletes": lambda: random.randint(0, 1),
|
||||
"allow_concurrent_memtable_write": 0,
|
||||
"iterpercent": 10,
|
||||
"max_background_compactions": 20,
|
||||
"max_bytes_for_level_base": 10485760,
|
||||
@ -85,6 +86,7 @@ simple_default_params = {
|
||||
"disable_data_sync": 0,
|
||||
"disable_wal": 0,
|
||||
"filter_deletes": lambda: random.randint(0, 1),
|
||||
"allow_concurrent_memtable_write": lambda: random.randint(0, 1),
|
||||
"iterpercent": 10,
|
||||
"max_background_compactions": 1,
|
||||
"max_bytes_for_level_base": 67108864,
|
||||
@ -126,6 +128,15 @@ whitebox_simple_default_params = {
|
||||
}
|
||||
|
||||
|
||||
def finalize_and_sanitize(src_params):
|
||||
dest_params = dict([(k, v() if callable(v) else v)
|
||||
for (k, v) in src_params.items()])
|
||||
# --allow_concurrent_memtable_write with --filter_deletes is not supported.
|
||||
if dest_params.get("allow_concurrent_memtable_write", 1) == 1:
|
||||
dest_params["filter_deletes"] = 0
|
||||
return dest_params
|
||||
|
||||
|
||||
def gen_cmd_params(args):
|
||||
params = {}
|
||||
|
||||
@ -151,8 +162,8 @@ def gen_cmd_params(args):
|
||||
|
||||
def gen_cmd(params):
|
||||
cmd = './db_stress ' + ' '.join(
|
||||
'--{0}={1}'.format(k, v() if callable(v) else v)
|
||||
for k, v in params.items()
|
||||
'--{0}={1}'.format(k, v)
|
||||
for k, v in finalize_and_sanitize(params).items()
|
||||
if k not in set(['test_type', 'simple', 'duration', 'interval'])
|
||||
and v is not None)
|
||||
return cmd
|
||||
|
@ -230,6 +230,13 @@ DEFINE_int64(cache_size, 2LL * KB * KB * KB,
|
||||
DEFINE_uint64(subcompactions, 1,
|
||||
"Maximum number of subcompactions to divide L0-L1 compactions "
|
||||
"into.");
|
||||
|
||||
DEFINE_bool(allow_concurrent_memtable_write, true,
|
||||
"Allow multi-writers to update mem tables in parallel.");
|
||||
|
||||
DEFINE_bool(enable_write_thread_adaptive_yield, true,
|
||||
"Use a yielding spin loop for brief writer thread waits.");
|
||||
|
||||
static const bool FLAGS_subcompactions_dummy __attribute__((unused)) =
|
||||
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
|
||||
|
||||
@ -1997,6 +2004,10 @@ class StressTest {
|
||||
options_.filter_deletes = FLAGS_filter_deletes;
|
||||
options_.inplace_update_support = FLAGS_in_place_update;
|
||||
options_.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
|
||||
options_.allow_concurrent_memtable_write =
|
||||
FLAGS_allow_concurrent_memtable_write;
|
||||
options_.enable_write_thread_adaptive_yield =
|
||||
FLAGS_enable_write_thread_adaptive_yield;
|
||||
if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kHashSkipList)) {
|
||||
fprintf(stderr,
|
||||
"prefix_size should be non-zero iff memtablerep == prefix_hash\n");
|
||||
|
@ -137,10 +137,17 @@ if [[ $do_setup != 0 ]]; then
|
||||
# Test 2a: sequential fill with large values to get peak ingest
|
||||
# adjust NUM_KEYS given the use of larger values
|
||||
env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \
|
||||
./tools/benchmark.sh fillseq
|
||||
./tools/benchmark.sh fillseq_disable_wal
|
||||
|
||||
# Test 2b: sequential fill with the configured value size
|
||||
env $ARGS ./tools/benchmark.sh fillseq
|
||||
env $ARGS ./tools/benchmark.sh fillseq_disable_wal
|
||||
|
||||
# Test 2c: same as 2a, but with WAL being enabled.
|
||||
env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \
|
||||
./tools/benchmark.sh fillseq_enable_wal
|
||||
|
||||
# Test 2d: same as 2b, but with WAL being enabled.
|
||||
env $ARGS ./tools/benchmark.sh fillseq_enable_wal
|
||||
|
||||
# Test 3: single-threaded overwrite
|
||||
env $ARGS NUM_THREADS=1 DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite
|
||||
@ -263,9 +270,13 @@ if [[ $skip_low_pri_tests != 1 ]]; then
|
||||
grep bulkload $output_dir/report.txt >> $output_dir/report2.txt
|
||||
fi
|
||||
|
||||
echo fillseq >> $output_dir/report2.txt
|
||||
echo fillseq_wal_disabled >> $output_dir/report2.txt
|
||||
head -1 $output_dir/report.txt >> $output_dir/report2.txt
|
||||
grep fillseq $output_dir/report.txt >> $output_dir/report2.txt
|
||||
grep fillseq.wal_disabled $output_dir/report.txt >> $output_dir/report2.txt
|
||||
|
||||
echo fillseq_wal_enabled >> $output_dir/report2.txt
|
||||
head -1 $output_dir/report.txt >> $output_dir/report2.txt
|
||||
grep fillseq.wal_enabled $output_dir/report.txt >> $output_dir/report2.txt
|
||||
|
||||
echo overwrite sync=0 >> $output_dir/report2.txt
|
||||
head -1 $output_dir/report.txt >> $output_dir/report2.txt
|
||||
|
@ -132,6 +132,7 @@ class PosixEnv : public Env {
|
||||
// All threads must be joined before the deletion of
|
||||
// thread_status_updater_.
|
||||
delete thread_status_updater_;
|
||||
TEST_SYNC_POINT("PosixEnv::~PosixEnv():End");
|
||||
}
|
||||
|
||||
void SetFD_CLOEXEC(int fd, const EnvOptions* options) {
|
||||
|
@ -4,6 +4,7 @@
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include "rocksdb/options.h"
|
||||
|
||||
|
@ -486,6 +486,9 @@ static std::unordered_map<std::string,
|
||||
{"block_restart_interval",
|
||||
{offsetof(struct BlockBasedTableOptions, block_restart_interval),
|
||||
OptionType::kInt, OptionVerificationType::kNormal}},
|
||||
{"index_block_restart_interval",
|
||||
{offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
|
||||
OptionType::kInt, OptionVerificationType::kNormal}},
|
||||
{"filter_policy",
|
||||
{offsetof(struct BlockBasedTableOptions, filter_policy),
|
||||
OptionType::kFilterPolicy, OptionVerificationType::kByName}},
|
||||
@ -493,7 +496,7 @@ static std::unordered_map<std::string,
|
||||
{offsetof(struct BlockBasedTableOptions, whole_key_filtering),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
||||
{"skip_table_builder_flush",
|
||||
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
|
||||
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
||||
{"format_version",
|
||||
{offsetof(struct BlockBasedTableOptions, format_version),
|
||||
|
@ -1575,7 +1575,8 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) {
|
||||
"cache_index_and_filter_blocks=1;index_type=kHashSearch;"
|
||||
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
|
||||
"block_cache=1M;block_cache_compressed=1k;block_size=1024;"
|
||||
"block_size_deviation=8;block_restart_interval=4;"
|
||||
"block_size_deviation=8;block_restart_interval=4; "
|
||||
"index_block_restart_interval=4;"
|
||||
"filter_policy=bloomfilter:4:true;whole_key_filtering=1;"
|
||||
"skip_table_builder_flush=1;format_version=1;"
|
||||
"hash_index_allow_collision=false;",
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include "rocksdb/env.h"
|
||||
#include "util/iostats_context_imp.h"
|
||||
#include "util/sync_point.h"
|
||||
#include <atomic>
|
||||
|
||||
namespace rocksdb {
|
||||
@ -56,6 +57,7 @@ class PosixLogger : public Logger {
|
||||
fclose(file_);
|
||||
}
|
||||
virtual void Flush() override {
|
||||
TEST_SYNC_POINT_CALLBACK("PosixLogger::Flush:BeginCallback", nullptr);
|
||||
if (flush_pending_) {
|
||||
flush_pending_ = false;
|
||||
fflush(file_);
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
@ -200,6 +200,7 @@ BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) {
|
||||
opt.block_size = rnd->Uniform(10000000);
|
||||
opt.block_size_deviation = rnd->Uniform(100);
|
||||
opt.block_restart_interval = rnd->Uniform(100);
|
||||
opt.index_block_restart_interval = rnd->Uniform(100);
|
||||
opt.whole_key_filtering = rnd->Uniform(2);
|
||||
|
||||
return opt;
|
||||
|
@ -104,7 +104,6 @@ PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
|
||||
|
||||
void ThreadLocalPtr::InitSingletons() {
|
||||
ThreadLocalPtr::StaticMeta::InitSingletons();
|
||||
ThreadLocalPtr::Instance();
|
||||
}
|
||||
|
||||
ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
|
||||
@ -113,30 +112,46 @@ ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
|
||||
// when the function is first call. As a result, we can properly
|
||||
// control their construction order by properly preparing their
|
||||
// first function call.
|
||||
static ThreadLocalPtr::StaticMeta inst;
|
||||
return &inst;
|
||||
//
|
||||
// Note that here we decide to make "inst" a static pointer w/o deleting
|
||||
// it at the end instead of a static variable. This is to avoid the following
|
||||
// destruction order desester happens when a child thread using ThreadLocalPtr
|
||||
// dies AFTER the main thread dies: When a child thread happens to use
|
||||
// ThreadLocalPtr, it will try to delete its thread-local data on its
|
||||
// OnThreadExit when the child thread dies. However, OnThreadExit depends
|
||||
// on the following variable. As a result, if the main thread dies before any
|
||||
// child thread happen to use ThreadLocalPtr dies, then the destruction of
|
||||
// the following variable will go first, then OnThreadExit, therefore causing
|
||||
// invalid access.
|
||||
//
|
||||
// The above problem can be solved by using thread_local to store tls_ instead
|
||||
// of using __thread. The major difference between thread_local and __thread
|
||||
// is that thread_local supports dynamic construction and destruction of
|
||||
// non-primitive typed variables. As a result, we can guarantee the
|
||||
// desturction order even when the main thread dies before any child threads.
|
||||
// However, thread_local requires gcc 4.8 and is not supported in all the
|
||||
// compilers that accepts -std=c++11 (e.g., the default clang on Mac), while
|
||||
// the current RocksDB still accept gcc 4.7.
|
||||
static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
|
||||
return inst;
|
||||
}
|
||||
|
||||
void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); }
|
||||
|
||||
port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() {
|
||||
// Here we prefer function static variable instead of global
|
||||
// static variable as function static variable is initialized
|
||||
// when the function is first call. As a result, we can properly
|
||||
// control their construction order by properly preparing their
|
||||
// first function call.
|
||||
static port::Mutex mutex;
|
||||
return &mutex;
|
||||
}
|
||||
port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
|
||||
|
||||
void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
|
||||
auto* tls = static_cast<ThreadData*>(ptr);
|
||||
assert(tls != nullptr);
|
||||
|
||||
auto* inst = Instance();
|
||||
// Use the cached StaticMeta::Instance() instead of directly calling
|
||||
// the variable inside StaticMeta::Instance() might already go out of
|
||||
// scope here in case this OnThreadExit is called after the main thread
|
||||
// dies.
|
||||
auto* inst = tls->inst;
|
||||
pthread_setspecific(inst->pthread_key_, nullptr);
|
||||
|
||||
MutexLock l(Mutex());
|
||||
MutexLock l(inst->MemberMutex());
|
||||
inst->RemoveThreadData(tls);
|
||||
// Unref stored pointers of current thread from all instances
|
||||
uint32_t id = 0;
|
||||
@ -154,7 +169,7 @@ void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
|
||||
delete tls;
|
||||
}
|
||||
|
||||
ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0) {
|
||||
ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) {
|
||||
if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
|
||||
abort();
|
||||
}
|
||||
@ -221,7 +236,7 @@ ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
|
||||
|
||||
if (UNLIKELY(tls_ == nullptr)) {
|
||||
auto* inst = Instance();
|
||||
tls_ = new ThreadData();
|
||||
tls_ = new ThreadData(inst);
|
||||
{
|
||||
// Register it in the global chain, needs to be done before thread exit
|
||||
// handler registration
|
||||
|
@ -10,6 +10,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@ -79,6 +80,8 @@ class ThreadLocalPtr {
|
||||
std::atomic<void*> ptr;
|
||||
};
|
||||
|
||||
class StaticMeta;
|
||||
|
||||
// This is the structure that is declared as "thread_local" storage.
|
||||
// The vector keep list of atomic pointer for all instances for "current"
|
||||
// thread. The vector is indexed by an Id that is unique in process and
|
||||
@ -95,10 +98,11 @@ class ThreadLocalPtr {
|
||||
// | thread 3 | void* | void* | void* | <- ThreadData
|
||||
// ---------------------------------------------------
|
||||
struct ThreadData {
|
||||
ThreadData() : entries() {}
|
||||
explicit ThreadData(StaticMeta* _inst) : entries(), inst(_inst) {}
|
||||
std::vector<Entry> entries;
|
||||
ThreadData* next;
|
||||
ThreadData* prev;
|
||||
StaticMeta* inst;
|
||||
};
|
||||
|
||||
class StaticMeta {
|
||||
@ -139,6 +143,31 @@ class ThreadLocalPtr {
|
||||
// initialized will be no-op.
|
||||
static void InitSingletons();
|
||||
|
||||
// protect inst, next_instance_id_, free_instance_ids_, head_,
|
||||
// ThreadData.entries
|
||||
//
|
||||
// Note that here we prefer function static variable instead of the usual
|
||||
// global static variable. The reason is that c++ destruction order of
|
||||
// static variables in the reverse order of their construction order.
|
||||
// However, C++ does not guarantee any construction order when global
|
||||
// static variables are defined in different files, while the function
|
||||
// static variables are initialized when their function are first called.
|
||||
// As a result, the construction order of the function static variables
|
||||
// can be controlled by properly invoke their first function calls in
|
||||
// the right order.
|
||||
//
|
||||
// For instance, the following function contains a function static
|
||||
// variable. We place a dummy function call of this inside
|
||||
// Env::Default() to ensure the construction order of the construction
|
||||
// order.
|
||||
static port::Mutex* Mutex();
|
||||
|
||||
// Returns the member mutex of the current StaticMeta. In general,
|
||||
// Mutex() should be used instead of this one. However, in case where
|
||||
// the static variable inside Instance() goes out of scope, MemberMutex()
|
||||
// should be used. One example is OnThreadExit() function.
|
||||
port::Mutex* MemberMutex() { return &mutex_; }
|
||||
|
||||
private:
|
||||
// Get UnrefHandler for id with acquiring mutex
|
||||
// REQUIRES: mutex locked
|
||||
@ -169,24 +198,9 @@ class ThreadLocalPtr {
|
||||
|
||||
std::unordered_map<uint32_t, UnrefHandler> handler_map_;
|
||||
|
||||
// protect inst, next_instance_id_, free_instance_ids_, head_,
|
||||
// ThreadData.entries
|
||||
//
|
||||
// Note that here we prefer function static variable instead of the usual
|
||||
// global static variable. The reason is that c++ destruction order of
|
||||
// static variables in the reverse order of their construction order.
|
||||
// However, C++ does not guarantee any construction order when global
|
||||
// static variables are defined in different files, while the function
|
||||
// static variables are initialized when their function are first called.
|
||||
// As a result, the construction order of the function static variables
|
||||
// can be controlled by properly invoke their first function calls in
|
||||
// the right order.
|
||||
//
|
||||
// For instance, the following function contains a function static
|
||||
// variable. We place a dummy function call of this inside
|
||||
// Env::Default() to ensure the construction order of the construction
|
||||
// order.
|
||||
static port::Mutex* Mutex();
|
||||
// The private mutex. Developers should always use Mutex() instead of
|
||||
// using this variable directly.
|
||||
port::Mutex mutex_;
|
||||
#if ROCKSDB_SUPPORT_THREAD_LOCAL
|
||||
// Thread local storage
|
||||
static __thread ThreadData* tls_;
|
||||
|
@ -3,14 +3,17 @@
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <pthread.h>
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
|
||||
#include "rocksdb/env.h"
|
||||
#include "port/port.h"
|
||||
#include "util/autovector.h"
|
||||
#include "util/thread_local.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
#include "util/thread_local.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -467,6 +470,37 @@ TEST_F(ThreadLocalTest, CompareAndSwap) {
|
||||
ASSERT_EQ(tls.Get(), reinterpret_cast<void*>(3));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void* AccessThreadLocal(void* arg) {
|
||||
TEST_SYNC_POINT("AccessThreadLocal:Start");
|
||||
ThreadLocalPtr tlp;
|
||||
tlp.Reset(new std::string("hello RocksDB"));
|
||||
TEST_SYNC_POINT("AccessThreadLocal:End");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// The following test is disabled as it requires manual steps to run it
|
||||
// correctly.
|
||||
//
|
||||
// Currently we have no way to acess SyncPoint w/o ASAN error when the
|
||||
// child thread dies after the main thread dies. So if you manually enable
|
||||
// this test and only see an ASAN error on SyncPoint, it means you pass the
|
||||
// test.
|
||||
TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) {
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency(
|
||||
{{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"},
|
||||
{"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}});
|
||||
|
||||
// Triggers the initialization of singletons.
|
||||
Env::Default();
|
||||
pthread_t t;
|
||||
pthread_create(&t, nullptr, &AccessThreadLocal, nullptr);
|
||||
TEST_SYNC_POINT("MainThreadDiesFirst:End");
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
Loading…
Reference in New Issue
Block a user