Compare commits
21 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
52a2cc5a6d | ||
|
d60acee274 | ||
|
84b27f480f | ||
|
92bf1f5773 | ||
|
6672cd0770 | ||
|
1d0562e7c8 | ||
|
3f96ed111e | ||
|
0645e8b42c | ||
|
6504c31cb6 | ||
|
53a86bf61e | ||
|
8a5ec0ec6f | ||
|
b7dbbdf783 | ||
|
ec64b24a0c | ||
|
2182bf2828 | ||
|
2137377f0e | ||
|
a9f71f1b6e | ||
|
1f9508953f | ||
|
39d156b120 | ||
|
cb08423712 | ||
|
d9a7d8a769 | ||
|
5407b7c6d9 |
@ -1,9 +1,9 @@
|
||||
## Unreleased
|
||||
# RocksDB default options change log
|
||||
## 4.8.0 (5/2/2016)
|
||||
* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters.
|
||||
* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks.
|
||||
* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata.
|
||||
|
||||
# RocksDB default options change log
|
||||
## 4.7.0 (4/8/2016)
|
||||
* options.write_buffer_size changes from 4MB to 64MB.
|
||||
* options.target_file_size_base changes from 2MB to 64MB.
|
||||
|
@ -1,10 +1,12 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
## 4.8.0 (5/2/2016)
|
||||
### Public API Change
|
||||
* Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes.
|
||||
* Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F
|
||||
* Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN".
|
||||
* Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
|
||||
### New Features
|
||||
* Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size.
|
||||
|
||||
## 4.7.0 (4/8/2016)
|
||||
### Public API Change
|
||||
|
4
Makefile
4
Makefile
@ -203,10 +203,6 @@ default: all
|
||||
WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \
|
||||
-Wno-unused-parameter
|
||||
|
||||
ifndef DISABLE_WARNING_AS_ERROR
|
||||
WARNING_FLAGS += -Werror
|
||||
endif
|
||||
|
||||
CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||
CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers
|
||||
|
||||
|
@ -52,12 +52,7 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
|
||||
FBCODE_BUILD="true"
|
||||
# If we're compiling with TSAN we need pic build
|
||||
PIC_BUILD=$COMPILE_WITH_TSAN
|
||||
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
else
|
||||
# we need this to build with MySQL. Don't use for other purposes.
|
||||
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
||||
fi
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
fi
|
||||
|
||||
# Delete existing output, if it exists
|
||||
|
@ -1,16 +1,19 @@
|
||||
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/f97108c12512b3b0789ac4515d836bdb1eae1142/4.9.x/gcc-4.9-glibc-2.20/024dbc3
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.20/gcc-4.9-glibc-2.20/500e281
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.9-glibc-2.20/e9936bf
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/d9acac8a9a2c9378eb696e22ffa8bd0e97d9206b/master/gcc-4.9-glibc-2.20/a6c5e1e
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/41bfa3759ce52c071f5fd547ec9ecd2522929f0a/trunk/gcc-4.9-glibc-2.20/12266b1
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/71454c53fffcb716a0beb9a90047aff7fb5c984a/2.26/centos6-native/da39a3e
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
|
||||
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
|
||||
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832
|
||||
|
@ -1,4 +1,4 @@
|
||||
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/ebc96bc2fb751b5a0300b8d91a95bdf24ac1d88b/4.8.1/centos6-native/cc6c9dc
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/f97108c12512b3b0789ac4515d836bdb1eae1142/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.17/gcc-4.8.1-glibc-2.17/99df8fc
|
||||
|
@ -13,8 +13,8 @@ source "$BASEDIR/dependencies.sh"
|
||||
CFLAGS=""
|
||||
|
||||
# libgcc
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/libs"
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
|
||||
|
||||
# glibc
|
||||
GLIBC_INCLUDE="$GLIBC_BASE/include"
|
||||
@ -43,12 +43,16 @@ if test -z $PIC_BUILD; then
|
||||
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
|
||||
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
|
||||
CFLAGS+=" -DLZ4"
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
CFLAGS+=" -DZSTD"
|
||||
fi
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
else
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DZSTD"
|
||||
|
||||
# location of gflags headers and libraries
|
||||
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
@ -56,7 +60,7 @@ if test -z $PIC_BUILD; then
|
||||
else
|
||||
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DGFLAGS=google"
|
||||
CFLAGS+=" -DGFLAGS=gflags"
|
||||
|
||||
# location of jemalloc
|
||||
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
|
||||
@ -72,13 +76,22 @@ if test -z $PIC_BUILD; then
|
||||
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
|
||||
fi
|
||||
|
||||
# location of TBB
|
||||
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
|
||||
else
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DTBB"
|
||||
|
||||
# use Intel SSE support for checksum calculations
|
||||
export USE_SSE=1
|
||||
|
||||
BINUTILS="$BINUTILS_BASE/bin"
|
||||
AR="$BINUTILS/ar"
|
||||
|
||||
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
|
||||
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
|
||||
|
||||
STDLIBS="-L $GCC_BASE/lib64"
|
||||
|
||||
@ -87,7 +100,7 @@ CLANG_LIB="$CLANG_BASE/lib"
|
||||
CLANG_SRC="$CLANG_BASE/../../src"
|
||||
|
||||
CLANG_ANALYZER="$CLANG_BIN/clang++"
|
||||
CLANG_SCAN_BUILD="$CLANG_SRC/clang/tools/scan-build/scan-build"
|
||||
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
|
||||
|
||||
if [ -z "$USE_CLANG" ]; then
|
||||
# gcc
|
||||
@ -95,40 +108,44 @@ if [ -z "$USE_CLANG" ]; then
|
||||
CXX="$GCC_BASE/bin/g++"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
JEMALLOC=1
|
||||
else
|
||||
# clang
|
||||
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
|
||||
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
|
||||
CC="$CLANG_BIN/clang"
|
||||
CXX="$CLANG_BIN/clang++"
|
||||
|
||||
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $CLANG_INCLUDE"
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
|
||||
CFLAGS+=" -Wno-expansion-to-defined "
|
||||
CXXFLAGS="-nostdinc++"
|
||||
fi
|
||||
|
||||
CFLAGS+=" $DEPS_INCLUDE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
|
||||
CXXFLAGS+=" $CFLAGS"
|
||||
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.9-glibc-2.20/lib/ld.so"
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
|
||||
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
|
||||
EXEC_LDFLAGS+=" $LIBUNWIND"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.9-glibc-2.20/lib"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
|
||||
# required by libtbb
|
||||
EXEC_LDFLAGS+=" -ldl"
|
||||
|
||||
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
|
||||
|
||||
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
|
||||
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
|
||||
|
||||
VALGRIND_VER="$VALGRIND_BASE/bin/"
|
||||
|
||||
|
@ -64,7 +64,7 @@ touch "$OUTPUT"
|
||||
echo "Writing dependencies to $OUTPUT"
|
||||
|
||||
# Compilers locations
|
||||
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1`
|
||||
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.9.x/centos6-native/*/`
|
||||
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
|
||||
|
||||
log_variable GCC_BASE
|
||||
@ -101,7 +101,7 @@ touch "$OUTPUT"
|
||||
echo "Writing 4.8.1 dependencies to $OUTPUT"
|
||||
|
||||
# Compilers locations
|
||||
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1`
|
||||
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
|
||||
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
|
||||
|
||||
log_variable GCC_BASE
|
||||
|
@ -149,7 +149,7 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options,
|
||||
result.comparator = icmp;
|
||||
size_t clamp_max = std::conditional<
|
||||
sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
|
||||
std::integral_constant<size_t, 64ull << 30>>::type::value;
|
||||
std::integral_constant<uint64_t, 64ull << 30>>::type::value;
|
||||
ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max);
|
||||
// if user sets arena_block_size, we trust user to use this value. Otherwise,
|
||||
// calculate a proper value from writer_buffer_size;
|
||||
|
@ -209,7 +209,7 @@ class ColumnFamilyData {
|
||||
const ImmutableCFOptions* ioptions() const { return &ioptions_; }
|
||||
// REQUIRES: DB mutex held
|
||||
// This returns the MutableCFOptions used by current SuperVersion
|
||||
// You shoul use this API to reference MutableCFOptions most of the time.
|
||||
// You should use this API to reference MutableCFOptions most of the time.
|
||||
const MutableCFOptions* GetCurrentMutableCFOptions() const {
|
||||
return &(super_version_->mutable_cf_options);
|
||||
}
|
||||
|
@ -669,6 +669,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
||||
}
|
||||
|
||||
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
|
||||
const MutableCFOptions* mutable_cf_options =
|
||||
sub_compact->compaction->mutable_cf_options();
|
||||
|
||||
// To build compression dictionary, we sample the first output file, assuming
|
||||
// it'll reach the maximum length, and then use the dictionary for compressing
|
||||
@ -680,9 +682,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
||||
cfd->ioptions()->compression_opts.max_dict_bytes > 0) {
|
||||
const size_t kMaxSamples =
|
||||
cfd->ioptions()->compression_opts.max_dict_bytes >> kSampleLenShift;
|
||||
const size_t kOutFileLen =
|
||||
cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(
|
||||
compact_->compaction->output_level());
|
||||
const size_t kOutFileLen = mutable_cf_options->MaxFileSizeForLevel(
|
||||
compact_->compaction->output_level());
|
||||
if (kOutFileLen != port::kMaxSizet) {
|
||||
const size_t kOutFileNumSamples = kOutFileLen >> kSampleLenShift;
|
||||
Random64 generator{versions_->NewFileNumber()};
|
||||
|
@ -160,6 +160,10 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
|
||||
|
||||
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
|
||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
||||
|
||||
ro.total_order_seek = true;
|
||||
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
|
||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
||||
}
|
||||
|
||||
TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
|
||||
|
@ -131,9 +131,8 @@ class DBIter: public Iterator {
|
||||
}
|
||||
}
|
||||
virtual ~DBIter() {
|
||||
if (pin_thru_lifetime_) {
|
||||
pinned_iters_mgr_.ReleasePinnedIterators();
|
||||
}
|
||||
// Release pinned data if any
|
||||
pinned_iters_mgr_.ReleasePinnedIterators();
|
||||
RecordTick(statistics_, NO_ITERATORS, -1);
|
||||
local_stats_.BumpGlobalStatistics(statistics_);
|
||||
if (!arena_mode_) {
|
||||
@ -154,8 +153,13 @@ class DBIter: public Iterator {
|
||||
}
|
||||
virtual Slice value() const override {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward && !current_entry_is_merged_) ?
|
||||
iter_->value() : saved_value_;
|
||||
if (current_entry_is_merged_) {
|
||||
return saved_value_;
|
||||
} else if (direction_ == kReverse) {
|
||||
return pinned_value_;
|
||||
} else {
|
||||
return iter_->value();
|
||||
}
|
||||
}
|
||||
virtual Status status() const override {
|
||||
if (status_.ok()) {
|
||||
@ -206,6 +210,21 @@ class DBIter: public Iterator {
|
||||
bool ParseKey(ParsedInternalKey* key);
|
||||
void MergeValuesNewToOld();
|
||||
|
||||
// Temporarily pin the blocks that we encounter until ReleaseTempPinnedData()
|
||||
// is called
|
||||
void TempPinData() {
|
||||
if (!pin_thru_lifetime_) {
|
||||
pinned_iters_mgr_.StartPinning();
|
||||
}
|
||||
}
|
||||
|
||||
// Release blocks pinned by TempPinData()
|
||||
void ReleaseTempPinnedData() {
|
||||
if (!pin_thru_lifetime_) {
|
||||
pinned_iters_mgr_.ReleasePinnedIterators();
|
||||
}
|
||||
}
|
||||
|
||||
inline void ClearSavedValue() {
|
||||
if (saved_value_.capacity() > 1048576) {
|
||||
std::string empty;
|
||||
@ -227,6 +246,7 @@ class DBIter: public Iterator {
|
||||
Status status_;
|
||||
IterKey saved_key_;
|
||||
std::string saved_value_;
|
||||
Slice pinned_value_;
|
||||
Direction direction_;
|
||||
bool valid_;
|
||||
bool current_entry_is_merged_;
|
||||
@ -266,6 +286,8 @@ void DBIter::Next() {
|
||||
assert(valid_);
|
||||
|
||||
if (direction_ == kReverse) {
|
||||
// We only pin blocks when doing kReverse
|
||||
ReleaseTempPinnedData();
|
||||
FindNextUserKey();
|
||||
direction_ = kForward;
|
||||
if (!iter_->Valid()) {
|
||||
@ -347,21 +369,24 @@ void DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
|
||||
case kTypeSingleDeletion:
|
||||
// Arrange to skip all upcoming entries for this key since
|
||||
// they are hidden by this deletion.
|
||||
saved_key_.SetKey(ikey.user_key,
|
||||
!iter_->IsKeyPinned() /* copy */);
|
||||
saved_key_.SetKey(
|
||||
ikey.user_key,
|
||||
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
|
||||
skipping = true;
|
||||
num_skipped = 0;
|
||||
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
|
||||
break;
|
||||
case kTypeValue:
|
||||
valid_ = true;
|
||||
saved_key_.SetKey(ikey.user_key,
|
||||
!iter_->IsKeyPinned() /* copy */);
|
||||
saved_key_.SetKey(
|
||||
ikey.user_key,
|
||||
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
|
||||
return;
|
||||
case kTypeMerge:
|
||||
// By now, we are sure the current ikey is going to yield a value
|
||||
saved_key_.SetKey(ikey.user_key,
|
||||
!iter_->IsKeyPinned() /* copy */);
|
||||
saved_key_.SetKey(
|
||||
ikey.user_key,
|
||||
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
|
||||
current_entry_is_merged_ = true;
|
||||
valid_ = true;
|
||||
MergeValuesNewToOld(); // Go to a different state machine
|
||||
@ -472,6 +497,7 @@ void DBIter::Prev() {
|
||||
if (direction_ == kForward) {
|
||||
ReverseToBackward();
|
||||
}
|
||||
ReleaseTempPinnedData();
|
||||
PrevInternal();
|
||||
if (statistics_ != nullptr) {
|
||||
local_stats_.prev_count_++;
|
||||
@ -524,7 +550,7 @@ void DBIter::PrevInternal() {
|
||||
|
||||
while (iter_->Valid()) {
|
||||
saved_key_.SetKey(ExtractUserKey(iter_->key()),
|
||||
!iter_->IsKeyPinned() /* copy */);
|
||||
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
|
||||
if (FindValueForCurrentKey()) {
|
||||
valid_ = true;
|
||||
if (!iter_->Valid()) {
|
||||
@ -555,6 +581,7 @@ void DBIter::PrevInternal() {
|
||||
bool DBIter::FindValueForCurrentKey() {
|
||||
assert(iter_->Valid());
|
||||
merge_context_.Clear();
|
||||
current_entry_is_merged_ = false;
|
||||
// last entry before merge (could be kTypeDeletion, kTypeSingleDeletion or
|
||||
// kTypeValue)
|
||||
ValueType last_not_merge_type = kTypeDeletion;
|
||||
@ -575,7 +602,9 @@ bool DBIter::FindValueForCurrentKey() {
|
||||
switch (last_key_entry_type) {
|
||||
case kTypeValue:
|
||||
merge_context_.Clear();
|
||||
saved_value_ = iter_->value().ToString();
|
||||
ReleaseTempPinnedData();
|
||||
TempPinData();
|
||||
pinned_value_ = iter_->value();
|
||||
last_not_merge_type = kTypeValue;
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
@ -605,6 +634,7 @@ bool DBIter::FindValueForCurrentKey() {
|
||||
valid_ = false;
|
||||
return false;
|
||||
case kTypeMerge:
|
||||
current_entry_is_merged_ = true;
|
||||
if (last_not_merge_type == kTypeDeletion) {
|
||||
StopWatchNano timer(env_, statistics_ != nullptr);
|
||||
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
||||
@ -615,12 +645,10 @@ bool DBIter::FindValueForCurrentKey() {
|
||||
timer.ElapsedNanos());
|
||||
} else {
|
||||
assert(last_not_merge_type == kTypeValue);
|
||||
std::string last_put_value = saved_value_;
|
||||
Slice temp_slice(last_put_value);
|
||||
{
|
||||
StopWatchNano timer(env_, statistics_ != nullptr);
|
||||
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
||||
user_merge_operator_->FullMerge(saved_key_.GetKey(), &temp_slice,
|
||||
user_merge_operator_->FullMerge(saved_key_.GetKey(), &pinned_value_,
|
||||
merge_context_.GetOperands(),
|
||||
&saved_value_, logger_);
|
||||
RecordTick(statistics_, MERGE_OPERATION_TOTAL_TIME,
|
||||
@ -655,7 +683,9 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
||||
if (ikey.type == kTypeValue || ikey.type == kTypeDeletion ||
|
||||
ikey.type == kTypeSingleDeletion) {
|
||||
if (ikey.type == kTypeValue) {
|
||||
saved_value_ = iter_->value().ToString();
|
||||
ReleaseTempPinnedData();
|
||||
TempPinData();
|
||||
pinned_value_ = iter_->value();
|
||||
valid_ = true;
|
||||
return true;
|
||||
}
|
||||
@ -665,6 +695,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
||||
|
||||
// kTypeMerge. We need to collect all kTypeMerge values and save them
|
||||
// in operands
|
||||
current_entry_is_merged_ = true;
|
||||
merge_context_.Clear();
|
||||
while (iter_->Valid() &&
|
||||
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey()) &&
|
||||
@ -767,6 +798,7 @@ void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) {
|
||||
|
||||
void DBIter::Seek(const Slice& target) {
|
||||
StopWatch sw(env_, statistics_, DB_SEEK);
|
||||
ReleaseTempPinnedData();
|
||||
saved_key_.Clear();
|
||||
// now savved_key is used to store internal key.
|
||||
saved_key_.SetInternalKey(target, sequence_);
|
||||
@ -809,6 +841,7 @@ void DBIter::SeekToFirst() {
|
||||
max_skip_ = std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
direction_ = kForward;
|
||||
ReleaseTempPinnedData();
|
||||
ClearSavedValue();
|
||||
|
||||
{
|
||||
@ -841,6 +874,7 @@ void DBIter::SeekToLast() {
|
||||
max_skip_ = std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
direction_ = kReverse;
|
||||
ReleaseTempPinnedData();
|
||||
ClearSavedValue();
|
||||
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "db/db_test_util.h"
|
||||
#include "port/stack_trace.h"
|
||||
#include "rocksdb/iostats_context.h"
|
||||
#include "rocksdb/perf_context.h"
|
||||
|
||||
namespace rocksdb {
|
||||
@ -1228,6 +1229,221 @@ TEST_F(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
|
||||
delete iter;
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, IterPrevKeyCrossingBlocks) {
|
||||
Options options = CurrentOptions();
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.block_size = 1; // every block will contain one entry
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
|
||||
options.disable_auto_compactions = true;
|
||||
options.max_sequential_skip_in_iterations = 8;
|
||||
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// Putting such deletes will force DBIter::Prev() to fallback to a Seek
|
||||
for (int file_num = 0; file_num < 10; file_num++) {
|
||||
ASSERT_OK(Delete("key4"));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
|
||||
// First File containing 5 blocks of puts
|
||||
ASSERT_OK(Put("key1", "val1.0"));
|
||||
ASSERT_OK(Put("key2", "val2.0"));
|
||||
ASSERT_OK(Put("key3", "val3.0"));
|
||||
ASSERT_OK(Put("key4", "val4.0"));
|
||||
ASSERT_OK(Put("key5", "val5.0"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
// Second file containing 9 blocks of merge operands
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
|
||||
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
|
||||
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
{
|
||||
ReadOptions ro;
|
||||
ro.fill_cache = false;
|
||||
Iterator* iter = db_->NewIterator(ro);
|
||||
|
||||
iter->SeekToLast();
|
||||
ASSERT_EQ(iter->key().ToString(), "key5");
|
||||
ASSERT_EQ(iter->value().ToString(), "val5.0");
|
||||
|
||||
iter->Prev();
|
||||
ASSERT_EQ(iter->key().ToString(), "key4");
|
||||
ASSERT_EQ(iter->value().ToString(), "val4.0");
|
||||
|
||||
iter->Prev();
|
||||
ASSERT_EQ(iter->key().ToString(), "key3");
|
||||
ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
|
||||
|
||||
iter->Prev();
|
||||
ASSERT_EQ(iter->key().ToString(), "key2");
|
||||
ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
|
||||
|
||||
iter->Prev();
|
||||
ASSERT_EQ(iter->key().ToString(), "key1");
|
||||
ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
|
||||
|
||||
delete iter;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
|
||||
Options options = CurrentOptions();
|
||||
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
|
||||
options.disable_auto_compactions = true;
|
||||
options.level0_slowdown_writes_trigger = (1 << 30);
|
||||
options.level0_stop_writes_trigger = (1 << 30);
|
||||
options.max_sequential_skip_in_iterations = 8;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
const int kNumKeys = 500;
|
||||
// Small number of merge operands to make sure that DBIter::Prev() dont
|
||||
// fall back to Seek()
|
||||
const int kNumMergeOperands = 3;
|
||||
// Use value size that will make sure that every block contain 1 key
|
||||
const int kValSize =
|
||||
static_cast<int>(BlockBasedTableOptions().block_size) * 4;
|
||||
// Percentage of keys that wont get merge operations
|
||||
const int kNoMergeOpPercentage = 20;
|
||||
// Percentage of keys that will be deleted
|
||||
const int kDeletePercentage = 10;
|
||||
|
||||
// For half of the key range we will write multiple deletes first to
|
||||
// force DBIter::Prev() to fall back to Seek()
|
||||
for (int file_num = 0; file_num < 10; file_num++) {
|
||||
for (int i = 0; i < kNumKeys; i += 2) {
|
||||
ASSERT_OK(Delete(Key(i)));
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
|
||||
Random rnd(301);
|
||||
std::map<std::string, std::string> true_data;
|
||||
std::string gen_key;
|
||||
std::string gen_val;
|
||||
|
||||
for (int i = 0; i < kNumKeys; i++) {
|
||||
gen_key = Key(i);
|
||||
gen_val = RandomString(&rnd, kValSize);
|
||||
|
||||
ASSERT_OK(Put(gen_key, gen_val));
|
||||
true_data[gen_key] = gen_val;
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
// Separate values and merge operands in different file so that we
|
||||
// make sure that we dont merge them while flushing but actually
|
||||
// merge them in the read path
|
||||
for (int i = 0; i < kNumKeys; i++) {
|
||||
if (rnd.OneIn(static_cast<int>(100.0 / kNoMergeOpPercentage))) {
|
||||
// Dont give merge operations for some keys
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int j = 0; j < kNumMergeOperands; j++) {
|
||||
gen_key = Key(i);
|
||||
gen_val = RandomString(&rnd, kValSize);
|
||||
|
||||
ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
|
||||
true_data[gen_key] += "," + gen_val;
|
||||
}
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
for (int i = 0; i < kNumKeys; i++) {
|
||||
if (rnd.OneIn(static_cast<int>(100.0 / kDeletePercentage))) {
|
||||
gen_key = Key(i);
|
||||
|
||||
ASSERT_OK(Delete(gen_key));
|
||||
true_data.erase(gen_key);
|
||||
}
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
{
|
||||
ReadOptions ro;
|
||||
ro.fill_cache = false;
|
||||
Iterator* iter = db_->NewIterator(ro);
|
||||
auto data_iter = true_data.rbegin();
|
||||
|
||||
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
|
||||
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||
data_iter++;
|
||||
}
|
||||
ASSERT_EQ(data_iter, true_data.rend());
|
||||
|
||||
delete iter;
|
||||
}
|
||||
|
||||
{
|
||||
ReadOptions ro;
|
||||
ro.fill_cache = false;
|
||||
Iterator* iter = db_->NewIterator(ro);
|
||||
auto data_iter = true_data.rbegin();
|
||||
|
||||
int entries_right = 0;
|
||||
std::string seek_key;
|
||||
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
|
||||
// Verify key/value of current position
|
||||
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||
|
||||
bool restore_position_with_seek = rnd.Uniform(2);
|
||||
if (restore_position_with_seek) {
|
||||
seek_key = iter->key().ToString();
|
||||
}
|
||||
|
||||
// Do some Next() operations the restore the iterator to orignal position
|
||||
int next_count =
|
||||
entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
|
||||
for (int i = 0; i < next_count; i++) {
|
||||
iter->Next();
|
||||
data_iter--;
|
||||
|
||||
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||
}
|
||||
|
||||
if (restore_position_with_seek) {
|
||||
// Restore orignal position using Seek()
|
||||
iter->Seek(seek_key);
|
||||
for (int i = 0; i < next_count; i++) {
|
||||
data_iter++;
|
||||
}
|
||||
|
||||
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||
} else {
|
||||
// Restore original position using Prev()
|
||||
for (int i = 0; i < next_count; i++) {
|
||||
iter->Prev();
|
||||
data_iter++;
|
||||
|
||||
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||
}
|
||||
}
|
||||
|
||||
entries_right++;
|
||||
data_iter++;
|
||||
}
|
||||
ASSERT_EQ(data_iter, true_data.rend());
|
||||
|
||||
delete iter;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, IteratorWithLocalStatistics) {
|
||||
Options options = CurrentOptions();
|
||||
options.statistics = rocksdb::CreateDBStatistics();
|
||||
@ -1310,6 +1526,76 @@ TEST_F(DBIteratorTest, IteratorWithLocalStatistics) {
|
||||
ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes);
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, ReadAhead) {
|
||||
Options options;
|
||||
env_->count_random_reads_ = true;
|
||||
options.env = env_;
|
||||
options.disable_auto_compactions = true;
|
||||
options.write_buffer_size = 4 << 20;
|
||||
options.statistics = rocksdb::CreateDBStatistics();
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.block_size = 1024;
|
||||
table_options.no_block_cache = true;
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
Reopen(options);
|
||||
|
||||
std::string value(1024, 'a');
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Put(Key(i), value);
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(2);
|
||||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Put(Key(i), value);
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(1);
|
||||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Put(Key(i), value);
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
#ifndef ROCKSDB_LITE
|
||||
ASSERT_EQ("1,1,1", FilesPerLevel());
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
env_->random_read_bytes_counter_ = 0;
|
||||
options.statistics->setTickerCount(NO_FILE_OPENS, 0);
|
||||
ReadOptions read_options;
|
||||
auto* iter = db_->NewIterator(read_options);
|
||||
iter->SeekToFirst();
|
||||
int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||
size_t bytes_read = env_->random_read_bytes_counter_;
|
||||
delete iter;
|
||||
|
||||
env_->random_read_bytes_counter_ = 0;
|
||||
options.statistics->setTickerCount(NO_FILE_OPENS, 0);
|
||||
read_options.readahead_size = 1024 * 10;
|
||||
iter = db_->NewIterator(read_options);
|
||||
iter->SeekToFirst();
|
||||
int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||
size_t bytes_read_readahead = env_->random_read_bytes_counter_;
|
||||
delete iter;
|
||||
ASSERT_EQ(num_file_opens + 3, num_file_opens_readahead);
|
||||
ASSERT_GT(bytes_read_readahead, bytes_read);
|
||||
ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
|
||||
|
||||
// Verify correctness.
|
||||
iter = db_->NewIterator(read_options);
|
||||
int count = 0;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
ASSERT_EQ(value, iter->value());
|
||||
count++;
|
||||
}
|
||||
ASSERT_EQ(100, count);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
iter->Seek(Key(i));
|
||||
ASSERT_EQ(value, iter->value());
|
||||
}
|
||||
delete iter;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -358,23 +358,30 @@ class SpecialEnv : public EnvWrapper {
|
||||
class CountingFile : public RandomAccessFile {
|
||||
public:
|
||||
CountingFile(unique_ptr<RandomAccessFile>&& target,
|
||||
anon::AtomicCounter* counter)
|
||||
: target_(std::move(target)), counter_(counter) {}
|
||||
anon::AtomicCounter* counter,
|
||||
std::atomic<size_t>* bytes_read)
|
||||
: target_(std::move(target)),
|
||||
counter_(counter),
|
||||
bytes_read_(bytes_read) {}
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const override {
|
||||
counter_->Increment();
|
||||
return target_->Read(offset, n, result, scratch);
|
||||
Status s = target_->Read(offset, n, result, scratch);
|
||||
*bytes_read_ += result->size();
|
||||
return s;
|
||||
}
|
||||
|
||||
private:
|
||||
unique_ptr<RandomAccessFile> target_;
|
||||
anon::AtomicCounter* counter_;
|
||||
std::atomic<size_t>* bytes_read_;
|
||||
};
|
||||
|
||||
Status s = target()->NewRandomAccessFile(f, r, soptions);
|
||||
random_file_open_counter_++;
|
||||
if (s.ok() && count_random_reads_) {
|
||||
r->reset(new CountingFile(std::move(*r), &random_read_counter_));
|
||||
r->reset(new CountingFile(std::move(*r), &random_read_counter_,
|
||||
&random_read_bytes_counter_));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -464,6 +471,7 @@ class SpecialEnv : public EnvWrapper {
|
||||
|
||||
bool count_random_reads_;
|
||||
anon::AtomicCounter random_read_counter_;
|
||||
std::atomic<size_t> random_read_bytes_counter_;
|
||||
std::atomic<int> random_file_open_counter_;
|
||||
|
||||
bool count_sequential_reads_;
|
||||
|
@ -87,15 +87,16 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
|
||||
Status TableCache::GetTableReader(
|
||||
const EnvOptions& env_options,
|
||||
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
|
||||
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
unique_ptr<TableReader>* table_reader, bool skip_filters, int level) {
|
||||
bool sequential_mode, size_t readahead, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist, unique_ptr<TableReader>* table_reader,
|
||||
bool skip_filters, int level) {
|
||||
std::string fname =
|
||||
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
|
||||
unique_ptr<RandomAccessFile> file;
|
||||
Status s = ioptions_.env->NewRandomAccessFile(fname, &file, env_options);
|
||||
if (sequential_mode && ioptions_.compaction_readahead_size > 0) {
|
||||
file = NewReadaheadRandomAccessFile(std::move(file),
|
||||
ioptions_.compaction_readahead_size);
|
||||
|
||||
if (readahead > 0) {
|
||||
file = NewReadaheadRandomAccessFile(std::move(file), readahead);
|
||||
}
|
||||
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
|
||||
if (s.ok()) {
|
||||
@ -143,8 +144,9 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
||||
}
|
||||
unique_ptr<TableReader> table_reader;
|
||||
s = GetTableReader(env_options, internal_comparator, fd,
|
||||
false /* sequential mode */, record_read_stats,
|
||||
file_read_hist, &table_reader, skip_filters, level);
|
||||
false /* sequential mode */, 0 /* readahead */,
|
||||
record_read_stats, file_read_hist, &table_reader,
|
||||
skip_filters, level);
|
||||
if (!s.ok()) {
|
||||
assert(table_reader == nullptr);
|
||||
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
|
||||
@ -175,13 +177,24 @@ InternalIterator* TableCache::NewIterator(
|
||||
|
||||
TableReader* table_reader = nullptr;
|
||||
Cache::Handle* handle = nullptr;
|
||||
bool create_new_table_reader =
|
||||
(for_compaction && ioptions_.new_table_reader_for_compaction_inputs);
|
||||
|
||||
size_t readahead = 0;
|
||||
bool create_new_table_reader = false;
|
||||
if (for_compaction) {
|
||||
if (ioptions_.new_table_reader_for_compaction_inputs) {
|
||||
readahead = ioptions_.compaction_readahead_size;
|
||||
create_new_table_reader = true;
|
||||
}
|
||||
} else {
|
||||
readahead = options.readahead_size;
|
||||
create_new_table_reader = readahead > 0;
|
||||
}
|
||||
|
||||
if (create_new_table_reader) {
|
||||
unique_ptr<TableReader> table_reader_unique_ptr;
|
||||
Status s = GetTableReader(
|
||||
env_options, icomparator, fd, /* sequential mode */ true,
|
||||
/* record stats */ false, nullptr, &table_reader_unique_ptr,
|
||||
env_options, icomparator, fd, true /* sequential_mode */, readahead,
|
||||
!for_compaction /* record stats */, nullptr, &table_reader_unique_ptr,
|
||||
false /* skip_filters */, level);
|
||||
if (!s.ok()) {
|
||||
return NewErrorInternalIterator(s, arena);
|
||||
|
@ -111,7 +111,8 @@ class TableCache {
|
||||
Status GetTableReader(const EnvOptions& env_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& fd, bool sequential_mode,
|
||||
bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
size_t readahead, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist,
|
||||
unique_ptr<TableReader>* table_reader,
|
||||
bool skip_filters = false, int level = -1);
|
||||
|
||||
|
@ -824,6 +824,10 @@ struct DBOptions {
|
||||
|
||||
// Some functions that make it easier to optimize RocksDB
|
||||
|
||||
// Use this if your DB is very small (like under 1GB) and you don't want to
|
||||
// spend lots of memory for memtables.
|
||||
DBOptions* OptimizeForSmallDb();
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
// By default, RocksDB uses only one background thread for flush and
|
||||
// compaction. Calling this function will set it up such that total of
|
||||
@ -898,7 +902,7 @@ struct DBOptions {
|
||||
|
||||
// If max_open_files is -1, DB will open all files on DB::Open(). You can
|
||||
// use this option to increase the number of threads used to open the files.
|
||||
// Default: 1
|
||||
// Default: 16
|
||||
int max_file_opening_threads;
|
||||
|
||||
// Once write-ahead logs exceed this size, we will start forcing the flush of
|
||||
@ -1343,6 +1347,8 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
|
||||
|
||||
void DumpCFOptions(Logger* log) const;
|
||||
|
||||
// Some functions that make it easier to optimize RocksDB
|
||||
|
||||
// Set appropriate parameters for bulk loading.
|
||||
// The reason that this is a function that returns "this" instead of a
|
||||
// constructor is to enable chaining of multiple similar calls in the future.
|
||||
@ -1352,6 +1358,10 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
|
||||
// It's recommended to manually call CompactRange(NULL, NULL) before reading
|
||||
// from the database, because otherwise the read can be very slow.
|
||||
Options* PrepareForBulkLoad();
|
||||
|
||||
// Use this if your DB is very small (like under 1GB) and you don't want to
|
||||
// spend lots of memory for memtables.
|
||||
Options* OptimizeForSmallDb();
|
||||
};
|
||||
|
||||
//
|
||||
@ -1448,6 +1458,9 @@ struct ReadOptions {
|
||||
// Enable a total order seek regardless of index format (e.g. hash index)
|
||||
// used in the table. Some table format (e.g. plain table) may not support
|
||||
// this option.
|
||||
// If true when calling Get(), we also skip prefix bloom when reading from
|
||||
// block based table. It provides a way to read exisiting data after
|
||||
// changing implementation of prefix extractor.
|
||||
bool total_order_seek;
|
||||
|
||||
// Enforce that the iterator only iterates over the same prefix as the seek.
|
||||
@ -1466,6 +1479,12 @@ struct ReadOptions {
|
||||
// Default: false
|
||||
bool pin_data;
|
||||
|
||||
// If non-zero, NewIterator will create a new table reader which
|
||||
// performs reads of the given size. Using a large size (> 2MB) can
|
||||
// improve the performance of forward iteration on spinning disks.
|
||||
// Default: 0
|
||||
size_t readahead_size;
|
||||
|
||||
ReadOptions();
|
||||
ReadOptions(bool cksum, bool cache);
|
||||
};
|
||||
|
@ -5,7 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCKSDB_MAJOR 4
|
||||
#define ROCKSDB_MINOR 7
|
||||
#define ROCKSDB_MINOR 8
|
||||
#define ROCKSDB_PATCH 0
|
||||
|
||||
// Do not use these. We made the mistake of declaring macros starting with
|
||||
|
@ -262,8 +262,11 @@ class WinMmapFile : public WritableFile {
|
||||
// page size or SSD page size
|
||||
const size_t
|
||||
allocation_granularity_; // View must start at such a granularity
|
||||
size_t mapping_size_; // We want file mapping to be of a specific size
|
||||
// because then the file is expandable
|
||||
|
||||
size_t reserved_size_; // Preallocated size
|
||||
|
||||
size_t mapping_size_; // The max size of the mapping object
|
||||
// we want to guess the final file size to minimize the remapping
|
||||
size_t view_size_; // How much memory to map into a view at a time
|
||||
|
||||
char* mapped_begin_; // Must begin at the file offset that is aligned with
|
||||
@ -283,15 +286,6 @@ class WinMmapFile : public WritableFile {
|
||||
return ftruncate(filename_, hFile_, toSize);
|
||||
}
|
||||
|
||||
// Can only truncate or reserve to a sector size aligned if
|
||||
// used on files that are opened with Unbuffered I/O
|
||||
// Normally it does not present a problem since in memory mapped files
|
||||
// we do not disable buffering
|
||||
Status ReserveFileSpace(uint64_t toSize) {
|
||||
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||
return fallocate(filename_, hFile_, toSize);
|
||||
}
|
||||
|
||||
Status UnmapCurrentRegion() {
|
||||
Status status;
|
||||
|
||||
@ -301,82 +295,57 @@ class WinMmapFile : public WritableFile {
|
||||
"Failed to unmap file view: " + filename_, GetLastError());
|
||||
}
|
||||
|
||||
// UnmapView automatically sends data to disk but not the metadata
|
||||
// which is good and provides some equivalent of fdatasync() on Linux
|
||||
// therefore, we donot need separate flag for metadata
|
||||
pending_sync_ = false;
|
||||
mapped_begin_ = nullptr;
|
||||
mapped_end_ = nullptr;
|
||||
dst_ = nullptr;
|
||||
last_sync_ = nullptr;
|
||||
|
||||
// Move on to the next portion of the file
|
||||
file_offset_ += view_size_;
|
||||
|
||||
// Increase the amount we map the next time, but capped at 1MB
|
||||
view_size_ *= 2;
|
||||
view_size_ = std::min(view_size_, c_OneMB);
|
||||
// UnmapView automatically sends data to disk but not the metadata
|
||||
// which is good and provides some equivalent of fdatasync() on Linux
|
||||
// therefore, we donot need separate flag for metadata
|
||||
mapped_begin_ = nullptr;
|
||||
mapped_end_ = nullptr;
|
||||
dst_ = nullptr;
|
||||
|
||||
last_sync_ = nullptr;
|
||||
pending_sync_ = false;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status MapNewRegion() {
|
||||
|
||||
Status status;
|
||||
|
||||
assert(mapped_begin_ == nullptr);
|
||||
|
||||
size_t minMappingSize = file_offset_ + view_size_;
|
||||
size_t minDiskSize = file_offset_ + view_size_;
|
||||
|
||||
// Check if we need to create a new mapping since we want to write beyond
|
||||
// the current one
|
||||
// If the mapping view is now too short
|
||||
// CreateFileMapping will extend the size of the file automatically if the
|
||||
// mapping size is greater than
|
||||
// the current length of the file, which reserves the space and makes
|
||||
// writing faster, except, windows can not map an empty file.
|
||||
// Thus the first time around we must actually extend the file ourselves
|
||||
if (hMap_ == NULL || minMappingSize > mapping_size_) {
|
||||
if (NULL == hMap_) {
|
||||
// Creating mapping for the first time so reserve the space on disk
|
||||
status = ReserveFileSpace(minMappingSize);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
if (minDiskSize > reserved_size_) {
|
||||
status = Allocate(file_offset_, view_size_);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
if (hMap_) {
|
||||
// Need to remap
|
||||
if (hMap_ == NULL || reserved_size_ > mapping_size_) {
|
||||
|
||||
if (hMap_ != NULL) {
|
||||
// Unmap the previous one
|
||||
BOOL ret = ::CloseHandle(hMap_);
|
||||
assert(ret);
|
||||
hMap_ = NULL;
|
||||
}
|
||||
|
||||
// Calculate the new mapping size which will hopefully reserve space for
|
||||
// several consecutive sliding views
|
||||
// Query preallocation block size if set
|
||||
size_t preallocationBlockSize = 0;
|
||||
size_t lastAllocatedBlockSize = 0; // Not used
|
||||
GetPreallocationStatus(&preallocationBlockSize, &lastAllocatedBlockSize);
|
||||
|
||||
if (preallocationBlockSize) {
|
||||
preallocationBlockSize =
|
||||
Roundup(preallocationBlockSize, allocation_granularity_);
|
||||
} else {
|
||||
preallocationBlockSize = 2 * view_size_;
|
||||
}
|
||||
|
||||
mapping_size_ += preallocationBlockSize;
|
||||
|
||||
ULARGE_INTEGER mappingSize;
|
||||
mappingSize.QuadPart = mapping_size_;
|
||||
mappingSize.QuadPart = reserved_size_;
|
||||
|
||||
hMap_ = CreateFileMappingA(
|
||||
hFile_,
|
||||
NULL, // Security attributes
|
||||
PAGE_READWRITE, // There is not a write only mode for mapping
|
||||
mappingSize.HighPart, // Enable mapping the whole file but the actual
|
||||
// amount mapped is determined by MapViewOfFile
|
||||
// amount mapped is determined by MapViewOfFile
|
||||
mappingSize.LowPart,
|
||||
NULL); // Mapping name
|
||||
|
||||
@ -385,6 +354,8 @@ class WinMmapFile : public WritableFile {
|
||||
"WindowsMmapFile failed to create file mapping for: " + filename_,
|
||||
GetLastError());
|
||||
}
|
||||
|
||||
mapping_size_ = reserved_size_;
|
||||
}
|
||||
|
||||
ULARGE_INTEGER offset;
|
||||
@ -416,6 +387,7 @@ class WinMmapFile : public WritableFile {
|
||||
hMap_(NULL),
|
||||
page_size_(page_size),
|
||||
allocation_granularity_(allocation_granularity),
|
||||
reserved_size_(0),
|
||||
mapping_size_(0),
|
||||
view_size_(0),
|
||||
mapped_begin_(nullptr),
|
||||
@ -435,25 +407,10 @@ class WinMmapFile : public WritableFile {
|
||||
// Only for memory mapped writes
|
||||
assert(options.use_mmap_writes);
|
||||
|
||||
// Make sure buffering is not disabled. It is ignored for mapping
|
||||
// purposes but also imposes restriction on moving file position
|
||||
// it is not a problem so much with reserving space since it is probably a
|
||||
// factor
|
||||
// of allocation_granularity but we also want to truncate the file in
|
||||
// Close() at
|
||||
// arbitrary position so we do not have to feel this with zeros.
|
||||
assert(options.use_os_buffer);
|
||||
|
||||
// View size must be both the multiple of allocation_granularity AND the
|
||||
// page size
|
||||
if ((allocation_granularity_ % page_size_) == 0) {
|
||||
view_size_ = 2 * allocation_granularity;
|
||||
} else if ((page_size_ % allocation_granularity_) == 0) {
|
||||
view_size_ = 2 * page_size_;
|
||||
} else {
|
||||
// we can multiply them together
|
||||
assert(false);
|
||||
}
|
||||
// page size and the granularity is usually a multiple of a page size.
|
||||
const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
|
||||
view_size_ = Roundup(viewSize, allocation_granularity_);
|
||||
}
|
||||
|
||||
~WinMmapFile() {
|
||||
@ -479,14 +436,20 @@ class WinMmapFile : public WritableFile {
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
size_t n = std::min(left, avail);
|
||||
memcpy(dst_, src, n);
|
||||
dst_ += n;
|
||||
src += n;
|
||||
left -= n;
|
||||
pending_sync_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
size_t n = std::min(left, avail);
|
||||
memcpy(dst_, src, n);
|
||||
dst_ += n;
|
||||
src += n;
|
||||
left -= n;
|
||||
pending_sync_ = true;
|
||||
// Now make sure that the last partial page is padded with zeros if needed
|
||||
size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
|
||||
if (bytesToPad > 0) {
|
||||
memset(dst_, 0, bytesToPad);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
@ -508,7 +471,13 @@ class WinMmapFile : public WritableFile {
|
||||
// which we use does not write zeros and it is good.
|
||||
uint64_t targetSize = GetFileSize();
|
||||
|
||||
s = UnmapCurrentRegion();
|
||||
if (mapped_begin_ != nullptr) {
|
||||
// Sync before unmapping to make sure everything
|
||||
// is on disk and there is not a lazy writing
|
||||
// so we are deterministic with the tests
|
||||
Sync();
|
||||
s = UnmapCurrentRegion();
|
||||
}
|
||||
|
||||
if (NULL != hMap_) {
|
||||
BOOL ret = ::CloseHandle(hMap_);
|
||||
@ -521,15 +490,18 @@ class WinMmapFile : public WritableFile {
|
||||
hMap_ = NULL;
|
||||
}
|
||||
|
||||
TruncateFile(targetSize);
|
||||
if (hFile_ != NULL) {
|
||||
|
||||
BOOL ret = ::CloseHandle(hFile_);
|
||||
hFile_ = NULL;
|
||||
TruncateFile(targetSize);
|
||||
|
||||
if (!ret && s.ok()) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to close file map handle: " + filename_, lastError);
|
||||
BOOL ret = ::CloseHandle(hFile_);
|
||||
hFile_ = NULL;
|
||||
|
||||
if (!ret && s.ok()) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to close file map handle: " + filename_, lastError);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
@ -542,7 +514,7 @@ class WinMmapFile : public WritableFile {
|
||||
Status s;
|
||||
|
||||
// Some writes occurred since last sync
|
||||
if (pending_sync_) {
|
||||
if (dst_ > last_sync_) {
|
||||
assert(mapped_begin_);
|
||||
assert(dst_);
|
||||
assert(dst_ > mapped_begin_);
|
||||
@ -552,16 +524,15 @@ class WinMmapFile : public WritableFile {
|
||||
TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
|
||||
size_t page_end =
|
||||
TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
|
||||
last_sync_ = dst_;
|
||||
|
||||
// Flush only the amount of that is a multiple of pages
|
||||
if (!::FlushViewOfFile(mapped_begin_ + page_begin,
|
||||
(page_end - page_begin) + page_size_)) {
|
||||
(page_end - page_begin) + page_size_)) {
|
||||
s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
|
||||
GetLastError());
|
||||
} else {
|
||||
last_sync_ = dst_;
|
||||
}
|
||||
|
||||
pending_sync_ = false;
|
||||
}
|
||||
|
||||
return s;
|
||||
@ -571,19 +542,15 @@ class WinMmapFile : public WritableFile {
|
||||
* Flush data as well as metadata to stable storage.
|
||||
*/
|
||||
virtual Status Fsync() override {
|
||||
Status s;
|
||||
|
||||
// Flush metadata if pending
|
||||
const bool pending = pending_sync_;
|
||||
|
||||
s = Sync();
|
||||
Status s = Sync();
|
||||
|
||||
// Flush metadata
|
||||
if (s.ok() && pending) {
|
||||
if (s.ok() && pending_sync_) {
|
||||
if (!::FlushFileBuffers(hFile_)) {
|
||||
s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
|
||||
GetLastError());
|
||||
}
|
||||
pending_sync_ = false;
|
||||
}
|
||||
|
||||
return s;
|
||||
@ -604,7 +571,24 @@ class WinMmapFile : public WritableFile {
|
||||
}
|
||||
|
||||
virtual Status Allocate(uint64_t offset, uint64_t len) override {
|
||||
return Status::OK();
|
||||
Status status;
|
||||
TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds);
|
||||
|
||||
// Make sure that we reserve an aligned amount of space
|
||||
// since the reservation block size is driven outside so we want
|
||||
// to check if we are ok with reservation here
|
||||
size_t spaceToReserve = Roundup(offset + len, view_size_);
|
||||
// Nothing to do
|
||||
if (spaceToReserve <= reserved_size_) {
|
||||
return status;
|
||||
}
|
||||
|
||||
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||
status = fallocate(filename_, hFile_, spaceToReserve);
|
||||
if (status.ok()) {
|
||||
reserved_size_ = spaceToReserve;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1332,7 +1332,8 @@ InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
|
||||
NewIndexIterator(read_options), arena);
|
||||
}
|
||||
|
||||
bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
|
||||
bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
||||
FilterBlockReader* filter,
|
||||
const Slice& internal_key) const {
|
||||
if (filter == nullptr || filter->IsBlockBased()) {
|
||||
return true;
|
||||
@ -1341,7 +1342,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
|
||||
if (!filter->KeyMayMatch(user_key)) {
|
||||
return false;
|
||||
}
|
||||
if (rep_->ioptions.prefix_extractor &&
|
||||
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
|
||||
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
||||
!filter->PrefixMayMatch(
|
||||
rep_->ioptions.prefix_extractor->Transform(user_key))) {
|
||||
@ -1361,7 +1362,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
|
||||
// First check the full filter
|
||||
// If full filter not useful, Then go into each block
|
||||
if (!FullFilterKeyMayMatch(filter, key)) {
|
||||
if (!FullFilterKeyMayMatch(read_options, filter, key)) {
|
||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||
} else {
|
||||
BlockIter iiter;
|
||||
|
@ -207,7 +207,8 @@ class BlockBasedTable : public TableReader {
|
||||
IndexReader** index_reader,
|
||||
InternalIterator* preloaded_meta_index_iter = nullptr);
|
||||
|
||||
bool FullFilterKeyMayMatch(FilterBlockReader* filter,
|
||||
bool FullFilterKeyMayMatch(const ReadOptions& read_options,
|
||||
FilterBlockReader* filter,
|
||||
const Slice& user_key) const;
|
||||
|
||||
// Read the meta block from sst.
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "table/plain_table_factory.h"
|
||||
#include "tools/ldb_cmd.h"
|
||||
#include "util/random.h"
|
||||
#include "util/compression.h"
|
||||
|
||||
#include "port/port.h"
|
||||
|
||||
@ -194,15 +195,19 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
||||
};
|
||||
|
||||
for (auto& i : compressions) {
|
||||
CompressionOptions compress_opt;
|
||||
std::string column_family_name;
|
||||
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories,
|
||||
i.first, compress_opt,
|
||||
nullptr /* compression_dict */,
|
||||
false /* skip_filters */, column_family_name);
|
||||
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size);
|
||||
fprintf(stdout, "Compression: %s", i.second);
|
||||
fprintf(stdout, " Size: %" PRIu64 "\n", file_size);
|
||||
if (CompressionTypeSupported(i.first)) {
|
||||
CompressionOptions compress_opt;
|
||||
std::string column_family_name;
|
||||
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories,
|
||||
i.first, compress_opt,
|
||||
nullptr /* compression_dict */,
|
||||
false /* skip_filters */, column_family_name);
|
||||
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size);
|
||||
fprintf(stdout, "Compression: %s", i.second);
|
||||
fprintf(stdout, " Size: %" PRIu64 "\n", file_size);
|
||||
} else {
|
||||
fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -668,6 +668,12 @@ Options::PrepareForBulkLoad()
|
||||
return this;
|
||||
}
|
||||
|
||||
Options* Options::OptimizeForSmallDb() {
|
||||
ColumnFamilyOptions::OptimizeForSmallDb();
|
||||
DBOptions::OptimizeForSmallDb();
|
||||
return this;
|
||||
}
|
||||
|
||||
Options* Options::OldDefaults(int rocksdb_major_version,
|
||||
int rocksdb_minor_version) {
|
||||
ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
|
||||
@ -705,6 +711,12 @@ ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
|
||||
}
|
||||
|
||||
// Optimization functions
|
||||
DBOptions* DBOptions::OptimizeForSmallDb() {
|
||||
max_file_opening_threads = 1;
|
||||
max_open_files = 5000;
|
||||
return this;
|
||||
}
|
||||
|
||||
ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb() {
|
||||
write_buffer_size = 2 << 20;
|
||||
target_file_size_base = 2 * 1048576;
|
||||
@ -794,7 +806,8 @@ ReadOptions::ReadOptions()
|
||||
managed(false),
|
||||
total_order_seek(false),
|
||||
prefix_same_as_start(false),
|
||||
pin_data(false) {
|
||||
pin_data(false),
|
||||
readahead_size(0) {
|
||||
XFUNC_TEST("", "managed_options", managed_options, xf_manage_options,
|
||||
reinterpret_cast<ReadOptions*>(this));
|
||||
}
|
||||
@ -809,7 +822,8 @@ ReadOptions::ReadOptions(bool cksum, bool cache)
|
||||
managed(false),
|
||||
total_order_seek(false),
|
||||
prefix_same_as_start(false),
|
||||
pin_data(false) {
|
||||
pin_data(false),
|
||||
readahead_size(0) {
|
||||
XFUNC_TEST("", "managed_options", managed_options, xf_manage_options,
|
||||
reinterpret_cast<ReadOptions*>(this));
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include "rocksdb/options.h"
|
||||
|
||||
|
@ -806,20 +806,23 @@ Status ParseColumnFamilyOption(const std::string& name,
|
||||
new_options->compression_opts.level =
|
||||
ParseInt(value.substr(start, end - start));
|
||||
start = end + 1;
|
||||
end = value.find(':', start);
|
||||
if (end == std::string::npos) {
|
||||
return Status::InvalidArgument(
|
||||
"unable to parse the specified CF option " + name);
|
||||
}
|
||||
new_options->compression_opts.strategy =
|
||||
ParseInt(value.substr(start, value.size() - start));
|
||||
start = end + 1;
|
||||
if (start >= value.size()) {
|
||||
return Status::InvalidArgument(
|
||||
"unable to parse the specified CF option " + name);
|
||||
}
|
||||
new_options->compression_opts.max_dict_bytes =
|
||||
end = value.find(':', start);
|
||||
new_options->compression_opts.strategy =
|
||||
ParseInt(value.substr(start, value.size() - start));
|
||||
// max_dict_bytes is optional for backwards compatibility
|
||||
if (end != std::string::npos) {
|
||||
start = end + 1;
|
||||
if (start >= value.size()) {
|
||||
return Status::InvalidArgument(
|
||||
"unable to parse the specified CF option " + name);
|
||||
}
|
||||
new_options->compression_opts.max_dict_bytes =
|
||||
ParseInt(value.substr(start, value.size() - start));
|
||||
}
|
||||
} else if (name == "compaction_options_fifo") {
|
||||
new_options->compaction_options_fifo.max_table_files_size =
|
||||
ParseUint64(value);
|
||||
|
@ -600,9 +600,14 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) {
|
||||
base_options,
|
||||
"write_buffer_size=10;max_write_buffer_number=16;"
|
||||
"block_based_table_factory={block_cache=1M;block_size=4;};"
|
||||
"create_if_missing=true;max_open_files=1;rate_limiter_bytes_per_sec=1024",
|
||||
"compression_opts=4:5:6;create_if_missing=true;max_open_files=1;"
|
||||
"rate_limiter_bytes_per_sec=1024",
|
||||
&new_options));
|
||||
|
||||
ASSERT_EQ(new_options.compression_opts.window_bits, 4);
|
||||
ASSERT_EQ(new_options.compression_opts.level, 5);
|
||||
ASSERT_EQ(new_options.compression_opts.strategy, 6);
|
||||
ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0);
|
||||
ASSERT_EQ(new_options.write_buffer_size, 10U);
|
||||
ASSERT_EQ(new_options.max_write_buffer_number, 16);
|
||||
BlockBasedTableOptions new_block_based_table_options =
|
||||
@ -1315,9 +1320,10 @@ TEST_F(OptionsParserTest, DifferentDefault) {
|
||||
old_default_cf_opts.compaction_pri);
|
||||
}
|
||||
|
||||
ColumnFamilyOptions cf_small_opts;
|
||||
cf_small_opts.OptimizeForSmallDb();
|
||||
ASSERT_EQ(2 << 20, cf_small_opts.write_buffer_size);
|
||||
Options small_opts;
|
||||
small_opts.OptimizeForSmallDb();
|
||||
ASSERT_EQ(2 << 20, small_opts.write_buffer_size);
|
||||
ASSERT_EQ(5000, small_opts.max_open_files);
|
||||
}
|
||||
|
||||
class OptionsSanityCheckTest : public OptionsParserTest {
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
@ -10,6 +10,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
Loading…
Reference in New Issue
Block a user