Compare commits
40 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b8d5f909f7 | ||
|
777c0011bf | ||
|
7e1f37eb4f | ||
|
f50476a021 | ||
|
c97db5282c | ||
|
0d4c848325 | ||
|
02028f9c19 | ||
|
0db501141a | ||
|
6c47f8f3d9 | ||
|
c00296e9fa | ||
|
39340abf23 | ||
|
abbd5a4b93 | ||
|
4bd3bc5c4f | ||
|
c1d8c1e7dc | ||
|
5e01c5a312 | ||
|
316f30ce02 | ||
|
17d42cc84a | ||
|
cb2d7fc7bc | ||
|
2c7422aa65 | ||
|
a0a9829f7d | ||
|
086bae0599 | ||
|
4842d1ea5f | ||
|
e28e2fff13 | ||
|
0b6123068a | ||
|
ee0cf2f473 | ||
|
3b66d4b617 | ||
|
421be4af42 | ||
|
9810a21baa | ||
|
f9fc1f483a | ||
|
600ca9a439 | ||
|
976212ede4 | ||
|
516faa0fa0 | ||
|
9897e42813 | ||
|
f132197a93 | ||
|
87f1325b2b | ||
|
b79a965f7c | ||
|
ad7491e11e | ||
|
c57b91f844 | ||
|
decf7e52b0 | ||
|
be91970d59 |
@ -48,6 +48,7 @@ option(WITH_JEMALLOC "build with JeMalloc" OFF)
|
||||
option(WITH_SNAPPY "build with SNAPPY" OFF)
|
||||
option(WITH_LZ4 "build with lz4" OFF)
|
||||
option(WITH_ZLIB "build with zlib" OFF)
|
||||
option(WITH_ZSTD "build with zstd" OFF)
|
||||
if(MSVC)
|
||||
# Defaults currently different for GFLAGS.
|
||||
# We will address find_package work a little later
|
||||
@ -108,7 +109,6 @@ else()
|
||||
list(APPEND THIRDPARTY_LIBS ${LZ4_LIBRARIES})
|
||||
endif()
|
||||
|
||||
option(WITH_ZSTD "build with zstd" OFF)
|
||||
if(WITH_ZSTD)
|
||||
find_package(zstd REQUIRED)
|
||||
add_definitions(-DZSTD)
|
||||
@ -307,14 +307,6 @@ if(DISABLE_STALL_NOTIF)
|
||||
add_definitions(-DROCKSDB_DISABLE_STALL_NOTIFICATION)
|
||||
endif()
|
||||
|
||||
# Used to run CI build and tests so we can run faster
|
||||
set(OPTIMIZE_DEBUG_DEFAULT 0) # Debug build is unoptimized by default use -DOPTDBG=1 to optimize
|
||||
|
||||
if(DEFINED OPTDBG)
|
||||
set(OPTIMIZE_DEBUG ${OPTDBG})
|
||||
else()
|
||||
set(OPTIMIZE_DEBUG ${OPTIMIZE_DEBUG_DEFAULT})
|
||||
endif()
|
||||
|
||||
if(DEFINED USE_RTTI)
|
||||
if(USE_RTTI)
|
||||
@ -342,8 +334,10 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Used to run CI build and tests so we can run faster
|
||||
option(OPTDBG "Build optimized debug build with MSVC" OFF)
|
||||
if(MSVC)
|
||||
if((${OPTIMIZE_DEBUG} EQUAL 1))
|
||||
if(OPTDBG)
|
||||
message(STATUS "Debug optimization is enabled")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "/Oxt /${RUNTIME_LIBRARY}d")
|
||||
else()
|
||||
@ -620,7 +614,6 @@ set(SOURCES
|
||||
utilities/blob_db/blob_log_reader.cc
|
||||
utilities/blob_db/blob_log_writer.cc
|
||||
utilities/blob_db/blob_log_format.cc
|
||||
utilities/blob_db/ttl_extractor.cc
|
||||
utilities/cassandra/cassandra_compaction_filter.cc
|
||||
utilities/cassandra/format.cc
|
||||
utilities/cassandra/merge_operator.cc
|
||||
|
59
HISTORY.md
59
HISTORY.md
@ -1,25 +1,68 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### 5.15.10 (9/13/2018)
|
||||
### Bug Fixes
|
||||
* Fix RocksDB Java build and tests.
|
||||
|
||||
### 5.15.9 (9/4/2018)
|
||||
### Bug Fixes
|
||||
* Fix compilation errors on OS X clang due to '-Wsuggest-override'.
|
||||
|
||||
## 5.15.8 (8/31/2018)
|
||||
### Bug Fixes
|
||||
* Further avoid creating empty SSTs and subsequently deleting them during compaction.
|
||||
|
||||
## 5.15.7 (8/24/2018)
|
||||
### Bug Fixes
|
||||
* Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction.
|
||||
|
||||
## 5.15.6 (8/21/2018)
|
||||
### Public API Change
|
||||
* The merge operands are passed to `MergeOperator::ShouldMerge` in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2) for performance reasons
|
||||
|
||||
## 5.15.5 (8/16/2018)
|
||||
### Bug Fixes
|
||||
* Fix VerifyChecksum() API not preserving options
|
||||
|
||||
## 5.15.4 (8/11/2018)
|
||||
### Bug Fixes
|
||||
* Fix a bug caused by not generating OnTableFileCreated() notification for a 0-byte SST.
|
||||
|
||||
## 5.15.3 (8/10/2018)
|
||||
### Bug Fixes
|
||||
* Fix a bug in misreporting the estimated partition index size in properties block.
|
||||
|
||||
## 5.15.2 (8/9/2018)
|
||||
### Bug Fixes
|
||||
* Return correct usable_size for BlockContents.
|
||||
|
||||
## 5.15.1 (8/1/2018)
|
||||
### Bug Fixes
|
||||
* Prevent dereferencing invalid STL iterators when there are range tombstones in ingested files.
|
||||
|
||||
## 5.15.0 (7/17/2018)
|
||||
### Public API Change
|
||||
* For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed.
|
||||
* With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents.
|
||||
* For bottommost_compression, a compatible CompressionOptions is added via `bottommost_compression_opts`. To keep backward compatible, a new boolean `enabled` is added to CompressionOptions. For compression_opts, it will be always used no matter what value of `enabled` is. For bottommost_compression_opts, it will only be used when user set `enabled=true`, otherwise, compression_opts will be used for bottommost_compression as default.
|
||||
* The "rocksdb.num.entries" table property no longer counts range deletion tombstones as entries.
|
||||
* Remove managed iterator. ReadOptions.managed is not effective anymore.
|
||||
* For bottommost_compression, a compatible CompressionOptions is added via `bottommost_compression_opts`. To keep backward compatible, a new boolean `enabled` is added to CompressionOptions. For compression_opts, it will be always used no matter what value of `enabled` is. For bottommost_compression_opts, it will only be used when user set `enabled=true`, otherwise, compression_opts will be used for bottommost_compression as default.
|
||||
* With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents.
|
||||
* For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed.
|
||||
* The "rocksdb.num.entries" table property no longer counts range deletion tombstones as entries.
|
||||
|
||||
### New Features
|
||||
* Changes the format of index blocks by storing the key in their raw form rather than converting them to InternalKey. This saves 8 bytes per index key. The feature is backward compatbile but not forward compatible. It is disabled by default unless format_version 3 or above is used.
|
||||
* Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1.
|
||||
* Support dynamically changing `ColumnFamilyOptions::ttl` via `SetOptions()`.
|
||||
* Add a new table property, "rocksdb.num.range-deletions", which counts the number of range deletion tombstones in the table.
|
||||
* Improve the performance of iterators doing long range scans by using readahead, when using direct IO.
|
||||
* pin_top_level_index_and_filter (default true) in BlockBasedTableOptions can be used in combination with cache_index_and_filter_blocks to prefetch and pin the top-level index of partitioned index and filter blocks in cache. It has no impact when cache_index_and_filter_blocks is false.
|
||||
* Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1.
|
||||
* Support dynamically changing `ColumnFamilyOptions::ttl` via `SetOptions()`.
|
||||
|
||||
### Bug Fixes
|
||||
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
|
||||
* Fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
|
||||
* Check conflict at output level in CompactFiles.
|
||||
* Fix corruption in non-iterator reads when mmap is used for file reads
|
||||
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
|
||||
* Change default value of `bytes_max_delete_chunk` to 0 in NewSstFileManager() as it doesn't work well with checkpoints.
|
||||
* Fix a bug caused by not copying the block trailer with compressed SST file, direct IO, prefetcher and no compressed block cache.
|
||||
* Fix write can stuck indefinitely if enable_pipelined_write=true. The issue exists since pipelined write was introduced in 5.5.0.
|
||||
|
||||
## 5.14.0 (5/16/2018)
|
||||
### Public API Change
|
||||
|
4
Makefile
4
Makefile
@ -1600,7 +1600,7 @@ ZLIB_SHA256 ?= c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
|
||||
ZLIB_DOWNLOAD_BASE ?= http://zlib.net
|
||||
BZIP2_VER ?= 1.0.6
|
||||
BZIP2_SHA256 ?= a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd
|
||||
BZIP2_DOWNLOAD_BASE ?= http://www.bzip.org
|
||||
BZIP2_DOWNLOAD_BASE ?= https://web.archive.org/web/20180624184835/http://www.bzip.org
|
||||
SNAPPY_VER ?= 1.1.4
|
||||
SNAPPY_SHA256 ?= 134bfe122fd25599bb807bb8130e7ba6d9bdb851e0b16efcb83ac4f5d0b70057
|
||||
SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/releases/download
|
||||
@ -1829,7 +1829,7 @@ $(java_libobjects): jl/%.o: %.cc
|
||||
rocksdbjava: $(java_all_libobjects)
|
||||
$(AM_V_GEN)cd java;$(MAKE) javalib;
|
||||
$(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB)
|
||||
$(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(java_libobjects) $(JAVA_LDFLAGS) $(COVERAGEFLAGS)
|
||||
$(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(java_all_libobjects) $(JAVA_LDFLAGS) $(COVERAGEFLAGS)
|
||||
$(AM_V_at)cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md
|
||||
$(AM_V_at)cd java/target;jar -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB)
|
||||
$(AM_V_at)cd java/target/classes;jar -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class
|
||||
|
1
TARGETS
1
TARGETS
@ -235,7 +235,6 @@ cpp_library(
|
||||
"utilities/blob_db/blob_log_format.cc",
|
||||
"utilities/blob_db/blob_log_reader.cc",
|
||||
"utilities/blob_db/blob_log_writer.cc",
|
||||
"utilities/blob_db/ttl_extractor.cc",
|
||||
"utilities/cassandra/cassandra_compaction_filter.cc",
|
||||
"utilities/cassandra/format.cc",
|
||||
"utilities/cassandra/merge_operator.cc",
|
||||
|
@ -53,11 +53,13 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
|
||||
FBCODE_BUILD="true"
|
||||
# If we're compiling with TSAN we need pic build
|
||||
PIC_BUILD=$COMPILE_WITH_TSAN
|
||||
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
else
|
||||
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||
# we need this to build with MySQL. Don't use for other purposes.
|
||||
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
||||
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
else
|
||||
source "$PWD/build_tools/fbcode_config_platform007.sh"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
18
build_tools/dependencies_platform007.sh
Normal file
18
build_tools/dependencies_platform007.sh
Normal file
@ -0,0 +1,18 @@
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/6e8e715624fd15256a7970073387793dfcf79b46/7.x/centos7-native/b2ef2b6
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/ef37e1faa1c29782abfac1ae65a291b9b7966f6d/stable/centos7-native/c9f9104
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c67031f0f739ac61575a061518d6ef5038f99f90/7.x/platform007/5620abc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/60d6f124a78798b73944f5ba87c2306ae3460153/2.26/platform007/f259413
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/22c2d65676fb7c23cfa797c4f6937f38b026f3cf/1.2.8/platform007/ca4da3d
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/907b498203d297947f3bb70b9466f47e100f1873/r131/platform007/ca4da3d
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/3ee276cbacfad3074e3f07bf826ac47f06970f4e/1.3.5/platform007/15a3614
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/9c910d36d6235cc40e8ff559358f1833452300ca/master/platform007/5b0f53e
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/9cbf2460284c669ed19c3ccb200a71f7dd7e53c7/2.0.11/platform007/ca4da3d
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/bf3d7497fe4e6d007354f0adffa16ce3003f8338/1.3/platform007/6f3e0a9
|
||||
TBB_BASE=/mnt/gvfs/third-party2/tbb/ff4e0b093534704d8abab678a4fd7f5ea7b094c7/2018_U5/platform007/ca4da3d
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/b5c4a61a5c483ba24722005ae07895971a2ac707/fb/platform007/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/92ff90349e2f43ea0a8246d8b1cf17b6869013e3/2.29.1/centos7-native/da39a3e
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/f3f697a28122e6bcd513273dd9c1ff23852fc59f/3.13.0/platform007/ca4da3d
|
||||
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832
|
157
build_tools/fbcode_config_platform007.sh
Normal file
157
build_tools/fbcode_config_platform007.sh
Normal file
@ -0,0 +1,157 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Set environment variables so that we can compile rocksdb using
|
||||
# fbcode settings. It uses the latest g++ and clang compilers and also
|
||||
# uses jemalloc
|
||||
# Environment variables that change the behavior of this script:
|
||||
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
|
||||
|
||||
|
||||
BASEDIR=`dirname $BASH_SOURCE`
|
||||
source "$BASEDIR/dependencies_platform007.sh"
|
||||
|
||||
CFLAGS=""
|
||||
|
||||
# libgcc
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
|
||||
|
||||
# glibc
|
||||
GLIBC_INCLUDE="$GLIBC_BASE/include"
|
||||
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
|
||||
|
||||
# snappy
|
||||
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
|
||||
else
|
||||
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DSNAPPY"
|
||||
|
||||
if test -z $PIC_BUILD; then
|
||||
# location of zlib headers and libraries
|
||||
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
|
||||
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
|
||||
CFLAGS+=" -DZLIB"
|
||||
|
||||
# location of bzip headers and libraries
|
||||
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
|
||||
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
|
||||
CFLAGS+=" -DBZIP2"
|
||||
|
||||
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
|
||||
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
|
||||
CFLAGS+=" -DLZ4"
|
||||
fi
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
else
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DZSTD"
|
||||
|
||||
# location of gflags headers and libraries
|
||||
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
|
||||
else
|
||||
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DGFLAGS=gflags"
|
||||
|
||||
# location of jemalloc
|
||||
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
|
||||
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
|
||||
|
||||
if test -z $PIC_BUILD; then
|
||||
# location of numa
|
||||
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
|
||||
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
|
||||
CFLAGS+=" -DNUMA"
|
||||
|
||||
# location of libunwind
|
||||
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
|
||||
fi
|
||||
|
||||
# location of TBB
|
||||
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
|
||||
else
|
||||
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DTBB"
|
||||
|
||||
# use Intel SSE support for checksum calculations
|
||||
export USE_SSE=1
|
||||
export PORTABLE=1
|
||||
|
||||
BINUTILS="$BINUTILS_BASE/bin"
|
||||
AR="$BINUTILS/ar"
|
||||
|
||||
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
|
||||
|
||||
STDLIBS="-L $GCC_BASE/lib64"
|
||||
|
||||
CLANG_BIN="$CLANG_BASE/bin"
|
||||
CLANG_LIB="$CLANG_BASE/lib"
|
||||
CLANG_SRC="$CLANG_BASE/../../src"
|
||||
|
||||
CLANG_ANALYZER="$CLANG_BIN/clang++"
|
||||
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
|
||||
|
||||
if [ -z "$USE_CLANG" ]; then
|
||||
# gcc
|
||||
CC="$GCC_BASE/bin/gcc"
|
||||
CXX="$GCC_BASE/bin/g++"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
JEMALLOC=1
|
||||
else
|
||||
# clang
|
||||
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
|
||||
CC="$CLANG_BIN/clang"
|
||||
CXX="$CLANG_BIN/clang++"
|
||||
|
||||
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $CLANG_INCLUDE"
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
|
||||
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
|
||||
CFLAGS+=" -Wno-expansion-to-defined "
|
||||
CXXFLAGS="-nostdinc++"
|
||||
fi
|
||||
|
||||
CFLAGS+=" $DEPS_INCLUDE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
|
||||
CXXFLAGS+=" $CFLAGS"
|
||||
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
|
||||
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
|
||||
EXEC_LDFLAGS+=" $LIBUNWIND"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
|
||||
# required by libtbb
|
||||
EXEC_LDFLAGS+=" -ldl"
|
||||
|
||||
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
|
||||
|
||||
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
|
||||
|
||||
VALGRIND_VER="$VALGRIND_BASE/bin/"
|
||||
|
||||
# lua not supported because it's on track for deprecation, I think
|
||||
LUA_PATH=
|
||||
LUA_LIB=
|
||||
|
||||
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
|
@ -85,8 +85,9 @@ NON_SHM="TMPD=/tmp/rocksdb_test_tmp"
|
||||
GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1"
|
||||
ASAN="COMPILE_WITH_ASAN=1"
|
||||
CLANG="USE_CLANG=1"
|
||||
LITE="OPT=\"-DROCKSDB_LITE -g\""
|
||||
TSAN="COMPILE_WITH_TSAN=1"
|
||||
# in gcc-5 there are known problems with TSAN like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71090.
|
||||
# using platform007 gives us gcc-8 or higher which has that bug fixed.
|
||||
TSAN="ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007=1 COMPILE_WITH_TSAN=1"
|
||||
UBSAN="COMPILE_WITH_UBSAN=1"
|
||||
DISABLE_JEMALLOC="DISABLE_JEMALLOC=1"
|
||||
HTTP_PROXY="https_proxy=http://fwdproxy.29.prn1:8080 http_proxy=http://fwdproxy.29.prn1:8080 ftp_proxy=http://fwdproxy.29.prn1:8080"
|
||||
|
@ -53,6 +53,45 @@ function get_lib_base()
|
||||
log_variable $__res_var
|
||||
}
|
||||
|
||||
###########################################################
|
||||
# platform007 dependencies #
|
||||
###########################################################
|
||||
|
||||
OUTPUT="$BASEDIR/dependencies_platform007.sh"
|
||||
|
||||
rm -f "$OUTPUT"
|
||||
touch "$OUTPUT"
|
||||
|
||||
echo "Writing dependencies to $OUTPUT"
|
||||
|
||||
# Compilers locations
|
||||
GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/`
|
||||
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
|
||||
|
||||
log_variable GCC_BASE
|
||||
log_variable CLANG_BASE
|
||||
|
||||
# Libraries locations
|
||||
get_lib_base libgcc 7.x platform007
|
||||
get_lib_base glibc 2.26 platform007
|
||||
get_lib_base snappy LATEST platform007
|
||||
get_lib_base zlib LATEST platform007
|
||||
get_lib_base bzip2 LATEST platform007
|
||||
get_lib_base lz4 LATEST platform007
|
||||
get_lib_base zstd LATEST platform007
|
||||
get_lib_base gflags LATEST platform007
|
||||
get_lib_base jemalloc LATEST platform007
|
||||
get_lib_base numa LATEST platform007
|
||||
get_lib_base libunwind LATEST platform007
|
||||
get_lib_base tbb LATEST platform007
|
||||
|
||||
get_lib_base kernel-headers fb platform007
|
||||
get_lib_base binutils LATEST centos7-native
|
||||
get_lib_base valgrind LATEST platform007
|
||||
get_lib_base lua 5.3.4 platform007
|
||||
|
||||
git diff $OUTPUT
|
||||
|
||||
###########################################################
|
||||
# 5.x dependencies #
|
||||
###########################################################
|
||||
|
@ -230,11 +230,9 @@ Status BuildTable(
|
||||
}
|
||||
|
||||
// Output to event logger and fire events.
|
||||
if (!s.ok() || meta->fd.GetFileSize() > 0) {
|
||||
EventHelpers::LogAndNotifyTableFileCreationFinished(
|
||||
event_logger, ioptions.listeners, dbname, column_family_name, fname,
|
||||
job_id, meta->fd, tp, reason, s);
|
||||
}
|
||||
EventHelpers::LogAndNotifyTableFileCreationFinished(
|
||||
event_logger, ioptions.listeners, dbname, column_family_name, fname,
|
||||
job_id, meta->fd, tp, reason, s);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
@ -1075,7 +1075,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
||||
}
|
||||
|
||||
if (status.ok() && sub_compact->builder == nullptr &&
|
||||
sub_compact->outputs.size() == 0) {
|
||||
sub_compact->outputs.size() == 0 &&
|
||||
!range_del_agg->IsEmpty()) {
|
||||
// handle subcompaction containing only range deletions
|
||||
status = OpenCompactionOutputFile(sub_compact);
|
||||
}
|
||||
|
@ -116,6 +116,22 @@ private:
|
||||
std::vector<std::atomic<int>> compaction_completed_;
|
||||
};
|
||||
|
||||
class SstStatsCollector : public EventListener {
|
||||
public:
|
||||
SstStatsCollector() : num_ssts_creation_started_(0) {}
|
||||
|
||||
void OnTableFileCreationStarted(const TableFileCreationBriefInfo& /* info */) override {
|
||||
++num_ssts_creation_started_;
|
||||
}
|
||||
|
||||
int num_ssts_creation_started() {
|
||||
return num_ssts_creation_started_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<int> num_ssts_creation_started_;
|
||||
};
|
||||
|
||||
static const int kCDTValueSize = 1000;
|
||||
static const int kCDTKeysPerBuffer = 4;
|
||||
static const int kCDTNumLevels = 8;
|
||||
@ -3816,6 +3832,30 @@ TEST_F(DBCompactionTest, CompactFilesOutputRangeConflict) {
|
||||
bg_thread.join();
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, CompactionHasEmptyOutput) {
|
||||
Options options = CurrentOptions();
|
||||
SstStatsCollector* collector = new SstStatsCollector();
|
||||
options.level0_file_num_compaction_trigger = 2;
|
||||
options.listeners.emplace_back(collector);
|
||||
Reopen(options);
|
||||
|
||||
// Make sure the L0 files overlap to prevent trivial move.
|
||||
ASSERT_OK(Put("a", "val"));
|
||||
ASSERT_OK(Put("b", "val"));
|
||||
ASSERT_OK(Flush());
|
||||
ASSERT_OK(Delete("a"));
|
||||
ASSERT_OK(Delete("b"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
dbfull()->TEST_WaitForCompact();
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
|
||||
|
||||
// Expect one file creation to start for each flush, and zero for compaction
|
||||
// since no keys are written.
|
||||
ASSERT_EQ(2, collector->num_ssts_creation_started());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
|
||||
::testing::Values(std::make_tuple(1, true),
|
||||
std::make_tuple(1, false),
|
||||
|
@ -2912,6 +2912,7 @@ Status DBImpl::IngestExternalFile(
|
||||
status = ingestion_job.Prepare(external_files, super_version);
|
||||
CleanupSuperVersion(super_version);
|
||||
if (!status.ok()) {
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
ReleaseFileNumberFromPendingOutputs(pending_output_elem);
|
||||
return status;
|
||||
}
|
||||
@ -3006,8 +3007,6 @@ Status DBImpl::IngestExternalFile(
|
||||
|
||||
Status DBImpl::VerifyChecksum() {
|
||||
Status s;
|
||||
Options options;
|
||||
EnvOptions env_options;
|
||||
std::vector<ColumnFamilyData*> cfd_list;
|
||||
{
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
@ -3025,13 +3024,19 @@ Status DBImpl::VerifyChecksum() {
|
||||
for (auto& sv : sv_list) {
|
||||
VersionStorageInfo* vstorage = sv->current->storage_info();
|
||||
ColumnFamilyData* cfd = sv->current->cfd();
|
||||
Options opts;
|
||||
{
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
opts = Options(BuildDBOptions(immutable_db_options_,
|
||||
mutable_db_options_), cfd->GetLatestCFOptions());
|
||||
}
|
||||
for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
|
||||
for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
|
||||
j++) {
|
||||
const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd;
|
||||
std::string fname = TableFileName(cfd->ioptions()->cf_paths,
|
||||
fd.GetNumber(), fd.GetPathId());
|
||||
s = rocksdb::VerifySstFileChecksum(options, env_options, fname);
|
||||
s = rocksdb::VerifySstFileChecksum(opts, env_options_, fname);
|
||||
}
|
||||
}
|
||||
if (!s.ok()) {
|
||||
|
@ -2413,6 +2413,30 @@ TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
|
||||
EXPECT_TRUE(iter->status().IsIncomplete());
|
||||
}
|
||||
|
||||
TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
|
||||
Put("a", "");
|
||||
Put("b", "");
|
||||
Flush();
|
||||
|
||||
ReadOptions ropt;
|
||||
Slice ub = "b";
|
||||
ropt.iterate_upper_bound = &ub;
|
||||
|
||||
std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
|
||||
it->SeekForPrev("a");
|
||||
ASSERT_TRUE(it->Valid());
|
||||
ASSERT_OK(it->status());
|
||||
ASSERT_EQ("a", it->key().ToString());
|
||||
it->Next();
|
||||
ASSERT_FALSE(it->Valid());
|
||||
ASSERT_OK(it->status());
|
||||
it->SeekForPrev("a");
|
||||
ASSERT_OK(it->status());
|
||||
|
||||
ASSERT_TRUE(it->Valid());
|
||||
ASSERT_EQ("a", it->key().ToString());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
|
||||
testing::Values(true, false));
|
||||
|
||||
|
@ -1822,7 +1822,7 @@ TEST_P(DBTestUniversalCompaction, FinalSortedRunCompactFilesConflict) {
|
||||
"CompactFilesImpl:1"}});
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
std::thread compact_files_thread([&]() {
|
||||
port::Thread compact_files_thread([&]() {
|
||||
ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), default_cfh,
|
||||
{first_sst_filename}, num_levels_ - 1));
|
||||
});
|
||||
|
@ -697,7 +697,7 @@ static bool SaveValue(void* arg, const char* entry) {
|
||||
*(s->merge_in_progress) = true;
|
||||
merge_context->PushOperand(
|
||||
v, s->inplace_update_support == false /* operand_pinned */);
|
||||
if (merge_operator->ShouldMerge(merge_context->GetOperands())) {
|
||||
if (merge_operator->ShouldMerge(merge_context->GetOperandsDirectionBackward())) {
|
||||
*(s->status) = MergeHelper::TimedFullMerge(
|
||||
merge_operator, s->key->user_key(), nullptr,
|
||||
merge_context->GetOperands(), s->value, s->logger, s->statistics,
|
||||
|
@ -74,8 +74,13 @@ class MergeContext {
|
||||
return (*operand_list_)[index];
|
||||
}
|
||||
|
||||
// Return all the operands.
|
||||
// Same as GetOperandsDirectionForward
|
||||
const std::vector<Slice>& GetOperands() {
|
||||
return GetOperandsDirectionForward();
|
||||
}
|
||||
|
||||
// Return all the operands in the order as they were merged (passed to FullMerge or FullMergeV2)
|
||||
const std::vector<Slice>& GetOperandsDirectionForward() {
|
||||
if (!operand_list_) {
|
||||
return empty_operand_list;
|
||||
}
|
||||
@ -84,6 +89,16 @@ class MergeContext {
|
||||
return *operand_list_;
|
||||
}
|
||||
|
||||
// Return all the operands in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2)
|
||||
const std::vector<Slice>& GetOperandsDirectionBackward() {
|
||||
if (!operand_list_) {
|
||||
return empty_operand_list;
|
||||
}
|
||||
|
||||
SetDirectionBackward();
|
||||
return *operand_list_;
|
||||
}
|
||||
|
||||
private:
|
||||
void Initialize() {
|
||||
if (!operand_list_) {
|
||||
|
@ -50,7 +50,7 @@ class UncollapsedRangeDelMap : public RangeDelMap {
|
||||
: rep_(TombstoneStartKeyComparator(ucmp)), ucmp_(ucmp) {}
|
||||
|
||||
bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||
RangeDelPositioningMode mode) {
|
||||
RangeDelPositioningMode mode) override {
|
||||
(void)mode;
|
||||
assert(mode == RangeDelPositioningMode::kFullScan);
|
||||
for (const auto& tombstone : rep_) {
|
||||
@ -65,7 +65,7 @@ class UncollapsedRangeDelMap : public RangeDelMap {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsRangeOverlapped(const Slice& start, const Slice& end) {
|
||||
bool IsRangeOverlapped(const Slice& start, const Slice& end) override {
|
||||
for (const auto& tombstone : rep_) {
|
||||
if (ucmp_->Compare(start, tombstone.end_key_) < 0 &&
|
||||
ucmp_->Compare(tombstone.start_key_, end) <= 0 &&
|
||||
@ -76,13 +76,13 @@ class UncollapsedRangeDelMap : public RangeDelMap {
|
||||
return false;
|
||||
}
|
||||
|
||||
void AddTombstone(RangeTombstone tombstone) { rep_.emplace(tombstone); }
|
||||
void AddTombstone(RangeTombstone tombstone) override { rep_.emplace(tombstone); }
|
||||
|
||||
size_t Size() const { return rep_.size(); }
|
||||
size_t Size() const override { return rep_.size(); }
|
||||
|
||||
void InvalidatePosition() {} // no-op
|
||||
void InvalidatePosition() override {} // no-op
|
||||
|
||||
std::unique_ptr<RangeDelIterator> NewIterator() {
|
||||
std::unique_ptr<RangeDelIterator> NewIterator() override {
|
||||
return std::unique_ptr<RangeDelIterator>(new Iterator(this->rep_));
|
||||
}
|
||||
};
|
||||
@ -155,6 +155,9 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
}
|
||||
|
||||
RangeTombstone Tombstone() const override {
|
||||
assert(Valid());
|
||||
assert(std::next(iter_) != rep_.end());
|
||||
assert(iter_->second != 0);
|
||||
RangeTombstone tombstone;
|
||||
tombstone.start_key_ = iter_->first;
|
||||
tombstone.end_key_ = std::next(iter_)->first;
|
||||
@ -173,7 +176,7 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
}
|
||||
|
||||
bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||
RangeDelPositioningMode mode) {
|
||||
RangeDelPositioningMode mode) override {
|
||||
if (iter_ == rep_.end() &&
|
||||
(mode == RangeDelPositioningMode::kForwardTraversal ||
|
||||
mode == RangeDelPositioningMode::kBackwardTraversal)) {
|
||||
@ -224,15 +227,15 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
return parsed.sequence < iter_->second;
|
||||
}
|
||||
|
||||
bool IsRangeOverlapped(const Slice&, const Slice&) {
|
||||
bool IsRangeOverlapped(const Slice&, const Slice&) override {
|
||||
// Unimplemented because the only client of this method, file ingestion,
|
||||
// uses uncollapsed maps.
|
||||
fprintf(stderr, "CollapsedRangeDelMap::IsRangeOverlapped unimplemented");
|
||||
abort();
|
||||
}
|
||||
|
||||
void AddTombstone(RangeTombstone t) {
|
||||
if (ucmp_->Compare(t.start_key_, t.end_key_) >= 0) {
|
||||
void AddTombstone(RangeTombstone t) override {
|
||||
if (ucmp_->Compare(t.start_key_, t.end_key_) >= 0 || t.seq_ == 0) {
|
||||
// The tombstone covers no keys. Nothing to do.
|
||||
return;
|
||||
}
|
||||
@ -341,11 +344,11 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
}
|
||||
}
|
||||
|
||||
size_t Size() const { return rep_.size() - 1; }
|
||||
size_t Size() const override { return rep_.empty() ? 0 : rep_.size() - 1; }
|
||||
|
||||
void InvalidatePosition() { iter_ = rep_.end(); }
|
||||
void InvalidatePosition() override { iter_ = rep_.end(); }
|
||||
|
||||
std::unique_ptr<RangeDelIterator> NewIterator() {
|
||||
std::unique_ptr<RangeDelIterator> NewIterator() override {
|
||||
return std::unique_ptr<RangeDelIterator>(new Iterator(this->rep_));
|
||||
}
|
||||
};
|
||||
|
@ -20,7 +20,7 @@ class DisableGCSnapshotChecker : public SnapshotChecker {
|
||||
public:
|
||||
virtual ~DisableGCSnapshotChecker() {}
|
||||
virtual bool IsInSnapshot(SequenceNumber /*sequence*/,
|
||||
SequenceNumber /*snapshot_sequence*/) const {
|
||||
SequenceNumber /*snapshot_sequence*/) const override {
|
||||
// By returning false, we prevent all the values from being GCed
|
||||
return false;
|
||||
}
|
||||
|
@ -56,6 +56,9 @@ class SnapshotList {
|
||||
count_ = 0;
|
||||
}
|
||||
|
||||
// No copy-construct.
|
||||
SnapshotList(const SnapshotList&) = delete;
|
||||
|
||||
bool empty() const { return list_.next_ == &list_; }
|
||||
SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
|
||||
SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
|
||||
|
@ -2994,25 +2994,31 @@ Status VersionSet::ProcessManifestWrites(
|
||||
delete first_writer.cfd;
|
||||
}
|
||||
} else {
|
||||
uint64_t max_log_number_in_batch = 0;
|
||||
// Each version in versions corresponds to a column family.
|
||||
// For each column family, update its log number indicating that logs
|
||||
// with number smaller than this should be ignored.
|
||||
for (const auto version : versions) {
|
||||
uint64_t max_log_number_in_batch = 0;
|
||||
uint32_t cf_id = version->cfd_->GetID();
|
||||
for (const auto& e : batch_edits) {
|
||||
if (e->has_log_number_ && e->column_family_ == cf_id) {
|
||||
max_log_number_in_batch =
|
||||
std::max(max_log_number_in_batch, e->log_number_);
|
||||
}
|
||||
}
|
||||
if (max_log_number_in_batch != 0) {
|
||||
assert(version->cfd_->GetLogNumber() <= max_log_number_in_batch);
|
||||
version->cfd_->SetLogNumber(max_log_number_in_batch);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t last_min_log_number_to_keep = 0;
|
||||
for (auto& e : batch_edits) {
|
||||
if (e->has_log_number_) {
|
||||
max_log_number_in_batch =
|
||||
std::max(max_log_number_in_batch, e->log_number_);
|
||||
}
|
||||
if (e->has_min_log_number_to_keep_) {
|
||||
last_min_log_number_to_keep =
|
||||
std::max(last_min_log_number_to_keep, e->min_log_number_to_keep_);
|
||||
}
|
||||
}
|
||||
if (max_log_number_in_batch != 0) {
|
||||
for (int i = 0; i < static_cast<int>(versions.size()); ++i) {
|
||||
ColumnFamilyData* cfd = versions[i]->cfd_;
|
||||
assert(cfd->GetLogNumber() <= max_log_number_in_batch);
|
||||
cfd->SetLogNumber(max_log_number_in_batch);
|
||||
}
|
||||
}
|
||||
|
||||
if (last_min_log_number_to_keep != 0) {
|
||||
// Should only be set in 2PC mode.
|
||||
|
@ -263,6 +263,17 @@ void WriteThread::CreateMissingNewerLinks(Writer* head) {
|
||||
}
|
||||
}
|
||||
|
||||
WriteThread::Writer* WriteThread::FindNextLeader(Writer* from,
|
||||
Writer* boundary) {
|
||||
assert(from != nullptr && from != boundary);
|
||||
Writer* current = from;
|
||||
while (current->link_older != boundary) {
|
||||
current = current->link_older;
|
||||
assert(current != nullptr);
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
void WriteThread::CompleteLeader(WriteGroup& write_group) {
|
||||
assert(write_group.size > 0);
|
||||
Writer* leader = write_group.leader;
|
||||
@ -558,21 +569,49 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
||||
if (!leader->ShouldWriteToMemtable()) {
|
||||
CompleteLeader(write_group);
|
||||
}
|
||||
|
||||
Writer* next_leader = nullptr;
|
||||
|
||||
// Look for next leader before we call LinkGroup. If there isn't
|
||||
// pending writers, place a dummy writer at the tail of the queue
|
||||
// so we know the boundary of the current write group.
|
||||
Writer dummy;
|
||||
Writer* expected = last_writer;
|
||||
bool has_dummy = newest_writer_.compare_exchange_strong(expected, &dummy);
|
||||
if (!has_dummy) {
|
||||
// We find at least one pending writer when we insert dummy. We search
|
||||
// for next leader from there.
|
||||
next_leader = FindNextLeader(expected, last_writer);
|
||||
assert(next_leader != nullptr && next_leader != last_writer);
|
||||
}
|
||||
|
||||
// Link the ramaining of the group to memtable writer list.
|
||||
//
|
||||
// We have to link our group to memtable writer queue before wake up the
|
||||
// next leader or set newest_writer_ to null, otherwise the next leader
|
||||
// can run ahead of us and link to memtable writer queue before we do.
|
||||
if (write_group.size > 0) {
|
||||
if (LinkGroup(write_group, &newest_memtable_writer_)) {
|
||||
// The leader can now be different from current writer.
|
||||
SetState(write_group.leader, STATE_MEMTABLE_WRITER_LEADER);
|
||||
}
|
||||
}
|
||||
// Reset newest_writer_ and wake up the next leader.
|
||||
Writer* newest_writer = last_writer;
|
||||
if (!newest_writer_.compare_exchange_strong(newest_writer, nullptr)) {
|
||||
Writer* next_leader = newest_writer;
|
||||
while (next_leader->link_older != last_writer) {
|
||||
next_leader = next_leader->link_older;
|
||||
assert(next_leader != nullptr);
|
||||
|
||||
// If we have inserted dummy in the queue, remove it now and check if there
|
||||
// are pending writer join the queue since we insert the dummy. If so,
|
||||
// look for next leader again.
|
||||
if (has_dummy) {
|
||||
assert(next_leader == nullptr);
|
||||
expected = &dummy;
|
||||
bool has_pending_writer =
|
||||
!newest_writer_.compare_exchange_strong(expected, nullptr);
|
||||
if (has_pending_writer) {
|
||||
next_leader = FindNextLeader(expected, &dummy);
|
||||
assert(next_leader != nullptr && next_leader != &dummy);
|
||||
}
|
||||
}
|
||||
|
||||
if (next_leader != nullptr) {
|
||||
next_leader->link_older = nullptr;
|
||||
SetState(next_leader, STATE_GROUP_LEADER);
|
||||
}
|
||||
|
@ -392,6 +392,10 @@ class WriteThread {
|
||||
// concurrently with itself.
|
||||
void CreateMissingNewerLinks(Writer* head);
|
||||
|
||||
// Starting from a pending writer, follow link_older to search for next
|
||||
// leader, until we hit boundary.
|
||||
Writer* FindNextLeader(Writer* pending_writer, Writer* boundary);
|
||||
|
||||
// Set the leader in write_group to completed state and remove it from the
|
||||
// write group.
|
||||
void CompleteLeader(WriteGroup& write_group);
|
||||
|
@ -195,6 +195,11 @@ class MergeOperator {
|
||||
// during a point lookup, thereby helping in limiting the number of levels to
|
||||
// read from.
|
||||
// Doesn't help with iterators.
|
||||
//
|
||||
// Note: the merge operands are passed to this function in the reversed order
|
||||
// relative to how they were merged (passed to FullMerge or FullMergeV2)
|
||||
// for performance reasons, see also:
|
||||
// https://github.com/facebook/rocksdb/issues/3865
|
||||
virtual bool ShouldMerge(const std::vector<Slice>& /*operands*/) const {
|
||||
return false;
|
||||
}
|
||||
|
@ -210,12 +210,20 @@ class LDBCommand {
|
||||
bool ParseStringOption(const std::map<std::string, std::string>& options,
|
||||
const std::string& option, std::string* value);
|
||||
|
||||
/**
|
||||
* Returns the value of the specified option as a boolean.
|
||||
* default_val is used if the option is not found in options.
|
||||
* Throws an exception if the value of the option is not
|
||||
* "true" or "false" (case insensitive).
|
||||
*/
|
||||
bool ParseBooleanOption(const std::map<std::string, std::string>& options,
|
||||
const std::string& option, bool default_val);
|
||||
|
||||
Options options_;
|
||||
std::vector<ColumnFamilyDescriptor> column_families_;
|
||||
LDBOptions ldb_options_;
|
||||
|
||||
private:
|
||||
friend class WALDumperCommand;
|
||||
/**
|
||||
* Interpret command line options and flags to determine if the key
|
||||
* should be input/output in hex.
|
||||
@ -230,15 +238,6 @@ class LDBCommand {
|
||||
bool IsValueHex(const std::map<std::string, std::string>& options,
|
||||
const std::vector<std::string>& flags);
|
||||
|
||||
/**
|
||||
* Returns the value of the specified option as a boolean.
|
||||
* default_val is used if the option is not found in options.
|
||||
* Throws an exception if the value of the option is not
|
||||
* "true" or "false" (case insensitive).
|
||||
*/
|
||||
bool ParseBooleanOption(const std::map<std::string, std::string>& options,
|
||||
const std::string& option, bool default_val);
|
||||
|
||||
/**
|
||||
* Converts val to a boolean.
|
||||
* val must be either true or false (case insensitive).
|
||||
|
@ -5,8 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCKSDB_MAJOR 5
|
||||
#define ROCKSDB_MINOR 14
|
||||
#define ROCKSDB_PATCH 0
|
||||
#define ROCKSDB_MINOR 15
|
||||
#define ROCKSDB_PATCH 10
|
||||
|
||||
// Do not use these. We made the mistake of declaring macros starting with
|
||||
// double underscore. Now we have to live with our choice. We'll deprecate these
|
||||
|
@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.4)
|
||||
set(JNI_NATIVE_SOURCES
|
||||
rocksjni/backupablejni.cc
|
||||
rocksjni/backupenginejni.cc
|
||||
rocksjni/cassandra_compactionfilterjni.cc
|
||||
rocksjni/cassandra_value_operator.cc
|
||||
rocksjni/checkpoint.cc
|
||||
rocksjni/clock_cache.cc
|
||||
rocksjni/columnfamilyhandle.cc
|
||||
@ -25,14 +27,14 @@ set(JNI_NATIVE_SOURCES
|
||||
rocksjni/memtablejni.cc
|
||||
rocksjni/merge_operator.cc
|
||||
rocksjni/native_comparator_wrapper_test.cc
|
||||
rocksjni/optimistic_transaction_db.cc
|
||||
rocksjni/optimistic_transaction_options.cc
|
||||
rocksjni/options.cc
|
||||
rocksjni/options_util.cc
|
||||
rocksjni/ratelimiterjni.cc
|
||||
rocksjni/remove_emptyvalue_compactionfilterjni.cc
|
||||
rocksjni/rocks_callback_object.cc
|
||||
rocksjni/cassandra_compactionfilterjni.cc
|
||||
rocksjni/cassandra_value_operator.cc
|
||||
rocksjni/restorejni.cc
|
||||
rocksjni/rocks_callback_object.cc
|
||||
rocksjni/rocksdb_exception_test.cc
|
||||
rocksjni/rocksjni.cc
|
||||
rocksjni/slice.cc
|
||||
@ -42,28 +44,33 @@ set(JNI_NATIVE_SOURCES
|
||||
rocksjni/statistics.cc
|
||||
rocksjni/statisticsjni.cc
|
||||
rocksjni/table.cc
|
||||
rocksjni/transaction.cc
|
||||
rocksjni/transaction_db.cc
|
||||
rocksjni/transaction_db_options.cc
|
||||
rocksjni/transaction_log.cc
|
||||
rocksjni/transaction_notifier.cc
|
||||
rocksjni/transaction_notifier_jnicallback.cc
|
||||
rocksjni/transaction_options.cc
|
||||
rocksjni/ttl.cc
|
||||
rocksjni/write_batch.cc
|
||||
rocksjni/writebatchhandlerjnicallback.cc
|
||||
rocksjni/write_batch_test.cc
|
||||
rocksjni/write_batch_with_index.cc
|
||||
rocksjni/writebatchhandlerjnicallback.cc
|
||||
)
|
||||
|
||||
set(NATIVE_JAVA_CLASSES
|
||||
org.rocksdb.AbstractCompactionFilter
|
||||
org.rocksdb.AbstractCompactionFilterFactory
|
||||
org.rocksdb.AbstractComparator
|
||||
org.rocksdb.AbstractImmutableNativeReference
|
||||
org.rocksdb.AbstractNativeReference
|
||||
org.rocksdb.AbstractRocksIterator
|
||||
org.rocksdb.AbstractSlice
|
||||
org.rocksdb.AbstractWriteBatch
|
||||
org.rocksdb.AbstractTransactionNotifier
|
||||
org.rocksdb.BackupableDBOptions
|
||||
org.rocksdb.BackupEngine
|
||||
org.rocksdb.BackupEngineTest
|
||||
org.rocksdb.BlockBasedTableConfig
|
||||
org.rocksdb.BloomFilter
|
||||
org.rocksdb.Cache
|
||||
org.rocksdb.CassandraCompactionFilter
|
||||
org.rocksdb.CassandraValueMergeOperator
|
||||
org.rocksdb.Checkpoint
|
||||
@ -88,10 +95,10 @@ set(NATIVE_JAVA_CLASSES
|
||||
org.rocksdb.Logger
|
||||
org.rocksdb.LRUCache
|
||||
org.rocksdb.MemTableConfig
|
||||
org.rocksdb.MergeOperator
|
||||
org.rocksdb.NativeComparatorWrapper
|
||||
org.rocksdb.NativeComparatorWrapperTest.NativeStringComparatorWrapper
|
||||
org.rocksdb.NativeLibraryLoader
|
||||
org.rocksdb.OptimisticTransactionDB
|
||||
org.rocksdb.OptimisticTransactionOptions
|
||||
org.rocksdb.Options
|
||||
org.rocksdb.OptionsUtil
|
||||
org.rocksdb.PlainTableConfig
|
||||
@ -101,7 +108,6 @@ set(NATIVE_JAVA_CLASSES
|
||||
org.rocksdb.RestoreOptions
|
||||
org.rocksdb.RocksCallbackObject
|
||||
org.rocksdb.RocksDB
|
||||
org.rocksdb.RocksDBExceptionTest
|
||||
org.rocksdb.RocksEnv
|
||||
org.rocksdb.RocksIterator
|
||||
org.rocksdb.RocksIteratorInterface
|
||||
@ -111,24 +117,29 @@ set(NATIVE_JAVA_CLASSES
|
||||
org.rocksdb.SkipListMemTableConfig
|
||||
org.rocksdb.Slice
|
||||
org.rocksdb.Snapshot
|
||||
org.rocksdb.SnapshotTest
|
||||
org.rocksdb.SstFileManager
|
||||
org.rocksdb.SstFileWriter
|
||||
org.rocksdb.Statistics
|
||||
org.rocksdb.StringAppendOperator
|
||||
org.rocksdb.TableFormatConfig
|
||||
org.rocksdb.Transaction
|
||||
org.rocksdb.TransactionDB
|
||||
org.rocksdb.TransactionDBOptions
|
||||
org.rocksdb.TransactionLogIterator
|
||||
org.rocksdb.TransactionOptions
|
||||
org.rocksdb.TtlDB
|
||||
org.rocksdb.VectorMemTableConfig
|
||||
org.rocksdb.WBWIRocksIterator
|
||||
org.rocksdb.WriteBatch
|
||||
org.rocksdb.WriteBatch.Handler
|
||||
org.rocksdb.WriteBatchTest
|
||||
org.rocksdb.WriteBatchTestInternalHelper
|
||||
org.rocksdb.WriteBatchInterface
|
||||
org.rocksdb.WriteBatchWithIndex
|
||||
org.rocksdb.WriteOptions
|
||||
org.rocksdb.util.CapturingWriteBatchHandler
|
||||
org.rocksdb.util.WriteBatchGetter
|
||||
org.rocksdb.NativeComparatorWrapperTest
|
||||
org.rocksdb.RocksDBExceptionTest
|
||||
org.rocksdb.SnapshotTest
|
||||
org.rocksdb.WriteBatchTest
|
||||
org.rocksdb.WriteBatchTestInternalHelper
|
||||
)
|
||||
|
||||
include(FindJava)
|
||||
@ -150,13 +161,14 @@ set(JAVA_TESTCLASSPATH ${JAVA_JUNIT_JAR} ${JAVA_HAMCR_JAR} ${JAVA_MOCKITO_JAR} $
|
||||
add_jar(
|
||||
rocksdbjni_classes
|
||||
SOURCES
|
||||
src/main/java/org/rocksdb/AbstractCompactionFilter.java
|
||||
src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java
|
||||
src/main/java/org/rocksdb/AbstractCompactionFilter.java
|
||||
src/main/java/org/rocksdb/AbstractComparator.java
|
||||
src/main/java/org/rocksdb/AbstractImmutableNativeReference.java
|
||||
src/main/java/org/rocksdb/AbstractNativeReference.java
|
||||
src/main/java/org/rocksdb/AbstractRocksIterator.java
|
||||
src/main/java/org/rocksdb/AbstractSlice.java
|
||||
src/main/java/org/rocksdb/AbstractTransactionNotifier.java
|
||||
src/main/java/org/rocksdb/AbstractWriteBatch.java
|
||||
src/main/java/org/rocksdb/AccessHint.java
|
||||
src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java
|
||||
@ -175,8 +187,8 @@ add_jar(
|
||||
src/main/java/org/rocksdb/ClockCache.java
|
||||
src/main/java/org/rocksdb/ColumnFamilyDescriptor.java
|
||||
src/main/java/org/rocksdb/ColumnFamilyHandle.java
|
||||
src/main/java/org/rocksdb/ColumnFamilyOptions.java
|
||||
src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java
|
||||
src/main/java/org/rocksdb/ColumnFamilyOptions.java
|
||||
src/main/java/org/rocksdb/CompactionOptionsFIFO.java
|
||||
src/main/java/org/rocksdb/CompactionOptionsUniversal.java
|
||||
src/main/java/org/rocksdb/CompactionPriority.java
|
||||
@ -187,8 +199,8 @@ add_jar(
|
||||
src/main/java/org/rocksdb/ComparatorType.java
|
||||
src/main/java/org/rocksdb/CompressionOptions.java
|
||||
src/main/java/org/rocksdb/CompressionType.java
|
||||
src/main/java/org/rocksdb/DBOptions.java
|
||||
src/main/java/org/rocksdb/DBOptionsInterface.java
|
||||
src/main/java/org/rocksdb/DBOptions.java
|
||||
src/main/java/org/rocksdb/DbPath.java
|
||||
src/main/java/org/rocksdb/DirectComparator.java
|
||||
src/main/java/org/rocksdb/DirectSlice.java
|
||||
@ -209,10 +221,12 @@ add_jar(
|
||||
src/main/java/org/rocksdb/LRUCache.java
|
||||
src/main/java/org/rocksdb/MemTableConfig.java
|
||||
src/main/java/org/rocksdb/MergeOperator.java
|
||||
src/main/java/org/rocksdb/MutableColumnFamilyOptions.java
|
||||
src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java
|
||||
src/main/java/org/rocksdb/MutableColumnFamilyOptions.java
|
||||
src/main/java/org/rocksdb/NativeComparatorWrapper.java
|
||||
src/main/java/org/rocksdb/NativeLibraryLoader.java
|
||||
src/main/java/org/rocksdb/OptimisticTransactionDB.java
|
||||
src/main/java/org/rocksdb/OptimisticTransactionOptions.java
|
||||
src/main/java/org/rocksdb/Options.java
|
||||
src/main/java/org/rocksdb/OptionsUtil.java
|
||||
src/main/java/org/rocksdb/PlainTableConfig.java
|
||||
@ -223,11 +237,11 @@ add_jar(
|
||||
src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java
|
||||
src/main/java/org/rocksdb/RestoreOptions.java
|
||||
src/main/java/org/rocksdb/RocksCallbackObject.java
|
||||
src/main/java/org/rocksdb/RocksDB.java
|
||||
src/main/java/org/rocksdb/RocksDBException.java
|
||||
src/main/java/org/rocksdb/RocksDB.java
|
||||
src/main/java/org/rocksdb/RocksEnv.java
|
||||
src/main/java/org/rocksdb/RocksIterator.java
|
||||
src/main/java/org/rocksdb/RocksIteratorInterface.java
|
||||
src/main/java/org/rocksdb/RocksIterator.java
|
||||
src/main/java/org/rocksdb/RocksMemEnv.java
|
||||
src/main/java/org/rocksdb/RocksMutableObject.java
|
||||
src/main/java/org/rocksdb/RocksObject.java
|
||||
@ -236,22 +250,36 @@ add_jar(
|
||||
src/main/java/org/rocksdb/Snapshot.java
|
||||
src/main/java/org/rocksdb/SstFileManager.java
|
||||
src/main/java/org/rocksdb/SstFileWriter.java
|
||||
src/main/java/org/rocksdb/StatisticsCollectorCallback.java
|
||||
src/main/java/org/rocksdb/StatisticsCollector.java
|
||||
src/main/java/org/rocksdb/Statistics.java
|
||||
src/main/java/org/rocksdb/StatsCollectorInput.java
|
||||
src/main/java/org/rocksdb/StatsLevel.java
|
||||
src/main/java/org/rocksdb/Status.java
|
||||
src/main/java/org/rocksdb/StringAppendOperator.java
|
||||
src/main/java/org/rocksdb/TableFormatConfig.java
|
||||
src/main/java/org/rocksdb/TickerType.java
|
||||
src/main/java/org/rocksdb/TransactionalDB.java
|
||||
src/main/java/org/rocksdb/TransactionalOptions.java
|
||||
src/main/java/org/rocksdb/TransactionDB.java
|
||||
src/main/java/org/rocksdb/TransactionDBOptions.java
|
||||
src/main/java/org/rocksdb/Transaction.java
|
||||
src/main/java/org/rocksdb/TransactionLogIterator.java
|
||||
src/main/java/org/rocksdb/TransactionOptions.java
|
||||
src/main/java/org/rocksdb/TtlDB.java
|
||||
src/main/java/org/rocksdb/util/Environment.java
|
||||
src/main/java/org/rocksdb/TxnDBWritePolicy.java
|
||||
src/main/java/org/rocksdb/VectorMemTableConfig.java
|
||||
src/main/java/org/rocksdb/WALRecoveryMode.java
|
||||
src/main/java/org/rocksdb/WBWIRocksIterator.java
|
||||
src/main/java/org/rocksdb/WriteBatch.java
|
||||
src/main/java/org/rocksdb/WriteBatchInterface.java
|
||||
src/main/java/org/rocksdb/WriteBatch.java
|
||||
src/main/java/org/rocksdb/WriteBatchWithIndex.java
|
||||
src/main/java/org/rocksdb/WriteOptions.java
|
||||
src/main/java/org/rocksdb/util/BytewiseComparator.java
|
||||
src/main/java/org/rocksdb/util/DirectBytewiseComparator.java
|
||||
src/main/java/org/rocksdb/util/Environment.java
|
||||
src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java
|
||||
src/main/java/org/rocksdb/util/SizeUnit.java
|
||||
src/test/java/org/rocksdb/BackupEngineTest.java
|
||||
src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java
|
||||
src/test/java/org/rocksdb/NativeComparatorWrapperTest.java
|
||||
|
@ -16,6 +16,11 @@
|
||||
|
||||
using namespace std::placeholders;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4503) // identifier' : decorated name length exceeded, name was truncated
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Class: org_rocksdb_Transaction
|
||||
* Method: setSnapshot
|
||||
@ -264,15 +269,14 @@ typedef std::function<std::vector<rocksdb::Status>(
|
||||
std::vector<std::string>*)>
|
||||
FnMultiGet;
|
||||
|
||||
void free_key_parts(
|
||||
void free_parts(
|
||||
JNIEnv* env,
|
||||
std::vector<std::tuple<jbyteArray, jbyte*, jobject>> key_parts_to_free) {
|
||||
for (std::vector<std::tuple<jbyteArray, jbyte*, jobject>>::size_type i = 0;
|
||||
i < key_parts_to_free.size(); i++) {
|
||||
std::vector<std::tuple<jbyteArray, jbyte*, jobject>> &parts_to_free) {
|
||||
for (auto &value : parts_to_free) {
|
||||
jobject jk;
|
||||
jbyteArray jk_ba;
|
||||
jbyte* jk_val;
|
||||
std::tie(jk_ba, jk_val, jk) = key_parts_to_free[i];
|
||||
std::tie(jk_ba, jk_val, jk) = value;
|
||||
env->ReleaseByteArrayElements(jk_ba, jk_val, JNI_ABORT);
|
||||
env->DeleteLocalRef(jk);
|
||||
}
|
||||
@ -295,7 +299,7 @@ jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get,
|
||||
const jobject jk = env->GetObjectArrayElement(jkey_parts, i);
|
||||
if (env->ExceptionCheck()) {
|
||||
// exception thrown: ArrayIndexOutOfBoundsException
|
||||
free_key_parts(env, key_parts_to_free);
|
||||
free_parts(env, key_parts_to_free);
|
||||
return nullptr;
|
||||
}
|
||||
jbyteArray jk_ba = reinterpret_cast<jbyteArray>(jk);
|
||||
@ -303,14 +307,14 @@ jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get,
|
||||
if (env->EnsureLocalCapacity(len_key) != 0) {
|
||||
// out of memory
|
||||
env->DeleteLocalRef(jk);
|
||||
free_key_parts(env, key_parts_to_free);
|
||||
free_parts(env, key_parts_to_free);
|
||||
return nullptr;
|
||||
}
|
||||
jbyte* jk_val = env->GetByteArrayElements(jk_ba, nullptr);
|
||||
if (jk_val == nullptr) {
|
||||
// exception thrown: OutOfMemoryError
|
||||
env->DeleteLocalRef(jk);
|
||||
free_key_parts(env, key_parts_to_free);
|
||||
free_parts(env, key_parts_to_free);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -327,7 +331,7 @@ jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get,
|
||||
fn_multi_get(*read_options, key_parts, &value_parts);
|
||||
|
||||
// free up allocated byte arrays
|
||||
free_key_parts(env, key_parts_to_free);
|
||||
free_parts(env, key_parts_to_free);
|
||||
|
||||
// prepare the results
|
||||
const jclass jcls_ba = env->FindClass("[B");
|
||||
@ -600,28 +604,6 @@ typedef std::function<rocksdb::Status(const rocksdb::SliceParts&,
|
||||
const rocksdb::SliceParts&)>
|
||||
FnWriteKVParts;
|
||||
|
||||
void free_key_value_parts(
|
||||
JNIEnv* env, const int32_t len,
|
||||
std::tuple<jbyteArray, jbyte*, jobject> jkey_parts_to_free[],
|
||||
std::tuple<jbyteArray, jbyte*, jobject> jvalue_parts_to_free[]) {
|
||||
for (int32_t i = len - 1; i >= 0; --i) {
|
||||
jbyteArray jba_value_part;
|
||||
jbyte* jvalue_part;
|
||||
jobject jobj_value_part;
|
||||
std::tie(jba_value_part, jvalue_part, jobj_value_part) =
|
||||
jvalue_parts_to_free[i];
|
||||
env->ReleaseByteArrayElements(jba_value_part, jvalue_part, JNI_ABORT);
|
||||
env->DeleteLocalRef(jobj_value_part);
|
||||
|
||||
jbyteArray jba_key_part;
|
||||
jbyte* jkey_part;
|
||||
jobject jobj_key_part;
|
||||
std::tie(jba_key_part, jkey_part, jobj_key_part) = jkey_parts_to_free[i];
|
||||
env->ReleaseByteArrayElements(jba_key_part, jkey_part, JNI_ABORT);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(AR) consider refactoring to share this between here and rocksjni.cc
|
||||
void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
const FnWriteKVParts& fn_write_kv_parts,
|
||||
@ -629,29 +611,29 @@ void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
const jint& jkey_parts_len,
|
||||
const jobjectArray& jvalue_parts,
|
||||
const jint& jvalue_parts_len) {
|
||||
#ifndef DEBUG
|
||||
(void) jvalue_parts_len;
|
||||
#else
|
||||
assert(jkey_parts_len == jvalue_parts_len);
|
||||
#endif
|
||||
|
||||
rocksdb::Slice key_parts[jkey_parts_len];
|
||||
rocksdb::Slice value_parts[jvalue_parts_len];
|
||||
std::tuple<jbyteArray, jbyte*, jobject> jkey_parts_to_free[jkey_parts_len];
|
||||
std::tuple<jbyteArray, jbyte*, jobject>
|
||||
jvalue_parts_to_free[jvalue_parts_len];
|
||||
auto key_parts = std::vector<rocksdb::Slice>();
|
||||
auto value_parts = std::vector<rocksdb::Slice>();
|
||||
auto jparts_to_free = std::vector<std::tuple<jbyteArray, jbyte*, jobject>>();
|
||||
|
||||
// convert java key_parts/value_parts byte[][] to Slice(s)
|
||||
for (jsize i = 0; i < jkey_parts_len; ++i) {
|
||||
const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i);
|
||||
if (env->ExceptionCheck()) {
|
||||
// exception thrown: ArrayIndexOutOfBoundsException
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
const jobject jobj_value_part = env->GetObjectArrayElement(jvalue_parts, i);
|
||||
if (env->ExceptionCheck()) {
|
||||
// exception thrown: ArrayIndexOutOfBoundsException
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -661,8 +643,7 @@ void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
// out of memory
|
||||
env->DeleteLocalRef(jobj_value_part);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr);
|
||||
@ -670,8 +651,7 @@ void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
// exception thrown: OutOfMemoryError
|
||||
env->DeleteLocalRef(jobj_value_part);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -682,8 +662,7 @@ void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
// out of memory
|
||||
env->DeleteLocalRef(jobj_value_part);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
jbyte* jvalue_part = env->GetByteArrayElements(jba_value_part, nullptr);
|
||||
@ -692,30 +671,28 @@ void txn_write_kv_parts_helper(JNIEnv* env,
|
||||
env->ReleaseByteArrayElements(jba_value_part, jvalue_part, JNI_ABORT);
|
||||
env->DeleteLocalRef(jobj_value_part);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
jkey_parts_to_free[i] = std::tuple<jbyteArray, jbyte*, jobject>(
|
||||
jba_key_part, jkey_part, jobj_key_part);
|
||||
jvalue_parts_to_free[i] = std::tuple<jbyteArray, jbyte*, jobject>(
|
||||
jba_value_part, jvalue_part, jobj_value_part);
|
||||
jparts_to_free.push_back(std::make_tuple(
|
||||
jba_key_part, jkey_part, jobj_key_part));
|
||||
jparts_to_free.push_back(std::make_tuple(
|
||||
jba_value_part, jvalue_part, jobj_value_part));
|
||||
|
||||
key_parts[i] =
|
||||
rocksdb::Slice(reinterpret_cast<char*>(jkey_part), jkey_part_len);
|
||||
value_parts[i] =
|
||||
rocksdb::Slice(reinterpret_cast<char*>(jvalue_part), jvalue_part_len);
|
||||
key_parts.push_back(
|
||||
rocksdb::Slice(reinterpret_cast<char*>(jkey_part), jkey_part_len));
|
||||
value_parts.push_back(
|
||||
rocksdb::Slice(reinterpret_cast<char*>(jvalue_part), jvalue_part_len));
|
||||
}
|
||||
|
||||
// call the write_multi function
|
||||
rocksdb::Status s =
|
||||
fn_write_kv_parts(rocksdb::SliceParts(key_parts, jkey_parts_len),
|
||||
rocksdb::SliceParts(value_parts, jvalue_parts_len));
|
||||
rocksdb::Status s = fn_write_kv_parts(
|
||||
rocksdb::SliceParts(key_parts.data(), (int)key_parts.size()),
|
||||
rocksdb::SliceParts(value_parts.data(), (int)value_parts.size()));
|
||||
|
||||
// cleanup temporary memory
|
||||
free_key_value_parts(env, jkey_parts_len, jkey_parts_to_free,
|
||||
jvalue_parts_to_free);
|
||||
free_parts(env, jparts_to_free);
|
||||
|
||||
// return
|
||||
if (s.ok()) {
|
||||
@ -857,33 +834,22 @@ void Java_org_rocksdb_Transaction_delete__J_3BI(JNIEnv* env, jobject /*jobj*/,
|
||||
typedef std::function<rocksdb::Status(const rocksdb::SliceParts&)>
|
||||
FnWriteKParts;
|
||||
|
||||
void free_key_parts(
|
||||
JNIEnv* env, const int32_t len,
|
||||
std::tuple<jbyteArray, jbyte*, jobject> jkey_parts_to_free[]) {
|
||||
for (int32_t i = len - 1; i >= 0; --i) {
|
||||
jbyteArray jba_key_part;
|
||||
jbyte* jkey;
|
||||
jobject jobj_key_part;
|
||||
std::tie(jba_key_part, jkey, jobj_key_part) = jkey_parts_to_free[i];
|
||||
env->ReleaseByteArrayElements(jba_key_part, jkey, JNI_ABORT);
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(AR) consider refactoring to share this between here and rocksjni.cc
|
||||
void txn_write_k_parts_helper(JNIEnv* env,
|
||||
const FnWriteKParts& fn_write_k_parts,
|
||||
const jobjectArray& jkey_parts,
|
||||
const jint& jkey_parts_len) {
|
||||
rocksdb::Slice key_parts[jkey_parts_len];
|
||||
std::tuple<jbyteArray, jbyte*, jobject> jkey_parts_to_free[jkey_parts_len];
|
||||
|
||||
std::vector<rocksdb::Slice> key_parts;
|
||||
std::vector<std::tuple<jbyteArray, jbyte*, jobject>> jkey_parts_to_free;
|
||||
|
||||
// convert java key_parts byte[][] to Slice(s)
|
||||
for (jint i = 0; i < jkey_parts_len; ++i) {
|
||||
const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i);
|
||||
if (env->ExceptionCheck()) {
|
||||
// exception thrown: ArrayIndexOutOfBoundsException
|
||||
free_key_parts(env, jkey_parts_len, jkey_parts_to_free);
|
||||
free_parts(env, jkey_parts_to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -892,30 +858,29 @@ void txn_write_k_parts_helper(JNIEnv* env,
|
||||
if (env->EnsureLocalCapacity(jkey_part_len) != 0) {
|
||||
// out of memory
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_parts(env, jkey_parts_len, jkey_parts_to_free);
|
||||
free_parts(env, jkey_parts_to_free);
|
||||
return;
|
||||
}
|
||||
jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr);
|
||||
if (jkey_part == nullptr) {
|
||||
// exception thrown: OutOfMemoryError
|
||||
env->DeleteLocalRef(jobj_key_part);
|
||||
free_key_parts(env, jkey_parts_len, jkey_parts_to_free);
|
||||
free_parts(env, jkey_parts_to_free);
|
||||
return;
|
||||
}
|
||||
|
||||
jkey_parts_to_free[i] = std::tuple<jbyteArray, jbyte*, jobject>(
|
||||
jba_key_part, jkey_part, jobj_key_part);
|
||||
jkey_parts_to_free.push_back(std::tuple<jbyteArray, jbyte*, jobject>(
|
||||
jba_key_part, jkey_part, jobj_key_part));
|
||||
|
||||
key_parts[i] =
|
||||
rocksdb::Slice(reinterpret_cast<char*>(jkey_part), jkey_part_len);
|
||||
key_parts.push_back(rocksdb::Slice(reinterpret_cast<char*>(jkey_part), jkey_part_len));
|
||||
}
|
||||
|
||||
// call the write_multi function
|
||||
rocksdb::Status s =
|
||||
fn_write_k_parts(rocksdb::SliceParts(key_parts, jkey_parts_len));
|
||||
fn_write_k_parts(rocksdb::SliceParts(key_parts.data(), (int)key_parts.size()));
|
||||
|
||||
// cleanup temporary memory
|
||||
free_key_parts(env, jkey_parts_len, jkey_parts_to_free);
|
||||
free_parts(env, jkey_parts_to_free);
|
||||
|
||||
// return
|
||||
if (s.ok()) {
|
||||
@ -1582,7 +1547,7 @@ jbyte Java_org_rocksdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/,
|
||||
}
|
||||
|
||||
assert(false);
|
||||
return 0xFF;
|
||||
return static_cast<jbyte>(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -269,7 +269,7 @@ jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions(
|
||||
assert(size < UINT32_MAX); // does it fit in a jint?
|
||||
|
||||
const jsize len = static_cast<jsize>(size);
|
||||
jlong tmp[len];
|
||||
std::vector<jlong> tmp(len);
|
||||
for (jsize i = 0; i < len; ++i) {
|
||||
tmp[i] = reinterpret_cast<jlong>(txns[i]);
|
||||
}
|
||||
@ -279,7 +279,7 @@ jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions(
|
||||
// exception thrown: OutOfMemoryError
|
||||
return nullptr;
|
||||
}
|
||||
env->SetLongArrayRegion(jtxns, 0, len, tmp);
|
||||
env->SetLongArrayRegion(jtxns, 0, len, tmp.data());
|
||||
if (env->ExceptionCheck()) {
|
||||
// exception thrown: ArrayIndexOutOfBoundsException
|
||||
env->DeleteLocalRef(jtxns);
|
||||
|
@ -306,7 +306,11 @@ rocksdb::Status WriteBatchHandlerJniCallback::PutBlobIndexCF(uint32_t column_fam
|
||||
}
|
||||
|
||||
rocksdb::Status WriteBatchHandlerJniCallback::MarkBeginPrepare(bool unprepare) {
|
||||
#ifndef DEBUG
|
||||
(void) unprepare;
|
||||
#else
|
||||
assert(!unprepare);
|
||||
#endif
|
||||
m_env->CallVoidMethod(m_jcallback_obj, m_jMarkBeginPrepareMethodId);
|
||||
|
||||
// check for Exception, in-particular RocksDBException
|
||||
|
@ -688,7 +688,7 @@ public abstract class AbstractTransactionTest {
|
||||
final long preStartTxnTime = System.currentTimeMillis();
|
||||
try(final DBContainer dbContainer = startDb();
|
||||
final Transaction txn = dbContainer.beginTransaction()) {
|
||||
Thread.sleep(1);
|
||||
Thread.sleep(2);
|
||||
|
||||
final long txnElapsedTime = txn.getElapsedTime();
|
||||
assertThat(txnElapsedTime).isLessThan(System.currentTimeMillis()
|
||||
|
@ -37,10 +37,14 @@ ZSTD_customMem GetJeZstdAllocationOverrides() {
|
||||
// Global operators to be replaced by a linker when this file is
|
||||
// a part of the build
|
||||
|
||||
namespace rocksdb {
|
||||
namespace port {
|
||||
void* jemalloc_aligned_alloc(size_t size, size_t alignment) ROCKSDB_NOEXCEPT {
|
||||
return je_aligned_alloc(alignment, size);
|
||||
}
|
||||
void jemalloc_aligned_free(void* p) ROCKSDB_NOEXCEPT { je_free(p); }
|
||||
} // port
|
||||
} // rocksdb
|
||||
|
||||
void* operator new(size_t size) {
|
||||
void* p = je_malloc(size);
|
||||
|
1
src.mk
1
src.mk
@ -163,7 +163,6 @@ LIB_SOURCES = \
|
||||
utilities/blob_db/blob_log_format.cc \
|
||||
utilities/blob_db/blob_log_reader.cc \
|
||||
utilities/blob_db/blob_log_writer.cc \
|
||||
utilities/blob_db/ttl_extractor.cc \
|
||||
utilities/cassandra/cassandra_compaction_filter.cc \
|
||||
utilities/cassandra/format.cc \
|
||||
utilities/cassandra/merge_operator.cc \
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include "table/filter_block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/full_filter_block.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/table_builder.h"
|
||||
|
||||
#include "util/string_util.h"
|
||||
@ -668,6 +667,172 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void BlockBasedTableBuilder::WriteFilterBlock(
|
||||
MetaIndexBuilder* meta_index_builder) {
|
||||
BlockHandle filter_block_handle;
|
||||
bool empty_filter_block = (rep_->filter_builder == nullptr ||
|
||||
rep_->filter_builder->NumAdded() == 0);
|
||||
if (ok() && !empty_filter_block) {
|
||||
Status s = Status::Incomplete();
|
||||
while (ok() && s.IsIncomplete()) {
|
||||
Slice filter_content = rep_->filter_builder->Finish(filter_block_handle, &s);
|
||||
assert(s.ok() || s.IsIncomplete());
|
||||
rep_->props.filter_size += filter_content.size();
|
||||
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle);
|
||||
}
|
||||
}
|
||||
if (ok() && !empty_filter_block) {
|
||||
// Add mapping from "<filter_block_prefix>.Name" to location
|
||||
// of filter data.
|
||||
std::string key;
|
||||
if (rep_->filter_builder->IsBlockBased()) {
|
||||
key = BlockBasedTable::kFilterBlockPrefix;
|
||||
} else {
|
||||
key = rep_->table_options.partition_filters
|
||||
? BlockBasedTable::kPartitionedFilterBlockPrefix
|
||||
: BlockBasedTable::kFullFilterBlockPrefix;
|
||||
}
|
||||
key.append(rep_->table_options.filter_policy->Name());
|
||||
meta_index_builder->Add(key, filter_block_handle);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockBasedTableBuilder::WriteIndexBlock(
|
||||
MetaIndexBuilder* meta_index_builder, BlockHandle* index_block_handle) {
|
||||
IndexBuilder::IndexBlocks index_blocks;
|
||||
auto index_builder_status = rep_->index_builder->Finish(&index_blocks);
|
||||
if (index_builder_status.IsIncomplete()) {
|
||||
// We we have more than one index partition then meta_blocks are not
|
||||
// supported for the index. Currently meta_blocks are used only by
|
||||
// HashIndexBuilder which is not multi-partition.
|
||||
assert(index_blocks.meta_blocks.empty());
|
||||
} else if (ok() && !index_builder_status.ok()) {
|
||||
rep_->status = index_builder_status;
|
||||
}
|
||||
if (ok()) {
|
||||
for (const auto& item : index_blocks.meta_blocks) {
|
||||
BlockHandle block_handle;
|
||||
WriteBlock(item.second, &block_handle, false /* is_data_block */);
|
||||
if (!ok()) {
|
||||
break;
|
||||
}
|
||||
meta_index_builder->Add(item.first, block_handle);
|
||||
}
|
||||
}
|
||||
if (ok()) {
|
||||
if (rep_->table_options.enable_index_compression) {
|
||||
WriteBlock(index_blocks.index_block_contents, index_block_handle, false);
|
||||
} else {
|
||||
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
|
||||
index_block_handle);
|
||||
}
|
||||
}
|
||||
// If there are more index partitions, finish them and write them out
|
||||
Status s = index_builder_status;
|
||||
while (ok() && s.IsIncomplete()) {
|
||||
s = rep_->index_builder->Finish(&index_blocks, *index_block_handle);
|
||||
if (!s.ok() && !s.IsIncomplete()) {
|
||||
rep_->status = s;
|
||||
return;
|
||||
}
|
||||
if (rep_->table_options.enable_index_compression) {
|
||||
WriteBlock(index_blocks.index_block_contents, index_block_handle, false);
|
||||
} else {
|
||||
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
|
||||
index_block_handle);
|
||||
}
|
||||
// The last index_block_handle will be for the partition index block
|
||||
}
|
||||
}
|
||||
|
||||
void BlockBasedTableBuilder::WritePropertiesBlock(
|
||||
MetaIndexBuilder* meta_index_builder) {
|
||||
BlockHandle properties_block_handle;
|
||||
if (ok()) {
|
||||
PropertyBlockBuilder property_block_builder;
|
||||
rep_->props.column_family_id = rep_->column_family_id;
|
||||
rep_->props.column_family_name = rep_->column_family_name;
|
||||
rep_->props.filter_policy_name = rep_->table_options.filter_policy != nullptr
|
||||
? rep_->table_options.filter_policy->Name()
|
||||
: "";
|
||||
rep_->props.index_size =
|
||||
rep_->index_builder->IndexSize() + kBlockTrailerSize;
|
||||
rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr
|
||||
? rep_->ioptions.user_comparator->Name()
|
||||
: "nullptr";
|
||||
rep_->props.merge_operator_name = rep_->ioptions.merge_operator != nullptr
|
||||
? rep_->ioptions.merge_operator->Name()
|
||||
: "nullptr";
|
||||
rep_->props.compression_name =
|
||||
CompressionTypeToString(rep_->compression_ctx.type());
|
||||
rep_->props.prefix_extractor_name = rep_->moptions.prefix_extractor != nullptr
|
||||
? rep_->moptions.prefix_extractor->Name()
|
||||
: "nullptr";
|
||||
|
||||
std::string property_collectors_names = "[";
|
||||
for (size_t i = 0;
|
||||
i < rep_->ioptions.table_properties_collector_factories.size(); ++i) {
|
||||
if (i != 0) {
|
||||
property_collectors_names += ",";
|
||||
}
|
||||
property_collectors_names +=
|
||||
rep_->ioptions.table_properties_collector_factories[i]->Name();
|
||||
}
|
||||
property_collectors_names += "]";
|
||||
rep_->props.property_collectors_names = property_collectors_names;
|
||||
if (rep_->table_options.index_type ==
|
||||
BlockBasedTableOptions::kTwoLevelIndexSearch) {
|
||||
assert(rep_->p_index_builder_ != nullptr);
|
||||
rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions();
|
||||
rep_->props.top_level_index_size =
|
||||
rep_->p_index_builder_->TopLevelIndexSize(rep_->offset);
|
||||
}
|
||||
rep_->props.index_key_is_user_key =
|
||||
!rep_->index_builder->seperator_is_key_plus_seq();
|
||||
rep_->props.creation_time = rep_->creation_time;
|
||||
rep_->props.oldest_key_time = rep_->oldest_key_time;
|
||||
|
||||
// Add basic properties
|
||||
property_block_builder.AddTableProperty(rep_->props);
|
||||
|
||||
// Add use collected properties
|
||||
NotifyCollectTableCollectorsOnFinish(rep_->table_properties_collectors,
|
||||
rep_->ioptions.info_log,
|
||||
&property_block_builder);
|
||||
|
||||
WriteRawBlock(property_block_builder.Finish(), kNoCompression,
|
||||
&properties_block_handle);
|
||||
}
|
||||
if (ok()) {
|
||||
meta_index_builder->Add(kPropertiesBlock, properties_block_handle);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockBasedTableBuilder::WriteCompressionDictBlock(
|
||||
MetaIndexBuilder* meta_index_builder) {
|
||||
if (rep_->compression_dict && rep_->compression_dict->size()) {
|
||||
BlockHandle compression_dict_block_handle;
|
||||
if (ok()) {
|
||||
WriteRawBlock(*rep_->compression_dict, kNoCompression,
|
||||
&compression_dict_block_handle);
|
||||
}
|
||||
if (ok()) {
|
||||
meta_index_builder->Add(kCompressionDictBlock,
|
||||
compression_dict_block_handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlockBasedTableBuilder::WriteRangeDelBlock(
|
||||
MetaIndexBuilder* meta_index_builder) {
|
||||
if (ok() && !rep_->range_del_block.empty()) {
|
||||
BlockHandle range_del_block_handle;
|
||||
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression,
|
||||
&range_del_block_handle);
|
||||
meta_index_builder->Add(kRangeDelBlock, range_del_block_handle);
|
||||
}
|
||||
}
|
||||
|
||||
Status BlockBasedTableBuilder::Finish() {
|
||||
Rep* r = rep_;
|
||||
bool empty_data_block = r->data_block.empty();
|
||||
@ -676,175 +841,30 @@ Status BlockBasedTableBuilder::Finish() {
|
||||
r->closed = true;
|
||||
|
||||
// To make sure properties block is able to keep the accurate size of index
|
||||
// block, we will finish writing all index entries here and flush them
|
||||
// to storage after metaindex block is written.
|
||||
// block, we will finish writing all index entries first.
|
||||
if (ok() && !empty_data_block) {
|
||||
r->index_builder->AddIndexEntry(
|
||||
&r->last_key, nullptr /* no next data block */, r->pending_handle);
|
||||
}
|
||||
|
||||
BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle,
|
||||
compression_dict_block_handle, range_del_block_handle;
|
||||
|
||||
// Write filter block
|
||||
bool empty_filter_block = (r->filter_builder == nullptr ||
|
||||
r->filter_builder->NumAdded() == 0);
|
||||
if (ok() && !empty_filter_block) {
|
||||
Status s = Status::Incomplete();
|
||||
while (s.IsIncomplete()) {
|
||||
Slice filter_content = r->filter_builder->Finish(filter_block_handle, &s);
|
||||
assert(s.ok() || s.IsIncomplete());
|
||||
r->props.filter_size += filter_content.size();
|
||||
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle);
|
||||
}
|
||||
}
|
||||
|
||||
IndexBuilder::IndexBlocks index_blocks;
|
||||
auto index_builder_status = r->index_builder->Finish(&index_blocks);
|
||||
if (index_builder_status.IsIncomplete()) {
|
||||
// We we have more than one index partition then meta_blocks are not
|
||||
// supported for the index. Currently meta_blocks are used only by
|
||||
// HashIndexBuilder which is not multi-partition.
|
||||
assert(index_blocks.meta_blocks.empty());
|
||||
} else if (!index_builder_status.ok()) {
|
||||
return index_builder_status;
|
||||
}
|
||||
|
||||
// Write meta blocks and metaindex block with the following order.
|
||||
// 1. [meta block: filter]
|
||||
// 2. [meta block: properties]
|
||||
// 2. [meta block: index]
|
||||
// 3. [meta block: compression dictionary]
|
||||
// 4. [meta block: range deletion tombstone]
|
||||
// 5. [metaindex block]
|
||||
// write meta blocks
|
||||
// 5. [meta block: properties]
|
||||
// 6. [metaindex block]
|
||||
BlockHandle metaindex_block_handle, index_block_handle;
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
for (const auto& item : index_blocks.meta_blocks) {
|
||||
BlockHandle block_handle;
|
||||
WriteBlock(item.second, &block_handle, false /* is_data_block */);
|
||||
meta_index_builder.Add(item.first, block_handle);
|
||||
}
|
||||
|
||||
if (ok()) {
|
||||
if (!empty_filter_block) {
|
||||
// Add mapping from "<filter_block_prefix>.Name" to location
|
||||
// of filter data.
|
||||
std::string key;
|
||||
if (r->filter_builder->IsBlockBased()) {
|
||||
key = BlockBasedTable::kFilterBlockPrefix;
|
||||
} else {
|
||||
key = r->table_options.partition_filters
|
||||
? BlockBasedTable::kPartitionedFilterBlockPrefix
|
||||
: BlockBasedTable::kFullFilterBlockPrefix;
|
||||
}
|
||||
key.append(r->table_options.filter_policy->Name());
|
||||
meta_index_builder.Add(key, filter_block_handle);
|
||||
}
|
||||
|
||||
// Write properties and compression dictionary blocks.
|
||||
{
|
||||
PropertyBlockBuilder property_block_builder;
|
||||
r->props.column_family_id = r->column_family_id;
|
||||
r->props.column_family_name = r->column_family_name;
|
||||
r->props.filter_policy_name = r->table_options.filter_policy != nullptr ?
|
||||
r->table_options.filter_policy->Name() : "";
|
||||
r->props.index_size =
|
||||
r->index_builder->EstimatedSize() + kBlockTrailerSize;
|
||||
r->props.comparator_name = r->ioptions.user_comparator != nullptr
|
||||
? r->ioptions.user_comparator->Name()
|
||||
: "nullptr";
|
||||
r->props.merge_operator_name = r->ioptions.merge_operator != nullptr
|
||||
? r->ioptions.merge_operator->Name()
|
||||
: "nullptr";
|
||||
r->props.compression_name =
|
||||
CompressionTypeToString(r->compression_ctx.type());
|
||||
r->props.prefix_extractor_name =
|
||||
r->moptions.prefix_extractor != nullptr
|
||||
? r->moptions.prefix_extractor->Name()
|
||||
: "nullptr";
|
||||
|
||||
std::string property_collectors_names = "[";
|
||||
for (size_t i = 0;
|
||||
i < r->ioptions.table_properties_collector_factories.size(); ++i) {
|
||||
if (i != 0) {
|
||||
property_collectors_names += ",";
|
||||
}
|
||||
property_collectors_names +=
|
||||
r->ioptions.table_properties_collector_factories[i]->Name();
|
||||
}
|
||||
property_collectors_names += "]";
|
||||
r->props.property_collectors_names = property_collectors_names;
|
||||
if (r->table_options.index_type ==
|
||||
BlockBasedTableOptions::kTwoLevelIndexSearch) {
|
||||
assert(r->p_index_builder_ != nullptr);
|
||||
r->props.index_partitions = r->p_index_builder_->NumPartitions();
|
||||
r->props.top_level_index_size =
|
||||
r->p_index_builder_->EstimateTopLevelIndexSize(r->offset);
|
||||
}
|
||||
r->props.index_key_is_user_key =
|
||||
!r->index_builder->seperator_is_key_plus_seq();
|
||||
r->props.creation_time = r->creation_time;
|
||||
r->props.oldest_key_time = r->oldest_key_time;
|
||||
|
||||
// Add basic properties
|
||||
property_block_builder.AddTableProperty(r->props);
|
||||
|
||||
// Add use collected properties
|
||||
NotifyCollectTableCollectorsOnFinish(r->table_properties_collectors,
|
||||
r->ioptions.info_log,
|
||||
&property_block_builder);
|
||||
|
||||
BlockHandle properties_block_handle;
|
||||
WriteRawBlock(
|
||||
property_block_builder.Finish(),
|
||||
kNoCompression,
|
||||
&properties_block_handle
|
||||
);
|
||||
meta_index_builder.Add(kPropertiesBlock, properties_block_handle);
|
||||
|
||||
// Write compression dictionary block
|
||||
if (r->compression_dict && r->compression_dict->size()) {
|
||||
WriteRawBlock(*r->compression_dict, kNoCompression,
|
||||
&compression_dict_block_handle);
|
||||
meta_index_builder.Add(kCompressionDictBlock,
|
||||
compression_dict_block_handle);
|
||||
}
|
||||
} // end of properties/compression dictionary block writing
|
||||
|
||||
if (ok() && !r->range_del_block.empty()) {
|
||||
WriteRawBlock(r->range_del_block.Finish(), kNoCompression,
|
||||
&range_del_block_handle);
|
||||
meta_index_builder.Add(kRangeDelBlock, range_del_block_handle);
|
||||
} // range deletion tombstone meta block
|
||||
} // meta blocks
|
||||
|
||||
// Write index block
|
||||
WriteFilterBlock(&meta_index_builder);
|
||||
WriteIndexBlock(&meta_index_builder, &index_block_handle);
|
||||
WriteCompressionDictBlock(&meta_index_builder);
|
||||
WriteRangeDelBlock(&meta_index_builder);
|
||||
WritePropertiesBlock(&meta_index_builder);
|
||||
if (ok()) {
|
||||
// flush the meta index block
|
||||
WriteRawBlock(meta_index_builder.Finish(), kNoCompression,
|
||||
&metaindex_block_handle);
|
||||
|
||||
if (r->table_options.enable_index_compression) {
|
||||
WriteBlock(index_blocks.index_block_contents, &index_block_handle, false);
|
||||
} else {
|
||||
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
|
||||
&index_block_handle);
|
||||
}
|
||||
// If there are more index partitions, finish them and write them out
|
||||
Status& s = index_builder_status;
|
||||
while (s.IsIncomplete()) {
|
||||
s = r->index_builder->Finish(&index_blocks, index_block_handle);
|
||||
if (!s.ok() && !s.IsIncomplete()) {
|
||||
return s;
|
||||
}
|
||||
if (r->table_options.enable_index_compression) {
|
||||
WriteBlock(index_blocks.index_block_contents, &index_block_handle,
|
||||
false);
|
||||
} else {
|
||||
WriteRawBlock(index_blocks.index_block_contents, kNoCompression,
|
||||
&index_block_handle);
|
||||
}
|
||||
// The last index_block_handle will be for the partition index block
|
||||
}
|
||||
}
|
||||
|
||||
// Write footer
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "rocksdb/listener.h"
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/status.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/table_builder.h"
|
||||
#include "util/compression.h"
|
||||
|
||||
@ -106,6 +107,14 @@ class BlockBasedTableBuilder : public TableBuilder {
|
||||
Status InsertBlockInCache(const Slice& block_contents,
|
||||
const CompressionType type,
|
||||
const BlockHandle* handle);
|
||||
|
||||
void WriteFilterBlock(MetaIndexBuilder* meta_index_builder);
|
||||
void WriteIndexBlock(MetaIndexBuilder* meta_index_builder,
|
||||
BlockHandle* index_block_handle);
|
||||
void WritePropertiesBlock(MetaIndexBuilder* meta_index_builder);
|
||||
void WriteCompressionDictBlock(MetaIndexBuilder* meta_index_builder);
|
||||
void WriteRangeDelBlock(MetaIndexBuilder* meta_index_builder);
|
||||
|
||||
struct Rep;
|
||||
class BlockBasedTablePropertiesCollectorFactory;
|
||||
class BlockBasedTablePropertiesCollector;
|
||||
|
@ -738,8 +738,17 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||
|
||||
std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
|
||||
|
||||
// Before read footer, readahead backwards to prefetch data
|
||||
const size_t kTailPrefetchSize = 512 * 1024;
|
||||
// prefetch both index and filters, down to all partitions
|
||||
const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
|
||||
const bool preload_all = !table_options.cache_index_and_filter_blocks;
|
||||
// Before read footer, readahead backwards to prefetch data. Do more readahead
|
||||
// if we're going to read index/filter.
|
||||
// TODO: This may incorrectly select small readahead in case partitioned
|
||||
// index/filter is enabled and top-level partition pinning is enabled. That's
|
||||
// because we need to issue readahead before we read the properties, at which
|
||||
// point we don't yet know the index type.
|
||||
const size_t kTailPrefetchSize =
|
||||
prefetch_all || preload_all ? 512 * 1024 : 4 * 1024;
|
||||
size_t prefetch_off;
|
||||
size_t prefetch_len;
|
||||
if (file_size < kTailPrefetchSize) {
|
||||
@ -945,8 +954,6 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||
bool need_upper_bound_check =
|
||||
PrefixExtractorChanged(rep->table_properties.get(), prefix_extractor);
|
||||
|
||||
// prefetch both index and filters, down to all partitions
|
||||
const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
|
||||
BlockBasedTableOptions::IndexType index_type = new_table->UpdateIndexType();
|
||||
// prefetch the first level of index
|
||||
const bool prefetch_index =
|
||||
@ -1944,6 +1951,7 @@ bool BlockBasedTable::PrefixMayMatch(
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::Seek(const Slice& target) {
|
||||
is_out_of_bound_ = false;
|
||||
if (!CheckPrefixMayMatch(target)) {
|
||||
ResetDataIter();
|
||||
return;
|
||||
@ -1973,6 +1981,7 @@ void BlockBasedTableIterator<TBlockIter>::Seek(const Slice& target) {
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::SeekForPrev(const Slice& target) {
|
||||
is_out_of_bound_ = false;
|
||||
if (!CheckPrefixMayMatch(target)) {
|
||||
ResetDataIter();
|
||||
return;
|
||||
@ -2015,6 +2024,7 @@ void BlockBasedTableIterator<TBlockIter>::SeekForPrev(const Slice& target) {
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::SeekToFirst() {
|
||||
is_out_of_bound_ = false;
|
||||
SavePrevIndexValue();
|
||||
index_iter_->SeekToFirst();
|
||||
if (!index_iter_->Valid()) {
|
||||
@ -2028,6 +2038,7 @@ void BlockBasedTableIterator<TBlockIter>::SeekToFirst() {
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::SeekToLast() {
|
||||
is_out_of_bound_ = false;
|
||||
SavePrevIndexValue();
|
||||
index_iter_->SeekToLast();
|
||||
if (!index_iter_->Valid()) {
|
||||
@ -2106,7 +2117,7 @@ void BlockBasedTableIterator<TBlockIter>::InitDataBlock() {
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::FindKeyForward() {
|
||||
is_out_of_bound_ = false;
|
||||
assert(!is_out_of_bound_);
|
||||
// TODO the while loop inherits from two-level-iterator. We don't know
|
||||
// whether a block can be empty so it can be replaced by an "if".
|
||||
while (!block_iter_.Valid()) {
|
||||
@ -2146,6 +2157,7 @@ void BlockBasedTableIterator<TBlockIter>::FindKeyForward() {
|
||||
|
||||
template <class TBlockIter>
|
||||
void BlockBasedTableIterator<TBlockIter>::FindKeyBackward() {
|
||||
assert(!is_out_of_bound_);
|
||||
while (!block_iter_.Valid()) {
|
||||
if (!block_iter_.status().ok()) {
|
||||
return;
|
||||
|
@ -212,7 +212,7 @@ struct BlockContents {
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return malloc_usable_size(allocation.get());
|
||||
#else
|
||||
return sizeof(*allocation.get());
|
||||
return data.size();
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
} else {
|
||||
return 0; // no extra memory is occupied by the data
|
||||
|
@ -200,7 +200,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
||||
merge_context_->PushOperand(value, false);
|
||||
}
|
||||
if (merge_operator_ != nullptr &&
|
||||
merge_operator_->ShouldMerge(merge_context_->GetOperands())) {
|
||||
merge_operator_->ShouldMerge(merge_context_->GetOperandsDirectionBackward())) {
|
||||
state_ = kFound;
|
||||
if (LIKELY(pinnable_val_ != nullptr)) {
|
||||
Status merge_status = MergeHelper::TimedFullMerge(
|
||||
|
@ -70,6 +70,12 @@ PartitionedIndexBuilder::PartitionedIndexBuilder(
|
||||
table_opt.format_version),
|
||||
sub_index_builder_(nullptr),
|
||||
table_opt_(table_opt),
|
||||
// We start by false. After each partition we revise the value based on
|
||||
// what the sub_index_builder has decided. If the feature is disabled
|
||||
// entirely, this will be set to true after switching the first
|
||||
// sub_index_builder. Otherwise, it could be set to true even one of the
|
||||
// sub_index_builders could not safely exclude seq from the keys, then it
|
||||
// wil be enforced on all sub_index_builders on ::Finish.
|
||||
seperator_is_key_plus_seq_(false) {}
|
||||
|
||||
PartitionedIndexBuilder::~PartitionedIndexBuilder() {
|
||||
@ -83,7 +89,11 @@ void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
|
||||
table_opt_.format_version);
|
||||
flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
|
||||
table_opt_.metadata_block_size, table_opt_.block_size_deviation,
|
||||
sub_index_builder_->index_block_builder_));
|
||||
// Note: this is sub-optimal since sub_index_builder_ could later reset
|
||||
// seperator_is_key_plus_seq_ but the probability of that is low.
|
||||
sub_index_builder_->seperator_is_key_plus_seq_
|
||||
? sub_index_builder_->index_block_builder_
|
||||
: sub_index_builder_->index_block_builder_without_seq_));
|
||||
partition_cut_requested_ = false;
|
||||
}
|
||||
|
||||
@ -143,6 +153,9 @@ void PartitionedIndexBuilder::AddIndexEntry(
|
||||
|
||||
Status PartitionedIndexBuilder::Finish(
|
||||
IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
|
||||
if (partition_cnt_ == 0) {
|
||||
partition_cnt_ = entries_.size();
|
||||
}
|
||||
// It must be set to null after last key is added
|
||||
assert(sub_index_builder_ == nullptr);
|
||||
if (finishing_indexes == true) {
|
||||
@ -164,6 +177,8 @@ Status PartitionedIndexBuilder::Finish(
|
||||
index_blocks->index_block_contents =
|
||||
index_block_builder_without_seq_.Finish();
|
||||
}
|
||||
top_level_index_size_ = index_blocks->index_block_contents.size();
|
||||
index_size_ += top_level_index_size_;
|
||||
return Status::OK();
|
||||
} else {
|
||||
// Finish the next partition index in line and Incomplete() to indicate we
|
||||
@ -172,45 +187,13 @@ Status PartitionedIndexBuilder::Finish(
|
||||
// Apply the policy to all sub-indexes
|
||||
entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
|
||||
auto s = entry.value->Finish(index_blocks);
|
||||
index_size_ += index_blocks->index_block_contents.size();
|
||||
finishing_indexes = true;
|
||||
return s.ok() ? Status::Incomplete() : s;
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate size excluding the top-level index
|
||||
// It is assumed that this method is called before writing index partition
|
||||
// starts
|
||||
size_t PartitionedIndexBuilder::EstimatedSize() const {
|
||||
size_t total = 0;
|
||||
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
|
||||
total += it->value->EstimatedSize();
|
||||
}
|
||||
total +=
|
||||
sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize();
|
||||
return total;
|
||||
}
|
||||
|
||||
// Since when this method is called we do not know the index block offsets yet,
|
||||
// the top-level index does not exist. Hence we estimate the block offsets and
|
||||
// create a temporary top-level index.
|
||||
size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize(
|
||||
uint64_t offset) const {
|
||||
BlockBuilder tmp_builder(
|
||||
table_opt_.index_block_restart_interval); // tmp top-level index builder
|
||||
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
|
||||
std::string tmp_handle_encoding;
|
||||
uint64_t size = it->value->EstimatedSize();
|
||||
BlockHandle tmp_block_handle(offset, size);
|
||||
tmp_block_handle.EncodeTo(&tmp_handle_encoding);
|
||||
tmp_builder.Add(
|
||||
seperator_is_key_plus_seq_ ? it->key : ExtractUserKey(it->key),
|
||||
tmp_handle_encoding);
|
||||
offset += size;
|
||||
}
|
||||
return tmp_builder.CurrentSizeEstimate();
|
||||
}
|
||||
|
||||
size_t PartitionedIndexBuilder::NumPartitions() const {
|
||||
return entries_.size();
|
||||
return partition_cnt_;
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
@ -96,13 +96,15 @@ class IndexBuilder {
|
||||
virtual Status Finish(IndexBlocks* index_blocks,
|
||||
const BlockHandle& last_partition_block_handle) = 0;
|
||||
|
||||
// Get the estimated size for index block.
|
||||
virtual size_t EstimatedSize() const = 0;
|
||||
// Get the size for index block. Must be called after ::Finish.
|
||||
virtual size_t IndexSize() const = 0;
|
||||
|
||||
virtual bool seperator_is_key_plus_seq() { return true; }
|
||||
|
||||
protected:
|
||||
const InternalKeyComparator* comparator_;
|
||||
// Set after ::Finish is called
|
||||
size_t index_size_ = 0;
|
||||
};
|
||||
|
||||
// This index builder builds space-efficient index block.
|
||||
@ -162,15 +164,12 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
||||
index_blocks->index_block_contents =
|
||||
index_block_builder_without_seq_.Finish();
|
||||
}
|
||||
index_size_ = index_blocks->index_block_contents.size();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual size_t EstimatedSize() const override {
|
||||
if (seperator_is_key_plus_seq_) {
|
||||
return index_block_builder_.CurrentSizeEstimate();
|
||||
} else {
|
||||
return index_block_builder_without_seq_.CurrentSizeEstimate();
|
||||
}
|
||||
virtual size_t IndexSize() const override {
|
||||
return index_size_;
|
||||
}
|
||||
|
||||
virtual bool seperator_is_key_plus_seq() override {
|
||||
@ -272,8 +271,8 @@ class HashIndexBuilder : public IndexBuilder {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual size_t EstimatedSize() const override {
|
||||
return primary_index_builder_.EstimatedSize() + prefix_block_.size() +
|
||||
virtual size_t IndexSize() const override {
|
||||
return primary_index_builder_.IndexSize() + prefix_block_.size() +
|
||||
prefix_meta_block_.size();
|
||||
}
|
||||
|
||||
@ -338,8 +337,12 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
||||
IndexBlocks* index_blocks,
|
||||
const BlockHandle& last_partition_block_handle) override;
|
||||
|
||||
virtual size_t EstimatedSize() const override;
|
||||
size_t EstimateTopLevelIndexSize(uint64_t) const;
|
||||
virtual size_t IndexSize() const override {
|
||||
return index_size_;
|
||||
}
|
||||
size_t TopLevelIndexSize(uint64_t) const {
|
||||
return top_level_index_size_;
|
||||
}
|
||||
size_t NumPartitions() const;
|
||||
|
||||
inline bool ShouldCutFilterBlock() {
|
||||
@ -362,6 +365,11 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
||||
}
|
||||
|
||||
private:
|
||||
// Set after ::Finish is called
|
||||
size_t top_level_index_size_ = 0;
|
||||
// Set after ::Finish is called
|
||||
size_t partition_cnt_ = 0;
|
||||
|
||||
void MakeNewSubIndexBuilder();
|
||||
|
||||
struct Entry {
|
||||
|
@ -352,19 +352,19 @@ class TableConstructor: public Constructor {
|
||||
file_writer_->Flush();
|
||||
EXPECT_TRUE(s.ok()) << s.ToString();
|
||||
|
||||
EXPECT_EQ(GetSink()->contents().size(), builder->FileSize());
|
||||
EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
|
||||
|
||||
// Open the table
|
||||
uniq_id_ = cur_uniq_id_++;
|
||||
file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
|
||||
GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
|
||||
TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
|
||||
const bool kSkipFilters = true;
|
||||
const bool kImmortal = true;
|
||||
return ioptions.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
|
||||
internal_comparator, !kSkipFilters, !kImmortal,
|
||||
level_),
|
||||
std::move(file_reader_), GetSink()->contents().size(), &table_reader_);
|
||||
std::move(file_reader_), TEST_GetSink()->contents().size(), &table_reader_);
|
||||
}
|
||||
|
||||
virtual InternalIterator* NewIterator(
|
||||
@ -390,11 +390,11 @@ class TableConstructor: public Constructor {
|
||||
virtual Status Reopen(const ImmutableCFOptions& ioptions,
|
||||
const MutableCFOptions& moptions) {
|
||||
file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
|
||||
GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
|
||||
TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
|
||||
return ioptions.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
|
||||
*last_internal_key_),
|
||||
std::move(file_reader_), GetSink()->contents().size(), &table_reader_);
|
||||
std::move(file_reader_), TEST_GetSink()->contents().size(), &table_reader_);
|
||||
}
|
||||
|
||||
virtual TableReader* GetTableReader() {
|
||||
@ -409,6 +409,10 @@ class TableConstructor: public Constructor {
|
||||
|
||||
bool ConvertToInternalKey() { return convert_to_internal_key_; }
|
||||
|
||||
test::StringSink* TEST_GetSink() {
|
||||
return static_cast<test::StringSink*>(file_writer_->writable_file());
|
||||
}
|
||||
|
||||
private:
|
||||
void Reset() {
|
||||
uniq_id_ = 0;
|
||||
@ -417,10 +421,6 @@ class TableConstructor: public Constructor {
|
||||
file_reader_.reset();
|
||||
}
|
||||
|
||||
test::StringSink* GetSink() {
|
||||
return static_cast<test::StringSink*>(file_writer_->writable_file());
|
||||
}
|
||||
|
||||
uint64_t uniq_id_;
|
||||
unique_ptr<WritableFileWriter> file_writer_;
|
||||
unique_ptr<RandomAccessFileReader> file_reader_;
|
||||
@ -3494,6 +3494,86 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
|
||||
// The properties meta-block should come at the end since we always need to
|
||||
// read it when opening a file, unlike index/filter/other meta-blocks, which
|
||||
// are sometimes read depending on the user's configuration. This ordering
|
||||
// allows us to do a small readahead on the end of the file to read properties
|
||||
// and meta-index blocks with one I/O.
|
||||
TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
|
||||
c.Add("a1", "val1");
|
||||
c.Add("b2", "val2");
|
||||
c.Add("c3", "val3");
|
||||
c.Add("d4", "val4");
|
||||
c.Add("e5", "val5");
|
||||
c.Add("f6", "val6");
|
||||
c.Add("g7", "val7");
|
||||
c.Add("h8", "val8");
|
||||
c.Add("j9", "val9");
|
||||
|
||||
// write an SST file
|
||||
Options options;
|
||||
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
||||
table_options.filter_policy.reset(NewBloomFilterPolicy(
|
||||
8 /* bits_per_key */, false /* use_block_based_filter */));
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
ImmutableCFOptions ioptions(options);
|
||||
MutableCFOptions moptions(options);
|
||||
std::vector<std::string> keys;
|
||||
stl_wrappers::KVMap kvmap;
|
||||
c.Finish(options, ioptions, moptions, table_options,
|
||||
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
|
||||
|
||||
// get file reader
|
||||
test::StringSink* table_sink = c.TEST_GetSink();
|
||||
std::unique_ptr<RandomAccessFileReader> table_reader{
|
||||
test::GetRandomAccessFileReader(
|
||||
new test::StringSource(table_sink->contents(), 0 /* unique_id */,
|
||||
false /* allow_mmap_reads */))};
|
||||
size_t table_size = table_sink->contents().size();
|
||||
|
||||
// read footer
|
||||
Footer footer;
|
||||
ASSERT_OK(ReadFooterFromFile(table_reader.get(),
|
||||
nullptr /* prefetch_buffer */, table_size,
|
||||
&footer, kBlockBasedTableMagicNumber));
|
||||
|
||||
// read metaindex
|
||||
auto metaindex_handle = footer.metaindex_handle();
|
||||
BlockContents metaindex_contents;
|
||||
Slice compression_dict;
|
||||
PersistentCacheOptions pcache_opts;
|
||||
BlockFetcher block_fetcher(
|
||||
table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
|
||||
metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
|
||||
compression_dict, pcache_opts);
|
||||
ASSERT_OK(block_fetcher.ReadBlockContents());
|
||||
Block metaindex_block(std::move(metaindex_contents),
|
||||
kDisableGlobalSequenceNumber);
|
||||
|
||||
// verify properties block comes last
|
||||
std::unique_ptr<InternalIterator> metaindex_iter{
|
||||
metaindex_block.NewIterator<DataBlockIter>(options.comparator,
|
||||
options.comparator)};
|
||||
uint64_t max_offset = 0;
|
||||
std::string key_at_max_offset;
|
||||
for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
|
||||
metaindex_iter->Next()) {
|
||||
BlockHandle handle;
|
||||
Slice value = metaindex_iter->value();
|
||||
ASSERT_OK(handle.DecodeFrom(&value));
|
||||
if (handle.offset() > max_offset) {
|
||||
max_offset = handle.offset();
|
||||
key_at_max_offset = metaindex_iter->key().ToString();
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
|
||||
// index handle is stored in footer rather than metaindex block, so need
|
||||
// separate logic to verify it comes before properties block.
|
||||
ASSERT_GT(max_offset, footer.index_handle().offset());
|
||||
c.ResetTableReader();
|
||||
}
|
||||
|
||||
TEST_P(BlockBasedTableTest, BadOptions) {
|
||||
rocksdb::Options options;
|
||||
options.compression = kNoCompression;
|
||||
|
183
tools/ldb_cmd.cc
183
tools/ldb_cmd.cc
@ -242,6 +242,14 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) {
|
||||
} else if (parsed_params.cmd == RestoreCommand::Name()) {
|
||||
return new RestoreCommand(parsed_params.cmd_params,
|
||||
parsed_params.option_map, parsed_params.flags);
|
||||
} else if (parsed_params.cmd == WriteExternalSstFilesCommand::Name()) {
|
||||
return new WriteExternalSstFilesCommand(parsed_params.cmd_params,
|
||||
parsed_params.option_map,
|
||||
parsed_params.flags);
|
||||
} else if (parsed_params.cmd == IngestExternalSstFilesCommand::Name()) {
|
||||
return new IngestExternalSstFilesCommand(parsed_params.cmd_params,
|
||||
parsed_params.option_map,
|
||||
parsed_params.flags);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -2939,5 +2947,180 @@ void DBFileDumperCommand::DoCommand() {
|
||||
}
|
||||
}
|
||||
|
||||
void WriteExternalSstFilesCommand::Help(std::string& ret) {
|
||||
ret.append(" ");
|
||||
ret.append(WriteExternalSstFilesCommand::Name());
|
||||
ret.append(" <output_sst_path>");
|
||||
ret.append("\n");
|
||||
}
|
||||
|
||||
WriteExternalSstFilesCommand::WriteExternalSstFilesCommand(
|
||||
const std::vector<std::string>& params,
|
||||
const std::map<std::string, std::string>& options,
|
||||
const std::vector<std::string>& flags)
|
||||
: LDBCommand(
|
||||
options, flags, false /* is_read_only */,
|
||||
BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
|
||||
ARG_TO, ARG_CREATE_IF_MISSING})) {
|
||||
create_if_missing_ =
|
||||
IsFlagPresent(flags, ARG_CREATE_IF_MISSING) ||
|
||||
ParseBooleanOption(options, ARG_CREATE_IF_MISSING, false);
|
||||
if (params.size() != 1) {
|
||||
exec_state_ = LDBCommandExecuteResult::Failed(
|
||||
"output SST file path must be specified");
|
||||
} else {
|
||||
output_sst_path_ = params.at(0);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteExternalSstFilesCommand::DoCommand() {
|
||||
if (!db_) {
|
||||
assert(GetExecuteState().IsFailed());
|
||||
return;
|
||||
}
|
||||
ColumnFamilyHandle* cfh = GetCfHandle();
|
||||
SstFileWriter sst_file_writer(EnvOptions(), db_->GetOptions(), cfh);
|
||||
Status status = sst_file_writer.Open(output_sst_path_);
|
||||
if (!status.ok()) {
|
||||
exec_state_ = LDBCommandExecuteResult::Failed("failed to open SST file: " +
|
||||
status.ToString());
|
||||
return;
|
||||
}
|
||||
|
||||
int bad_lines = 0;
|
||||
std::string line;
|
||||
std::ifstream ifs_stdin("/dev/stdin");
|
||||
std::istream* istream_p = ifs_stdin.is_open() ? &ifs_stdin : &std::cin;
|
||||
while (getline(*istream_p, line, '\n')) {
|
||||
std::string key;
|
||||
std::string value;
|
||||
if (ParseKeyValue(line, &key, &value, is_key_hex_, is_value_hex_)) {
|
||||
status = sst_file_writer.Put(key, value);
|
||||
if (!status.ok()) {
|
||||
exec_state_ = LDBCommandExecuteResult::Failed(
|
||||
"failed to write record to file: " + status.ToString());
|
||||
return;
|
||||
}
|
||||
} else if (0 == line.find("Keys in range:")) {
|
||||
// ignore this line
|
||||
} else if (0 == line.find("Created bg thread 0x")) {
|
||||
// ignore this line
|
||||
} else {
|
||||
bad_lines++;
|
||||
}
|
||||
}
|
||||
|
||||
status = sst_file_writer.Finish();
|
||||
if (!status.ok()) {
|
||||
exec_state_ = LDBCommandExecuteResult::Failed(
|
||||
"Failed to finish writing to file: " + status.ToString());
|
||||
return;
|
||||
}
|
||||
|
||||
if (bad_lines > 0) {
|
||||
fprintf(stderr, "Warning: %d bad lines ignored.\n", bad_lines);
|
||||
}
|
||||
exec_state_ = LDBCommandExecuteResult::Succeed(
|
||||
"external SST file written to " + output_sst_path_);
|
||||
}
|
||||
|
||||
Options WriteExternalSstFilesCommand::PrepareOptionsForOpenDB() {
|
||||
Options opt = LDBCommand::PrepareOptionsForOpenDB();
|
||||
opt.create_if_missing = create_if_missing_;
|
||||
return opt;
|
||||
}
|
||||
|
||||
const std::string IngestExternalSstFilesCommand::ARG_MOVE_FILES = "move_files";
|
||||
const std::string IngestExternalSstFilesCommand::ARG_SNAPSHOT_CONSISTENCY =
|
||||
"snapshot_consistency";
|
||||
const std::string IngestExternalSstFilesCommand::ARG_ALLOW_GLOBAL_SEQNO =
|
||||
"allow_global_seqno";
|
||||
const std::string IngestExternalSstFilesCommand::ARG_ALLOW_BLOCKING_FLUSH =
|
||||
"allow_blocking_flush";
|
||||
const std::string IngestExternalSstFilesCommand::ARG_INGEST_BEHIND =
|
||||
"ingest_behind";
|
||||
|
||||
void IngestExternalSstFilesCommand::Help(std::string& ret) {
|
||||
ret.append(" ");
|
||||
ret.append(IngestExternalSstFilesCommand::Name());
|
||||
ret.append(" <input_sst_path>");
|
||||
ret.append(" [--" + ARG_MOVE_FILES + "] ");
|
||||
ret.append(" [--" + ARG_SNAPSHOT_CONSISTENCY + "] ");
|
||||
ret.append(" [--" + ARG_ALLOW_GLOBAL_SEQNO + "] ");
|
||||
ret.append(" [--" + ARG_ALLOW_BLOCKING_FLUSH + "] ");
|
||||
ret.append(" [--" + ARG_INGEST_BEHIND + "] ");
|
||||
ret.append("\n");
|
||||
}
|
||||
|
||||
IngestExternalSstFilesCommand::IngestExternalSstFilesCommand(
|
||||
const std::vector<std::string>& params,
|
||||
const std::map<std::string, std::string>& options,
|
||||
const std::vector<std::string>& flags)
|
||||
: LDBCommand(
|
||||
options, flags, false /* is_read_only */,
|
||||
BuildCmdLineOptions({ARG_MOVE_FILES, ARG_SNAPSHOT_CONSISTENCY,
|
||||
ARG_ALLOW_GLOBAL_SEQNO, ARG_CREATE_IF_MISSING,
|
||||
ARG_ALLOW_BLOCKING_FLUSH, ARG_INGEST_BEHIND})),
|
||||
move_files_(false),
|
||||
snapshot_consistency_(true),
|
||||
allow_global_seqno_(true),
|
||||
allow_blocking_flush_(true),
|
||||
ingest_behind_(false) {
|
||||
create_if_missing_ =
|
||||
IsFlagPresent(flags, ARG_CREATE_IF_MISSING) ||
|
||||
ParseBooleanOption(options, ARG_CREATE_IF_MISSING, false);
|
||||
move_files_ = IsFlagPresent(flags, ARG_MOVE_FILES) ||
|
||||
ParseBooleanOption(options, ARG_MOVE_FILES, false);
|
||||
snapshot_consistency_ =
|
||||
IsFlagPresent(flags, ARG_SNAPSHOT_CONSISTENCY) ||
|
||||
ParseBooleanOption(options, ARG_SNAPSHOT_CONSISTENCY, true);
|
||||
allow_global_seqno_ =
|
||||
IsFlagPresent(flags, ARG_ALLOW_GLOBAL_SEQNO) ||
|
||||
ParseBooleanOption(options, ARG_ALLOW_GLOBAL_SEQNO, true);
|
||||
allow_blocking_flush_ =
|
||||
IsFlagPresent(flags, ARG_ALLOW_BLOCKING_FLUSH) ||
|
||||
ParseBooleanOption(options, ARG_ALLOW_BLOCKING_FLUSH, true);
|
||||
ingest_behind_ = IsFlagPresent(flags, ARG_INGEST_BEHIND) ||
|
||||
ParseBooleanOption(options, ARG_INGEST_BEHIND, false);
|
||||
|
||||
if (params.size() != 1) {
|
||||
exec_state_ =
|
||||
LDBCommandExecuteResult::Failed("input SST path must be specified");
|
||||
} else {
|
||||
input_sst_path_ = params.at(0);
|
||||
}
|
||||
}
|
||||
|
||||
void IngestExternalSstFilesCommand::DoCommand() {
|
||||
if (!db_) {
|
||||
assert(GetExecuteState().IsFailed());
|
||||
return;
|
||||
}
|
||||
if (GetExecuteState().IsFailed()) {
|
||||
return;
|
||||
}
|
||||
ColumnFamilyHandle* cfh = GetCfHandle();
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.move_files = move_files_;
|
||||
ifo.snapshot_consistency = snapshot_consistency_;
|
||||
ifo.allow_global_seqno = allow_global_seqno_;
|
||||
ifo.allow_blocking_flush = allow_blocking_flush_;
|
||||
ifo.ingest_behind = ingest_behind_;
|
||||
Status status = db_->IngestExternalFile(cfh, {input_sst_path_}, ifo);
|
||||
if (!status.ok()) {
|
||||
exec_state_ = LDBCommandExecuteResult::Failed(
|
||||
"failed to ingest external SST: " + status.ToString());
|
||||
} else {
|
||||
exec_state_ =
|
||||
LDBCommandExecuteResult::Succeed("external SST files ingested");
|
||||
}
|
||||
}
|
||||
|
||||
Options IngestExternalSstFilesCommand::PrepareOptionsForOpenDB() {
|
||||
Options opt = LDBCommand::PrepareOptionsForOpenDB();
|
||||
opt.create_if_missing = create_if_missing_;
|
||||
return opt;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
#endif // ROCKSDB_LITE
|
||||
|
@ -522,4 +522,55 @@ class RestoreCommand : public BackupableCommand {
|
||||
static void Help(std::string& ret);
|
||||
};
|
||||
|
||||
class WriteExternalSstFilesCommand : public LDBCommand {
|
||||
public:
|
||||
static std::string Name() { return "write_extern_sst"; }
|
||||
WriteExternalSstFilesCommand(
|
||||
const std::vector<std::string>& params,
|
||||
const std::map<std::string, std::string>& options,
|
||||
const std::vector<std::string>& flags);
|
||||
|
||||
virtual void DoCommand() override;
|
||||
|
||||
virtual bool NoDBOpen() override { return false; }
|
||||
|
||||
virtual Options PrepareOptionsForOpenDB() override;
|
||||
|
||||
static void Help(std::string& ret);
|
||||
|
||||
private:
|
||||
std::string output_sst_path_;
|
||||
};
|
||||
|
||||
class IngestExternalSstFilesCommand : public LDBCommand {
|
||||
public:
|
||||
static std::string Name() { return "ingest_extern_sst"; }
|
||||
IngestExternalSstFilesCommand(
|
||||
const std::vector<std::string>& params,
|
||||
const std::map<std::string, std::string>& options,
|
||||
const std::vector<std::string>& flags);
|
||||
|
||||
virtual void DoCommand() override;
|
||||
|
||||
virtual bool NoDBOpen() override { return false; }
|
||||
|
||||
virtual Options PrepareOptionsForOpenDB() override;
|
||||
|
||||
static void Help(std::string& ret);
|
||||
|
||||
private:
|
||||
std::string input_sst_path_;
|
||||
bool move_files_;
|
||||
bool snapshot_consistency_;
|
||||
bool allow_global_seqno_;
|
||||
bool allow_blocking_flush_;
|
||||
bool ingest_behind_;
|
||||
|
||||
static const std::string ARG_MOVE_FILES;
|
||||
static const std::string ARG_SNAPSHOT_CONSISTENCY;
|
||||
static const std::string ARG_ALLOW_GLOBAL_SEQNO;
|
||||
static const std::string ARG_ALLOW_BLOCKING_FLUSH;
|
||||
static const std::string ARG_INGEST_BEHIND;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -76,7 +76,7 @@ class LDBTestCase(unittest.TestCase):
|
||||
|
||||
my_check_output("./ldb %s >/dev/null 2>&1 |grep -v \"Created bg \
|
||||
thread\"" % params, shell=True)
|
||||
except Exception, e:
|
||||
except Exception:
|
||||
return
|
||||
self.fail(
|
||||
"Exception should have been raised for command with params: %s" %
|
||||
@ -146,6 +146,14 @@ class LDBTestCase(unittest.TestCase):
|
||||
def loadDb(self, params, dumpFile):
|
||||
return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params))
|
||||
|
||||
def writeExternSst(self, params, inputDumpFile, outputSst):
|
||||
return 0 == run_err_null("cat %s | ./ldb write_extern_sst %s %s"
|
||||
% (inputDumpFile, outputSst, params))
|
||||
|
||||
def ingestExternSst(self, params, inputSst):
|
||||
return 0 == run_err_null("./ldb ingest_extern_sst %s %s"
|
||||
% (inputSst, params))
|
||||
|
||||
def testStringBatchPut(self):
|
||||
print "Running testStringBatchPut..."
|
||||
self.assertRunOK("batchput x1 y1 --create_if_missing", "OK")
|
||||
@ -547,5 +555,38 @@ class LDBTestCase(unittest.TestCase):
|
||||
# non-existing column family.
|
||||
self.assertRunFAIL("get cf3_1 --column_family=four")
|
||||
|
||||
def testIngestExternalSst(self):
|
||||
print "Running testIngestExternalSst..."
|
||||
|
||||
# Dump, load, write external sst and ingest it in another db
|
||||
dbPath = os.path.join(self.TMP_DIR, "db1")
|
||||
self.assertRunOK(
|
||||
"batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4"
|
||||
% dbPath,
|
||||
"OK")
|
||||
self.assertRunOK("scan --db=%s" % dbPath,
|
||||
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
|
||||
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
|
||||
with open(dumpFilePath, 'w') as f:
|
||||
f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40")
|
||||
externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst")
|
||||
self.assertTrue(self.writeExternSst("--create_if_missing --db=%s"
|
||||
% dbPath,
|
||||
dumpFilePath,
|
||||
externSstPath))
|
||||
# cannot ingest if allow_global_seqno is false
|
||||
self.assertFalse(
|
||||
self.ingestExternSst(
|
||||
"--create_if_missing --allow_global_seqno=false --db=%s"
|
||||
% dbPath,
|
||||
externSstPath))
|
||||
self.assertTrue(
|
||||
self.ingestExternSst(
|
||||
"--create_if_missing --allow_global_seqno --db=%s"
|
||||
% dbPath,
|
||||
externSstPath))
|
||||
self.assertRunOKFull("scan --db=%s" % dbPath,
|
||||
"x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -88,6 +88,8 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options,
|
||||
BackupCommand::Help(ret);
|
||||
RestoreCommand::Help(ret);
|
||||
CheckPointCommand::Help(ret);
|
||||
WriteExternalSstFilesCommand::Help(ret);
|
||||
IngestExternalSstFilesCommand::Help(ret);
|
||||
|
||||
fprintf(stderr, "%s\n", ret.c_str());
|
||||
}
|
||||
|
@ -18,9 +18,17 @@ namespace rocksdb {
|
||||
__thread size_t ConcurrentArena::tls_cpuid = 0;
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
// If the shard block size is too large, in the worst case, every core
|
||||
// allocates a block without populate it. If the shared block size is
|
||||
// 1MB, 64 cores will quickly allocate 64MB, and may quickly trigger a
|
||||
// flush. Cap the size instead.
|
||||
const size_t kMaxShardBlockSize = size_t{128 * 1024};
|
||||
} // namespace
|
||||
|
||||
ConcurrentArena::ConcurrentArena(size_t block_size, AllocTracker* tracker,
|
||||
size_t huge_page_size)
|
||||
: shard_block_size_(block_size / 8),
|
||||
: shard_block_size_(std::min(kMaxShardBlockSize, block_size / 8)),
|
||||
shards_(),
|
||||
arena_(block_size, tracker, huge_page_size) {
|
||||
Fixup();
|
||||
|
@ -9,15 +9,29 @@
|
||||
|
||||
#include "rocksdb/status.h"
|
||||
#include <stdio.h>
|
||||
#ifdef OS_WIN
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include <cstring>
|
||||
#include "port/port.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
const char* Status::CopyState(const char* state) {
|
||||
#ifdef OS_WIN
|
||||
const size_t cch =
|
||||
std::strlen(state) + 1; // +1 for the null terminator
|
||||
char* result = new char[cch];
|
||||
errno_t ret;
|
||||
ret = strncpy_s(result, cch, state, cch - 1);
|
||||
result[cch - 1] = '\0';
|
||||
assert(ret == 0);
|
||||
return result;
|
||||
#else
|
||||
const size_t cch =
|
||||
std::strlen(state) + 1; // +1 for the null terminator
|
||||
return std::strncpy(new char[cch], state, cch);
|
||||
#endif
|
||||
}
|
||||
|
||||
Status::Status(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2)
|
||||
|
@ -247,7 +247,7 @@ class RandomRWStringSink : public RandomRWFile {
|
||||
public:
|
||||
explicit RandomRWStringSink(StringSink* ss) : ss_(ss) {}
|
||||
|
||||
Status Write(uint64_t offset, const Slice& data) {
|
||||
Status Write(uint64_t offset, const Slice& data) override {
|
||||
if (offset + data.size() > ss_->contents_.size()) {
|
||||
ss_->contents_.resize(offset + data.size(), '\0');
|
||||
}
|
||||
@ -258,7 +258,7 @@ class RandomRWStringSink : public RandomRWFile {
|
||||
}
|
||||
|
||||
Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* /*scratch*/) const {
|
||||
char* /*scratch*/) const override {
|
||||
*result = Slice(nullptr, 0);
|
||||
if (offset < ss_->contents_.size()) {
|
||||
size_t str_res_sz =
|
||||
@ -268,11 +268,11 @@ class RandomRWStringSink : public RandomRWFile {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Flush() { return Status::OK(); }
|
||||
Status Flush() override { return Status::OK(); }
|
||||
|
||||
Status Sync() { return Status::OK(); }
|
||||
Status Sync() override { return Status::OK(); }
|
||||
|
||||
Status Close() { return Status::OK(); }
|
||||
Status Close() override { return Status::OK(); }
|
||||
|
||||
const std::string& contents() const { return ss_->contents(); }
|
||||
|
||||
|
@ -185,7 +185,7 @@ void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
|
||||
// We decided to punt on PROCESS_EXIT
|
||||
if (DLL_THREAD_DETACH == reason) {
|
||||
if (thread_local_key != pthread_key_t(-1) && thread_local_inclass_routine != nullptr) {
|
||||
void* tls = pthread_getspecific(thread_local_key);
|
||||
void* tls = TlsGetValue(thread_local_key);
|
||||
if (tls != nullptr) {
|
||||
thread_local_inclass_routine(tls);
|
||||
}
|
||||
|
@ -87,9 +87,6 @@ void BlobDBOptions::Dump(Logger* log) const {
|
||||
ROCKS_LOG_HEADER(
|
||||
log, " BlobDBOptions.blob_file_size: %" PRIu64,
|
||||
blob_file_size);
|
||||
ROCKS_LOG_HEADER(
|
||||
log, " BlobDBOptions.ttl_extractor: %p",
|
||||
ttl_extractor.get());
|
||||
ROCKS_LOG_HEADER(
|
||||
log, " BlobDBOptions.compression: %d",
|
||||
static_cast<int>(compression));
|
||||
|
@ -18,8 +18,6 @@ namespace rocksdb {
|
||||
|
||||
namespace blob_db {
|
||||
|
||||
class TTLExtractor;
|
||||
|
||||
// A wrapped database which puts values of KV pairs in a separate log
|
||||
// and store location to the log in the underlying DB.
|
||||
// It lacks lots of importatant functionalities, e.g. DB restarts,
|
||||
@ -67,11 +65,6 @@ struct BlobDBOptions {
|
||||
// after it exceeds that size
|
||||
uint64_t blob_file_size = 256 * 1024 * 1024;
|
||||
|
||||
// Instead of setting TTL explicitly by calling PutWithTTL or PutUntil,
|
||||
// applications can set a TTLExtractor which can extract TTL from key-value
|
||||
// pairs.
|
||||
std::shared_ptr<TTLExtractor> ttl_extractor = nullptr;
|
||||
|
||||
// what compression to use for Blob's
|
||||
CompressionType compression = kNoCompression;
|
||||
|
||||
@ -151,6 +144,15 @@ class BlobDB : public StackableDB {
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value) override = 0;
|
||||
|
||||
// Get value and expiration.
|
||||
virtual Status Get(const ReadOptions& options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value, uint64_t* expiration) = 0;
|
||||
virtual Status Get(const ReadOptions& options, const Slice& key,
|
||||
PinnableSlice* value, uint64_t* expiration) {
|
||||
return Get(options, DefaultColumnFamily(), key, value, expiration);
|
||||
}
|
||||
|
||||
using rocksdb::StackableDB::MultiGet;
|
||||
virtual std::vector<Status> MultiGet(
|
||||
const ReadOptions& options,
|
||||
@ -188,7 +190,6 @@ class BlobDB : public StackableDB {
|
||||
|
||||
virtual Status Write(const WriteOptions& opts,
|
||||
WriteBatch* updates) override = 0;
|
||||
|
||||
using rocksdb::StackableDB::NewIterator;
|
||||
virtual Iterator* NewIterator(const ReadOptions& options) override = 0;
|
||||
virtual Iterator* NewIterator(const ReadOptions& options,
|
||||
@ -228,33 +229,6 @@ class BlobDB : public StackableDB {
|
||||
Status DestroyBlobDB(const std::string& dbname, const Options& options,
|
||||
const BlobDBOptions& bdb_options);
|
||||
|
||||
// TTLExtractor allow applications to extract TTL from key-value pairs.
|
||||
// This useful for applications using Put or WriteBatch to write keys and
|
||||
// don't intend to migrate to PutWithTTL or PutUntil.
|
||||
//
|
||||
// Applications can implement either ExtractTTL or ExtractExpiration. If both
|
||||
// are implemented, ExtractExpiration will take precedence.
|
||||
class TTLExtractor {
|
||||
public:
|
||||
// Extract TTL from key-value pair.
|
||||
// Return true if the key has TTL, false otherwise. If key has TTL,
|
||||
// TTL is pass back through ttl. The method can optionally modify the value,
|
||||
// pass the result back through new_value, and also set value_changed to true.
|
||||
virtual bool ExtractTTL(const Slice& key, const Slice& value, uint64_t* ttl,
|
||||
std::string* new_value, bool* value_changed);
|
||||
|
||||
// Extract expiration time from key-value pair.
|
||||
// Return true if the key has expiration time, false otherwise. If key has
|
||||
// expiration time, it is pass back through expiration. The method can
|
||||
// optionally modify the value, pass the result back through new_value,
|
||||
// and also set value_changed to true.
|
||||
virtual bool ExtractExpiration(const Slice& key, const Slice& value,
|
||||
uint64_t now, uint64_t* expiration,
|
||||
std::string* new_value, bool* value_changed);
|
||||
|
||||
virtual ~TTLExtractor() = default;
|
||||
};
|
||||
|
||||
} // namespace blob_db
|
||||
} // namespace rocksdb
|
||||
#endif // ROCKSDB_LITE
|
||||
|
@ -79,7 +79,6 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
|
||||
dbname_(dbname),
|
||||
db_impl_(nullptr),
|
||||
env_(db_options.env),
|
||||
ttl_extractor_(blob_db_options.ttl_extractor.get()),
|
||||
bdb_options_(blob_db_options),
|
||||
db_options_(db_options),
|
||||
cf_options_(cf_options),
|
||||
@ -213,8 +212,8 @@ void BlobDBImpl::StartBackgroundTasks() {
|
||||
tqueue_.add(kSanityCheckPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::SanityCheck, this, std::placeholders::_1));
|
||||
tqueue_.add(
|
||||
kCheckSeqFilesPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::CheckSeqFiles, this, std::placeholders::_1));
|
||||
kEvictExpiredFilesPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::EvictExpiredFiles, this, std::placeholders::_1));
|
||||
}
|
||||
|
||||
Status BlobDBImpl::GetAllBlobFiles(std::set<uint64_t>* file_numbers) {
|
||||
@ -563,12 +562,8 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
|
||||
return Status::NotSupported(
|
||||
"Blob DB doesn't support non-default column family.");
|
||||
}
|
||||
std::string new_value;
|
||||
Slice value_slice;
|
||||
uint64_t expiration =
|
||||
blob_db_impl_->ExtractExpiration(key, value, &value_slice, &new_value);
|
||||
Status s = blob_db_impl_->PutBlobValue(options_, key, value_slice,
|
||||
expiration, &batch_);
|
||||
Status s = blob_db_impl_->PutBlobValue(options_, key, value,
|
||||
kNoExpiration, &batch_);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -661,10 +656,7 @@ void BlobDBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
|
||||
|
||||
Status BlobDBImpl::Put(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value) {
|
||||
std::string new_value;
|
||||
Slice value_slice;
|
||||
uint64_t expiration = ExtractExpiration(key, value, &value_slice, &new_value);
|
||||
return PutUntil(options, key, value_slice, expiration);
|
||||
return PutUntil(options, key, value, kNoExpiration);
|
||||
}
|
||||
|
||||
Status BlobDBImpl::PutWithTTL(const WriteOptions& options,
|
||||
@ -786,20 +778,6 @@ Slice BlobDBImpl::GetCompressedSlice(const Slice& raw,
|
||||
return *compression_output;
|
||||
}
|
||||
|
||||
uint64_t BlobDBImpl::ExtractExpiration(const Slice& key, const Slice& value,
|
||||
Slice* value_slice,
|
||||
std::string* new_value) {
|
||||
uint64_t expiration = kNoExpiration;
|
||||
bool has_expiration = false;
|
||||
bool value_changed = false;
|
||||
if (ttl_extractor_ != nullptr) {
|
||||
has_expiration = ttl_extractor_->ExtractExpiration(
|
||||
key, value, EpochNow(), &expiration, new_value, &value_changed);
|
||||
}
|
||||
*value_slice = value_changed ? Slice(*new_value) : value;
|
||||
return has_expiration ? expiration : kNoExpiration;
|
||||
}
|
||||
|
||||
void BlobDBImpl::GetCompactionContext(BlobCompactionContext* context) {
|
||||
ReadLock l(&mutex_);
|
||||
|
||||
@ -990,7 +968,7 @@ bool BlobDBImpl::SetSnapshotIfNeeded(ReadOptions* read_options) {
|
||||
}
|
||||
|
||||
Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
PinnableSlice* value) {
|
||||
PinnableSlice* value, uint64_t* expiration) {
|
||||
assert(value != nullptr);
|
||||
BlobIndex blob_index;
|
||||
Status s = blob_index.DecodeFrom(index_entry);
|
||||
@ -1000,6 +978,13 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
if (blob_index.HasTTL() && blob_index.expiration() <= EpochNow()) {
|
||||
return Status::NotFound("Key expired");
|
||||
}
|
||||
if (expiration != nullptr) {
|
||||
if (blob_index.HasTTL()) {
|
||||
*expiration = blob_index.expiration();
|
||||
} else {
|
||||
*expiration = kNoExpiration;
|
||||
}
|
||||
}
|
||||
if (blob_index.IsInlined()) {
|
||||
// TODO(yiwu): If index_entry is a PinnableSlice, we can also pin the same
|
||||
// memory buffer to avoid extra copy.
|
||||
@ -1135,14 +1120,20 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
Status BlobDBImpl::Get(const ReadOptions& read_options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value) {
|
||||
return Get(read_options, column_family, key, value, nullptr /*expiration*/);
|
||||
}
|
||||
|
||||
Status BlobDBImpl::Get(const ReadOptions& read_options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value, uint64_t* expiration) {
|
||||
StopWatch get_sw(env_, statistics_, BLOB_DB_GET_MICROS);
|
||||
RecordTick(statistics_, BLOB_DB_NUM_GET);
|
||||
return GetImpl(read_options, column_family, key, value);
|
||||
return GetImpl(read_options, column_family, key, value, expiration);
|
||||
}
|
||||
|
||||
Status BlobDBImpl::GetImpl(const ReadOptions& read_options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value) {
|
||||
PinnableSlice* value, uint64_t* expiration) {
|
||||
if (column_family != DefaultColumnFamily()) {
|
||||
return Status::NotSupported(
|
||||
"Blob DB doesn't support non-default column family.");
|
||||
@ -1160,10 +1151,13 @@ Status BlobDBImpl::GetImpl(const ReadOptions& read_options,
|
||||
&is_blob_index);
|
||||
TEST_SYNC_POINT("BlobDBImpl::Get:AfterIndexEntryGet:1");
|
||||
TEST_SYNC_POINT("BlobDBImpl::Get:AfterIndexEntryGet:2");
|
||||
if (expiration != nullptr) {
|
||||
*expiration = kNoExpiration;
|
||||
}
|
||||
if (s.ok() && is_blob_index) {
|
||||
std::string index_entry = value->ToString();
|
||||
value->Reset();
|
||||
s = GetBlobValue(key, index_entry, value);
|
||||
s = GetBlobValue(key, index_entry, value, expiration);
|
||||
}
|
||||
if (snapshot_created) {
|
||||
db_->ReleaseSnapshot(ro.snapshot);
|
||||
@ -1276,11 +1270,11 @@ bool BlobDBImpl::VisibleToActiveSnapshot(
|
||||
// [earliest_sequence, obsolete_sequence). But doing so will make the
|
||||
// implementation more complicated.
|
||||
SequenceNumber obsolete_sequence = bfile->GetObsoleteSequence();
|
||||
SequenceNumber oldest_snapshot = 0;
|
||||
SequenceNumber oldest_snapshot = kMaxSequenceNumber;
|
||||
{
|
||||
// Need to lock DBImpl mutex before access snapshot list.
|
||||
InstrumentedMutexLock l(db_impl_->mutex());
|
||||
auto snapshots = db_impl_->snapshots();
|
||||
auto& snapshots = db_impl_->snapshots();
|
||||
if (!snapshots.empty()) {
|
||||
oldest_snapshot = snapshots.oldest()->GetSequenceNumber();
|
||||
}
|
||||
@ -1288,29 +1282,38 @@ bool BlobDBImpl::VisibleToActiveSnapshot(
|
||||
return oldest_snapshot < obsolete_sequence;
|
||||
}
|
||||
|
||||
std::pair<bool, int64_t> BlobDBImpl::CheckSeqFiles(bool aborted) {
|
||||
if (aborted) return std::make_pair(false, -1);
|
||||
std::pair<bool, int64_t> BlobDBImpl::EvictExpiredFiles(bool aborted) {
|
||||
if (aborted) {
|
||||
return std::make_pair(false, -1);
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<BlobFile>> process_files;
|
||||
uint64_t now = EpochNow();
|
||||
{
|
||||
uint64_t epoch_now = EpochNow();
|
||||
|
||||
ReadLock rl(&mutex_);
|
||||
for (auto bfile : open_ttl_files_) {
|
||||
{
|
||||
ReadLock lockbfile_r(&bfile->mutex_);
|
||||
|
||||
if (bfile->expiration_range_.second > epoch_now) {
|
||||
continue;
|
||||
}
|
||||
process_files.push_back(bfile);
|
||||
for (auto p : blob_files_) {
|
||||
auto& blob_file = p.second;
|
||||
ReadLock file_lock(&blob_file->mutex_);
|
||||
if (blob_file->HasTTL() && !blob_file->Obsolete() &&
|
||||
blob_file->GetExpirationRange().second <= now) {
|
||||
process_files.push_back(blob_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MutexLock l(&write_mutex_);
|
||||
for (auto bfile : process_files) {
|
||||
CloseBlobFile(bfile);
|
||||
SequenceNumber seq = GetLatestSequenceNumber();
|
||||
{
|
||||
MutexLock l(&write_mutex_);
|
||||
for (auto& blob_file : process_files) {
|
||||
WriteLock file_lock(&blob_file->mutex_);
|
||||
if (!blob_file->Immutable()) {
|
||||
CloseBlobFile(blob_file, false /*need_lock*/);
|
||||
}
|
||||
// Need to double check if the file is obsolete.
|
||||
if (!blob_file->Obsolete()) {
|
||||
ObsoleteBlobFile(blob_file, seq, true /*update_size*/);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(true, -1);
|
||||
@ -1587,8 +1590,8 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
|
||||
}
|
||||
|
||||
// We don't add the file to open_ttl_files_ or open_non_ttl_files_, to
|
||||
// avoid user writes writing to the file, and avoid CheckSeqFiles close
|
||||
// the file by mistake.
|
||||
// avoid user writes writing to the file, and avoid
|
||||
// EvictExpiredFiles close the file by mistake.
|
||||
WriteLock wl(&mutex_);
|
||||
blob_files_.insert(std::make_pair(newfile->BlobFileNumber(), newfile));
|
||||
}
|
||||
@ -1857,6 +1860,10 @@ Status BlobDBImpl::TEST_GCFileAndUpdateLSM(std::shared_ptr<BlobFile>& bfile,
|
||||
|
||||
void BlobDBImpl::TEST_RunGC() { RunGC(false /*abort*/); }
|
||||
|
||||
void BlobDBImpl::TEST_EvictExpiredFiles() {
|
||||
EvictExpiredFiles(false /*abort*/);
|
||||
}
|
||||
|
||||
uint64_t BlobDBImpl::TEST_live_sst_size() { return live_sst_size_.load(); }
|
||||
#endif // !NDEBUG
|
||||
|
||||
|
@ -115,8 +115,8 @@ class BlobDBImpl : public BlobDB {
|
||||
// how often to schedule delete obs files periods
|
||||
static constexpr uint32_t kDeleteObsoleteFilesPeriodMillisecs = 10 * 1000;
|
||||
|
||||
// how often to schedule check seq files period
|
||||
static constexpr uint32_t kCheckSeqFilesPeriodMillisecs = 10 * 1000;
|
||||
// how often to schedule expired files eviction.
|
||||
static constexpr uint32_t kEvictExpiredFilesPeriodMillisecs = 10 * 1000;
|
||||
|
||||
// when should oldest file be evicted:
|
||||
// on reaching 90% of blob_dir_size
|
||||
@ -130,6 +130,10 @@ class BlobDBImpl : public BlobDB {
|
||||
Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
|
||||
const Slice& key, PinnableSlice* value) override;
|
||||
|
||||
Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
|
||||
const Slice& key, PinnableSlice* value,
|
||||
uint64_t* expiration) override;
|
||||
|
||||
using BlobDB::NewIterator;
|
||||
virtual Iterator* NewIterator(const ReadOptions& read_options) override;
|
||||
|
||||
@ -200,6 +204,8 @@ class BlobDBImpl : public BlobDB {
|
||||
|
||||
void TEST_RunGC();
|
||||
|
||||
void TEST_EvictExpiredFiles();
|
||||
|
||||
void TEST_DeleteObsoleteFiles();
|
||||
|
||||
uint64_t TEST_live_sst_size();
|
||||
@ -215,10 +221,10 @@ class BlobDBImpl : public BlobDB {
|
||||
|
||||
Status GetImpl(const ReadOptions& read_options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
PinnableSlice* value);
|
||||
PinnableSlice* value, uint64_t* expiration = nullptr);
|
||||
|
||||
Status GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
PinnableSlice* value);
|
||||
PinnableSlice* value, uint64_t* expiration = nullptr);
|
||||
|
||||
Slice GetCompressedSlice(const Slice& raw,
|
||||
std::string* compression_output) const;
|
||||
@ -235,9 +241,6 @@ class BlobDBImpl : public BlobDB {
|
||||
void ObsoleteBlobFile(std::shared_ptr<BlobFile> blob_file,
|
||||
SequenceNumber obsolete_seq, bool update_size);
|
||||
|
||||
uint64_t ExtractExpiration(const Slice& key, const Slice& value,
|
||||
Slice* value_slice, std::string* new_value);
|
||||
|
||||
Status PutBlobValue(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value, uint64_t expiration,
|
||||
WriteBatch* batch);
|
||||
@ -269,7 +272,7 @@ class BlobDBImpl : public BlobDB {
|
||||
|
||||
// periodically check if open blob files and their TTL's has expired
|
||||
// if expired, close the sequential writer and make the file immutable
|
||||
std::pair<bool, int64_t> CheckSeqFiles(bool aborted);
|
||||
std::pair<bool, int64_t> EvictExpiredFiles(bool aborted);
|
||||
|
||||
// if the number of open files, approaches ULIMIT's this
|
||||
// task will close random readers, which are kept around for
|
||||
@ -337,7 +340,6 @@ class BlobDBImpl : public BlobDB {
|
||||
// the base DB
|
||||
DBImpl* db_impl_;
|
||||
Env* env_;
|
||||
TTLExtractor* ttl_extractor_;
|
||||
|
||||
// the options that govern the behavior of Blob Storage
|
||||
BlobDBOptions bdb_options_;
|
||||
|
@ -235,7 +235,6 @@ class BlobDBTest : public testing::Test {
|
||||
|
||||
const std::string dbname_;
|
||||
std::unique_ptr<MockTimeEnv> mock_env_;
|
||||
std::shared_ptr<TTLExtractor> ttl_extractor_;
|
||||
BlobDB *blob_db_;
|
||||
}; // class BlobDBTest
|
||||
|
||||
@ -312,191 +311,6 @@ TEST_F(BlobDBTest, PutUntil) {
|
||||
VerifyDB(data);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, TTLExtrator_NoTTL) {
|
||||
// The default ttl extractor return no ttl for every key.
|
||||
ttl_extractor_.reset(new TTLExtractor());
|
||||
Random rnd(301);
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.ttl_range_secs = 1000;
|
||||
bdb_options.min_blob_size = 0;
|
||||
bdb_options.blob_file_size = 256 * 1000 * 1000;
|
||||
bdb_options.ttl_extractor = ttl_extractor_;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options, options);
|
||||
std::map<std::string, std::string> data;
|
||||
mock_env_->set_current_time(0);
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
PutRandom("key" + ToString(i), &rnd, &data);
|
||||
}
|
||||
// very far in the future..
|
||||
mock_env_->set_current_time(std::numeric_limits<uint64_t>::max() / 1000000 -
|
||||
10);
|
||||
auto *bdb_impl = static_cast<BlobDBImpl *>(blob_db_);
|
||||
auto blob_files = bdb_impl->TEST_GetBlobFiles();
|
||||
ASSERT_EQ(1, blob_files.size());
|
||||
ASSERT_FALSE(blob_files[0]->HasTTL());
|
||||
ASSERT_OK(bdb_impl->TEST_CloseBlobFile(blob_files[0]));
|
||||
GCStats gc_stats;
|
||||
ASSERT_OK(bdb_impl->TEST_GCFileAndUpdateLSM(blob_files[0], &gc_stats));
|
||||
ASSERT_EQ(0, gc_stats.num_keys_expired);
|
||||
ASSERT_EQ(100, gc_stats.num_keys_relocated);
|
||||
VerifyDB(data);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, TTLExtractor_ExtractTTL) {
|
||||
Random rnd(301);
|
||||
class TestTTLExtractor : public TTLExtractor {
|
||||
public:
|
||||
explicit TestTTLExtractor(Random *r) : rnd(r) {}
|
||||
|
||||
virtual bool ExtractTTL(const Slice &key, const Slice &value, uint64_t *ttl,
|
||||
std::string * /*new_value*/,
|
||||
bool * /*value_changed*/) override {
|
||||
*ttl = rnd->Next() % 100;
|
||||
if (*ttl > 50) {
|
||||
data[key.ToString()] = value.ToString();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Random *rnd;
|
||||
std::map<std::string, std::string> data;
|
||||
};
|
||||
ttl_extractor_.reset(new TestTTLExtractor(&rnd));
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.ttl_range_secs = 1000;
|
||||
bdb_options.min_blob_size = 0;
|
||||
bdb_options.blob_file_size = 256 * 1000 * 1000;
|
||||
bdb_options.ttl_extractor = ttl_extractor_;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options, options);
|
||||
mock_env_->set_current_time(50);
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
PutRandom("key" + ToString(i), &rnd);
|
||||
}
|
||||
mock_env_->set_current_time(100);
|
||||
auto *bdb_impl = static_cast<BlobDBImpl *>(blob_db_);
|
||||
auto blob_files = bdb_impl->TEST_GetBlobFiles();
|
||||
ASSERT_EQ(1, blob_files.size());
|
||||
ASSERT_TRUE(blob_files[0]->HasTTL());
|
||||
ASSERT_OK(bdb_impl->TEST_CloseBlobFile(blob_files[0]));
|
||||
GCStats gc_stats;
|
||||
ASSERT_OK(bdb_impl->TEST_GCFileAndUpdateLSM(blob_files[0], &gc_stats));
|
||||
auto &data = static_cast<TestTTLExtractor *>(ttl_extractor_.get())->data;
|
||||
ASSERT_EQ(100 - data.size(), gc_stats.num_keys_expired);
|
||||
ASSERT_EQ(data.size(), gc_stats.num_keys_relocated);
|
||||
VerifyDB(data);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, TTLExtractor_ExtractExpiration) {
|
||||
Random rnd(301);
|
||||
class TestTTLExtractor : public TTLExtractor {
|
||||
public:
|
||||
explicit TestTTLExtractor(Random *r) : rnd(r) {}
|
||||
|
||||
virtual bool ExtractExpiration(const Slice &key, const Slice &value,
|
||||
uint64_t /*now*/, uint64_t *expiration,
|
||||
std::string * /*new_value*/,
|
||||
bool * /*value_changed*/) override {
|
||||
*expiration = rnd->Next() % 100 + 50;
|
||||
if (*expiration > 100) {
|
||||
data[key.ToString()] = value.ToString();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Random *rnd;
|
||||
std::map<std::string, std::string> data;
|
||||
};
|
||||
ttl_extractor_.reset(new TestTTLExtractor(&rnd));
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.ttl_range_secs = 1000;
|
||||
bdb_options.min_blob_size = 0;
|
||||
bdb_options.blob_file_size = 256 * 1000 * 1000;
|
||||
bdb_options.ttl_extractor = ttl_extractor_;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options, options);
|
||||
mock_env_->set_current_time(50);
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
PutRandom("key" + ToString(i), &rnd);
|
||||
}
|
||||
mock_env_->set_current_time(100);
|
||||
auto *bdb_impl = static_cast<BlobDBImpl *>(blob_db_);
|
||||
auto blob_files = bdb_impl->TEST_GetBlobFiles();
|
||||
ASSERT_EQ(1, blob_files.size());
|
||||
ASSERT_TRUE(blob_files[0]->HasTTL());
|
||||
ASSERT_OK(bdb_impl->TEST_CloseBlobFile(blob_files[0]));
|
||||
GCStats gc_stats;
|
||||
ASSERT_OK(bdb_impl->TEST_GCFileAndUpdateLSM(blob_files[0], &gc_stats));
|
||||
auto &data = static_cast<TestTTLExtractor *>(ttl_extractor_.get())->data;
|
||||
ASSERT_EQ(100 - data.size(), gc_stats.num_keys_expired);
|
||||
ASSERT_EQ(data.size(), gc_stats.num_keys_relocated);
|
||||
VerifyDB(data);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, TTLExtractor_ChangeValue) {
|
||||
class TestTTLExtractor : public TTLExtractor {
|
||||
public:
|
||||
const Slice kTTLSuffix = Slice("ttl:");
|
||||
|
||||
bool ExtractTTL(const Slice & /*key*/, const Slice &value, uint64_t *ttl,
|
||||
std::string *new_value, bool *value_changed) override {
|
||||
if (value.size() < 12) {
|
||||
return false;
|
||||
}
|
||||
const char *p = value.data() + value.size() - 12;
|
||||
if (kTTLSuffix != Slice(p, 4)) {
|
||||
return false;
|
||||
}
|
||||
*ttl = DecodeFixed64(p + 4);
|
||||
*new_value = Slice(value.data(), value.size() - 12).ToString();
|
||||
*value_changed = true;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
Random rnd(301);
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.ttl_range_secs = 1000;
|
||||
bdb_options.min_blob_size = 0;
|
||||
bdb_options.blob_file_size = 256 * 1000 * 1000;
|
||||
bdb_options.ttl_extractor = std::make_shared<TestTTLExtractor>();
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options, options);
|
||||
std::map<std::string, std::string> data;
|
||||
mock_env_->set_current_time(50);
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
int len = rnd.Next() % kMaxBlobSize + 1;
|
||||
std::string key = "key" + ToString(i);
|
||||
std::string value = test::RandomHumanReadableString(&rnd, len);
|
||||
uint64_t ttl = rnd.Next() % 100;
|
||||
std::string value_ttl = value + "ttl:";
|
||||
PutFixed64(&value_ttl, ttl);
|
||||
ASSERT_OK(blob_db_->Put(WriteOptions(), Slice(key), Slice(value_ttl)));
|
||||
if (ttl > 50) {
|
||||
data[key] = value;
|
||||
}
|
||||
}
|
||||
mock_env_->set_current_time(100);
|
||||
auto *bdb_impl = static_cast<BlobDBImpl *>(blob_db_);
|
||||
auto blob_files = bdb_impl->TEST_GetBlobFiles();
|
||||
ASSERT_EQ(1, blob_files.size());
|
||||
ASSERT_TRUE(blob_files[0]->HasTTL());
|
||||
ASSERT_OK(bdb_impl->TEST_CloseBlobFile(blob_files[0]));
|
||||
GCStats gc_stats;
|
||||
ASSERT_OK(bdb_impl->TEST_GCFileAndUpdateLSM(blob_files[0], &gc_stats));
|
||||
ASSERT_EQ(100 - data.size(), gc_stats.num_keys_expired);
|
||||
ASSERT_EQ(data.size(), gc_stats.num_keys_relocated);
|
||||
VerifyDB(data);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, StackableDBGet) {
|
||||
Random rnd(301);
|
||||
BlobDBOptions bdb_options;
|
||||
@ -520,6 +334,25 @@ TEST_F(BlobDBTest, StackableDBGet) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, GetExpiration) {
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
mock_env_->set_current_time(100);
|
||||
Open(bdb_options, options);
|
||||
Put("key1", "value1");
|
||||
PutWithTTL("key2", "value2", 200);
|
||||
PinnableSlice value;
|
||||
uint64_t expiration;
|
||||
ASSERT_OK(blob_db_->Get(ReadOptions(), "key1", &value, &expiration));
|
||||
ASSERT_EQ("value1", value.ToString());
|
||||
ASSERT_EQ(kNoExpiration, expiration);
|
||||
ASSERT_OK(blob_db_->Get(ReadOptions(), "key2", &value, &expiration));
|
||||
ASSERT_EQ("value2", value.ToString());
|
||||
ASSERT_EQ(300 /* = 100 + 200 */, expiration);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, WriteBatch) {
|
||||
Random rnd(301);
|
||||
BlobDBOptions bdb_options;
|
||||
@ -1548,6 +1381,36 @@ TEST_F(BlobDBTest, FilterForFIFOEviction) {
|
||||
VerifyDB(data_after_compact);
|
||||
}
|
||||
|
||||
// File should be evicted after expiration.
|
||||
TEST_F(BlobDBTest, EvictExpiredFile) {
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.ttl_range_secs = 100;
|
||||
bdb_options.min_blob_size = 0;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Options options;
|
||||
options.env = mock_env_.get();
|
||||
Open(bdb_options, options);
|
||||
mock_env_->set_current_time(50);
|
||||
std::map<std::string, std::string> data;
|
||||
ASSERT_OK(PutWithTTL("foo", "bar", 100, &data));
|
||||
auto blob_files = blob_db_impl()->TEST_GetBlobFiles();
|
||||
ASSERT_EQ(1, blob_files.size());
|
||||
auto blob_file = blob_files[0];
|
||||
ASSERT_FALSE(blob_file->Immutable());
|
||||
ASSERT_FALSE(blob_file->Obsolete());
|
||||
VerifyDB(data);
|
||||
mock_env_->set_current_time(250);
|
||||
// The key should expired now.
|
||||
blob_db_impl()->TEST_EvictExpiredFiles();
|
||||
ASSERT_EQ(1, blob_db_impl()->TEST_GetBlobFiles().size());
|
||||
ASSERT_EQ(1, blob_db_impl()->TEST_GetObsoleteFiles().size());
|
||||
ASSERT_TRUE(blob_file->Immutable());
|
||||
ASSERT_TRUE(blob_file->Obsolete());
|
||||
blob_db_impl()->TEST_DeleteObsoleteFiles();
|
||||
ASSERT_EQ(0, blob_db_impl()->TEST_GetBlobFiles().size());
|
||||
ASSERT_EQ(0, blob_db_impl()->TEST_GetObsoleteFiles().size());
|
||||
}
|
||||
|
||||
} // namespace blob_db
|
||||
} // namespace rocksdb
|
||||
|
||||
|
@ -1,34 +0,0 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
#ifndef ROCKSDB_LITE
|
||||
|
||||
#include "utilities/blob_db/blob_db.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace blob_db {
|
||||
|
||||
bool TTLExtractor::ExtractTTL(const Slice& /*key*/, const Slice& /*value*/,
|
||||
uint64_t* /*ttl*/, std::string* /*new_value*/,
|
||||
bool* /*value_changed*/) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TTLExtractor::ExtractExpiration(const Slice& key, const Slice& value,
|
||||
uint64_t now, uint64_t* expiration,
|
||||
std::string* new_value,
|
||||
bool* value_changed) {
|
||||
uint64_t ttl;
|
||||
bool has_ttl = ExtractTTL(key, value, &ttl, new_value, value_changed);
|
||||
if (has_ttl) {
|
||||
*expiration = now + ttl;
|
||||
}
|
||||
return has_ttl;
|
||||
}
|
||||
|
||||
} // namespace blob_db
|
||||
} // namespace rocksdb
|
||||
|
||||
#endif // ROCKSDB_LITE
|
@ -20,7 +20,7 @@ class SequentialFileMirror : public SequentialFile {
|
||||
std::string fname;
|
||||
explicit SequentialFileMirror(std::string f) : fname(f) {}
|
||||
|
||||
Status Read(size_t n, Slice* result, char* scratch) {
|
||||
Status Read(size_t n, Slice* result, char* scratch) override {
|
||||
Slice aslice;
|
||||
Status as = a_->Read(n, &aslice, scratch);
|
||||
if (as == Status::OK()) {
|
||||
@ -44,13 +44,13 @@ class SequentialFileMirror : public SequentialFile {
|
||||
return as;
|
||||
}
|
||||
|
||||
Status Skip(uint64_t n) {
|
||||
Status Skip(uint64_t n) override {
|
||||
Status as = a_->Skip(n);
|
||||
Status bs = b_->Skip(n);
|
||||
assert(as == bs);
|
||||
return as;
|
||||
}
|
||||
Status InvalidateCache(size_t offset, size_t length) {
|
||||
Status InvalidateCache(size_t offset, size_t length) override {
|
||||
Status as = a_->InvalidateCache(offset, length);
|
||||
Status bs = b_->InvalidateCache(offset, length);
|
||||
assert(as == bs);
|
||||
@ -64,7 +64,7 @@ class RandomAccessFileMirror : public RandomAccessFile {
|
||||
std::string fname;
|
||||
explicit RandomAccessFileMirror(std::string f) : fname(f) {}
|
||||
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const {
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override {
|
||||
Status as = a_->Read(offset, n, result, scratch);
|
||||
if (as == Status::OK()) {
|
||||
char* bscratch = new char[n];
|
||||
@ -86,7 +86,7 @@ class RandomAccessFileMirror : public RandomAccessFile {
|
||||
return as;
|
||||
}
|
||||
|
||||
size_t GetUniqueId(char* id, size_t max_size) const {
|
||||
size_t GetUniqueId(char* id, size_t max_size) const override {
|
||||
// NOTE: not verified
|
||||
return a_->GetUniqueId(id, max_size);
|
||||
}
|
||||
|
@ -251,20 +251,20 @@ class PersistentCacheTier : public PersistentCache {
|
||||
// Print stats to string recursively
|
||||
virtual std::string PrintStats();
|
||||
|
||||
virtual PersistentCache::StatsType Stats();
|
||||
virtual PersistentCache::StatsType Stats() override;
|
||||
|
||||
// Insert to page cache
|
||||
virtual Status Insert(const Slice& page_key, const char* data,
|
||||
const size_t size) = 0;
|
||||
const size_t size) override = 0;
|
||||
|
||||
// Lookup page cache by page identifier
|
||||
virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
|
||||
size_t* size) = 0;
|
||||
size_t* size) override = 0;
|
||||
|
||||
// Does it store compressed data ?
|
||||
virtual bool IsCompressed() = 0;
|
||||
virtual bool IsCompressed() override = 0;
|
||||
|
||||
virtual std::string GetPrintableOptions() const = 0;
|
||||
virtual std::string GetPrintableOptions() const override = 0;
|
||||
|
||||
// Return a reference to next tier
|
||||
virtual Tier& next_tier() { return next_tier_; }
|
||||
|
Loading…
Reference in New Issue
Block a user