Compare commits
15 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
4be3b1c3e5 | ||
|
f54a9a6e7f | ||
|
eb84ad3151 | ||
|
50c552c5bc | ||
|
5b74443720 | ||
|
073cdbaa49 | ||
|
e52d6ad227 | ||
|
25ea289819 | ||
|
4dd267d497 | ||
|
32be357ef7 | ||
|
3eb6956728 | ||
|
18ba3e2edd | ||
|
1c91104f43 | ||
|
e47027d92d | ||
|
e5fd8bdb83 |
22
HISTORY.md
22
HISTORY.md
@ -1,14 +1,26 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
|
||||
## 5.2.0 (02/08/2017)
|
||||
### Public API Change
|
||||
* NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explict provide one.
|
||||
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
|
||||
* (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition.
|
||||
|
||||
### New Features
|
||||
* Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations.
|
||||
* Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables.
|
||||
|
||||
### Bug Fixes
|
||||
* RangeSync() should work if ROCKSDB_FALLOCATE_PRESENT is not set
|
||||
* Fix wrong results in a data race case in Get()
|
||||
* Some fixes related to 2PC.
|
||||
* Fix several bugs in Direct I/O supports.
|
||||
|
||||
## 5.1.0 (01/13/2017)
|
||||
* Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions().
|
||||
* Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully.
|
||||
* BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env.
|
||||
* Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations.
|
||||
* NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explict provide one.
|
||||
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
|
||||
* Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables.
|
||||
* (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition.
|
||||
|
||||
### Bug Fixes
|
||||
* Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions.
|
||||
|
5
Makefile
5
Makefile
@ -223,11 +223,6 @@ default: all
|
||||
WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \
|
||||
-Wno-unused-parameter
|
||||
|
||||
ifndef DISABLE_WARNING_AS_ERROR
|
||||
WARNING_FLAGS += -Werror
|
||||
endif
|
||||
|
||||
|
||||
ifdef LUA_PATH
|
||||
|
||||
ifndef LUA_INCLUDE
|
||||
|
@ -51,12 +51,7 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
|
||||
FBCODE_BUILD="true"
|
||||
# If we're compiling with TSAN we need pic build
|
||||
PIC_BUILD=$COMPILE_WITH_TSAN
|
||||
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
else
|
||||
# we need this to build with MySQL. Don't use for other purposes.
|
||||
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
||||
fi
|
||||
source "$PWD/build_tools/fbcode_config.sh"
|
||||
fi
|
||||
|
||||
# Delete existing output, if it exists
|
||||
|
@ -1,18 +1,19 @@
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/cf7d14c625ce30bae1a4661c2319c5a283e4dd22/4.9.x/centos6-native/108cf83
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/8598c375b0e94e1448182eb3df034704144a838d/stable/centos6-native/3f16ddd
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/d6e0a7da6faba45f5e5b1638f9edd7afc2f34e7d/4.9.x/gcc-4.9-glibc-2.20/024dbc3
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/d282e6e8f3d20f4e40a516834847bdc038e07973/2.20/gcc-4.9-glibc-2.20/500e281
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfcb4/1.1.3/gcc-4.9-glibc-2.20/e9936bf
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-5-glibc-2.23/9bc6787
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-5-glibc-2.23/9bc6787
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-5-glibc-2.23/9bc6787
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-5-glibc-2.23/9bc6787
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-5-glibc-2.23/1c32b4b
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-5-glibc-2.23/9bc6787
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/ad576de2a1ea560c4d3434304f0fc4e079bede42/trunk/gcc-5-glibc-2.23/b1847cb
|
||||
TBB_BASE=/mnt/gvfs/third-party2/tbb/9d9a554877d0c5bef330fe818ab7178806dd316a/4.0_update2/gcc-4.9-glibc-2.20/e9936bf
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/7c111ff27e0c466235163f00f280a9d617c3d2ec/4.0.9-36_fbk5_2933_gd092e3f/gcc-5-glibc-2.23/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/b7fd454c4b10c6a81015d4524ed06cdeab558490/2.26/centos6-native/da39a3e
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d7f4d4d86674a57668e3a96f76f0e17dd0eb8765/3.10.0/gcc-4.9-glibc-2.20/e9936bf
|
||||
LUA_BASE=/mnt/gvfs/third-party2/lua/61e4abf5813bbc39bc4f548757ccfcadde175a48/5.2.3/gcc-4.9-glibc-2.20/690f0d7
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
|
||||
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
|
||||
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
|
||||
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
|
||||
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
|
||||
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
|
||||
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
|
||||
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
|
||||
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
|
||||
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
|
||||
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
|
||||
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
|
||||
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
|
||||
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
|
||||
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
|
||||
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
|
||||
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
|
||||
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832
|
||||
|
@ -13,7 +13,7 @@ source "$BASEDIR/dependencies.sh"
|
||||
CFLAGS=""
|
||||
|
||||
# libgcc
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
|
||||
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
|
||||
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
|
||||
|
||||
# glibc
|
||||
@ -43,12 +43,16 @@ if test -z $PIC_BUILD; then
|
||||
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
|
||||
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
|
||||
CFLAGS+=" -DLZ4"
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
CFLAGS+=" -DZSTD"
|
||||
fi
|
||||
|
||||
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||
else
|
||||
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DZSTD"
|
||||
|
||||
# location of gflags headers and libraries
|
||||
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
|
||||
if test -z $PIC_BUILD; then
|
||||
@ -56,7 +60,7 @@ if test -z $PIC_BUILD; then
|
||||
else
|
||||
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
|
||||
fi
|
||||
CFLAGS+=" -DGFLAGS=google"
|
||||
CFLAGS+=" -DGFLAGS=gflags"
|
||||
|
||||
# location of jemalloc
|
||||
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
|
||||
@ -104,8 +108,8 @@ if [ -z "$USE_CLANG" ]; then
|
||||
CXX="$GCC_BASE/bin/g++"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
JEMALLOC=1
|
||||
else
|
||||
# clang
|
||||
@ -116,8 +120,8 @@ else
|
||||
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
|
||||
|
||||
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
|
||||
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
|
||||
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||
CFLAGS+=" -isystem $CLANG_INCLUDE"
|
||||
@ -128,13 +132,14 @@ else
|
||||
fi
|
||||
|
||||
CFLAGS+=" $DEPS_INCLUDE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
|
||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
|
||||
CXXFLAGS+=" $CFLAGS"
|
||||
|
||||
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.9-glibc-2.20/lib/ld.so"
|
||||
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
|
||||
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
|
||||
EXEC_LDFLAGS+=" $LIBUNWIND"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.9-glibc-2.20/lib"
|
||||
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
|
||||
# required by libtbb
|
||||
EXEC_LDFLAGS+=" -ldl"
|
||||
|
||||
@ -144,12 +149,4 @@ EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GF
|
||||
|
||||
VALGRIND_VER="$VALGRIND_BASE/bin/"
|
||||
|
||||
LUA_PATH="$LUA_BASE"
|
||||
|
||||
if test -z $PIC_BUILD; then
|
||||
LUA_LIB=" $LUA_PATH/lib/liblua.a"
|
||||
else
|
||||
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
|
||||
fi
|
||||
|
||||
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
|
||||
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD
|
||||
|
@ -341,6 +341,7 @@ void DBIter::Next() {
|
||||
//
|
||||
// NOTE: In between, saved_key_ can point to a user key that has
|
||||
// a delete marker or a sequence number higher than sequence_
|
||||
// saved_key_ MUST have a proper user_key before calling this function
|
||||
//
|
||||
// The prefix_check parameter controls whether we check the iterated
|
||||
// keys against the prefix of the seeked key. Set to false when
|
||||
@ -946,7 +947,6 @@ void DBIter::Seek(const Slice& target) {
|
||||
StopWatch sw(env_, statistics_, DB_SEEK);
|
||||
ReleaseTempPinnedData();
|
||||
saved_key_.Clear();
|
||||
// now saved_key is used to store internal key.
|
||||
saved_key_.SetInternalKey(target, sequence_);
|
||||
|
||||
{
|
||||
@ -961,6 +961,9 @@ void DBIter::Seek(const Slice& target) {
|
||||
}
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
// convert the InternalKey to UserKey in saved_key_ before
|
||||
// passed to FindNextUserEntry
|
||||
saved_key_.Reserve(saved_key_.Size() - 8);
|
||||
FindNextUserEntry(false /* not skipping */, prefix_same_as_start_);
|
||||
if (!valid_) {
|
||||
prefix_start_key_.clear();
|
||||
@ -1039,6 +1042,8 @@ void DBIter::SeekToFirst() {
|
||||
|
||||
RecordTick(statistics_, NUMBER_DB_SEEK);
|
||||
if (iter_->Valid()) {
|
||||
saved_key_.SetKey(ExtractUserKey(iter_->key()),
|
||||
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
|
||||
FindNextUserEntry(false /* not skipping */, false /* no prefix check */);
|
||||
if (statistics_ != nullptr) {
|
||||
if (valid_) {
|
||||
|
@ -1897,6 +1897,66 @@ TEST_F(DBIteratorTest, DBIterator12) {
|
||||
ASSERT_FALSE(db_iter->Valid());
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, DBIterator13) {
|
||||
Options options;
|
||||
options.merge_operator = nullptr;
|
||||
|
||||
std::string key;
|
||||
key.resize(9);
|
||||
key.assign(9, static_cast<char>(0));
|
||||
key[0] = 'b';
|
||||
|
||||
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
|
||||
internal_iter->AddPut(key, "0");
|
||||
internal_iter->AddPut(key, "1");
|
||||
internal_iter->AddPut(key, "2");
|
||||
internal_iter->AddPut(key, "3");
|
||||
internal_iter->AddPut(key, "4");
|
||||
internal_iter->AddPut(key, "5");
|
||||
internal_iter->AddPut(key, "6");
|
||||
internal_iter->AddPut(key, "7");
|
||||
internal_iter->AddPut(key, "8");
|
||||
internal_iter->Finish();
|
||||
|
||||
std::unique_ptr<Iterator> db_iter(
|
||||
NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(),
|
||||
internal_iter, 2, 3, 0));
|
||||
db_iter->Seek("b");
|
||||
ASSERT_TRUE(db_iter->Valid());
|
||||
ASSERT_EQ(db_iter->key().ToString(), key);
|
||||
ASSERT_EQ(db_iter->value().ToString(), "2");
|
||||
}
|
||||
|
||||
TEST_F(DBIteratorTest, DBIterator14) {
|
||||
Options options;
|
||||
options.merge_operator = nullptr;
|
||||
|
||||
std::string key("b");
|
||||
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
|
||||
internal_iter->AddPut("b", "0");
|
||||
internal_iter->AddPut("b", "1");
|
||||
internal_iter->AddPut("b", "2");
|
||||
internal_iter->AddPut("b", "3");
|
||||
internal_iter->AddPut("a", "4");
|
||||
internal_iter->AddPut("a", "5");
|
||||
internal_iter->AddPut("a", "6");
|
||||
internal_iter->AddPut("c", "7");
|
||||
internal_iter->AddPut("c", "8");
|
||||
internal_iter->AddPut("c", "9");
|
||||
internal_iter->Finish();
|
||||
|
||||
std::unique_ptr<Iterator> db_iter(
|
||||
NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(),
|
||||
internal_iter, 4, 1, 0));
|
||||
db_iter->Seek("b");
|
||||
ASSERT_TRUE(db_iter->Valid());
|
||||
ASSERT_EQ(db_iter->key().ToString(), "b");
|
||||
ASSERT_EQ(db_iter->value().ToString(), "3");
|
||||
db_iter->SeekToFirst();
|
||||
ASSERT_EQ(db_iter->key().ToString(), "a");
|
||||
ASSERT_EQ(db_iter->value().ToString(), "4");
|
||||
}
|
||||
|
||||
class DBIterWithMergeIterTest : public testing::Test {
|
||||
public:
|
||||
DBIterWithMergeIterTest()
|
||||
|
@ -2277,6 +2277,27 @@ TEST_F(DBTest2, GetRaceFlush2) {
|
||||
t1.join();
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
|
||||
TEST_F(DBTest2, DirectIO) {
|
||||
if (!IsDirectIOSupported()) {
|
||||
return;
|
||||
}
|
||||
Options options = CurrentOptions();
|
||||
options.use_direct_reads = options.use_direct_writes = true;
|
||||
options.allow_mmap_reads = options.allow_mmap_writes = false;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
ASSERT_OK(Put(Key(0), "a"));
|
||||
ASSERT_OK(Put(Key(5), "a"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
ASSERT_OK(Put(Key(10), "a"));
|
||||
ASSERT_OK(Put(Key(15), "a"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||
Reopen(options);
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -231,6 +231,20 @@ class SpecialEnv : public EnvWrapper {
|
||||
return base_->Append(data);
|
||||
}
|
||||
}
|
||||
Status PositionedAppend(const Slice& data, uint64_t offset) override {
|
||||
if (env_->table_write_callback_) {
|
||||
(*env_->table_write_callback_)();
|
||||
}
|
||||
if (env_->drop_writes_.load(std::memory_order_acquire)) {
|
||||
// Drop writes on the floor
|
||||
return Status::OK();
|
||||
} else if (env_->no_space_.load(std::memory_order_acquire)) {
|
||||
return Status::NoSpace("No space left on device");
|
||||
} else {
|
||||
env_->bytes_written_ += data.size();
|
||||
return base_->PositionedAppend(data, offset);
|
||||
}
|
||||
}
|
||||
Status Truncate(uint64_t size) override { return base_->Truncate(size); }
|
||||
Status Close() override {
|
||||
// SyncPoint is not supported in Released Windows Mode.
|
||||
@ -257,6 +271,9 @@ class SpecialEnv : public EnvWrapper {
|
||||
Env::IOPriority GetIOPriority() override {
|
||||
return base_->GetIOPriority();
|
||||
}
|
||||
bool use_direct_io() const override {
|
||||
return base_->use_direct_io();
|
||||
}
|
||||
};
|
||||
class ManifestFile : public WritableFile {
|
||||
public:
|
||||
@ -358,7 +375,14 @@ class SpecialEnv : public EnvWrapper {
|
||||
return Status::IOError("simulated write error");
|
||||
}
|
||||
|
||||
Status s = target()->NewWritableFile(f, r, soptions);
|
||||
EnvOptions optimized = soptions;
|
||||
if (strstr(f.c_str(), "MANIFEST") != nullptr ||
|
||||
strstr(f.c_str(), "log") != nullptr) {
|
||||
optimized.use_mmap_writes = false;
|
||||
optimized.use_direct_writes = false;
|
||||
}
|
||||
|
||||
Status s = target()->NewWritableFile(f, r, optimized);
|
||||
if (s.ok()) {
|
||||
if (strstr(f.c_str(), ".sst") != nullptr) {
|
||||
r->reset(new SSTableFile(this, std::move(*r)));
|
||||
|
@ -5,8 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCKSDB_MAJOR 5
|
||||
#define ROCKSDB_MINOR 0
|
||||
#define ROCKSDB_PATCH 0
|
||||
#define ROCKSDB_MINOR 2
|
||||
#define ROCKSDB_PATCH 1
|
||||
|
||||
// Do not use these. We made the mistake of declaring macros starting with
|
||||
// double underscore. Now we have to live with our choice. We'll deprecate these
|
||||
|
@ -468,13 +468,15 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
ReadaheadRandomAccessFile(std::unique_ptr<RandomAccessFile>&& file,
|
||||
size_t readahead_size)
|
||||
: file_(std::move(file)),
|
||||
readahead_size_(readahead_size),
|
||||
alignment_(file_->GetRequiredBufferAlignment()),
|
||||
readahead_size_(Roundup(readahead_size, alignment_)),
|
||||
forward_calls_(file_->ShouldForwardRawRequest()),
|
||||
buffer_(),
|
||||
buffer_offset_(0),
|
||||
buffer_len_(0) {
|
||||
if (!forward_calls_) {
|
||||
buffer_.reset(new char[readahead_size_]);
|
||||
buffer_.Alignment(alignment_);
|
||||
buffer_.AllocateNewBuffer(readahead_size_ + alignment_);
|
||||
} else if (readahead_size_ > 0) {
|
||||
file_->EnableReadAhead();
|
||||
}
|
||||
@ -500,31 +502,45 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
|
||||
std::unique_lock<std::mutex> lk(lock_);
|
||||
|
||||
size_t copied = 0;
|
||||
// if offset between [buffer_offset_, buffer_offset_ + buffer_len>
|
||||
if (offset >= buffer_offset_ && offset < buffer_len_ + buffer_offset_) {
|
||||
uint64_t offset_in_buffer = offset - buffer_offset_;
|
||||
copied = std::min(buffer_len_ - static_cast<size_t>(offset_in_buffer), n);
|
||||
memcpy(scratch, buffer_.get() + offset_in_buffer, copied);
|
||||
if (copied == n) {
|
||||
// fully cached
|
||||
*result = Slice(scratch, n);
|
||||
return Status::OK();
|
||||
}
|
||||
size_t cached_len = 0;
|
||||
// Check if there is a cache hit, means that [offset, offset + n) is either
|
||||
// complitely or partially in the buffer
|
||||
// If it's completely cached, including end of file case when offset + n is
|
||||
// greater than EOF, return
|
||||
if (TryReadFromCache_(offset, n, &cached_len, scratch) &&
|
||||
(cached_len == n ||
|
||||
// End of file
|
||||
buffer_len_ < readahead_size_ + alignment_)) {
|
||||
*result = Slice(scratch, cached_len);
|
||||
return Status::OK();
|
||||
}
|
||||
size_t advanced_offset = offset + cached_len;
|
||||
// In the case of cache hit advanced_offset is already aligned, means that
|
||||
// chunk_offset equals to advanced_offset
|
||||
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
||||
Slice readahead_result;
|
||||
Status s = file_->Read(offset + copied, readahead_size_, &readahead_result,
|
||||
buffer_.get());
|
||||
Status s = file_->Read(chunk_offset, readahead_size_ + alignment_,
|
||||
&readahead_result, buffer_.BufferStart());
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
||||
// exceed the file end position, so the following check is required
|
||||
if (advanced_offset < chunk_offset + readahead_result.size()) {
|
||||
// In the case of cache miss, the first chunk_padding bytes in buffer_ are
|
||||
// stored for alignment only and must be skipped
|
||||
size_t chunk_padding = advanced_offset - chunk_offset;
|
||||
auto remaining_len =
|
||||
std::min(readahead_result.size() - chunk_padding, n - cached_len);
|
||||
memcpy(scratch + cached_len, readahead_result.data() + chunk_padding,
|
||||
remaining_len);
|
||||
*result = Slice(scratch, cached_len + remaining_len);
|
||||
} else {
|
||||
*result = Slice(scratch, cached_len);
|
||||
}
|
||||
|
||||
auto left_to_copy = std::min(readahead_result.size(), n - copied);
|
||||
memcpy(scratch + copied, readahead_result.data(), left_to_copy);
|
||||
*result = Slice(scratch, copied + left_to_copy);
|
||||
|
||||
if (readahead_result.data() == buffer_.get()) {
|
||||
buffer_offset_ = offset + copied;
|
||||
if (readahead_result.data() == buffer_.BufferStart()) {
|
||||
buffer_offset_ = chunk_offset;
|
||||
buffer_len_ = readahead_result.size();
|
||||
} else {
|
||||
buffer_len_ = 0;
|
||||
@ -543,13 +559,31 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
return file_->InvalidateCache(offset, length);
|
||||
}
|
||||
|
||||
virtual bool use_direct_io() const override {
|
||||
return file_->use_direct_io();
|
||||
}
|
||||
|
||||
private:
|
||||
bool TryReadFromCache_(uint64_t offset, size_t n, size_t* cached_len,
|
||||
char* scratch) const {
|
||||
if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) {
|
||||
*cached_len = 0;
|
||||
return false;
|
||||
}
|
||||
uint64_t offset_in_buffer = offset - buffer_offset_;
|
||||
*cached_len =
|
||||
std::min(buffer_len_ - static_cast<size_t>(offset_in_buffer), n);
|
||||
memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<RandomAccessFile> file_;
|
||||
const size_t alignment_;
|
||||
size_t readahead_size_;
|
||||
const bool forward_calls_;
|
||||
|
||||
mutable std::mutex lock_;
|
||||
mutable std::unique_ptr<char[]> buffer_;
|
||||
mutable AlignedBuffer buffer_;
|
||||
mutable uint64_t buffer_offset_;
|
||||
mutable size_t buffer_len_;
|
||||
};
|
||||
|
@ -3,10 +3,12 @@
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#include <vector>
|
||||
#include "util/file_reader_writer.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "util/random.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -125,6 +127,108 @@ TEST_F(WritableFileWriterTest, AppendStatusReturn) {
|
||||
ASSERT_NOK(writer->Append(std::string(2 * kMb, 'b')));
|
||||
}
|
||||
|
||||
class ReadaheadRandomAccessFileTest
|
||||
: public testing::Test,
|
||||
public testing::WithParamInterface<size_t> {
|
||||
public:
|
||||
static std::vector<size_t> GetReadaheadSizeList() {
|
||||
return {1lu << 12, 1lu << 16};
|
||||
}
|
||||
virtual void SetUp() override {
|
||||
readahead_size_ = GetParam();
|
||||
scratch_.reset(new char[2 * readahead_size_]);
|
||||
ResetSourceStr();
|
||||
}
|
||||
ReadaheadRandomAccessFileTest() : control_contents_() {}
|
||||
std::string Read(uint64_t offset, size_t n) {
|
||||
Slice result;
|
||||
test_read_holder_->Read(offset, n, &result, scratch_.get());
|
||||
return std::string(result.data(), result.size());
|
||||
}
|
||||
void ResetSourceStr(const std::string& str = "") {
|
||||
auto write_holder = std::unique_ptr<WritableFileWriter>(
|
||||
test::GetWritableFileWriter(new test::StringSink(&control_contents_)));
|
||||
write_holder->Append(Slice(str));
|
||||
write_holder->Flush();
|
||||
auto read_holder = std::unique_ptr<RandomAccessFile>(
|
||||
new test::StringSource(control_contents_));
|
||||
test_read_holder_ =
|
||||
NewReadaheadRandomAccessFile(std::move(read_holder), readahead_size_);
|
||||
}
|
||||
size_t GetReadaheadSize() const { return readahead_size_; }
|
||||
|
||||
private:
|
||||
size_t readahead_size_;
|
||||
Slice control_contents_;
|
||||
std::unique_ptr<RandomAccessFile> test_read_holder_;
|
||||
std::unique_ptr<char[]> scratch_;
|
||||
};
|
||||
|
||||
TEST_P(ReadaheadRandomAccessFileTest, EmptySourceStrTest) {
|
||||
ASSERT_EQ("", Read(0, 1));
|
||||
ASSERT_EQ("", Read(0, 0));
|
||||
ASSERT_EQ("", Read(13, 13));
|
||||
}
|
||||
|
||||
TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenLessThanReadaheadSizeTest) {
|
||||
std::string str = "abcdefghijklmnopqrs";
|
||||
ResetSourceStr(str);
|
||||
ASSERT_EQ(str.substr(3, 4), Read(3, 4));
|
||||
ASSERT_EQ(str.substr(0, 3), Read(0, 3));
|
||||
ASSERT_EQ(str, Read(0, str.size()));
|
||||
ASSERT_EQ(str.substr(7, std::min(static_cast<int>(str.size()) - 7, 30)),
|
||||
Read(7, 30));
|
||||
ASSERT_EQ("", Read(100, 100));
|
||||
}
|
||||
|
||||
TEST_P(ReadaheadRandomAccessFileTest,
|
||||
SourceStrLenCanBeGreaterThanReadaheadSizeTest) {
|
||||
Random rng(42);
|
||||
for (int k = 0; k < 100; ++k) {
|
||||
size_t strLen = k * GetReadaheadSize() +
|
||||
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||
std::string str =
|
||||
test::RandomHumanReadableString(&rng, static_cast<int>(strLen));
|
||||
ResetSourceStr(str);
|
||||
for (int test = 1; test <= 100; ++test) {
|
||||
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
||||
size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||
ASSERT_EQ(str.substr(offset, std::min(n, str.size() - offset)),
|
||||
Read(offset, n));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(ReadaheadRandomAccessFileTest, NExceedReadaheadTest) {
|
||||
Random rng(7);
|
||||
size_t strLen = 4 * GetReadaheadSize() +
|
||||
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||
std::string str =
|
||||
test::RandomHumanReadableString(&rng, static_cast<int>(strLen));
|
||||
ResetSourceStr(str);
|
||||
for (int test = 1; test <= 100; ++test) {
|
||||
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
||||
size_t n =
|
||||
GetReadaheadSize() + rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||
ASSERT_EQ(str.substr(offset, std::min(n, str.size() - offset)),
|
||||
Read(offset, n));
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
EmptySourceStrTest, ReadaheadRandomAccessFileTest,
|
||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SourceStrLenLessThanReadaheadSizeTest, ReadaheadRandomAccessFileTest,
|
||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SourceStrLenCanBeGreaterThanReadaheadSizeTest,
|
||||
ReadaheadRandomAccessFileTest,
|
||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NExceedReadaheadTest, ReadaheadRandomAccessFileTest,
|
||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -656,6 +656,17 @@ Status PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PosixWritableFile::Truncate(uint64_t size) {
|
||||
Status s;
|
||||
int r = ftruncate(fd_, size);
|
||||
if (r < 0) {
|
||||
s = IOError(filename_, errno);
|
||||
} else {
|
||||
filesize_ = size;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status PosixWritableFile::Close() {
|
||||
Status s;
|
||||
|
||||
|
@ -93,9 +93,9 @@ class PosixWritableFile : public WritableFile {
|
||||
const EnvOptions& options);
|
||||
virtual ~PosixWritableFile();
|
||||
|
||||
// Means Close() will properly take care of truncate
|
||||
// and it does not need any additional information
|
||||
virtual Status Truncate(uint64_t size) override { return Status::OK(); }
|
||||
// Need to implement this so the file is truncated correctly
|
||||
// with direct I/O
|
||||
virtual Status Truncate(uint64_t size) override;
|
||||
virtual Status Close() override;
|
||||
virtual Status Append(const Slice& data) override;
|
||||
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
|
||||
|
@ -536,6 +536,10 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type,
|
||||
return ParseEnum<CompactionStyle>(
|
||||
compaction_style_string_map, value,
|
||||
reinterpret_cast<CompactionStyle*>(opt_address));
|
||||
case OptionType::kCompactionPri:
|
||||
return ParseEnum<CompactionPri>(
|
||||
compaction_pri_string_map, value,
|
||||
reinterpret_cast<CompactionPri*>(opt_address));
|
||||
case OptionType::kCompressionType:
|
||||
return ParseEnum<CompressionType>(
|
||||
compression_type_string_map, value,
|
||||
@ -617,6 +621,10 @@ bool SerializeSingleOptionHelper(const char* opt_address,
|
||||
return SerializeEnum<CompactionStyle>(
|
||||
compaction_style_string_map,
|
||||
*(reinterpret_cast<const CompactionStyle*>(opt_address)), value);
|
||||
case OptionType::kCompactionPri:
|
||||
return SerializeEnum<CompactionPri>(
|
||||
compaction_pri_string_map,
|
||||
*(reinterpret_cast<const CompactionPri*>(opt_address)), value);
|
||||
case OptionType::kCompressionType:
|
||||
return SerializeEnum<CompressionType>(
|
||||
compression_type_string_map,
|
||||
|
@ -106,6 +106,7 @@ enum class OptionType {
|
||||
kString,
|
||||
kDouble,
|
||||
kCompactionStyle,
|
||||
kCompactionPri,
|
||||
kSliceTransform,
|
||||
kCompressionType,
|
||||
kVectorCompressionType,
|
||||
@ -573,8 +574,10 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
||||
OptionType::kMergeOperator, OptionVerificationType::kByName, false, 0}},
|
||||
{"compaction_style",
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_style),
|
||||
OptionType::kCompactionStyle, OptionVerificationType::kNormal, false,
|
||||
0}}};
|
||||
OptionType::kCompactionStyle, OptionVerificationType::kNormal, false, 0}},
|
||||
{"compaction_pri",
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_pri),
|
||||
OptionType::kCompactionPri, OptionVerificationType::kNormal, false, 0}}};
|
||||
|
||||
static std::unordered_map<std::string, OptionTypeInfo>
|
||||
block_based_table_type_info = {
|
||||
@ -706,6 +709,13 @@ static std::unordered_map<std::string, CompactionStyle>
|
||||
{"kCompactionStyleFIFO", kCompactionStyleFIFO},
|
||||
{"kCompactionStyleNone", kCompactionStyleNone}};
|
||||
|
||||
static std::unordered_map<std::string, CompactionPri>
|
||||
compaction_pri_string_map = {
|
||||
{"kByCompensatedSize", kByCompensatedSize},
|
||||
{"kOldestLargestSeqFirst", kOldestLargestSeqFirst},
|
||||
{"kOldestSmallestSeqFirst", kOldestSmallestSeqFirst},
|
||||
{"kMinOverlappingRatio", kMinOverlappingRatio}};
|
||||
|
||||
static std::unordered_map<std::string,
|
||||
WALRecoveryMode> wal_recovery_mode_string_map = {
|
||||
{"kTolerateCorruptedTailRecords",
|
||||
|
@ -542,6 +542,9 @@ bool AreEqualOptions(
|
||||
case OptionType::kCompactionStyle:
|
||||
return (*reinterpret_cast<const CompactionStyle*>(offset1) ==
|
||||
*reinterpret_cast<const CompactionStyle*>(offset2));
|
||||
case OptionType::kCompactionPri:
|
||||
return (*reinterpret_cast<const CompactionPri*>(offset1) ==
|
||||
*reinterpret_cast<const CompactionPri*>(offset2));
|
||||
case OptionType::kCompressionType:
|
||||
return (*reinterpret_cast<const CompressionType*>(offset1) ==
|
||||
*reinterpret_cast<const CompressionType*>(offset2));
|
||||
|
@ -373,7 +373,6 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
||||
// Following options are not settable through
|
||||
// GetColumnFamilyOptionsFromString():
|
||||
options->rate_limit_delay_max_milliseconds = 33;
|
||||
options->compaction_pri = CompactionPri::kOldestSmallestSeqFirst;
|
||||
options->compaction_options_universal = CompactionOptionsUniversal();
|
||||
options->compression_opts = CompressionOptions();
|
||||
options->hard_rate_limit = 0;
|
||||
@ -433,6 +432,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
||||
"level_compaction_dynamic_level_bytes=false;"
|
||||
"inplace_update_support=false;"
|
||||
"compaction_style=kCompactionStyleFIFO;"
|
||||
"compaction_pri=kMinOverlappingRatio;"
|
||||
"purge_redundant_kvs_while_flush=true;"
|
||||
"hard_pending_compaction_bytes_limit=0;"
|
||||
"disable_auto_compactions=false;"
|
||||
|
@ -77,6 +77,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
|
||||
{"arena_block_size", "22"},
|
||||
{"disable_auto_compactions", "true"},
|
||||
{"compaction_style", "kCompactionStyleLevel"},
|
||||
{"compaction_pri", "kOldestSmallestSeqFirst"},
|
||||
{"verify_checksums_in_compaction", "false"},
|
||||
{"compaction_options_fifo", "23"},
|
||||
{"max_sequential_skip_in_iterations", "24"},
|
||||
@ -176,6 +177,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
|
||||
ASSERT_EQ(new_cf_opt.disable_auto_compactions, true);
|
||||
ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel);
|
||||
ASSERT_EQ(new_cf_opt.verify_checksums_in_compaction, false);
|
||||
ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst);
|
||||
ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size,
|
||||
static_cast<uint64_t>(23));
|
||||
ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations,
|
||||
|
Loading…
Reference in New Issue
Block a user