From 55b37efa158560fc724dbb4ca1cbca61e6a0a491 Mon Sep 17 00:00:00 2001 From: Warren Falk Date: Thu, 10 Dec 2015 20:26:11 -0500 Subject: [PATCH 001/195] fix a compile error on 32-bit --- util/crc32c.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/util/crc32c.cc b/util/crc32c.cc index b8d281a27..315263363 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -315,8 +315,15 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { #ifdef __SSE4_2__ +#ifdef __LP64__ *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *p += 8; +#else + *l = _mm_crc32_u32(*l, LE_LOAD32(*p)); + *p += 4; + *l = _mm_crc32_u32(*l, LE_LOAD32(*p)); + *p += 4; +#endif #else Slow_CRC32(l, p); #endif From 601f1306a1eabea7807c0ad0600f8a12bc3eb717 Mon Sep 17 00:00:00 2001 From: Warren Falk Date: Thu, 7 Jan 2016 13:12:15 -0500 Subject: [PATCH 002/195] fix shorten-64-to-32 warning in crc32c --- util/crc32c.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/crc32c.cc b/util/crc32c.cc index 315263363..b32836177 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -319,9 +319,9 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *p += 8; #else - *l = _mm_crc32_u32(*l, LE_LOAD32(*p)); + *l = _mm_crc32_u32((unsigned int)*l, LE_LOAD32(*p)); *p += 4; - *l = _mm_crc32_u32(*l, LE_LOAD32(*p)); + *l = _mm_crc32_u32((unsigned int)*l, LE_LOAD32(*p)); *p += 4; #endif #else From 2f01e10fa99227d778e83674ed55c9bf051af19f Mon Sep 17 00:00:00 2001 From: Warren Falk Date: Thu, 7 Jan 2016 13:22:09 -0500 Subject: [PATCH 003/195] use static_cast in crc32c instead of c-style cast --- util/crc32c.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/crc32c.cc b/util/crc32c.cc index b32836177..b96292ec8 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -319,9 +319,9 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *p += 8; #else - *l = _mm_crc32_u32((unsigned int)*l, LE_LOAD32(*p)); + *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); *p += 4; - *l = _mm_crc32_u32((unsigned int)*l, LE_LOAD32(*p)); + *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); *p += 4; #endif #else From 94d9df24820df62f70be24c348c6fb8c755457dc Mon Sep 17 00:00:00 2001 From: Warren Falk Date: Thu, 7 Jan 2016 13:27:20 -0500 Subject: [PATCH 004/195] fix an unused function compiler warning in crc32c in 32-bit mode --- util/crc32c.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/crc32c.cc b/util/crc32c.cc index b96292ec8..2b237b48e 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -292,10 +292,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) { } #ifdef __SSE4_2__ +#ifdef __LP64__ static inline uint64_t LE_LOAD64(const uint8_t *p) { return DecodeFixed64(reinterpret_cast(p)); } #endif +#endif static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); From b73fbbaf64b531b163d1bf0d0f4445b2a86b87de Mon Sep 17 00:00:00 2001 From: Alexander Fenster Date: Mon, 11 Jan 2016 10:51:42 -0800 Subject: [PATCH 005/195] added --no_value option to ldb scan to dump key only --- tools/ldb_cmd.cc | 35 ++++++++++++++++++++++++----------- tools/ldb_cmd.h | 4 +++- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 7ec4690d0..b03a865e5 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -60,6 +60,7 @@ const string LDBCommand::ARG_DB_WRITE_BUFFER_SIZE = "db_write_buffer_size"; const string LDBCommand::ARG_WRITE_BUFFER_SIZE = "write_buffer_size"; const string LDBCommand::ARG_FILE_SIZE = "file_size"; const string LDBCommand::ARG_CREATE_IF_MISSING = "create_if_missing"; +const string LDBCommand::ARG_NO_VALUE = "no_value"; const char* LDBCommand::DELIM = " ==> "; @@ -1743,12 +1744,13 @@ Options BatchPutCommand::PrepareOptionsForOpenDB() { ScanCommand::ScanCommand(const vector& params, const map& options, const vector& flags) : LDBCommand(options, flags, true, - BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_TO, + BuildCmdLineOptions({ARG_TTL, ARG_NO_VALUE, ARG_HEX, ARG_KEY_HEX, ARG_TO, ARG_VALUE_HEX, ARG_FROM, ARG_TIMESTAMP, ARG_MAX_KEYS, ARG_TTL_START, ARG_TTL_END})), start_key_specified_(false), end_key_specified_(false), - max_keys_scanned_(-1) { + max_keys_scanned_(-1), + no_value_(false) { map::const_iterator itr = options.find(ARG_FROM); if (itr != options.end()) { @@ -1767,6 +1769,11 @@ ScanCommand::ScanCommand(const vector& params, end_key_specified_ = true; } + vector::const_iterator vitr = std::find(flags.begin(), flags.end(), ARG_NO_VALUE); + if (vitr != flags.end()) { + no_value_ = true; + } + itr = options.find(ARG_MAX_KEYS); if (itr != options.end()) { try { @@ -1794,6 +1801,7 @@ void ScanCommand::Help(string& ret) { ret.append(" [--" + ARG_MAX_KEYS + "=q] "); ret.append(" [--" + ARG_TTL_START + "=:- is inclusive]"); ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); + ret.append(" [--" + ARG_NO_VALUE + "]"); ret.append("\n"); } @@ -1850,16 +1858,21 @@ void ScanCommand::DoCommand() { key_slice = formatted_key; } - std::string formatted_value; - if (is_value_hex_) { - formatted_value = "0x" + val_slice.ToString(true /* hex */); - val_slice = formatted_value; + if (no_value_) { + fprintf(stdout, "%.*s\n", + static_cast(key_slice.size()), key_slice.data()); + } else { + Slice val_slice = it->value(); + std::string formatted_value; + if (is_value_hex_) { + formatted_value = "0x" + val_slice.ToString(true /* hex */); + val_slice = formatted_value; + } + fprintf(stdout, "%.*s : %.*s\n", + static_cast(key_slice.size()), key_slice.data(), + static_cast(val_slice.size()), val_slice.data()); } - - fprintf(stdout, "%.*s : %.*s\n", - static_cast(key_slice.size()), key_slice.data(), - static_cast(val_slice.size()), val_slice.data()); - + num_keys_scanned++; if (max_keys_scanned_ >= 0 && num_keys_scanned >= max_keys_scanned_) { break; diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index 0c048e794..2c1c5e4b1 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -60,6 +60,7 @@ public: static const string ARG_WRITE_BUFFER_SIZE; static const string ARG_FILE_SIZE; static const string ARG_CREATE_IF_MISSING; + static const string ARG_NO_VALUE; static LDBCommand* InitFromCmdLineArgs( const vector& args, @@ -377,7 +378,7 @@ private: */ bool StringToBool(string val) { std::transform(val.begin(), val.end(), val.begin(), - [](char ch) -> char { return ::tolower(ch); }); + [](char ch) -> char { return (char)::tolower(ch); }); if (val == "true") { return true; @@ -709,6 +710,7 @@ private: bool start_key_specified_; bool end_key_specified_; int max_keys_scanned_; + bool no_value_; }; class DeleteCommand : public LDBCommand { From e16438bb86f42eb58e3a7495ddd28055a58824e0 Mon Sep 17 00:00:00 2001 From: Alexander Fenster Date: Mon, 11 Jan 2016 11:23:33 -0800 Subject: [PATCH 006/195] fixing build warning --- tools/ldb_cmd.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index b03a865e5..bec67a377 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1847,7 +1847,6 @@ void ScanCommand::DoCommand() { } Slice key_slice = it->key(); - Slice val_slice = it->value(); std::string formatted_key; if (is_key_hex_) { From b54d4dd435abfb5be05c865c10639817c899502a Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 12 Jan 2016 18:20:06 -0800 Subject: [PATCH 007/195] tools/sst_dump_tool_imp.h not to depend on "util/testutil.h" Summary: util/testutil.h doesn't seem to be used in tools/sst_dump_tool_imp.h. Remove it. Also move some other include to tools/sst_dump_tool.cc instead. Test Plan: Build with GCC, CLANG and with GCC 4.81 and 4.9. Reviewers: yuslepukhin, yhchiang, rven, anthony, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52791 --- tools/sst_dump_tool.cc | 23 +++++++++++++++++++++++ tools/sst_dump_tool_imp.h | 25 +------------------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 23a33fc1a..b348da38b 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -12,6 +12,29 @@ #endif #include +#include +#include +#include + +#include "db/memtable.h" +#include "db/write_batch_internal.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/immutable_options.h" +#include "rocksdb/iterator.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/status.h" +#include "rocksdb/table_properties.h" +#include "table/block.h" +#include "table/block_based_table_builder.h" +#include "table/block_based_table_factory.h" +#include "table/block_builder.h" +#include "table/format.h" +#include "table/meta_blocks.h" +#include "table/plain_table_factory.h" +#include "tools/ldb_cmd.h" +#include "util/random.h" + #include "port/port.h" namespace rocksdb { diff --git a/tools/sst_dump_tool_imp.h b/tools/sst_dump_tool_imp.h index dd65d3b10..44c74f909 100644 --- a/tools/sst_dump_tool_imp.h +++ b/tools/sst_dump_tool_imp.h @@ -7,33 +7,10 @@ #include "rocksdb/sst_dump_tool.h" -#include -#include +#include #include -#include - #include "db/dbformat.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/immutable_options.h" -#include "rocksdb/iterator.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/status.h" -#include "rocksdb/table_properties.h" -#include "table/block.h" -#include "table/block_based_table_builder.h" -#include "table/block_based_table_factory.h" -#include "table/block_builder.h" -#include "table/format.h" -#include "table/meta_blocks.h" -#include "table/plain_table_factory.h" -#include "tools/ldb_cmd.h" #include "util/file_reader_writer.h" -#include "util/random.h" -#include "util/testharness.h" -#include "util/testutil.h" namespace rocksdb { From ac50fd3a71288b22160f849efb4650d814f3847e Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 13 Jan 2016 14:51:58 -0800 Subject: [PATCH 008/195] Align statistics Use Yield macro to make it a little more portable between platforms. --- port/port_posix.h | 2 ++ port/win/port_win.h | 2 +- util/statistics.h | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/port/port_posix.h b/port/port_posix.h index efcd1aa8e..74c42f31b 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -16,6 +16,8 @@ // in fact, we could use that one #define ROCKSDB_PRIszt "zu" +#define __declspec(S) + #define ROCKSDB_NOEXCEPT noexcept #undef PLATFORM_IS_LITTLE_ENDIAN diff --git a/port/win/port_win.h b/port/win/port_win.h index 9ee7d96be..d901c40bf 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -245,7 +245,7 @@ extern void InitOnce(OnceType* once, void (*initializer)()); static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64) - ::_mm_pause(); + YieldProcessor(); #endif // it would be nice to get "wfe" on ARM here } diff --git a/util/statistics.h b/util/statistics.h index 55914f59e..b42fb43a5 100644 --- a/util/statistics.h +++ b/util/statistics.h @@ -61,7 +61,13 @@ class StatisticsImpl : public Statistics { char padding[64 - sizeof(std::atomic_uint_fast64_t)]; }; - Ticker tickers_[INTERNAL_TICKER_ENUM_MAX] __attribute__((aligned(64))); + static_assert(sizeof(Ticker) == 64, "Expecting to fit into 64 bytes"); + + // Attributes expand to nothing depending on the platform + __declspec(align(64)) + Ticker tickers_[INTERNAL_TICKER_ENUM_MAX] + __attribute__((aligned(64))); + __declspec(align(64)) HistogramImpl histograms_[INTERNAL_HISTOGRAM_ENUM_MAX] __attribute__((aligned(64))); }; From f7ebc2f34679f803242548a9a5ddff296f2b41c7 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 14 Jan 2016 16:55:13 -0800 Subject: [PATCH 009/195] Update HISTORY.mc for 4.4.0 Summary: Prepare to cut release Test Plan: no code change. Reviewers: yhchiang Reviewed By: yhchiang Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52827 --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 2400411d4..f721b7c1c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,6 @@ # Rocksdb Change Log -## Unreleased +## 4.4.0 (1/14/2016) ### Public API Changes * Change names in CompactionPri and add a new one. * Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. From aec10f734b52bb4d9dbf06a99663c6aaa35961a6 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Thu, 14 Jan 2016 22:47:15 -0800 Subject: [PATCH 010/195] Guard falloc.h inclusion to avoid build breaks Summary: Depending on the order of include paths and versions of various headers we may end up in a situation where we'll encounter a build break caused by redefinition of constants. gcc-4.9-glibc-2.20 header update to include/bits/fcntl-linux.h introduced the definitions of FALLOC_FL_* constants. However, linux/falloc.h from kernel-headers also has FALLOC_FL_* constants defined. Therefore during the compilation we'll get "previously defined" errors. Test Plan: Both in the environment where the build break manifests (to make sure that the change fixed the problem) and in the environment where everything builds fine (to make sure that there are no regressions): make clean make -j 32 Reviewers: sdong, igor Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52821 --- util/posix_logger.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/posix_logger.h b/util/posix_logger.h index 55cb34a86..1c7d39866 100644 --- a/util/posix_logger.h +++ b/util/posix_logger.h @@ -16,9 +16,13 @@ #include "port/sys_time.h" #include #include + #ifdef OS_LINUX +#ifndef FALLOC_FL_KEEP_SIZE #include #endif +#endif + #include "rocksdb/env.h" #include "util/iostats_context_imp.h" #include From 83e1de92af37ee0ae053161de5138204dc54444a Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 14 Jan 2016 19:04:06 -0800 Subject: [PATCH 011/195] move internal build to use zstd 0.4.5 Summary: update internal build scripts to use zstd 0.4.5. Test Plan: built and ran tests with and without ROCKSDB_FBCODE_BUILD_WITH_481 Reviewers: yhchiang, rven, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52833 --- build_tools/fbcode_config.sh | 10 +++++----- build_tools/fbcode_config4.8.1.sh | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/build_tools/fbcode_config.sh b/build_tools/fbcode_config.sh index 1394e4e64..48327c2e3 100644 --- a/build_tools/fbcode_config.sh +++ b/build_tools/fbcode_config.sh @@ -43,9 +43,9 @@ if test -z $PIC_BUILD; then LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf/lib/liblz4.a" CFLAGS+=" -DLZ4" - ZSTD_REV=810b81b4705def5243e998b54701f3c504e4009e - ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/include" - ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" + ZSTD_REV=d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde + ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/include" + ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" CFLAGS+=" -DZSTD" fi @@ -92,12 +92,12 @@ if [ -z "$USE_CLANG" ]; then # gcc CC="$GCC_BASE/bin/gcc" CXX="$GCC_BASE/bin/g++" - + CFLAGS+=" -B$BINUTILS/gold" CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" else - # clang + # clang CLANG_INCLUDE="$CLANG_BASE/gcc-4.9-glibc-2.20/74c386f/lib/clang/dev/include/" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" diff --git a/build_tools/fbcode_config4.8.1.sh b/build_tools/fbcode_config4.8.1.sh index 0a4adc96f..31bc091f4 100644 --- a/build_tools/fbcode_config4.8.1.sh +++ b/build_tools/fbcode_config4.8.1.sh @@ -33,9 +33,9 @@ LZ4_REV=6858fac689e0f92e584224d91bdb0e39f6c8320d LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/include" LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/lib/liblz4.a" -ZSTD_REV=810b81b4705def5243e998b54701f3c504e4009e -ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/include" -ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.2/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" +ZSTD_REV=d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde +ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/include" +ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" # location of gflags headers and libraries GFLAGS_REV=c7275a4ceae0aca0929e56964a31dafc53c1ee96 @@ -71,12 +71,12 @@ if [ -z "$USE_CLANG" ]; then # gcc CC="$GCC_BASE/bin/gcc" CXX="$GCC_BASE/bin/g++" - + CFLAGS="-B$BINUTILS/gold -m64 -mtune=generic" CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" else - # clang + # clang CLANG_BASE="/mnt/gvfs/third-party2/clang/ab054e9a490a8fd4537c0b6ec56e5c91c0f81c91/3.7" CLANG_INCLUDE="$CLANG_BASE/gcc-4.8.1-glibc-2.17/ee9b060/lib/clang/3.7/include" CC="$CLANG_BASE/centos6-native/b2feaee/bin/clang" From 2e9fae3f2a00e38dfef58f7778d5321d5bd07bfd Mon Sep 17 00:00:00 2001 From: Selva Sarangan Date: Mon, 18 Jan 2016 15:24:52 -0800 Subject: [PATCH 012/195] Add Rakuten Marketing to USERS.md --- USERS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/USERS.md b/USERS.md index c76b0221f..7123790ac 100644 --- a/USERS.md +++ b/USERS.md @@ -45,4 +45,7 @@ Airbnb is using RocksDB as a storage engine for their personalized search servic Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtube.com/watch?v=MtFEVEs_2Vo ## Smyte -[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services. \ No newline at end of file +[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services. + +## Rakuten Marketing +[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP. \ No newline at end of file From d78c6b28c452acad0666561c4daeac689042f7b3 Mon Sep 17 00:00:00 2001 From: David Bernard Date: Tue, 19 Jan 2016 04:45:21 +0000 Subject: [PATCH 013/195] Changes for build on solaris Makefile adjust paths for solaris build Makefile enable _GLIBCXX_USE_C99 so that std::to_string is available db_compaction_test.cc Initialise a variable to avoid a compilation error db_impl.cc Include db_test.cc Include Environment.java recognise solaris envrionment options_bulder.cc Make log unambiguous geodb_impl.cc Make log and floor unambiguous --- Makefile | 18 +++++++++++++++--- db/db_compaction_test.cc | 2 +- db/db_impl.cc | 1 + db/db_test.cc | 1 + .../java/org/rocksdb/util/Environment.java | 10 ++++++++-- util/options_builder.cc | 2 ++ utilities/geodb/geodb_impl.cc | 3 +++ 7 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 396b8e201..507f0948f 100644 --- a/Makefile +++ b/Makefile @@ -143,6 +143,9 @@ else OPT += -DNDEBUG endif +ifeq ($(PLATFORM), OS_SOLARIS) + PLATFORM_CXXFLAGS += -D _GLIBCXX_USE_C99 +endif ifneq ($(filter -DROCKSDB_LITE,$(OPT)),) # found CFLAGS += -fno-exceptions @@ -1036,7 +1039,11 @@ install: install-static # --------------------------------------------------------------------------- JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux -ARCH := $(shell getconf LONG_BIT) +ifeq ($(PLATFORM), OS_SOLARIS) + ARCH := $(shell isainfo -b) +else + ARCH := $(shell getconf LONG_BIT) +endif ROCKSDBJNILIB = librocksdbjni-linux$(ARCH).so ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH).jar ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar @@ -1044,14 +1051,19 @@ ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PA ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-sources.jar ifeq ($(PLATFORM), OS_MACOSX) -ROCKSDBJNILIB = librocksdbjni-osx.jnilib -ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar + ROCKSDBJNILIB = librocksdbjni-osx.jnilib + ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin else JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ endif endif +ifeq ($(PLATFORM), OS_SOLARIS) + ROCKSDBJNILIB = librocksdbjni-solaris$(ARCH).so + ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar + JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/solaris +endif libz.a: -rm -rf zlib-1.2.8 diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index e2925ba07..d29b50e7f 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -1898,7 +1898,7 @@ TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) { std::set overlapping_file_names; std::vector compaction_input_file_names; for (int f = 0; f < file_picked; ++f) { - int level; + int level = 0; auto file_meta = PickFileRandomly(cf_meta, &rnd, &level); compaction_input_file_names.push_back(file_meta->name); GetOverlappingFileNumbersForLevelCompaction( diff --git a/db/db_impl.cc b/db/db_impl.cc index 77dc9457d..fd667aa04 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/db/db_test.cc b/db/db_test.cc index 34b94cfdf..442658b44 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -20,6 +20,7 @@ #ifndef OS_WIN #include #endif +#include #include "db/filename.h" #include "db/dbformat.h" diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/rocksdb/util/Environment.java index f65b92a0e..306eae093 100644 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ b/java/src/main/java/org/rocksdb/util/Environment.java @@ -18,6 +18,10 @@ public class Environment { OS.contains("aix")); } + public static boolean isSolaris() { + return OS.contains("sunos"); + } + public static boolean is64Bit() { return (ARCH.indexOf("64") > 0); } @@ -36,7 +40,9 @@ public class Environment { return String.format("%sjni-linux%s", name, arch); } else if (isMac()) { return String.format("%sjni-osx", name); - } + } else if (isSolaris()) { + return String.format("%sjni-solaris%d", name, is64Bit() ? 64 : 32); + } throw new UnsupportedOperationException(); } @@ -45,7 +51,7 @@ public class Environment { } private static String appendLibOsSuffix(final String libraryFileName, final boolean shared) { - if (isUnix()) { + if (isUnix() || isSolaris()) { return libraryFileName + ".so"; } else if (isMac()) { return libraryFileName + (shared ? ".dylib" : ".jnilib"); diff --git a/util/options_builder.cc b/util/options_builder.cc index 67fd268a9..72ea7a46d 100644 --- a/util/options_builder.cc +++ b/util/options_builder.cc @@ -7,6 +7,8 @@ #include #include "rocksdb/options.h" +using std::log; + namespace rocksdb { namespace { diff --git a/utilities/geodb/geodb_impl.cc b/utilities/geodb/geodb_impl.cc index b30dd6333..e134dd62c 100644 --- a/utilities/geodb/geodb_impl.cc +++ b/utilities/geodb/geodb_impl.cc @@ -19,6 +19,9 @@ #include "util/coding.h" #include "util/string_util.h" +using std::log; +using std::floor; + // // There are two types of keys. The first type of key-values // maps a geo location to the set of object ids and their values. From eaa563756860fd752f40d0bc17e9e58a99aba483 Mon Sep 17 00:00:00 2001 From: David Bernard Date: Tue, 19 Jan 2016 04:59:39 +0000 Subject: [PATCH 014/195] Change notification email for travis --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index b6fa63c5d..c73983d58 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,6 +38,4 @@ script: notifications: email: - - leveldb@fb.com - webhooks: - - https://buildtimetrend.herokuapp.com/travis + - davidbernard@optushome.com.au From 3f12e16f27f32ad6e245ba54c2c09e42c45b3337 Mon Sep 17 00:00:00 2001 From: David Bernard Date: Tue, 19 Jan 2016 06:17:31 +0000 Subject: [PATCH 015/195] Make alloca.h optional --- db/db_impl.cc | 2 ++ db/db_test.cc | 2 ++ 2 files changed, 4 insertions(+) diff --git a/db/db_impl.cc b/db/db_impl.cc index fd667aa04..8f9c0168e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -15,7 +15,9 @@ #include #include +#ifdef OS_SOLARIS #include +#endif #include #include diff --git a/db/db_test.cc b/db/db_test.cc index 442658b44..e692caedb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -20,7 +20,9 @@ #ifndef OS_WIN #include #endif +#ifdef OS_SOLARIS #include +#endif #include "db/filename.h" #include "db/dbformat.h" From f423f05dcd1dce0a5c6a8db170434629c9705b3a Mon Sep 17 00:00:00 2001 From: bcbrock Date: Tue, 19 Jan 2016 09:08:19 -0600 Subject: [PATCH 016/195] Simple changes to support builds for ppc64[le] consistent with X86 These simple changes are required to allow builds on ppc64[le] systems consistent with X86. The Makefile now recognizes both ppc64 and ppc64le, and in the absence of PORTABLE=1, the code will be built analogously to the X86 -march=native. Note that although GCC supports -mcpu=native -mtune=native on POWER, it doesn't work correctly on all systems. This is why we need to get the actual machine model from the AUX vector. --- INSTALL.md | 2 +- Makefile | 2 +- build_tools/build_detect_platform | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index bff75155f..3669bf1cf 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -21,7 +21,7 @@ depend on gflags. You will need to have gflags installed to run `make all`. This use binaries compiled by `make all` in production. * By default the binary we produce is optimized for the platform you're compiling on -(-march=native). If you want to build a portable binary, add 'PORTABLE=1' before +(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before your make commands, like this: `PORTABLE=1 make static_lib` ## Dependencies diff --git a/Makefile b/Makefile index 396b8e201..4dba4f6d2 100644 --- a/Makefile +++ b/Makefile @@ -84,7 +84,7 @@ endif # compile with -O2 if debug level is not 2 ifneq ($(DEBUG_LEVEL), 2) OPT += -O2 -fno-omit-frame-pointer -ifneq ($(MACHINE),ppc64) # ppc64 doesn't support -momit-leaf-frame-pointer +ifeq (,$(findstring ppc64,$(MACHINE))) # ppc64[le] doesn't support -momit-leaf-frame-pointer OPT += -momit-leaf-frame-pointer endif endif diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index fc099a540..80905266f 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -359,7 +359,13 @@ if test "$USE_SSE"; then # if Intel SSE instruction set is supported, set USE_SSE=1 COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 " elif test -z "$PORTABLE"; then - COMMON_FLAGS="$COMMON_FLAGS -march=native " + if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then + # Tune for this POWER processor, treating '+' models as base models + POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+` + COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER " + else + COMMON_FLAGS="$COMMON_FLAGS -march=native " + fi fi PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" From 34704d5c7bfb1dbcfeb4986bc8eb42f1f78481a0 Mon Sep 17 00:00:00 2001 From: Mike Kolupaev Date: Tue, 19 Jan 2016 11:46:52 -0800 Subject: [PATCH 017/195] [easy] Fixed a crash in LogAndApply() when CF creation failed Summary: That line used to dereference `column_family_data`, which is nullptr if we're creating a column family. Test Plan: `make -j check` Reviewers: sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D52881 --- db/version_set.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index ba62177a7..193f1076f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -758,7 +758,7 @@ uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const { if (current_num_samples_ < file_count) { // casting to avoid overflowing - return + return static_cast( (est * static_cast(file_count) / current_num_samples_) ); @@ -2282,7 +2282,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, Log(InfoLogLevel::ERROR_LEVEL, db_options_->info_log, "Error in committing version %lu to [%s]", (unsigned long)v->GetVersionNumber(), - column_family_data->GetName().c_str()); + column_family_data ? column_family_data->GetName().c_str() + : ""); delete v; if (new_descriptor_log) { Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, From 12809b44b4318d51e989a573b422a132630ed2a0 Mon Sep 17 00:00:00 2001 From: David Bernard Date: Tue, 19 Jan 2016 22:46:39 +0000 Subject: [PATCH 018/195] Revert "Change notification email for travis" This reverts commit eaa563756860fd752f40d0bc17e9e58a99aba483. --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c73983d58..b6fa63c5d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,4 +38,6 @@ script: notifications: email: - - davidbernard@optushome.com.au + - leveldb@fb.com + webhooks: + - https://buildtimetrend.herokuapp.com/travis From df7c2f3b59e87e6f2f57dec255ae33fb6fbf27ad Mon Sep 17 00:00:00 2001 From: David Bernard Date: Wed, 20 Jan 2016 00:27:29 +0000 Subject: [PATCH 019/195] As per google coding standard replace "using" in option_builder.cc and geodb_impl.cc --- util/options_builder.cc | 8 +++----- utilities/geodb/geodb_impl.cc | 16 +++++++--------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/util/options_builder.cc b/util/options_builder.cc index 72ea7a46d..1b480f22b 100644 --- a/util/options_builder.cc +++ b/util/options_builder.cc @@ -7,8 +7,6 @@ #include #include "rocksdb/options.h" -using std::log; - namespace rocksdb { namespace { @@ -28,7 +26,7 @@ CompactionStyle PickCompactionStyle(size_t write_buffer_size, // Otherwise, calculate a score based on threshold and expected value of // two styles, weighing reads 4X important than writes. int expected_levels = static_cast(ceil( - ::log(target_db_size / write_buffer_size) / ::log(kBytesForLevelMultiplier))); + std::log(target_db_size / write_buffer_size) / std::log(kBytesForLevelMultiplier))); int expected_max_files_universal = static_cast(ceil(log2(target_db_size / write_buffer_size))); @@ -119,8 +117,8 @@ void OptimizeForLevel(int read_amplification_threshold, int write_amplification_threshold, uint64_t target_db_size, Options* options) { int expected_levels_one_level0_file = - static_cast(ceil(::log(target_db_size / options->write_buffer_size) / - ::log(kBytesForLevelMultiplier))); + static_cast(ceil(std::log(target_db_size / options->write_buffer_size) / + std::log(kBytesForLevelMultiplier))); int level0_stop_writes_trigger = read_amplification_threshold - expected_levels_one_level0_file; diff --git a/utilities/geodb/geodb_impl.cc b/utilities/geodb/geodb_impl.cc index e134dd62c..7fb76f36b 100644 --- a/utilities/geodb/geodb_impl.cc +++ b/utilities/geodb/geodb_impl.cc @@ -19,8 +19,6 @@ #include "util/coding.h" #include "util/string_util.h" -using std::log; -using std::floor; // // There are two types of keys. The first type of key-values @@ -355,8 +353,8 @@ Status GeoDBImpl::searchQuadIds(const GeoPosition& position, Pixel bottomRight = PositionToPixel(bottomRightPos, Detail); // how many level of details to look for - int numberOfTilesAtMaxDepth = static_cast(floor((bottomRight.x - topLeft.x) / 256)); - int zoomLevelsToRise = static_cast(floor(::log(numberOfTilesAtMaxDepth) / ::log(2))); + int numberOfTilesAtMaxDepth = static_cast(std::floor((bottomRight.x - topLeft.x) / 256)); + int zoomLevelsToRise = static_cast(std::floor(std::log(numberOfTilesAtMaxDepth) / std::log(2))); zoomLevelsToRise++; int levels = std::max(0, Detail - zoomLevelsToRise); @@ -393,10 +391,10 @@ GeoDBImpl::Pixel GeoDBImpl::PositionToPixel(const GeoPosition& pos, double latitude = clip(pos.latitude, MinLatitude, MaxLatitude); double x = (pos.longitude + 180) / 360; double sinLatitude = sin(latitude * PI / 180); - double y = 0.5 - ::log((1 + sinLatitude) / (1 - sinLatitude)) / (4 * PI); + double y = 0.5 - std::log((1 + sinLatitude) / (1 - sinLatitude)) / (4 * PI); double mapSize = MapSize(levelOfDetail); - double X = floor(clip(x * mapSize + 0.5, 0, mapSize - 1)); - double Y = floor(clip(y * mapSize + 0.5, 0, mapSize - 1)); + double X = std::floor(clip(x * mapSize + 0.5, 0, mapSize - 1)); + double Y = std::floor(clip(y * mapSize + 0.5, 0, mapSize - 1)); return Pixel((unsigned int)X, (unsigned int)Y); } @@ -411,8 +409,8 @@ GeoPosition GeoDBImpl::PixelToPosition(const Pixel& pixel, int levelOfDetail) { // Converts a Pixel to a Tile GeoDBImpl::Tile GeoDBImpl::PixelToTile(const Pixel& pixel) { - unsigned int tileX = static_cast(floor(pixel.x / 256)); - unsigned int tileY = static_cast(floor(pixel.y / 256)); + unsigned int tileX = static_cast(std::floor(pixel.x / 256)); + unsigned int tileY = static_cast(std::floor(pixel.y / 256)); return Tile(tileX, tileY); } From fdbff42391212155243aa91eea2f9b9faa3646b6 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 19 Jan 2016 14:40:54 -0800 Subject: [PATCH 020/195] Crash test to make kill decision for every kill point Summary: In crash test, when coming to each kill point, we start a random class using seed as current second. With this approach, for every second, the random number used is the same. However, in each second, there are multiple kill points with different frequency. It makes it hard to reason about chance of kill point to trigger. With this commit, we use thread local random seed to generate the random number, so that it will take different values per second, hoping it makes chances of killing much easier to reason about. Also significantly reduce the kill odd to make sure time before kiling is similar as before. Test Plan: Run white box crash test and see the killing happens as expected and the run time time before killing reasonable. Reviewers: kradhakrishnan, IslamAbdelRahman, rven, yhchiang, andrewkr, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52971 --- tools/db_crashtest.py | 6 +++--- util/sync_point.cc | 10 ++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 348ed1215..90fde4a30 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -236,7 +236,7 @@ def whitebox_crash_main(args): total_check_mode = 4 check_mode = 0 - kill_random_test = 97 + kill_random_test = 888887 kill_mode = 0 while time.time() < exit_time: @@ -255,13 +255,13 @@ def whitebox_crash_main(args): }) elif kill_mode == 1: additional_opts.update({ - "kill_random_test": (kill_random_test / 2 + 1), + "kill_random_test": (kill_random_test / 10 + 1), "kill_prefix_blacklist": "WritableFileWriter::Append," + "WritableFileWriter::WriteBuffered", }) elif kill_mode == 2: additional_opts.update({ - "kill_random_test": (kill_random_test / 4 + 1), + "kill_random_test": (kill_random_test / 5000 + 1), "kill_prefix_blacklist": "WritableFileWriter::Append," "WritableFileWriter::WriteBuffered," "PosixMmapFile::Allocate,WritableFileWriter::Flush", diff --git a/util/sync_point.cc b/util/sync_point.cc index 11c42f100..147e3e47a 100644 --- a/util/sync_point.cc +++ b/util/sync_point.cc @@ -21,16 +21,14 @@ void TestKillRandom(std::string kill_point, int odds, } } - time_t curtime = time(nullptr); - Random r((uint32_t)curtime); - assert(odds > 0); if (odds % 7 == 0) { - // class Rarndom uses multiplier 16807, which is 7^5. If odds are - // multiplier of 7, the first random value might have limited values. + // class Random uses multiplier 16807, which is 7^5. If odds are + // multiplier of 7, there might be limited values generated. odds++; } - bool crash = r.OneIn(odds); + auto* r = Random::GetTLSInstance(); + bool crash = r->OneIn(odds); if (crash) { port::Crash(srcfile, srcline); } From 94918ae84bec80c9aca7b03e14b2d0f2ddb76123 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 15 Jan 2016 14:13:08 -0800 Subject: [PATCH 021/195] db_bench: explicitly clear buffer in compress benchmark Summary: It is reported that in compress benchmark in db_bench, zlib will cause an OOM. The suggestd fix was to clear the buffer. Test Plan: Build and run compress benchmark. Reviewers: IslamAbdelRahman, yhchiang, rven, andrewkr, kradhakrishnan, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52857 --- db/db_bench.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/db/db_bench.cc b/db/db_bench.cc index a07798b41..a0d767ea6 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -2262,6 +2262,7 @@ class Benchmark { // Compress 1G while (ok && bytes < int64_t(1) << 30) { + compressed.clear(); ok = CompressSlice(input, &compressed); produced += compressed.size(); bytes += input.size(); From eceb5cb1b7d1c75b0767451a999dccb27339ce4f Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 20 Jan 2016 15:17:52 -0800 Subject: [PATCH 022/195] Split db_test.cc (part 1: properties) Summary: Moved all the tests that verify property correctness into a separate file. The goal is to reduce compile time and complexity of db_test. I didn't add parallelism for db_properties_test, even though these tests were parallelized in db_test, since the file is small enough that it won't matter. Some of these moves may be controversial since it's hard to say whether the test is "verifying property correctness," or "using properties to verify rocksdb's correctness." I'm interested in any opinions. Test Plan: ran db_properties_test, also waiting on "make commit-prereq -j32" Reviewers: yhchiang, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52995 --- CMakeLists.txt | 9 +- Makefile | 4 + db/db_properties_test.cc | 1173 ++++++++++++++++++++++++++++++++++++++ db/db_test.cc | 1143 ------------------------------------- db/db_test_util.h | 1 - 5 files changed, 1182 insertions(+), 1148 deletions(-) create mode 100644 db/db_properties_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index a005d26d1..208267254 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,17 +312,18 @@ set(TESTS db/comparator_db_test.cc db/corruption_test.cc db/cuckoo_table_db_test.cc - db/db_iter_test.cc - db/db_test.cc db/db_compaction_filter_test.cc db/db_compaction_test.cc db/db_dynamic_level_test.cc db/db_inplace_update_test.cc + db/db_iter_test.cc db/db_log_iter_test.cc + db/db_properties_test.cc + db/db_table_properties_test.cc + db/db_tailing_iter_test.cc + db/db_test.cc db/db_universal_compaction_test.cc db/db_wal_test.cc - db/db_tailing_iter_test.cc - db/db_table_properties_test.cc db/dbformat_test.cc db/deletefile_test.cc db/fault_injection_test.cc diff --git a/Makefile b/Makefile index 2c8dbd589..24e6a5d28 100644 --- a/Makefile +++ b/Makefile @@ -251,6 +251,7 @@ TESTS = \ db_tailing_iter_test \ db_universal_compaction_test \ db_wal_test \ + db_properties_test \ db_table_properties_test \ block_hash_index_test \ autovector_test \ @@ -772,6 +773,9 @@ db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util. db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_properties_test: db/db_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc new file mode 100644 index 000000000..ed8c561cc --- /dev/null +++ b/db/db_properties_test.cc @@ -0,0 +1,1173 @@ +// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include + +#include +#include + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/options.h" +#include "rocksdb/perf_context.h" +#include "rocksdb/perf_level.h" +#include "rocksdb/table.h" +#include "util/random.h" + +namespace rocksdb { + +class DBPropertiesTest : public DBTestBase { + public: + DBPropertiesTest() : DBTestBase("/db_properties_test") {} +}; + +#ifndef ROCKSDB_LITE +TEST_F(DBPropertiesTest, Empty) { + do { + Options options; + options.env = env_; + options.write_buffer_size = 100000; // Small write buffer + options = CurrentOptions(options); + CreateAndReopenWithCF({"pikachu"}, options); + + std::string num; + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ("0", num); + + ASSERT_OK(Put(1, "foo", "v1")); + ASSERT_EQ("v1", Get(1, "foo")); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ("1", num); + + // Block sync calls + env_->delay_sstable_sync_.store(true, std::memory_order_release); + Put(1, "k1", std::string(100000, 'x')); // Fill memtable + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ("2", num); + + Put(1, "k2", std::string(100000, 'y')); // Trigger compaction + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ("1", num); + + ASSERT_EQ("v1", Get(1, "foo")); + // Release sync calls + env_->delay_sstable_sync_.store(false, std::memory_order_release); + + ASSERT_OK(db_->DisableFileDeletions()); + ASSERT_TRUE( + dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); + ASSERT_EQ("1", num); + + ASSERT_OK(db_->DisableFileDeletions()); + ASSERT_TRUE( + dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); + ASSERT_EQ("2", num); + + ASSERT_OK(db_->DisableFileDeletions()); + ASSERT_TRUE( + dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); + ASSERT_EQ("3", num); + + ASSERT_OK(db_->EnableFileDeletions(false)); + ASSERT_TRUE( + dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); + ASSERT_EQ("2", num); + + ASSERT_OK(db_->EnableFileDeletions()); + ASSERT_TRUE( + dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); + ASSERT_EQ("0", num); + } while (ChangeOptions()); +} + +TEST_F(DBPropertiesTest, GetAggregatedIntPropertyTest) { + const int kKeySize = 100; + const int kValueSize = 500; + const int kKeyNum = 100; + + Options options; + options.env = env_; + options.create_if_missing = true; + options.write_buffer_size = (kKeySize + kValueSize) * kKeyNum / 10; + // Make them never flush + options.min_write_buffer_number_to_merge = 1000; + options.max_write_buffer_number = 1000; + options = CurrentOptions(options); + CreateAndReopenWithCF({"one", "two", "three", "four"}, options); + + Random rnd(301); + for (auto* handle : handles_) { + for (int i = 0; i < kKeyNum; ++i) { + db_->Put(WriteOptions(), handle, RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } + } + + uint64_t manual_sum = 0; + uint64_t api_sum = 0; + uint64_t value = 0; + for (auto* handle : handles_) { + ASSERT_TRUE( + db_->GetIntProperty(handle, DB::Properties::kSizeAllMemTables, &value)); + manual_sum += value; + } + ASSERT_TRUE(db_->GetAggregatedIntProperty(DB::Properties::kSizeAllMemTables, + &api_sum)); + ASSERT_GT(manual_sum, 0); + ASSERT_EQ(manual_sum, api_sum); + + ASSERT_FALSE(db_->GetAggregatedIntProperty(DB::Properties::kDBStats, &value)); + + uint64_t before_flush_trm; + uint64_t after_flush_trm; + for (auto* handle : handles_) { + ASSERT_TRUE(db_->GetAggregatedIntProperty( + DB::Properties::kEstimateTableReadersMem, &before_flush_trm)); + + // Issue flush and expect larger memory usage of table readers. + db_->Flush(FlushOptions(), handle); + + ASSERT_TRUE(db_->GetAggregatedIntProperty( + DB::Properties::kEstimateTableReadersMem, &after_flush_trm)); + ASSERT_GT(after_flush_trm, before_flush_trm); + } +} + +namespace { +void ResetTableProperties(TableProperties* tp) { + tp->data_size = 0; + tp->index_size = 0; + tp->filter_size = 0; + tp->raw_key_size = 0; + tp->raw_value_size = 0; + tp->num_data_blocks = 0; + tp->num_entries = 0; +} + +void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { + double dummy_double; + std::replace(tp_string.begin(), tp_string.end(), ';', ' '); + std::replace(tp_string.begin(), tp_string.end(), '=', ' '); + ResetTableProperties(tp); + + sscanf(tp_string.c_str(), + "# data blocks %" SCNu64 " # entries %" SCNu64 " raw key size %" SCNu64 + " raw average key size %lf " + " raw value size %" SCNu64 + " raw average value size %lf " + " data block size %" SCNu64 " index block size %" SCNu64 + " filter block size %" SCNu64, + &tp->num_data_blocks, &tp->num_entries, &tp->raw_key_size, + &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, + &tp->index_size, &tp->filter_size); +} + +void VerifySimilar(uint64_t a, uint64_t b, double bias) { + ASSERT_EQ(a == 0U, b == 0U); + if (a == 0) { + return; + } + double dbl_a = static_cast(a); + double dbl_b = static_cast(b); + if (dbl_a > dbl_b) { + ASSERT_LT(static_cast(dbl_a - dbl_b) / (dbl_a + dbl_b), bias); + } else { + ASSERT_LT(static_cast(dbl_b - dbl_a) / (dbl_a + dbl_b), bias); + } +} + +void VerifyTableProperties(const TableProperties& base_tp, + const TableProperties& new_tp, + double filter_size_bias = 0.1, + double index_size_bias = 0.1, + double data_size_bias = 0.1, + double num_data_blocks_bias = 0.05) { + VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); + VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); + VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); + VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, + num_data_blocks_bias); + ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); + ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); + ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); +} + +void GetExpectedTableProperties(TableProperties* expected_tp, + const int kKeySize, const int kValueSize, + const int kKeysPerTable, const int kTableCount, + const int kBloomBitsPerKey, + const size_t kBlockSize) { + const int kKeyCount = kTableCount * kKeysPerTable; + const int kAvgSuccessorSize = kKeySize / 2; + const int kEncodingSavePerKey = kKeySize / 4; + expected_tp->raw_key_size = kKeyCount * (kKeySize + 8); + expected_tp->raw_value_size = kKeyCount * kValueSize; + expected_tp->num_entries = kKeyCount; + expected_tp->num_data_blocks = + kTableCount * + (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / + kBlockSize; + expected_tp->data_size = + kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); + expected_tp->index_size = + expected_tp->num_data_blocks * (kAvgSuccessorSize + 12); + expected_tp->filter_size = + kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); +} +} // anonymous namespace + +TEST_F(DBPropertiesTest, AggregatedTableProperties) { + for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { + const int kKeysPerTable = 100; + const int kKeySize = 80; + const int kValueSize = 200; + const int kBloomBitsPerKey = 20; + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 8; + options.compression = kNoCompression; + options.create_if_missing = true; + + BlockBasedTableOptions table_options; + table_options.filter_policy.reset( + NewBloomFilterPolicy(kBloomBitsPerKey, false)); + table_options.block_size = 1024; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + DestroyAndReopen(options); + + Random rnd(5632); + for (int table = 1; table <= kTableCount; ++table) { + for (int i = 0; i < kKeysPerTable; ++i) { + db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } + db_->Flush(FlushOptions()); + } + std::string property; + db_->GetProperty(DB::Properties::kAggregatedTableProperties, &property); + + TableProperties expected_tp; + GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, + kKeysPerTable, kTableCount, kBloomBitsPerKey, + table_options.block_size); + + TableProperties output_tp; + ParseTablePropertiesString(property, &output_tp); + + VerifyTableProperties(expected_tp, output_tp); + } +} + +TEST_F(DBPropertiesTest, ReadLatencyHistogramByLevel) { + Options options = CurrentOptions(); + options.write_buffer_size = 110 << 10; + options.level0_file_num_compaction_trigger = 6; + options.num_levels = 4; + options.compression = kNoCompression; + options.max_bytes_for_level_base = 4500 << 10; + options.target_file_size_base = 98 << 10; + options.max_write_buffer_number = 2; + options.statistics = rocksdb::CreateDBStatistics(); + options.max_open_files = 100; + + BlockBasedTableOptions table_options; + table_options.no_block_cache = true; + + DestroyAndReopen(options); + int key_index = 0; + Random rnd(301); + for (int num = 0; num < 8; num++) { + Put("foo", "bar"); + GenerateNewFile(&rnd, &key_index); + dbfull()->TEST_WaitForCompact(); + } + dbfull()->TEST_WaitForCompact(); + + std::string prop; + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + + // Get() after flushes, See latency histogram tracked. + for (int key = 0; key < key_index; key++) { + Get(Key(key)); + } + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); + + // Reopen and issue Get(). See thee latency tracked + Reopen(options); + dbfull()->TEST_WaitForCompact(); + for (int key = 0; key < key_index; key++) { + Get(Key(key)); + } + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); + + // Reopen and issue iterating. See thee latency tracked + Reopen(options); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); + { + unique_ptr iter(db_->NewIterator(ReadOptions())); + for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { + } + } + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); + + // options.max_open_files preloads table readers. + options.max_open_files = -1; + Reopen(options); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); + for (int key = 0; key < key_index; key++) { + Get(Key(key)); + } + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); + ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); + ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); + ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); +} + +TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { + const int kTableCount = 100; + const int kKeysPerTable = 10; + const int kKeySize = 50; + const int kValueSize = 400; + const int kMaxLevel = 7; + const int kBloomBitsPerKey = 20; + Random rnd(301); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 8; + options.compression = kNoCompression; + options.create_if_missing = true; + options.level0_file_num_compaction_trigger = 2; + options.target_file_size_base = 8192; + options.max_bytes_for_level_base = 10000; + options.max_bytes_for_level_multiplier = 2; + // This ensures there no compaction happening when we call GetProperty(). + options.disable_auto_compactions = true; + + BlockBasedTableOptions table_options; + table_options.filter_policy.reset( + NewBloomFilterPolicy(kBloomBitsPerKey, false)); + table_options.block_size = 1024; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + DestroyAndReopen(options); + + std::string level_tp_strings[kMaxLevel]; + std::string tp_string; + TableProperties level_tps[kMaxLevel]; + TableProperties tp, sum_tp, expected_tp; + for (int table = 1; table <= kTableCount; ++table) { + for (int i = 0; i < kKeysPerTable; ++i) { + db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } + db_->Flush(FlushOptions()); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ResetTableProperties(&sum_tp); + for (int level = 0; level < kMaxLevel; ++level) { + db_->GetProperty( + DB::Properties::kAggregatedTablePropertiesAtLevel + ToString(level), + &level_tp_strings[level]); + ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]); + sum_tp.data_size += level_tps[level].data_size; + sum_tp.index_size += level_tps[level].index_size; + sum_tp.filter_size += level_tps[level].filter_size; + sum_tp.raw_key_size += level_tps[level].raw_key_size; + sum_tp.raw_value_size += level_tps[level].raw_value_size; + sum_tp.num_data_blocks += level_tps[level].num_data_blocks; + sum_tp.num_entries += level_tps[level].num_entries; + } + db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); + ParseTablePropertiesString(tp_string, &tp); + ASSERT_EQ(sum_tp.data_size, tp.data_size); + ASSERT_EQ(sum_tp.index_size, tp.index_size); + ASSERT_EQ(sum_tp.filter_size, tp.filter_size); + ASSERT_EQ(sum_tp.raw_key_size, tp.raw_key_size); + ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); + ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); + ASSERT_EQ(sum_tp.num_entries, tp.num_entries); + if (table > 3) { + GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, + kKeysPerTable, table, kBloomBitsPerKey, + table_options.block_size); + // Gives larger bias here as index block size, filter block size, + // and data block size become much harder to estimate in this test. + VerifyTableProperties(tp, expected_tp, 0.5, 0.4, 0.4, 0.25); + } + } +} + +TEST_F(DBPropertiesTest, NumImmutableMemTable) { + do { + Options options = CurrentOptions(); + WriteOptions writeOpt = WriteOptions(); + writeOpt.disableWAL = true; + options.max_write_buffer_number = 4; + options.min_write_buffer_number_to_merge = 3; + options.max_write_buffer_number_to_maintain = 0; + options.write_buffer_size = 1000000; + CreateAndReopenWithCF({"pikachu"}, options); + + std::string big_value(1000000 * 2, 'x'); + std::string num; + SetPerfLevel(kEnableTime); + ASSERT_TRUE(GetPerfLevel() == kEnableTime); + + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k1", big_value)); + ASSERT_TRUE(dbfull()->GetProperty(handles_[1], + "rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ(num, "1"); + perf_context.Reset(); + Get(1, "k1"); + ASSERT_EQ(1, static_cast(perf_context.get_from_memtable_count)); + + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); + ASSERT_TRUE(dbfull()->GetProperty(handles_[1], + "rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); + ASSERT_EQ(num, "1"); + + perf_context.Reset(); + Get(1, "k1"); + ASSERT_EQ(2, static_cast(perf_context.get_from_memtable_count)); + perf_context.Reset(); + Get(1, "k2"); + ASSERT_EQ(1, static_cast(perf_context.get_from_memtable_count)); + + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", big_value)); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.cur-size-active-mem-table", &num)); + ASSERT_TRUE(dbfull()->GetProperty(handles_[1], + "rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "2"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); + ASSERT_EQ(num, "2"); + perf_context.Reset(); + Get(1, "k2"); + ASSERT_EQ(2, static_cast(perf_context.get_from_memtable_count)); + perf_context.Reset(); + Get(1, "k3"); + ASSERT_EQ(1, static_cast(perf_context.get_from_memtable_count)); + perf_context.Reset(); + Get(1, "k1"); + ASSERT_EQ(3, static_cast(perf_context.get_from_memtable_count)); + + ASSERT_OK(Flush(1)); + ASSERT_TRUE(dbfull()->GetProperty(handles_[1], + "rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], "rocksdb.cur-size-active-mem-table", &num)); + // "192" is the size of the metadata of an empty skiplist, this would + // break if we change the default skiplist implementation + ASSERT_EQ(num, "192"); + + uint64_t int_num; + uint64_t base_total_size; + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.estimate-num-keys", &base_total_size)); + + ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k2")); + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", "")); + ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k3")); + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.num-deletes-active-mem-table", &int_num)); + ASSERT_EQ(int_num, 2U); + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.num-entries-active-mem-table", &int_num)); + ASSERT_EQ(int_num, 3U); + + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); + ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.num-entries-imm-mem-tables", &int_num)); + ASSERT_EQ(int_num, 4U); + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.num-deletes-imm-mem-tables", &int_num)); + ASSERT_EQ(int_num, 2U); + + ASSERT_TRUE(dbfull()->GetIntProperty( + handles_[1], "rocksdb.estimate-num-keys", &int_num)); + ASSERT_EQ(int_num, base_total_size + 1); + + SetPerfLevel(kDisable); + ASSERT_TRUE(GetPerfLevel() == kDisable); + } while (ChangeCompactOptions()); +} + +TEST_F(DBPropertiesTest, GetProperty) { + // Set sizes to both background thread pool to be 1 and block them. + env_->SetBackgroundThreads(1, Env::HIGH); + env_->SetBackgroundThreads(1, Env::LOW); + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + test::SleepingBackgroundTask sleeping_task_high; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, + &sleeping_task_high, Env::Priority::HIGH); + + Options options = CurrentOptions(); + WriteOptions writeOpt = WriteOptions(); + writeOpt.disableWAL = true; + options.compaction_style = kCompactionStyleUniversal; + options.level0_file_num_compaction_trigger = 1; + options.compaction_options_universal.size_ratio = 50; + options.max_background_compactions = 1; + options.max_background_flushes = 1; + options.max_write_buffer_number = 10; + options.min_write_buffer_number_to_merge = 1; + options.max_write_buffer_number_to_maintain = 0; + options.write_buffer_size = 1000000; + Reopen(options); + + std::string big_value(1000000 * 2, 'x'); + std::string num; + uint64_t int_num; + SetPerfLevel(kEnableTime); + + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_EQ(int_num, 0U); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-live-data-size", &int_num)); + ASSERT_EQ(int_num, 0U); + + ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "1"); + perf_context.Reset(); + + ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "1"); + ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing")); + ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "2"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "2"); + // Verify the same set of properties through GetIntProperty + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-immutable-mem-table", &int_num)); + ASSERT_EQ(int_num, 2U); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.mem-table-flush-pending", &int_num)); + ASSERT_EQ(int_num, 1U); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.compaction-pending", &int_num)); + ASSERT_EQ(int_num, 0U); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); + ASSERT_EQ(int_num, 2U); + + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_EQ(int_num, 0U); + + sleeping_task_high.WakeUp(); + sleeping_task_high.WaitUntilDone(); + dbfull()->TEST_WaitForFlushMemTable(); + + ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value)); + ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value)); + dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "4"); + + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_GT(int_num, 0U); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + + // Wait for compaction to be done. This is important because otherwise RocksDB + // might schedule a compaction when reopening the database, failing assertion + // (A) as a result. + dbfull()->TEST_WaitForCompact(); + options.max_open_files = 10; + Reopen(options); + // After reopening, no table reader is loaded, so no memory for table readers + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_EQ(int_num, 0U); // (A) + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); + ASSERT_GT(int_num, 0U); + + // After reading a key, at least one table reader is loaded. + Get("k5"); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_GT(int_num, 0U); + + // Test rocksdb.num-live-versions + { + options.level0_file_num_compaction_trigger = 20; + Reopen(options); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); + ASSERT_EQ(int_num, 1U); + + // Use an iterator to hold current version + std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); + + ASSERT_OK(dbfull()->Put(writeOpt, "k6", big_value)); + Flush(); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); + ASSERT_EQ(int_num, 2U); + + // Use an iterator to hold current version + std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); + + ASSERT_OK(dbfull()->Put(writeOpt, "k7", big_value)); + Flush(); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); + ASSERT_EQ(int_num, 3U); + + iter2.reset(); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); + ASSERT_EQ(int_num, 2U); + + iter1.reset(); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); + ASSERT_EQ(int_num, 1U); + } +} + +TEST_F(DBPropertiesTest, ApproximateMemoryUsage) { + const int kNumRounds = 10; + // TODO(noetzli) kFlushesPerRound does not really correlate with how many + // flushes happen. + const int kFlushesPerRound = 10; + const int kWritesPerFlush = 10; + const int kKeySize = 100; + const int kValueSize = 1000; + Options options; + options.write_buffer_size = 1000; // small write buffer + options.min_write_buffer_number_to_merge = 4; + options.compression = kNoCompression; + options.create_if_missing = true; + options = CurrentOptions(options); + DestroyAndReopen(options); + + Random rnd(301); + + std::vector iters; + + uint64_t active_mem; + uint64_t unflushed_mem; + uint64_t all_mem; + uint64_t prev_all_mem; + + // Phase 0. The verify the initial value of all these properties are the same + // as we have no mem-tables. + dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); + dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + ASSERT_EQ(all_mem, active_mem); + ASSERT_EQ(all_mem, unflushed_mem); + + // Phase 1. Simply issue Put() and expect "cur-size-all-mem-tables" equals to + // "size-all-mem-tables" + for (int r = 0; r < kNumRounds; ++r) { + for (int f = 0; f < kFlushesPerRound; ++f) { + for (int w = 0; w < kWritesPerFlush; ++w) { + Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); + } + } + // Make sure that there is no flush between getting the two properties. + dbfull()->TEST_WaitForFlushMemTable(); + dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + // in no iterator case, these two number should be the same. + ASSERT_EQ(unflushed_mem, all_mem); + } + prev_all_mem = all_mem; + + // Phase 2. Keep issuing Put() but also create new iterators. This time we + // expect "size-all-mem-tables" > "cur-size-all-mem-tables". + for (int r = 0; r < kNumRounds; ++r) { + iters.push_back(db_->NewIterator(ReadOptions())); + for (int f = 0; f < kFlushesPerRound; ++f) { + for (int w = 0; w < kWritesPerFlush; ++w) { + Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); + } + } + // Force flush to prevent flush from happening between getting the + // properties or after getting the properties and before the new round. + Flush(); + + // In the second round, add iterators. + dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); + dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + ASSERT_GT(all_mem, active_mem); + ASSERT_GT(all_mem, unflushed_mem); + ASSERT_GT(all_mem, prev_all_mem); + prev_all_mem = all_mem; + } + + // Phase 3. Delete iterators and expect "size-all-mem-tables" shrinks + // whenever we release an iterator. + for (auto* iter : iters) { + delete iter; + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + // Expect the size shrinking + ASSERT_LT(all_mem, prev_all_mem); + prev_all_mem = all_mem; + } + + // Expect all these three counters to be the same. + dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); + dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + ASSERT_EQ(active_mem, unflushed_mem); + ASSERT_EQ(unflushed_mem, all_mem); + + // Phase 5. Reopen, and expect all these three counters to be the same again. + Reopen(options); + dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); + dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); + dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); + ASSERT_EQ(active_mem, unflushed_mem); + ASSERT_EQ(unflushed_mem, all_mem); +} + +TEST_F(DBPropertiesTest, EstimatePendingCompBytes) { + // Set sizes to both background thread pool to be 1 and block them. + env_->SetBackgroundThreads(1, Env::HIGH); + env_->SetBackgroundThreads(1, Env::LOW); + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + Options options = CurrentOptions(); + WriteOptions writeOpt = WriteOptions(); + writeOpt.disableWAL = true; + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 2; + options.max_background_compactions = 1; + options.max_background_flushes = 1; + options.max_write_buffer_number = 10; + options.min_write_buffer_number_to_merge = 1; + options.max_write_buffer_number_to_maintain = 0; + options.write_buffer_size = 1000000; + Reopen(options); + + std::string big_value(1000000 * 2, 'x'); + std::string num; + uint64_t int_num; + + ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); + Flush(); + ASSERT_TRUE(dbfull()->GetIntProperty( + "rocksdb.estimate-pending-compaction-bytes", &int_num)); + ASSERT_EQ(int_num, 0U); + + ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); + Flush(); + ASSERT_TRUE(dbfull()->GetIntProperty( + "rocksdb.estimate-pending-compaction-bytes", &int_num)); + ASSERT_EQ(int_num, 0U); + + ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); + Flush(); + ASSERT_TRUE(dbfull()->GetIntProperty( + "rocksdb.estimate-pending-compaction-bytes", &int_num)); + ASSERT_GT(int_num, 0U); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + + dbfull()->TEST_WaitForCompact(); + ASSERT_TRUE(dbfull()->GetIntProperty( + "rocksdb.estimate-pending-compaction-bytes", &int_num)); + ASSERT_EQ(int_num, 0U); +} +#endif // ROCKSDB_LITE + +class CountingUserTblPropCollector : public TablePropertiesCollector { + public: + const char* Name() const override { return "CountingUserTblPropCollector"; } + + Status Finish(UserCollectedProperties* properties) override { + std::string encoded; + PutVarint32(&encoded, count_); + *properties = UserCollectedProperties{ + {"CountingUserTblPropCollector", message_}, {"Count", encoded}, + }; + return Status::OK(); + } + + Status AddUserKey(const Slice& user_key, const Slice& value, EntryType type, + SequenceNumber seq, uint64_t file_size) override { + ++count_; + return Status::OK(); + } + + virtual UserCollectedProperties GetReadableProperties() const override { + return UserCollectedProperties{}; + } + + private: + std::string message_ = "Rocksdb"; + uint32_t count_ = 0; +}; + +class CountingUserTblPropCollectorFactory + : public TablePropertiesCollectorFactory { + public: + explicit CountingUserTblPropCollectorFactory( + uint32_t expected_column_family_id) + : expected_column_family_id_(expected_column_family_id), + num_created_(0) {} + virtual TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context context) override { + EXPECT_EQ(expected_column_family_id_, context.column_family_id); + num_created_++; + return new CountingUserTblPropCollector(); + } + const char* Name() const override { + return "CountingUserTblPropCollectorFactory"; + } + void set_expected_column_family_id(uint32_t v) { + expected_column_family_id_ = v; + } + uint32_t expected_column_family_id_; + uint32_t num_created_; +}; + +class CountingDeleteTabPropCollector : public TablePropertiesCollector { + public: + const char* Name() const override { return "CountingDeleteTabPropCollector"; } + + Status AddUserKey(const Slice& user_key, const Slice& value, EntryType type, + SequenceNumber seq, uint64_t file_size) override { + if (type == kEntryDelete) { + num_deletes_++; + } + return Status::OK(); + } + + bool NeedCompact() const override { return num_deletes_ > 10; } + + UserCollectedProperties GetReadableProperties() const override { + return UserCollectedProperties{}; + } + + Status Finish(UserCollectedProperties* properties) override { + *properties = + UserCollectedProperties{{"num_delete", ToString(num_deletes_)}}; + return Status::OK(); + } + + private: + uint32_t num_deletes_ = 0; +}; + +class CountingDeleteTabPropCollectorFactory + : public TablePropertiesCollectorFactory { + public: + virtual TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context context) override { + return new CountingDeleteTabPropCollector(); + } + const char* Name() const override { + return "CountingDeleteTabPropCollectorFactory"; + } +}; + +#ifndef ROCKSDB_LITE +TEST_F(DBPropertiesTest, GetUserDefinedTableProperties) { + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = (1 << 30); + options.max_background_flushes = 0; + options.table_properties_collector_factories.resize(1); + std::shared_ptr collector_factory = + std::make_shared(0); + options.table_properties_collector_factories[0] = collector_factory; + Reopen(options); + // Create 4 tables + for (int table = 0; table < 4; ++table) { + for (int i = 0; i < 10 + table; ++i) { + db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); + } + db_->Flush(FlushOptions()); + } + + TablePropertiesCollection props; + ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); + ASSERT_EQ(4U, props.size()); + uint32_t sum = 0; + for (const auto& item : props) { + auto& user_collected = item.second->user_collected_properties; + ASSERT_TRUE(user_collected.find("CountingUserTblPropCollector") != + user_collected.end()); + ASSERT_EQ(user_collected.at("CountingUserTblPropCollector"), "Rocksdb"); + ASSERT_TRUE(user_collected.find("Count") != user_collected.end()); + Slice key(user_collected.at("Count")); + uint32_t count; + ASSERT_TRUE(GetVarint32(&key, &count)); + sum += count; + } + ASSERT_EQ(10u + 11u + 12u + 13u, sum); + + ASSERT_GT(collector_factory->num_created_, 0U); + collector_factory->num_created_ = 0; + dbfull()->TEST_CompactRange(0, nullptr, nullptr); + ASSERT_GT(collector_factory->num_created_, 0U); +} +#endif // ROCKSDB_LITE + +TEST_F(DBPropertiesTest, UserDefinedTablePropertiesContext) { + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 3; + options.max_background_flushes = 0; + options.table_properties_collector_factories.resize(1); + std::shared_ptr collector_factory = + std::make_shared(1); + options.table_properties_collector_factories[0] = collector_factory, + CreateAndReopenWithCF({"pikachu"}, options); + // Create 2 files + for (int table = 0; table < 2; ++table) { + for (int i = 0; i < 10 + table; ++i) { + Put(1, ToString(table * 100 + i), "val"); + } + Flush(1); + } + ASSERT_GT(collector_factory->num_created_, 0U); + + collector_factory->num_created_ = 0; + // Trigger automatic compactions. + for (int table = 0; table < 3; ++table) { + for (int i = 0; i < 10 + table; ++i) { + Put(1, ToString(table * 100 + i), "val"); + } + Flush(1); + dbfull()->TEST_WaitForCompact(); + } + ASSERT_GT(collector_factory->num_created_, 0U); + + collector_factory->num_created_ = 0; + dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + ASSERT_GT(collector_factory->num_created_, 0U); + + // Come back to write to default column family + collector_factory->num_created_ = 0; + collector_factory->set_expected_column_family_id(0); // default CF + // Create 4 tables in default column family + for (int table = 0; table < 2; ++table) { + for (int i = 0; i < 10 + table; ++i) { + Put(ToString(table * 100 + i), "val"); + } + Flush(); + } + ASSERT_GT(collector_factory->num_created_, 0U); + + collector_factory->num_created_ = 0; + // Trigger automatic compactions. + for (int table = 0; table < 3; ++table) { + for (int i = 0; i < 10 + table; ++i) { + Put(ToString(table * 100 + i), "val"); + } + Flush(); + dbfull()->TEST_WaitForCompact(); + } + ASSERT_GT(collector_factory->num_created_, 0U); + + collector_factory->num_created_ = 0; + dbfull()->TEST_CompactRange(0, nullptr, nullptr); + ASSERT_GT(collector_factory->num_created_, 0U); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBPropertiesTest, TablePropertiesNeedCompactTest) { + Random rnd(301); + + Options options; + options.create_if_missing = true; + options.write_buffer_size = 4096; + options.max_write_buffer_number = 8; + options.level0_file_num_compaction_trigger = 2; + options.level0_slowdown_writes_trigger = 2; + options.level0_stop_writes_trigger = 4; + options.target_file_size_base = 2048; + options.max_bytes_for_level_base = 10240; + options.max_bytes_for_level_multiplier = 4; + options.soft_pending_compaction_bytes_limit = 1024 * 1024; + options.num_levels = 8; + + std::shared_ptr collector_factory = + std::make_shared(); + options.table_properties_collector_factories.resize(1); + options.table_properties_collector_factories[0] = collector_factory; + + DestroyAndReopen(options); + + const int kMaxKey = 1000; + for (int i = 0; i < kMaxKey; i++) { + ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); + ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); + } + Flush(); + dbfull()->TEST_WaitForCompact(); + if (NumTableFilesAtLevel(0) == 1) { + // Clear Level 0 so that when later flush a file with deletions, + // we don't trigger an organic compaction. + ASSERT_OK(Put(Key(0), "")); + ASSERT_OK(Put(Key(kMaxKey * 2), "")); + Flush(); + dbfull()->TEST_WaitForCompact(); + } + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + + { + int c = 0; + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + iter->Seek(Key(kMaxKey - 100)); + while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { + iter->Next(); + ++c; + } + ASSERT_EQ(c, 200); + } + + Delete(Key(0)); + for (int i = kMaxKey - 100; i < kMaxKey + 100; i++) { + Delete(Key(i)); + } + Delete(Key(kMaxKey * 2)); + + Flush(); + dbfull()->TEST_WaitForCompact(); + + { + SetPerfLevel(kEnableCount); + perf_context.Reset(); + int c = 0; + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + iter->Seek(Key(kMaxKey - 100)); + while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { + iter->Next(); + } + ASSERT_EQ(c, 0); + ASSERT_LT(perf_context.internal_delete_skipped_count, 30u); + ASSERT_LT(perf_context.internal_key_skipped_count, 30u); + SetPerfLevel(kDisable); + } +} + +TEST_F(DBPropertiesTest, NeedCompactHintPersistentTest) { + Random rnd(301); + + Options options; + options.create_if_missing = true; + options.max_write_buffer_number = 8; + options.level0_file_num_compaction_trigger = 10; + options.level0_slowdown_writes_trigger = 10; + options.level0_stop_writes_trigger = 10; + options.disable_auto_compactions = true; + + std::shared_ptr collector_factory = + std::make_shared(); + options.table_properties_collector_factories.resize(1); + options.table_properties_collector_factories[0] = collector_factory; + + DestroyAndReopen(options); + + const int kMaxKey = 100; + for (int i = 0; i < kMaxKey; i++) { + ASSERT_OK(Put(Key(i), "")); + } + Flush(); + dbfull()->TEST_WaitForFlushMemTable(); + + for (int i = 1; i < kMaxKey - 1; i++) { + Delete(Key(i)); + } + Flush(); + dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_EQ(NumTableFilesAtLevel(0), 2); + + // Restart the DB. Although number of files didn't reach + // options.level0_file_num_compaction_trigger, compaction should + // still be triggered because of the need-compaction hint. + options.disable_auto_compactions = false; + Reopen(options); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + { + SetPerfLevel(kEnableCount); + perf_context.Reset(); + int c = 0; + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { + c++; + } + ASSERT_EQ(c, 2); + ASSERT_EQ(perf_context.internal_delete_skipped_count, 0); + // We iterate every key twice. Is it a bug? + ASSERT_LE(perf_context.internal_key_skipped_count, 2); + SetPerfLevel(kDisable); + } +} +#endif // ROCKSDB_LITE +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/db_test.cc b/db/db_test.cc index e692caedb..cb27ba0d5 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -121,69 +121,6 @@ class DBTestWithParam uint32_t max_subcompactions_; bool exclusive_manual_compaction_; }; -#ifndef ROCKSDB_LITE -TEST_F(DBTest, Empty) { - do { - Options options; - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - - std::string num; - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("1", num); - - // Block sync calls - env_->delay_sstable_sync_.store(true, std::memory_order_release); - Put(1, "k1", std::string(100000, 'x')); // Fill memtable - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("2", num); - - Put(1, "k2", std::string(100000, 'y')); // Trigger compaction - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("1", num); - - ASSERT_EQ("v1", Get(1, "foo")); - // Release sync calls - env_->delay_sstable_sync_.store(false, std::memory_order_release); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("1", num); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("2", num); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("3", num); - - ASSERT_OK(db_->EnableFileDeletions(false)); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("2", num); - - ASSERT_OK(db_->EnableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("0", num); - } while (ChangeOptions()); -} -#endif // ROCKSDB_LITE TEST_F(DBTest, WriteEmptyBatch) { Options options; @@ -207,59 +144,6 @@ TEST_F(DBTest, WriteEmptyBatch) { } #ifndef ROCKSDB_LITE -TEST_F(DBTest, GetAggregatedIntPropertyTest) { - const int kKeySize = 100; - const int kValueSize = 500; - const int kKeyNum = 100; - - Options options; - options.env = env_; - options.create_if_missing = true; - options.write_buffer_size = (kKeySize + kValueSize) * kKeyNum / 10; - // Make them never flush - options.min_write_buffer_number_to_merge = 1000; - options.max_write_buffer_number = 1000; - options = CurrentOptions(options); - CreateAndReopenWithCF({"one", "two", "three", "four"}, options); - - Random rnd(301); - for (auto* handle : handles_) { - for (int i = 0; i < kKeyNum; ++i) { - db_->Put(WriteOptions(), handle, RandomString(&rnd, kKeySize), - RandomString(&rnd, kValueSize)); - } - } - - uint64_t manual_sum = 0; - uint64_t api_sum = 0; - uint64_t value = 0; - for (auto* handle : handles_) { - ASSERT_TRUE( - db_->GetIntProperty(handle, DB::Properties::kSizeAllMemTables, &value)); - manual_sum += value; - } - ASSERT_TRUE(db_->GetAggregatedIntProperty(DB::Properties::kSizeAllMemTables, - &api_sum)); - ASSERT_GT(manual_sum, 0); - ASSERT_EQ(manual_sum, api_sum); - - ASSERT_FALSE(db_->GetAggregatedIntProperty(DB::Properties::kDBStats, &value)); - - uint64_t before_flush_trm; - uint64_t after_flush_trm; - for (auto* handle : handles_) { - ASSERT_TRUE(db_->GetAggregatedIntProperty( - DB::Properties::kEstimateTableReadersMem, &before_flush_trm)); - - // Issue flush and expect larger memory usage of table readers. - db_->Flush(FlushOptions(), handle); - - ASSERT_TRUE(db_->GetAggregatedIntProperty( - DB::Properties::kEstimateTableReadersMem, &after_flush_trm)); - ASSERT_GT(after_flush_trm, before_flush_trm); - } -} - TEST_F(DBTest, ReadOnlyDB) { ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("bar", "v2")); @@ -481,442 +365,6 @@ TEST_F(DBTest, ParanoidFileChecks) { TestGetTickerCount(options, BLOCK_CACHE_ADD)); } -namespace { -void ResetTableProperties(TableProperties* tp) { - tp->data_size = 0; - tp->index_size = 0; - tp->filter_size = 0; - tp->raw_key_size = 0; - tp->raw_value_size = 0; - tp->num_data_blocks = 0; - tp->num_entries = 0; -} - -void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { - double dummy_double; - std::replace(tp_string.begin(), tp_string.end(), ';', ' '); - std::replace(tp_string.begin(), tp_string.end(), '=', ' '); - ResetTableProperties(tp); - - sscanf(tp_string.c_str(), "# data blocks %" SCNu64 - " # entries %" SCNu64 - " raw key size %" SCNu64 - " raw average key size %lf " - " raw value size %" SCNu64 - " raw average value size %lf " - " data block size %" SCNu64 - " index block size %" SCNu64 - " filter block size %" SCNu64, - &tp->num_data_blocks, &tp->num_entries, &tp->raw_key_size, - &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, - &tp->index_size, &tp->filter_size); -} - -void VerifySimilar(uint64_t a, uint64_t b, double bias) { - ASSERT_EQ(a == 0U, b == 0U); - if (a == 0) { - return; - } - double dbl_a = static_cast(a); - double dbl_b = static_cast(b); - if (dbl_a > dbl_b) { - ASSERT_LT(static_cast(dbl_a - dbl_b) / (dbl_a + dbl_b), bias); - } else { - ASSERT_LT(static_cast(dbl_b - dbl_a) / (dbl_a + dbl_b), bias); - } -} - -void VerifyTableProperties(const TableProperties& base_tp, - const TableProperties& new_tp, - double filter_size_bias = 0.1, - double index_size_bias = 0.1, - double data_size_bias = 0.1, - double num_data_blocks_bias = 0.05) { - VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); - VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); - VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); - VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, - num_data_blocks_bias); - ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); - ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); - ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); -} - -void GetExpectedTableProperties(TableProperties* expected_tp, - const int kKeySize, const int kValueSize, - const int kKeysPerTable, const int kTableCount, - const int kBloomBitsPerKey, - const size_t kBlockSize) { - const int kKeyCount = kTableCount * kKeysPerTable; - const int kAvgSuccessorSize = kKeySize / 2; - const int kEncodingSavePerKey = kKeySize / 4; - expected_tp->raw_key_size = kKeyCount * (kKeySize + 8); - expected_tp->raw_value_size = kKeyCount * kValueSize; - expected_tp->num_entries = kKeyCount; - expected_tp->num_data_blocks = - kTableCount * - (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / - kBlockSize; - expected_tp->data_size = - kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); - expected_tp->index_size = - expected_tp->num_data_blocks * (kAvgSuccessorSize + 12); - expected_tp->filter_size = - kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); -} -} // namespace - -TEST_F(DBTest, AggregatedTableProperties) { - for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { - const int kKeysPerTable = 100; - const int kKeySize = 80; - const int kValueSize = 200; - const int kBloomBitsPerKey = 20; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - options.compression = kNoCompression; - options.create_if_missing = true; - - BlockBasedTableOptions table_options; - table_options.filter_policy.reset( - NewBloomFilterPolicy(kBloomBitsPerKey, false)); - table_options.block_size = 1024; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - Random rnd(5632); - for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { - db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), - RandomString(&rnd, kValueSize)); - } - db_->Flush(FlushOptions()); - } - std::string property; - db_->GetProperty(DB::Properties::kAggregatedTableProperties, &property); - - TableProperties expected_tp; - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, kTableCount, kBloomBitsPerKey, - table_options.block_size); - - TableProperties output_tp; - ParseTablePropertiesString(property, &output_tp); - - VerifyTableProperties(expected_tp, output_tp); - } -} - -TEST_F(DBTest, ReadLatencyHistogramByLevel) { - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; - options.level0_file_num_compaction_trigger = 6; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 4500 << 10; - options.target_file_size_base = 98 << 10; - options.max_write_buffer_number = 2; - options.statistics = rocksdb::CreateDBStatistics(); - options.max_open_files = 100; - - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - - DestroyAndReopen(options); - int key_index = 0; - Random rnd(301); - for (int num = 0; num < 8; num++) { - Put("foo", "bar"); - GenerateNewFile(&rnd, &key_index); - dbfull()->TEST_WaitForCompact(); - } - dbfull()->TEST_WaitForCompact(); - - std::string prop; - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - - // Get() after flushes, See latency histogram tracked. - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // Reopen and issue Get(). See thee latency tracked - Reopen(options); - dbfull()->TEST_WaitForCompact(); - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // Reopen and issue iterating. See thee latency tracked - Reopen(options); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - { - unique_ptr iter(db_->NewIterator(ReadOptions())); - for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { - } - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // options.max_open_files preloads table readers. - options.max_open_files = -1; - Reopen(options); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); -} - -TEST_F(DBTest, AggregatedTablePropertiesAtLevel) { - const int kTableCount = 100; - const int kKeysPerTable = 10; - const int kKeySize = 50; - const int kValueSize = 400; - const int kMaxLevel = 7; - const int kBloomBitsPerKey = 20; - Random rnd(301); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - options.compression = kNoCompression; - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.target_file_size_base = 8192; - options.max_bytes_for_level_base = 10000; - options.max_bytes_for_level_multiplier = 2; - // This ensures there no compaction happening when we call GetProperty(). - options.disable_auto_compactions = true; - - BlockBasedTableOptions table_options; - table_options.filter_policy.reset( - NewBloomFilterPolicy(kBloomBitsPerKey, false)); - table_options.block_size = 1024; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - std::string level_tp_strings[kMaxLevel]; - std::string tp_string; - TableProperties level_tps[kMaxLevel]; - TableProperties tp, sum_tp, expected_tp; - for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { - db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), - RandomString(&rnd, kValueSize)); - } - db_->Flush(FlushOptions()); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - ResetTableProperties(&sum_tp); - for (int level = 0; level < kMaxLevel; ++level) { - db_->GetProperty( - DB::Properties::kAggregatedTablePropertiesAtLevel + ToString(level), - &level_tp_strings[level]); - ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]); - sum_tp.data_size += level_tps[level].data_size; - sum_tp.index_size += level_tps[level].index_size; - sum_tp.filter_size += level_tps[level].filter_size; - sum_tp.raw_key_size += level_tps[level].raw_key_size; - sum_tp.raw_value_size += level_tps[level].raw_value_size; - sum_tp.num_data_blocks += level_tps[level].num_data_blocks; - sum_tp.num_entries += level_tps[level].num_entries; - } - db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); - ParseTablePropertiesString(tp_string, &tp); - ASSERT_EQ(sum_tp.data_size, tp.data_size); - ASSERT_EQ(sum_tp.index_size, tp.index_size); - ASSERT_EQ(sum_tp.filter_size, tp.filter_size); - ASSERT_EQ(sum_tp.raw_key_size, tp.raw_key_size); - ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); - ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); - ASSERT_EQ(sum_tp.num_entries, tp.num_entries); - if (table > 3) { - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, table, kBloomBitsPerKey, - table_options.block_size); - // Gives larger bias here as index block size, filter block size, - // and data block size become much harder to estimate in this test. - VerifyTableProperties(tp, expected_tp, 0.5, 0.4, 0.4, 0.25); - } - } -} -#endif // ROCKSDB_LITE - -class CoutingUserTblPropCollector : public TablePropertiesCollector { - public: - const char* Name() const override { return "CoutingUserTblPropCollector"; } - - Status Finish(UserCollectedProperties* properties) override { - std::string encoded; - PutVarint32(&encoded, count_); - *properties = UserCollectedProperties{ - {"CoutingUserTblPropCollector", message_}, {"Count", encoded}, - }; - return Status::OK(); - } - - Status AddUserKey(const Slice& user_key, const Slice& value, EntryType type, - SequenceNumber seq, uint64_t file_size) override { - ++count_; - return Status::OK(); - } - - virtual UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - std::string message_ = "Rocksdb"; - uint32_t count_ = 0; -}; - -class CoutingUserTblPropCollectorFactory - : public TablePropertiesCollectorFactory { - public: - explicit CoutingUserTblPropCollectorFactory( - uint32_t expected_column_family_id) - : expected_column_family_id_(expected_column_family_id), - num_created_(0) {} - virtual TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context context) override { - EXPECT_EQ(expected_column_family_id_, context.column_family_id); - num_created_++; - return new CoutingUserTblPropCollector(); - } - const char* Name() const override { - return "CoutingUserTblPropCollectorFactory"; - } - void set_expected_column_family_id(uint32_t v) { - expected_column_family_id_ = v; - } - uint32_t expected_column_family_id_; - uint32_t num_created_; -}; - -#ifndef ROCKSDB_LITE -TEST_F(DBTest, GetUserDefinedTableProperties) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = (1<<30); - options.max_background_flushes = 0; - options.table_properties_collector_factories.resize(1); - std::shared_ptr collector_factory = - std::make_shared(0); - options.table_properties_collector_factories[0] = collector_factory; - Reopen(options); - // Create 4 tables - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10 + table; ++i) { - db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); - } - db_->Flush(FlushOptions()); - } - - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - ASSERT_EQ(4U, props.size()); - uint32_t sum = 0; - for (const auto& item : props) { - auto& user_collected = item.second->user_collected_properties; - ASSERT_TRUE(user_collected.find("CoutingUserTblPropCollector") != - user_collected.end()); - ASSERT_EQ(user_collected.at("CoutingUserTblPropCollector"), "Rocksdb"); - ASSERT_TRUE(user_collected.find("Count") != user_collected.end()); - Slice key(user_collected.at("Count")); - uint32_t count; - ASSERT_TRUE(GetVarint32(&key, &count)); - sum += count; - } - ASSERT_EQ(10u + 11u + 12u + 13u, sum); - - ASSERT_GT(collector_factory->num_created_, 0U); - collector_factory->num_created_ = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr); - ASSERT_GT(collector_factory->num_created_, 0U); -} -#endif // ROCKSDB_LITE - -TEST_F(DBTest, UserDefinedTablePropertiesContext) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 3; - options.max_background_flushes = 0; - options.table_properties_collector_factories.resize(1); - std::shared_ptr collector_factory = - std::make_shared(1); - options.table_properties_collector_factories[0] = collector_factory, - CreateAndReopenWithCF({"pikachu"}, options); - // Create 2 files - for (int table = 0; table < 2; ++table) { - for (int i = 0; i < 10 + table; ++i) { - Put(1, ToString(table * 100 + i), "val"); - } - Flush(1); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - // Trigger automatic compactions. - for (int table = 0; table < 3; ++table) { - for (int i = 0; i < 10 + table; ++i) { - Put(1, ToString(table * 100 + i), "val"); - } - Flush(1); - dbfull()->TEST_WaitForCompact(); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); - ASSERT_GT(collector_factory->num_created_, 0U); - - // Come back to write to default column family - collector_factory->num_created_ = 0; - collector_factory->set_expected_column_family_id(0); // default CF - // Create 4 tables in default column family - for (int table = 0; table < 2; ++table) { - for (int i = 0; i < 10 + table; ++i) { - Put(ToString(table * 100 + i), "val"); - } - Flush(); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - // Trigger automatic compactions. - for (int table = 0; table < 3; ++table) { - for (int i = 0; i < 10 + table; ++i) { - Put(ToString(table * 100 + i), "val"); - } - Flush(); - dbfull()->TEST_WaitForCompact(); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr); - ASSERT_GT(collector_factory->num_created_, 0U); -} - -#ifndef ROCKSDB_LITE TEST_F(DBTest, LevelLimitReopen) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); @@ -2315,118 +1763,6 @@ TEST_F(DBTest, FlushMultipleMemtable) { } while (ChangeCompactOptions()); } -#ifndef ROCKSDB_LITE -TEST_F(DBTest, NumImmutableMemTable) { - do { - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.max_write_buffer_number_to_maintain = 0; - options.write_buffer_size = 1000000; - CreateAndReopenWithCF({"pikachu"}, options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - SetPerfLevel(kEnableTime);; - ASSERT_TRUE(GetPerfLevel() == kEnableTime); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k1", big_value)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - perf_context.Reset(); - Get(1, "k1"); - ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); - ASSERT_EQ(num, "1"); - - perf_context.Reset(); - Get(1, "k1"); - ASSERT_EQ(2, (int) perf_context.get_from_memtable_count); - perf_context.Reset(); - Get(1, "k2"); - ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", big_value)); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.cur-size-active-mem-table", &num)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "2"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); - ASSERT_EQ(num, "2"); - perf_context.Reset(); - Get(1, "k2"); - ASSERT_EQ(2, (int) perf_context.get_from_memtable_count); - perf_context.Reset(); - Get(1, "k3"); - ASSERT_EQ(1, (int) perf_context.get_from_memtable_count); - perf_context.Reset(); - Get(1, "k1"); - ASSERT_EQ(3, (int) perf_context.get_from_memtable_count); - - ASSERT_OK(Flush(1)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.cur-size-active-mem-table", &num)); - // "192" is the size of the metadata of an empty skiplist, this would - // break if we change the default skiplist implementation - ASSERT_EQ(num, "192"); - - uint64_t int_num; - uint64_t base_total_size; - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.estimate-num-keys", &base_total_size)); - - ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k2")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", "")); - ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k3")); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-deletes-active-mem-table", &int_num)); - ASSERT_EQ(int_num, 2U); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &int_num)); - ASSERT_EQ(int_num, 3U); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &int_num)); - ASSERT_EQ(int_num, 4U); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-deletes-imm-mem-tables", &int_num)); - ASSERT_EQ(int_num, 2U); - - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.estimate-num-keys", &int_num)); - ASSERT_EQ(int_num, base_total_size + 1); - - SetPerfLevel(kDisable); - ASSERT_TRUE(GetPerfLevel() == kDisable); - } while (ChangeCompactOptions()); -} -#endif // ROCKSDB_LITE - TEST_F(DBTest, FlushEmptyColumnFamily) { // Block flush thread and disable compaction thread env_->SetBackgroundThreads(1, Env::HIGH); @@ -2471,314 +1807,6 @@ TEST_F(DBTest, FlushEmptyColumnFamily) { sleeping_task_low.WaitUntilDone(); } -#ifndef ROCKSDB_LITE -TEST_F(DBTest, GetProperty) { - // Set sizes to both background thread pool to be 1 and block them. - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - test::SleepingBackgroundTask sleeping_task_high; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_high, Env::Priority::HIGH); - - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.compaction_style = kCompactionStyleUniversal; - options.level0_file_num_compaction_trigger = 1; - options.compaction_options_universal.size_ratio = 50; - options.max_background_compactions = 1; - options.max_background_flushes = 1; - options.max_write_buffer_number = 10; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_number_to_maintain = 0; - options.write_buffer_size = 1000000; - Reopen(options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - uint64_t int_num; - SetPerfLevel(kEnableTime); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-live-data-size", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "1"); - perf_context.Reset(); - - ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing")); - ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "2"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "2"); - // Verify the same set of properties through GetIntProperty - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-immutable-mem-table", &int_num)); - ASSERT_EQ(int_num, 2U); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.mem-table-flush-pending", &int_num)); - ASSERT_EQ(int_num, 1U); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.compaction-pending", &int_num)); - ASSERT_EQ(int_num, 0U); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); - ASSERT_EQ(int_num, 2U); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - - sleeping_task_high.WakeUp(); - sleeping_task_high.WaitUntilDone(); - dbfull()->TEST_WaitForFlushMemTable(); - - ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value)); - ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value)); - dbfull()->TEST_WaitForFlushMemTable(); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "4"); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_GT(int_num, 0U); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - // Wait for compaction to be done. This is important because otherwise RocksDB - // might schedule a compaction when reopening the database, failing assertion - // (A) as a result. - dbfull()->TEST_WaitForCompact(); - options.max_open_files = 10; - Reopen(options); - // After reopening, no table reader is loaded, so no memory for table readers - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); // (A) - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); - ASSERT_GT(int_num, 0U); - - // After reading a key, at least one table reader is loaded. - Get("k5"); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_GT(int_num, 0U); - - // Test rocksdb.num-live-versions - { - options.level0_file_num_compaction_trigger = 20; - Reopen(options); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 1U); - - // Use an iterator to hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - - ASSERT_OK(dbfull()->Put(writeOpt, "k6", big_value)); - Flush(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 2U); - - // Use an iterator to hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - - ASSERT_OK(dbfull()->Put(writeOpt, "k7", big_value)); - Flush(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 3U); - - iter2.reset(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 2U); - - iter1.reset(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 1U); - } -} - -TEST_F(DBTest, ApproximateMemoryUsage) { - const int kNumRounds = 10; - // TODO(noetzli) kFlushesPerRound does not really correlate with how many - // flushes happen. - const int kFlushesPerRound = 10; - const int kWritesPerFlush = 10; - const int kKeySize = 100; - const int kValueSize = 1000; - Options options; - options.write_buffer_size = 1000; // small write buffer - options.min_write_buffer_number_to_merge = 4; - options.compression = kNoCompression; - options.create_if_missing = true; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - - std::vector iters; - - uint64_t active_mem; - uint64_t unflushed_mem; - uint64_t all_mem; - uint64_t prev_all_mem; - - // Phase 0. The verify the initial value of all these properties are the same - // as we have no mem-tables. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(all_mem, active_mem); - ASSERT_EQ(all_mem, unflushed_mem); - - // Phase 1. Simply issue Put() and expect "cur-size-all-mem-tables" equals to - // "size-all-mem-tables" - for (int r = 0; r < kNumRounds; ++r) { - for (int f = 0; f < kFlushesPerRound; ++f) { - for (int w = 0; w < kWritesPerFlush; ++w) { - Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); - } - } - // Make sure that there is no flush between getting the two properties. - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - // in no iterator case, these two number should be the same. - ASSERT_EQ(unflushed_mem, all_mem); - } - prev_all_mem = all_mem; - - // Phase 2. Keep issuing Put() but also create new iterators. This time we - // expect "size-all-mem-tables" > "cur-size-all-mem-tables". - for (int r = 0; r < kNumRounds; ++r) { - iters.push_back(db_->NewIterator(ReadOptions())); - for (int f = 0; f < kFlushesPerRound; ++f) { - for (int w = 0; w < kWritesPerFlush; ++w) { - Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); - } - } - // Force flush to prevent flush from happening between getting the - // properties or after getting the properties and before the new round. - Flush(); - - // In the second round, add iterators. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_GT(all_mem, active_mem); - ASSERT_GT(all_mem, unflushed_mem); - ASSERT_GT(all_mem, prev_all_mem); - prev_all_mem = all_mem; - } - - // Phase 3. Delete iterators and expect "size-all-mem-tables" shrinks - // whenever we release an iterator. - for (auto* iter : iters) { - delete iter; - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - // Expect the size shrinking - ASSERT_LT(all_mem, prev_all_mem); - prev_all_mem = all_mem; - } - - // Expect all these three counters to be the same. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(active_mem, unflushed_mem); - ASSERT_EQ(unflushed_mem, all_mem); - - // Phase 5. Reopen, and expect all these three counters to be the same again. - Reopen(options); - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(active_mem, unflushed_mem); - ASSERT_EQ(unflushed_mem, all_mem); -} - -TEST_F(DBTest, EstimatePendingCompBytes) { - // Set sizes to both background thread pool to be 1 and block them. - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 2; - options.max_background_compactions = 1; - options.max_background_flushes = 1; - options.max_write_buffer_number = 10; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_number_to_maintain = 0; - options.write_buffer_size = 1000000; - Reopen(options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - uint64_t int_num; - - ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); - Flush(); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); - Flush(); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); - Flush(); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_GT(int_num, 0U); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - dbfull()->TEST_WaitForCompact(); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_EQ(int_num, 0U); -} -#endif // ROCKSDB_LITE - TEST_F(DBTest, FLUSH) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); @@ -8556,178 +7584,7 @@ TEST_F(DBTest, EmptyCompactedDB) { } #endif // ROCKSDB_LITE -class CountingDeleteTabPropCollector : public TablePropertiesCollector { - public: - const char* Name() const override { return "CountingDeleteTabPropCollector"; } - - Status AddUserKey(const Slice& user_key, const Slice& value, EntryType type, - SequenceNumber seq, uint64_t file_size) override { - if (type == kEntryDelete) { - num_deletes_++; - } - return Status::OK(); - } - - bool NeedCompact() const override { return num_deletes_ > 10; } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - Status Finish(UserCollectedProperties* properties) override { - *properties = - UserCollectedProperties{{"num_delete", ToString(num_deletes_)}}; - return Status::OK(); - } - - private: - uint32_t num_deletes_ = 0; -}; - -class CountingDeleteTabPropCollectorFactory - : public TablePropertiesCollectorFactory { - public: - virtual TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context context) override { - return new CountingDeleteTabPropCollector(); - } - const char* Name() const override { - return "CountingDeleteTabPropCollectorFactory"; - } -}; - #ifndef ROCKSDB_LITE -TEST_F(DBTest, TablePropertiesNeedCompactTest) { - Random rnd(301); - - Options options; - options.create_if_missing = true; - options.write_buffer_size = 4096; - options.max_write_buffer_number = 8; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.target_file_size_base = 2048; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.soft_pending_compaction_bytes_limit = 1024 * 1024; - options.num_levels = 8; - - std::shared_ptr collector_factory = - std::make_shared(); - options.table_properties_collector_factories.resize(1); - options.table_properties_collector_factories[0] = collector_factory; - - DestroyAndReopen(options); - - const int kMaxKey = 1000; - for (int i = 0; i < kMaxKey; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); - ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); - } - Flush(); - dbfull()->TEST_WaitForCompact(); - if (NumTableFilesAtLevel(0) == 1) { - // Clear Level 0 so that when later flush a file with deletions, - // we don't trigger an organic compaction. - ASSERT_OK(Put(Key(0), "")); - ASSERT_OK(Put(Key(kMaxKey * 2), "")); - Flush(); - dbfull()->TEST_WaitForCompact(); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - { - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->Seek(Key(kMaxKey - 100)); - while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { - iter->Next(); - ++c; - } - ASSERT_EQ(c, 200); - } - - Delete(Key(0)); - for (int i = kMaxKey - 100; i < kMaxKey + 100; i++) { - Delete(Key(i)); - } - Delete(Key(kMaxKey * 2)); - - Flush(); - dbfull()->TEST_WaitForCompact(); - - { - SetPerfLevel(kEnableCount); - perf_context.Reset(); - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->Seek(Key(kMaxKey - 100)); - while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { - iter->Next(); - } - ASSERT_EQ(c, 0); - ASSERT_LT(perf_context.internal_delete_skipped_count, 30u); - ASSERT_LT(perf_context.internal_key_skipped_count, 30u); - SetPerfLevel(kDisable); - } -} - -TEST_F(DBTest, NeedCompactHintPersistentTest) { - Random rnd(301); - - Options options; - options.create_if_missing = true; - options.max_write_buffer_number = 8; - options.level0_file_num_compaction_trigger = 10; - options.level0_slowdown_writes_trigger = 10; - options.level0_stop_writes_trigger = 10; - options.disable_auto_compactions = true; - - std::shared_ptr collector_factory = - std::make_shared(); - options.table_properties_collector_factories.resize(1); - options.table_properties_collector_factories[0] = collector_factory; - - DestroyAndReopen(options); - - const int kMaxKey = 100; - for (int i = 0; i < kMaxKey; i++) { - ASSERT_OK(Put(Key(i), "")); - } - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); - - for (int i = 1; i < kMaxKey - 1; i++) { - Delete(Key(i)); - } - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); - ASSERT_EQ(NumTableFilesAtLevel(0), 2); - - // Restart the DB. Although number of files didn't reach - // options.level0_file_num_compaction_trigger, compaction should - // still be triggered because of the need-compaction hint. - options.disable_auto_compactions = false; - Reopen(options); - dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - { - SetPerfLevel(kEnableCount); - perf_context.Reset(); - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { - c++; - } - ASSERT_EQ(c, 2); - ASSERT_EQ(perf_context.internal_delete_skipped_count, 0); - // We iterate every key twice. Is it a bug? - ASSERT_LE(perf_context.internal_key_skipped_count, 2); - SetPerfLevel(kDisable); - } -} - TEST_F(DBTest, SuggestCompactRangeTest) { class CompactionFilterFactoryGetContext : public CompactionFilterFactory { public: diff --git a/db/db_test_util.h b/db/db_test_util.h index c13ebbda2..b93994afc 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -27,7 +27,6 @@ #include #include "db/db_impl.h" -#include "db/db_test_util.h" #include "db/dbformat.h" #include "db/filename.h" #include "memtable/hash_linklist_rep.h" From 202be23e4618da7a59b222a14e0414a7132c1aa2 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 19 Jan 2016 13:10:06 -0800 Subject: [PATCH 023/195] Add test that verifies all options in BlockBasedTableOptions is settable through GetBlockBasedTableOptionsFromString() Summary: Add a test OptionsParserTest.BlockBasedTableOptionsAdded, which will fail if a new option is added to BlockBasedTableOptions but is not settable through GetBlockBasedTableOptionsFromString(). Test Plan: Run the test. Also manually remove and add options and make sure it fails. Reviewers: anthony, IslamAbdelRahman, kradhakrishnan, rven, yhchiang, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52953 --- util/options_test.cc | 107 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/util/options_test.cc b/util/options_test.cc index e4b572533..c60fe48b9 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -12,6 +12,7 @@ #endif #include +#include #include #include @@ -1474,6 +1475,112 @@ TEST_F(OptionsParserTest, EscapeOptionString) { "Escape \\# and"); } +// Only run OptionsParserTest.BlockBasedTableOptionsAdded on limited platforms +// as it depends on behavior of compilers. +#ifdef OS_LINUX +const char kSpecialChar = 'R'; +// Items in the form of . Need to be in ascending order +// and not overlapping. Need to updated if new pointer-option is added. +const std::vector> kBbtoBlacklist = { + {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, block_cache), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, block_cache_compressed), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, filter_policy), + sizeof(std::shared_ptr)}, +}; + +void FillWithSpecialChar(char* start_ptr) { + int offset = 0; + for (auto& pair : kBbtoBlacklist) { + std::memset(start_ptr + offset, kSpecialChar, pair.first - offset); + offset = pair.first + pair.second; + } + std::memset(start_ptr + offset, kSpecialChar, + sizeof(BlockBasedTableOptions) - offset); +} + +int NumUnsetBytes(char* start_ptr) { + int total_unset_bytes_base = 0; + + int offset = 0; + for (auto& pair : kBbtoBlacklist) { + for (char* ptr = start_ptr + offset; ptr < start_ptr + pair.first; ptr++) { + if (*ptr == kSpecialChar) { + total_unset_bytes_base++; + } + offset = pair.first + pair.second; + } + } + for (char* ptr = start_ptr + offset; + ptr < start_ptr + sizeof(BlockBasedTableOptions); ptr++) { + if (*ptr == kSpecialChar) { + total_unset_bytes_base++; + } + } + return total_unset_bytes_base; +} + +TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { + // In this test, we catch a new option of BlockBasedTableOptions that is not + // settable through GetBlockBasedTableOptionsFromString(). + // We count padding bytes of the option struct, and assert it to be the same + // as unset bytes of an option struct initialized by + // GetBlockBasedTableOptionsFromString(). + + char* bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; + + // Count padding bytes by setting all bytes in the memory to a special char, + // copy a well constructed struct to this memory and see how many special + // bytes left. + BlockBasedTableOptions* bbto = new (bbto_ptr) BlockBasedTableOptions(); + FillWithSpecialChar(bbto_ptr); + // It based on the behavior of compiler that padding bytes are not changed + // when copying the struct. It's prone to failure when compiler behavior + // changes. We verify there is unset bytes to detect the case. + *bbto = BlockBasedTableOptions(); + int unset_bytes_base = NumUnsetBytes(bbto_ptr); + ASSERT_GT(unset_bytes_base, 0); + bbto->~BlockBasedTableOptions(); + + // Construct the base option passed into + // GetBlockBasedTableOptionsFromString(). + FillWithSpecialChar(bbto_ptr); + // This option is not setable: + bbto->use_delta_encoding = true; + + char* new_bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; + BlockBasedTableOptions* new_bbto = + new (new_bbto_ptr) BlockBasedTableOptions(); + FillWithSpecialChar(new_bbto_ptr); + + // Need to update the option string if a new option is added. + GetBlockBasedTableOptionsFromString( + *bbto, + "cache_index_and_filter_blocks=1;index_type=kHashSearch;" + "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;" + "block_cache=1M;block_cache_compressed=1k;block_size=1024;" + "block_size_deviation=8;block_restart_interval=4;" + "filter_policy=bloomfilter:4:true;whole_key_filtering=1;" + "skip_table_builder_flush=1;format_version=1;" + "hash_index_allow_collision=false;", + new_bbto); + + ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_bbto_ptr)); + + ASSERT_TRUE(new_bbto->block_cache.get() != nullptr); + ASSERT_TRUE(new_bbto->block_cache_compressed.get() != nullptr); + ASSERT_TRUE(new_bbto->filter_policy.get() != nullptr); + + bbto->~BlockBasedTableOptions(); + new_bbto->~BlockBasedTableOptions(); + + delete[] bbto_ptr; + delete[] new_bbto_ptr; +} +#endif // OS_LINUX #endif // !ROCKSDB_LITE } // namespace rocksdb From a300d9928e69cfe2f4352d0452b1d9c999eb60e0 Mon Sep 17 00:00:00 2001 From: krad Date: Tue, 19 Jan 2016 15:33:20 -0800 Subject: [PATCH 024/195] Added sandcastle pre-commit Test Plan: Lately we have been breaking our builds too often. This changes adds the capability to schedule tests in sandcastle for every diff created. This will help us increase the pre-commit testing bar. This patch will dispatch signals to sandcastle to start running tests on the diff. The test failures are reported to the user via email. The user can also manually check the progress of test in sandcastle via the URL provided. Reviewers: sdong, rven Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53001 --- .../config/FacebookArcanistConfiguration.php | 103 +++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/arcanist_util/config/FacebookArcanistConfiguration.php b/arcanist_util/config/FacebookArcanistConfiguration.php index c3454903b..e9f4b0828 100644 --- a/arcanist_util/config/FacebookArcanistConfiguration.php +++ b/arcanist_util/config/FacebookArcanistConfiguration.php @@ -10,13 +10,112 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { ArcanistBaseWorkflow $workflow, $error_code) { if ($command == 'diff' && !$workflow->isRawDiffSource()) { - $this->maybePushToJenkins($workflow); + $this->startTestsInJenkins($workflow); + $this->startTestsInSandcastle($workflow); } } + ////////////////////////////////////////////////////////////////////// + /* Run tests in sandcastle */ + function getSteps($diffID, $username) { + $arcrc_content = exec("cat ~/.arcrc | base64 -w0"); + + $setup = array( + "name" => "Setup arcrc", + "shell" => "echo " . $arcrc_content . " | base64 --decode > ~/.arcrc", + "user" => "root" + ); + + $fix_permission = array( + "name" => "Fix environment", + "shell" => "chmod 600 ~/.arcrc", + "user" => "root" + ); + + $fix_git_ignore = array( + "name" => "Fix git ignore", + "shell" => "echo fbcode >> .git/info/exclude", + "user" => "root" + ); + + $patch = array( + "name" => "Patch " . $diffID, + "shell" => "HTTPS_PROXY=fwdproxy:8080 arc --arcrc-file ~/.arcrc " + . "patch D" . $diffID . " || rm -f ~/.arcrc", + "user" => "root" + ); + + $cleanup = array( + "name" => "Arc cleanup", + "shell" => "rm -f ~/.arcrc", + "user" => "root" + ); + + $steps[] = $setup; + $steps[] = $fix_permission; + $steps[] = $fix_git_ignore; + $steps[] = $patch; + $steps[] = $cleanup; + + $tests = array( + "unit", "clang_unit", "tsan", "asan", "valgrind" + ); + + foreach ($tests as $test) { + $run_test = array( + "name" => "Run " . $test, + "shell" => "EMAIL=" . $username . "@fb.com " + . "./build_tools/rocksdb-lego-determinator " . $test, + "user" => "root", + "determinator" => true + ); + + $steps[] = $run_test; + } + + return $steps; + } + + function startTestsInSandcastle($workflow) { + $diffID = $workflow->getDiffId(); + $username = exec("whoami"); + + if ($diffID == null || $username == null) { + return; + } + + $arg = array( + "name" => "RocksDB diff D" . $diffID . "testing for " . $username, + "steps" => $this->getSteps($diffID, $username) + ); + + $url = 'https://interngraph.intern.facebook.com/sandcastle/generate?' + .'command=SandcastleUniversalCommand' + .'&vcs=rocksdb-git&revision=origin%2Fmaster&type=lego' + .'&user=krad&alias=ci-util' + .'&command-args=' . urlencode(json_encode($arg)); + + $cmd = 'https_proxy= HTTPS_PROXY= curl -s -k -F app=659387027470559 ' + . '-F token=AeO_3f2Ya3TujjnxGD4 "' . $url . '"'; + + echo "\n====================================================== \n"; + echo "Scheduling sandcastle job for D" . $diffID . " for " . $username; + echo "\n"; + echo "Please follow the URL for details on the job. \n"; + echo "An email will be sent to " . $username . "@fb.com on failure. \n"; + echo "\n"; + echo "Job details: \n"; + + $output = shell_exec($cmd); + + echo $output; + + echo "\n====================================================== \n"; + } + ////////////////////////////////////////////////////////////////////// /* Send off builds to jenkins */ - function maybePushToJenkins($workflow) { + function startTestsInJenkins($workflow) { $diffID = $workflow->getDiffID(); if ($diffID === null) { return; From 2c2b72218d1693c55a528893daeb792b0a87c194 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 21 Jan 2016 10:52:38 -0800 Subject: [PATCH 025/195] Disable OptionsParserTest.BlockBasedTableOptionsAllFieldsSettable under CLANG Summary: OptionsParserTest.BlockBasedTableOptionsAllFieldsSettable is failiong under CLANG. Disable the test to unblock the build. Test Plan: Run it both of CLANG and GCC Reviewers: kradhakrishnan, rven, andrewkr, anthony, yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53157 --- util/options_test.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util/options_test.cc b/util/options_test.cc index c60fe48b9..19e48b350 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1478,6 +1478,7 @@ TEST_F(OptionsParserTest, EscapeOptionString) { // Only run OptionsParserTest.BlockBasedTableOptionsAdded on limited platforms // as it depends on behavior of compilers. #ifdef OS_LINUX +#ifndef __clang__ const char kSpecialChar = 'R'; // Items in the form of . Need to be in ascending order // and not overlapping. Need to updated if new pointer-option is added. @@ -1493,7 +1494,7 @@ const std::vector> kBbtoBlacklist = { }; void FillWithSpecialChar(char* start_ptr) { - int offset = 0; + size_t offset = 0; for (auto& pair : kBbtoBlacklist) { std::memset(start_ptr + offset, kSpecialChar, pair.first - offset); offset = pair.first + pair.second; @@ -1505,7 +1506,7 @@ void FillWithSpecialChar(char* start_ptr) { int NumUnsetBytes(char* start_ptr) { int total_unset_bytes_base = 0; - int offset = 0; + size_t offset = 0; for (auto& pair : kBbtoBlacklist) { for (char* ptr = start_ptr + offset; ptr < start_ptr + pair.first; ptr++) { if (*ptr == kSpecialChar) { @@ -1580,6 +1581,7 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { delete[] bbto_ptr; delete[] new_bbto_ptr; } +#endif // !__clang__ #endif // OS_LINUX #endif // !ROCKSDB_LITE From 29289333d086271d0cadeae83e025fc094b512cd Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 21 Jan 2016 10:59:36 -0800 Subject: [PATCH 026/195] Add named constants for remaining properties Summary: There were just these two properties that didn't have any named constant. Test Plan: build and below test $ ./db_properties_test --gtest_filter=DBPropertiesTest.NumImmutableMemTable Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53103 --- db/db_properties_test.cc | 8 +++++++- db/internal_stats.cc | 3 +++ include/rocksdb/db.h | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index ed8c561cc..50e08c45f 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -428,7 +428,7 @@ TEST_F(DBPropertiesTest, NumImmutableMemTable) { writeOpt.disableWAL = true; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; - options.max_write_buffer_number_to_maintain = 0; + options.max_write_buffer_number_to_maintain = 4; options.write_buffer_size = 1000000; CreateAndReopenWithCF({"pikachu"}, options); @@ -441,6 +441,9 @@ TEST_F(DBPropertiesTest, NumImmutableMemTable) { ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); + ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ(num, "1"); @@ -492,6 +495,9 @@ TEST_F(DBPropertiesTest, NumImmutableMemTable) { ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty( + handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); + ASSERT_EQ(num, "3"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.cur-size-active-mem-table", &num)); // "192" is the size of the metadata of an empty skiplist, this would diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 74aac3649..33839d17e 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -136,8 +136,11 @@ const std::string DB::Properties::kStats = rocksdb_prefix + allstats; const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables; const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats; const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats; +const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats; const std::string DB::Properties::kNumImmutableMemTable = rocksdb_prefix + num_immutable_mem_table; +const std::string DB::Properties::kNumImmutableMemTableFlushed = + rocksdb_prefix + num_immutable_mem_table_flushed; const std::string DB::Properties::kMemTableFlushPending = rocksdb_prefix + mem_table_flush_pending; const std::string DB::Properties::kCompactionPending = diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 9bc0993c5..a94496a65 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -343,11 +343,14 @@ class DB { // of the sstables that make up the db contents. // "rocksdb.cfstats" // "rocksdb.dbstats" + // "rocksdb.levelstats" // "rocksdb.num-immutable-mem-table" // "rocksdb.mem-table-flush-pending" + // "rocksdb.num-immutable-mem-table-flushed" // "rocksdb.compaction-pending" - 1 if at least one compaction is pending // "rocksdb.background-errors" - accumulated number of background errors // "rocksdb.cur-size-active-mem-table" + // "rocksdb.cur-size-all-mem-tables" // "rocksdb.size-all-mem-tables" // "rocksdb.num-entries-active-mem-table" // "rocksdb.num-entries-imm-mem-tables" @@ -386,7 +389,9 @@ class DB { static const std::string kSSTables; static const std::string kCFStats; static const std::string kDBStats; + static const std::string kLevelStats; static const std::string kNumImmutableMemTable; + static const std::string kNumImmutableMemTableFlushed; static const std::string kMemTableFlushPending; static const std::string kNumRunningFlushes; static const std::string kCompactionPending; From bb2888738cc6573179c8b3c321ec8cf0dd508b34 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 21 Jan 2016 11:38:15 -0800 Subject: [PATCH 027/195] Cleanup property-related variable names Summary: I noticed these names were quite confusing while updating GetProperty documentation. Test Plan: running "make commit-prereq -j32" Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53073 --- db/internal_stats.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 33839d17e..8da179d84 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -99,9 +99,8 @@ static const std::string compaction_pending = "compaction-pending"; static const std::string background_errors = "background-errors"; static const std::string cur_size_active_mem_table = "cur-size-active-mem-table"; -static const std::string cur_size_unflushed_mem_tables = - "cur-size-all-mem-tables"; -static const std::string cur_size_all_mem_tables = "size-all-mem-tables"; +static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables"; +static const std::string size_all_mem_tables = "size-all-mem-tables"; static const std::string num_entries_active_mem_table = "num-entries-active-mem-table"; static const std::string num_entries_imm_mem_tables = @@ -154,9 +153,9 @@ const std::string DB::Properties::kBackgroundErrors = const std::string DB::Properties::kCurSizeActiveMemTable = rocksdb_prefix + cur_size_active_mem_table; const std::string DB::Properties::kCurSizeAllMemTables = - rocksdb_prefix + cur_size_unflushed_mem_tables; -const std::string DB::Properties::kSizeAllMemTables = rocksdb_prefix + cur_size_all_mem_tables; +const std::string DB::Properties::kSizeAllMemTables = + rocksdb_prefix + size_all_mem_tables; const std::string DB::Properties::kNumEntriesActiveMemTable = rocksdb_prefix + num_entries_active_mem_table; const std::string DB::Properties::kNumEntriesImmMemTables = @@ -232,9 +231,9 @@ DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, return kBackgroundErrors; } else if (in == cur_size_active_mem_table) { return kCurSizeActiveMemTable; - } else if (in == cur_size_unflushed_mem_tables) { - return kCurSizeAllMemTables; } else if (in == cur_size_all_mem_tables) { + return kCurSizeAllMemTables; + } else if (in == size_all_mem_tables) { return kSizeAllMemTables; } else if (in == num_entries_active_mem_table) { return kNumEntriesInMutableMemtable; From b0a15e7fb99b9610dc8e2ee97d961b346b12269b Mon Sep 17 00:00:00 2001 From: krad Date: Tue, 12 Jan 2016 16:41:48 -0800 Subject: [PATCH 028/195] Mechanism to run CI jobs on local branch via commit_prereq Summary: This patch provides a mechanism to run pre commit tests on the local branch before committing. This can help prevent frequent build breaks. The tests can be run in parallel by specifying the J=<..> environment variable. Test Plan: Run manually Reviewers: sdong rven tec CC: leveldb@ Task ID: #9689218 Blame Rev: --- Makefile | 7 +- build_tools/precommit_checker.py | 198 +++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 4 deletions(-) create mode 100755 build_tools/precommit_checker.py diff --git a/Makefile b/Makefile index 24e6a5d28..8d2b9c41b 100644 --- a/Makefile +++ b/Makefile @@ -1161,11 +1161,10 @@ jtest: rocksdbjava jdb_bench: cd java;$(MAKE) db_bench; -commit-prereq: - $(MAKE) clean && $(MAKE) all check; +commit_prereq: build_tools/rocksdb-lego-determinator \ + build_tools/precommit_checker.py $(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava; - $(MAKE) clean && USE_CLANG=1 $(MAKE) all; - $(MAKE) clean && OPT=-DROCKSDB_LITE $(MAKE) static_lib; + build_tools/precommit_checker.py unit uint_481 clang_unit tsan asan lite xfunc: for xftest in $(XFUNC_TESTS); do \ diff --git a/build_tools/precommit_checker.py b/build_tools/precommit_checker.py new file mode 100755 index 000000000..ceb5cb4ab --- /dev/null +++ b/build_tools/precommit_checker.py @@ -0,0 +1,198 @@ +#!/usr/local/fbcode/gcc-4.8.1-glibc-2.17-fb/bin/python2.7 + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import argparse +import commands +import subprocess +import sys +import re +import os +import time + +# +# Simple logger +# + + +class Log: + + LOG_FILE = "/tmp/precommit-check.log" + + def __init__(self): + self.filename = Log.LOG_FILE + self.f = open(self.filename, 'w+', 0) + + def caption(self, str): + line = "\n##### %s #####\n" % str + if self.f: + self.f.write("%s \n" % line) + else: + print(line) + + def error(self, str): + data = "\n\n##### ERROR ##### %s" % str + if self.f: + self.f.write("%s \n" % data) + else: + print(data) + + def log(self, str): + if self.f: + self.f.write("%s \n" % str) + else: + print(str) + +# +# Shell Environment +# + + +class Env(object): + + def __init__(self, tests): + self.tests = tests + self.log = Log() + + def shell(self, cmd, path=os.getcwd()): + if path: + os.chdir(path) + + self.log.log("==== shell session ===========================") + self.log.log("%s> %s" % (path, cmd)) + status = subprocess.call("cd %s; %s" % (path, cmd), shell=True, + stdout=self.log.f, stderr=self.log.f) + self.log.log("status = %s" % status) + self.log.log("============================================== \n\n") + return status + + def GetOutput(self, cmd, path=os.getcwd()): + if path: + os.chdir(path) + + self.log.log("==== shell session ===========================") + self.log.log("%s> %s" % (path, cmd)) + status, out = commands.getstatusoutput(cmd) + self.log.log("status = %s" % status) + self.log.log("out = %s" % out) + self.log.log("============================================== \n\n") + return status, out + +# +# Pre-commit checker +# + + +class PreCommitChecker(Env): + + def __init__(self, tests): + Env.__init__(self, tests) + + # + # Get commands for a given job from the determinator file + # + def get_commands(self, test): + status, out = self.GetOutput( + "build_tools/rocksdb-lego-determinator %s" % test, ".") + return status, out + + # + # Run a specific CI job + # + def run_test(self, test): + self.log.caption("Running test %s locally" % test) + + # get commands for the CI job determinator + status, cmds = self.get_commands(test) + if status != 0: + self.log.error("Error getting commands for test %s" % test) + return False + + # Parse the JSON to extract the commands to run + cmds = re.findall("'shell':'([^\']*)'", cmds) + + if len(cmds) == 0: + self.log.log("No commands found") + return False + + # Run commands + for cmd in cmds: + # Replace J=<..> with the local environment variable + if "J" in os.environ: + cmd = cmd.replace("J=1", "J=%s" % os.environ["J"]) + cmd = cmd.replace("make ", "make -j%s " % os.environ["J"]) + # Run the command + status = self.shell(cmd, ".") + if status != 0: + self.log.error("Error running command %s for test %s" + % (cmd, test)) + return False + + return True + + # + # Run specified CI jobs + # + def run_tests(self): + if not self.tests: + self.log.error("Invalid args. Please provide tests") + return False + + self.print_separator() + self.print_row("TEST", "RESULT") + self.print_separator() + + for test in self.tests: + start_time = time.time() + self.print_test(test) + result = self.run_test(test) + elapsed_min = (time.time() - start_time) / 60 + if not result: + self.log.error("Error running test %s" % test) + self.print_result("FAIL (%dm)" % elapsed_min) + return False + self.print_result("PASS (%dm)" % elapsed_min) + + self.print_separator() + return True + + # + # Print a line + # + def print_separator(self): + print("".ljust(60, "-")) + + # + # Print two colums + # + def print_row(self, c0, c1): + print("%s%s" % (c0.ljust(40), c1.ljust(20))) + + def print_test(self, test): + print(test.ljust(40), end="") + sys.stdout.flush() + + def print_result(self, result): + print(result.ljust(20)) + +# +# Main +# +parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.') + +# +parser.add_argument('test', nargs='+', + help='CI test(s) to run. e.g: unit punit asan tsan') + +print("Please follow log %s" % Log.LOG_FILE) + +args = parser.parse_args() +checker = PreCommitChecker(args.test) + +if not checker.run_tests(): + print("Error running tests. Please check log file %s" % Log.LOG_FILE) + sys.exit(1) + +sys.exit(0) From bcd4ccbc33fbc6e17c73eb996b8f45909e880411 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Tue, 19 Jan 2016 17:24:58 -0800 Subject: [PATCH 029/195] Revert D7809 Summary: Revert the functionaility of D7809 (but I'm keeping the logging and test code). We decided it was dangerous to ignore sync failures based on attempting to read the data written. The read does not tell us whether the data was synced. Test Plan: There was no test for the particular functionaility that was reverted. Keeping the test code from D7809 that tests whether we set the DB to be readonly when paranoid checks are enabled. Reviewers: rven, yhchiang, kradhakrishnan, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52989 --- db/version_set.cc | 60 ----------------------------------------------- db/version_set.h | 3 --- 2 files changed, 63 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 193f1076f..519805396 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2193,27 +2193,6 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, if (!s.ok()) { Log(InfoLogLevel::ERROR_LEVEL, db_options_->info_log, "MANIFEST write: %s\n", s.ToString().c_str()); - bool all_records_in = true; - for (auto& e : batch_edits) { - std::string record; - if (!e->EncodeTo(&record)) { - s = Status::Corruption( - "Unable to Encode VersionEdit:" + e->DebugString(true)); - all_records_in = false; - break; - } - if (!ManifestContains(pending_manifest_file_number_, record)) { - all_records_in = false; - break; - } - } - if (all_records_in) { - Log(InfoLogLevel::WARN_LEVEL, db_options_->info_log, - "MANIFEST contains log record despite error; advancing to new " - "version to prevent mismatch between in-memory and logged state" - " If paranoid is set, then the db is now in readonly mode."); - s = Status::OK(); - } } } @@ -3067,45 +3046,6 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { return Status::OK(); } -// Opens the mainfest file and reads all records -// till it finds the record we are looking for. -bool VersionSet::ManifestContains(uint64_t manifest_file_num, - const std::string& record) const { - std::string fname = DescriptorFileName(dbname_, manifest_file_num); - Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, - "ManifestContains: checking %s\n", fname.c_str()); - - unique_ptr file_reader; - Status s; - { - unique_ptr file; - s = env_->NewSequentialFile(fname, &file, env_options_); - if (!s.ok()) { - Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, - "ManifestContains: %s\n", s.ToString().c_str()); - Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, - "ManifestContains: is unable to reopen the manifest file %s", - fname.c_str()); - return false; - } - file_reader.reset(new SequentialFileReader(std::move(file))); - } - log::Reader reader(NULL, std::move(file_reader), nullptr, - true /*checksum*/, 0, 0); - Slice r; - std::string scratch; - bool result = false; - while (reader.ReadRecord(&r, &scratch)) { - if (r == Slice(record)) { - result = true; - break; - } - } - Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, - "ManifestContains: result = %d\n", result ? 1 : 0); - return result; -} - // TODO(aekmekji): in CompactionJob::GenSubcompactionBoundaries(), this // function is called repeatedly with consecutive pairs of slices. For example // if the slice list is [a, b, c, d] this function is called with arguments diff --git a/db/version_set.h b/db/version_set.h index 2d9d93f6f..097109fd4 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -731,9 +731,6 @@ class VersionSet { void AppendVersion(ColumnFamilyData* column_family_data, Version* v); - bool ManifestContains(uint64_t manifest_file_number, - const std::string& record) const; - ColumnFamilyData* CreateColumnFamily(const ColumnFamilyOptions& cf_options, VersionEdit* edit); From 8019aa9b55c90ae2b89889d6260674f4c8fd19ff Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 21 Jan 2016 17:28:01 -0800 Subject: [PATCH 030/195] improve test for manifest write failure Summary: Improve testing per discussion in D52989 Test Plan: ran test Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53211 --- db/db_test.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/db/db_test.cc b/db/db_test.cc index cb27ba0d5..1143eef1b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -3289,6 +3289,7 @@ TEST_F(DBTest, ManifestWriteError) { options.env = env_; options.create_if_missing = true; options.error_if_exists = false; + options.paranoid_checks = true; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_EQ("bar", Get("foo")); @@ -3305,10 +3306,33 @@ TEST_F(DBTest, ManifestWriteError) { dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail ASSERT_EQ("bar", Get("foo")); + error_type->store(false, std::memory_order_release); + + // Since paranoid_checks=true, writes should fail + ASSERT_NOK(Put("foo2", "bar2")); + + // Recovery: should not lose data + ASSERT_EQ("bar", Get("foo")); + + // Try again with paranoid_checks=false + Close(); + options.paranoid_checks = false; + Reopen(options); + + // Merging compaction (will fail) + error_type->store(true, std::memory_order_release); + dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail + ASSERT_EQ("bar", Get("foo")); + // Recovery: should not lose data error_type->store(false, std::memory_order_release); Reopen(options); ASSERT_EQ("bar", Get("foo")); + + // Since paranoid_checks=false, writes should succeed + ASSERT_OK(Put("foo2", "bar2")); + ASSERT_EQ("bar", Get("foo")); + ASSERT_EQ("bar2", Get("foo2")); } } #endif // ROCKSDB_LITE From 538eec0661e6e66bc54fc189295c342a84ea7f54 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Fri, 22 Jan 2016 11:56:32 -0800 Subject: [PATCH 031/195] Update fbcode_config.sh to use latest versions automatically Summary: Update fbcode_config.sh so that It try to use the latest version for dependencies that we are using, after updating the code these libraries where updated ``` Snappy: 1.0.3 => 1.1.3 GFLAGS: 1.6 => 2.1.1 JEMALLOC: 3.6.0 => 4.0.3 ``` I have also updated clang from 3.7 to 3.7.1 ``` Clang 3.7 => 3.7.1 ``` Another change is that we use the same tp2 directory as fbcode, so we dont need to keep changing commit hash every time we need to change a version of a compiler or a library Test Plan: make check -j64 USE_CLANG=1 make check -j64 DISABLE_JEMALLOC=1 OPT=-g make all valgrind_check -j32 (make sure it's running) Reviewers: yhchiang, anthony, rven, kradhakrishnan, andrewkr, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53037 --- build_tools/dependencies.sh | 13 +++++ build_tools/fbcode_config.sh | 69 +++++++++++++------------- build_tools/update_dependencies.sh | 79 ++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 35 deletions(-) create mode 100644 build_tools/dependencies.sh create mode 100755 build_tools/update_dependencies.sh diff --git a/build_tools/dependencies.sh b/build_tools/dependencies.sh new file mode 100644 index 000000000..a7e590904 --- /dev/null +++ b/build_tools/dependencies.sh @@ -0,0 +1,13 @@ +GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/* +CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1 +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.9.x/gcc-4.9-glibc-2.20/024dbc3 +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.20/gcc-4.9-glibc-2.20/500e281 +SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf +ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf +BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf +LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde/0.4.5/gcc-4.9-glibc-2.20/e9936bf +GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.9-glibc-2.20/a6c5e1e +NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.9-glibc-2.20/12266b1 diff --git a/build_tools/fbcode_config.sh b/build_tools/fbcode_config.sh index 48327c2e3..8cc0b53ff 100644 --- a/build_tools/fbcode_config.sh +++ b/build_tools/fbcode_config.sh @@ -6,85 +6,84 @@ # Environment variables that change the behavior of this script: # PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included + +source "$PWD/build_tools/dependencies.sh" + +TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2" + CFLAGS="" -# location of libgcc -LIBGCC_BASE="/mnt/gvfs/third-party2/libgcc/0473c80518a10d6efcbe24c5eeca3fb4ec9b519c/4.9.x/gcc-4.9-glibc-2.20/e1a7e4e" +# libgcc LIBGCC_INCLUDE="$LIBGCC_BASE/include" LIBGCC_LIBS=" -L $LIBGCC_BASE/libs" -# location of glibc -GLIBC_REV=7397bed99280af5d9543439cdb7d018af7542720 -GLIBC_INCLUDE="/mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.20/gcc-4.9-glibc-2.20/99df8fc/include" -GLIBC_LIBS=" -L /mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.20/gcc-4.9-glibc-2.20/99df8fc/lib" - -SNAPPY_INCLUDE=" -I /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/include/" +# glibc +GLIBC_INCLUDE="$GLIBC_BASE/include" +GLIBC_LIBS=" -L $GLIB_BASE/lib" +# snappy +SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" if test -z $PIC_BUILD; then - SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/lib/libsnappy.a" + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" else - SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/b0f269b3ca47770121aa159b99e1d8d2ab260e1f/1.0.3/gcc-4.9-glibc-2.20/c32916f/lib/libsnappy_pic.a" + SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" fi - CFLAGS+=" -DSNAPPY" if test -z $PIC_BUILD; then # location of zlib headers and libraries - ZLIB_INCLUDE=" -I /mnt/gvfs/third-party2/zlib/feb983d9667f4cf5e9da07ce75abc824764b67a1/1.2.8/gcc-4.9-glibc-2.20/4230243/include/" - ZLIB_LIBS=" /mnt/gvfs/third-party2/zlib/feb983d9667f4cf5e9da07ce75abc824764b67a1/1.2.8/gcc-4.9-glibc-2.20/4230243/lib/libz.a" + ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" + ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" CFLAGS+=" -DZLIB" # location of bzip headers and libraries - BZIP_INCLUDE=" -I /mnt/gvfs/third-party2/bzip2/af004cceebb2dfd173ca29933ea5915e727aad2f/1.0.6/gcc-4.9-glibc-2.20/4230243/include/" - BZIP_LIBS=" /mnt/gvfs/third-party2/bzip2/af004cceebb2dfd173ca29933ea5915e727aad2f/1.0.6/gcc-4.9-glibc-2.20/4230243/lib/libbz2.a" + BZIP_INCLUDE=" -I $BZIP2_BASE/include/" + BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" CFLAGS+=" -DBZIP2" - LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf/include/" - LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf/lib/liblz4.a" + LZ4_INCLUDE=" -I $LZ4_BASE/include/" + LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" CFLAGS+=" -DLZ4" - ZSTD_REV=d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde - ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/include" - ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" + ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" + ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" CFLAGS+=" -DZSTD" fi # location of gflags headers and libraries -GFLAGS_INCLUDE=" -I /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/include/" +GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" if test -z $PIC_BUILD; then - GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/lib/libgflags.a" + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" else - GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/0fa60e2b88de3e469db6c482d6e6dac72f5d65f9/1.6/gcc-4.9-glibc-2.20/4230243/lib/libgflags_pic.a" + GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" fi CFLAGS+=" -DGFLAGS=google" # location of jemalloc -JEMALLOC_INCLUDE=" -I /mnt/gvfs/third-party2/jemalloc/bcd68e5e419efa4e61b9486d6854564d6d75a0b5/3.6.0/gcc-4.9-glibc-2.20/2aafc78/include/" -JEMALLOC_LIB=" /mnt/gvfs/third-party2/jemalloc/bcd68e5e419efa4e61b9486d6854564d6d75a0b5/3.6.0/gcc-4.9-glibc-2.20/2aafc78/lib/libjemalloc.a" +JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" +JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" if test -z $PIC_BUILD; then # location of numa - NUMA_INCLUDE=" -I /mnt/gvfs/third-party2/numa/bbefc39ecbf31d0ca184168eb613ef8d397790ee/2.0.8/gcc-4.9-glibc-2.20/4230243/include/" - NUMA_LIB=" /mnt/gvfs/third-party2/numa/bbefc39ecbf31d0ca184168eb613ef8d397790ee/2.0.8/gcc-4.9-glibc-2.20/4230243/lib/libnuma.a" + NUMA_INCLUDE=" -I $NUMA_BASE/include/" + NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" CFLAGS+=" -DNUMA" # location of libunwind - LIBUNWIND="/mnt/gvfs/third-party2/libunwind/1de3b75e0afedfe5585b231bbb340ec7a1542335/1.1/gcc-4.9-glibc-2.20/34235e8/lib/libunwind.a" + LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" fi # use Intel SSE support for checksum calculations export USE_SSE=1 -BINUTILS="/mnt/gvfs/third-party2/binutils/0b6ad0c88ddd903333a48ae8bff134efac468e4a/2.25/centos6-native/da39a3e/bin" +BINUTILS="$TP2_LATEST/binutils/2.25/centos6-native/*/bin" AR="$BINUTILS/ar" DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE" -GCC_BASE="/mnt/gvfs/third-party2/gcc/1c67a0b88f64d4d9ced0382d141c76aaa7d62fba/4.9.x/centos6-native/1317bc4" STDLIBS="-L $GCC_BASE/lib64" -CLANG_BASE="/mnt/gvfs/third-party2/clang/d81444dd214df3d2466734de45bb264a0486acc3/dev" -CLANG_BIN="$CLANG_BASE/centos6-native/af4b1a0/bin" +CLANG_BIN="$CLANG_BASE/centos6-native/*/bin" CLANG_ANALYZER="$CLANG_BIN/clang++" CLANG_SCAN_BUILD="$CLANG_BASE/src/clang/tools/scan-build/scan-build" @@ -98,11 +97,11 @@ if [ -z "$USE_CLANG" ]; then CFLAGS+=" -isystem $LIBGCC_INCLUDE" else # clang - CLANG_INCLUDE="$CLANG_BASE/gcc-4.9-glibc-2.20/74c386f/lib/clang/dev/include/" + CLANG_INCLUDE="$CLANG_BASE/centos6-native/*/lib/clang/*/include" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" - KERNEL_HEADERS_INCLUDE="/mnt/gvfs/third-party2/kernel-headers/ffd14f660a43c4b92717986b1bba66722ef089d0/3.2.18_70_fbk11_00129_gc8882d0/gcc-4.9-glibc-2.20/da39a3e/include" + KERNEL_HEADERS_INCLUDE="$TP2_LATEST/kernel-headers/3.2.18_70_fbk11_00129_gc8882d0/gcc-4.9-glibc-2.20/*/include" CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x " @@ -128,6 +127,6 @@ PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" -VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/6c45ef049cbf11c2df593addb712cd891049e737/3.10.0/gcc-4.9-glibc-2.20/4230243/bin/" +VALGRIND_VER="$TP2_LATEST/valgrind/3.10.0/gcc-4.9-glibc-2.20/*/bin/" export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD diff --git a/build_tools/update_dependencies.sh b/build_tools/update_dependencies.sh new file mode 100755 index 000000000..6a9c662a2 --- /dev/null +++ b/build_tools/update_dependencies.sh @@ -0,0 +1,79 @@ +#!/bin/sh +# +# Update dependencies.sh file with the latest avaliable versions + +BASEDIR=$(dirname $0) +OUTPUT="$BASEDIR/dependencies.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + +function log_variable() +{ + echo "$1=${!1}" >> "$OUTPUT" +} + + +TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2" +## $1 => lib name +## $2 => lib version (if not provided, will try to pick latest) +## $3 => platform (if not provided, will try to pick latest gcc) +## +## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location +function get_lib_base() +{ + local lib_name=$1 + local lib_version=$2 + local lib_platform=$3 + + local result="$TP2_LATEST/$lib_name/" + + # Lib Version + if [ -z "$lib_version" ]; then + # version is not provided, use latest + result=`ls -dr1v $result/*/ | head -n1` + else + result="$result/$lib_version/" + fi + + # Lib Platform + if [ -z "$lib_platform" ]; then + # platform is not provided, use latest gcc + result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1` + else + result="$result/$lib_platform/" + fi + + result="$result/*/" + + # lib_name => LIB_NAME_BASE + local __res_var=${lib_name^^}"_BASE" + # LIB_NAME_BASE=$result + eval $__res_var=`readlink -f $result` + + log_variable $__res_var +} + +echo "Writing dependencies to $OUTPUT" + +# Compilers locations +GCC_BASE="$TP2_LATEST/gcc/4.9.x/centos6-native/*" +CLANG_BASE="$TP2_LATEST/clang/3.7.1" + +log_variable GCC_BASE +log_variable CLANG_BASE + +# Libraries locations +get_lib_base libgcc +get_lib_base glibc 2.20 gcc-4.9-glibc-2.20 +get_lib_base snappy +get_lib_base zlib +get_lib_base bzip2 +get_lib_base lz4 +get_lib_base zstd +get_lib_base gflags +get_lib_base jemalloc +get_lib_base numa +get_lib_base libunwind + +git diff $OUTPUT From f57596b0cb6d0c4503521941357d7d762846a67f Mon Sep 17 00:00:00 2001 From: krad Date: Thu, 21 Jan 2016 13:06:14 -0800 Subject: [PATCH 032/195] Improvements to pre-commit Summary: - UI is enhanced to lists the tests, status and the results - We are using the same pre-commit tool as the make equivalent - No more emails to user on failure - Dropped valgrind from the list since it can be a time hogger (and can hurt scheduling for others) - Patching bug fix - Made the jobs run in parallel in sandcastle Test Plan: Manual test Reviewers: sdong, rven, igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53217 Making parallel requests to sandcastle Test Plan: Run manual tests Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53259 --- .../config/FacebookArcanistConfiguration.php | 98 ++++++++++++++----- 1 file changed, 74 insertions(+), 24 deletions(-) diff --git a/arcanist_util/config/FacebookArcanistConfiguration.php b/arcanist_util/config/FacebookArcanistConfiguration.php index e9f4b0828..c8834de47 100644 --- a/arcanist_util/config/FacebookArcanistConfiguration.php +++ b/arcanist_util/config/FacebookArcanistConfiguration.php @@ -17,7 +17,33 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { ////////////////////////////////////////////////////////////////////// /* Run tests in sandcastle */ - function getSteps($diffID, $username) { + function postURL($diffID, $url) { + $cmd = 'echo \'{"diff_id": "' . $diffID . '", ' + . '"name":"click here for sandcastle tests for D' . $diffID . '", ' + . '"link":"' . $url . '"}\' | ' + . 'http_proxy=fwdproxy.any.facebook.com:8080 ' + . 'https_proxy=fwdproxy.any.facebook.com:8080 arc call-conduit ' + . 'differential.updateunitresults'; + + shell_exec($cmd); + } + + function updateTestCommand($diffID, $test, $status) { + $cmd = 'echo \'{"diff_id": "' . $diffID . '", ' + . '"name":"' . $test . '", ' + . '"result":"' . $status . '"}\' | ' + . 'http_proxy=fwdproxy.any.facebook.com:8080 ' + . 'https_proxy=fwdproxy.any.facebook.com:8080 arc call-conduit ' + . 'differential.updateunitresults'; + return $cmd; + } + + function updateTest($diffID, $test) { + shell_exec($this->updateTestCommand($diffID, $test, "waiting")); + } + + + function getSteps($diffID, $username, $test) { $arcrc_content = exec("cat ~/.arcrc | base64 -w0"); $setup = array( @@ -41,7 +67,7 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { $patch = array( "name" => "Patch " . $diffID, "shell" => "HTTPS_PROXY=fwdproxy:8080 arc --arcrc-file ~/.arcrc " - . "patch D" . $diffID . " || rm -f ~/.arcrc", + . "patch --diff " . $diffID, "user" => "root" ); @@ -55,23 +81,24 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { $steps[] = $fix_permission; $steps[] = $fix_git_ignore; $steps[] = $patch; - $steps[] = $cleanup; - $tests = array( - "unit", "clang_unit", "tsan", "asan", "valgrind" + $this->updateTest($diffID, $test); + $cmd = $this->updateTestCommand($diffID, $test, "running") . ";" + . "(./build_tools/precommit_checker.py " . $test + . "&& " + . $this->updateTestCommand($diffID, $test, "pass") . ")" + . "|| " . $this->updateTestCommand($diffID, $test, "fail") + . "; cat /tmp/precommit-check.log" + . "; for f in `ls t/log-*`; do echo \$f; cat \$f; done"; + + $run_test = array( + "name" => "Run " . $test, + "shell" => $cmd, + "user" => "root", ); - foreach ($tests as $test) { - $run_test = array( - "name" => "Run " . $test, - "shell" => "EMAIL=" . $username . "@fb.com " - . "./build_tools/rocksdb-lego-determinator " . $test, - "user" => "root", - "determinator" => true - ); - - $steps[] = $run_test; - } + $steps[] = $run_test; + $steps[] = $cleanup; return $steps; } @@ -84,16 +111,36 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { return; } - $arg = array( - "name" => "RocksDB diff D" . $diffID . "testing for " . $username, - "steps" => $this->getSteps($diffID, $username) + $tests = array( + "unit", "unit_481", "clang_unit", "tsan", "asan", "lite" + ); + + foreach ($tests as $test) { + $arg[] = array( + "name" => "RocksDB diff " . $diffID . " test " . $test, + "steps" => $this->getSteps($diffID, $username, $test) + ); + } + + $arg_encoded = base64_encode(json_encode($arg)); + + $command = array( + "name" => "Run diff " . $diffID . "for user " . $username, + "steps" => array() + ); + + $command["steps"][] = array( + "name" => "Generate determinator", + "shell" => "echo " . $arg_encoded . " | base64 --decode", + "determinator" => true, + "user" => "root" ); $url = 'https://interngraph.intern.facebook.com/sandcastle/generate?' .'command=SandcastleUniversalCommand' .'&vcs=rocksdb-git&revision=origin%2Fmaster&type=lego' - .'&user=krad&alias=ci-util' - .'&command-args=' . urlencode(json_encode($arg)); + .'&user=krad&alias=rocksdb-precommit' + .'&command-args=' . urlencode(json_encode($command)); $cmd = 'https_proxy= HTTPS_PROXY= curl -s -k -F app=659387027470559 ' . '-F token=AeO_3f2Ya3TujjnxGD4 "' . $url . '"'; @@ -104,13 +151,16 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { echo "Please follow the URL for details on the job. \n"; echo "An email will be sent to " . $username . "@fb.com on failure. \n"; echo "\n"; - echo "Job details: \n"; $output = shell_exec($cmd); - echo $output; + preg_match('/url": "(.+)"/', $output, $sandcastle_url); - echo "\n====================================================== \n"; + echo "url: " . $sandcastle_url[1] . "\n"; + + $this->postURL($diffID, $sandcastle_url[1]); + + echo "====================================================== \n"; } ////////////////////////////////////////////////////////////////////// From f1ed1701058a0832076574f6a514df227897213d Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 20 Jan 2016 18:15:36 -0800 Subject: [PATCH 033/195] Add tests to make sure new DB or ColumnFamily options are settable through string Summary: Add a test to fail if someone adds a DB options. Test Plan: Run the test, run the test with valgrind. Add an option to DB option in the middle or in the end and make sure it fails. Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53097 --- util/options_test.cc | 380 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 350 insertions(+), 30 deletions(-) diff --git a/util/options_test.cc b/util/options_test.cc index 19e48b350..09ecbea03 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1475,39 +1475,28 @@ TEST_F(OptionsParserTest, EscapeOptionString) { "Escape \\# and"); } -// Only run OptionsParserTest.BlockBasedTableOptionsAdded on limited platforms -// as it depends on behavior of compilers. +// Only run the tests to verify new fields in options are settable through +// string on limited platforms as it depends on behavior of compilers. #ifdef OS_LINUX #ifndef __clang__ const char kSpecialChar = 'R'; -// Items in the form of . Need to be in ascending order -// and not overlapping. Need to updated if new pointer-option is added. -const std::vector> kBbtoBlacklist = { - {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, block_cache), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, block_cache_compressed), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, filter_policy), - sizeof(std::shared_ptr)}, -}; +typedef std::vector> OffsetGap; -void FillWithSpecialChar(char* start_ptr) { +void FillWithSpecialChar(char* start_ptr, size_t total_size, + const OffsetGap& blacklist) { size_t offset = 0; - for (auto& pair : kBbtoBlacklist) { + for (auto& pair : blacklist) { std::memset(start_ptr + offset, kSpecialChar, pair.first - offset); offset = pair.first + pair.second; } - std::memset(start_ptr + offset, kSpecialChar, - sizeof(BlockBasedTableOptions) - offset); + std::memset(start_ptr + offset, kSpecialChar, total_size - offset); } -int NumUnsetBytes(char* start_ptr) { +int NumUnsetBytes(char* start_ptr, size_t total_size, + const OffsetGap& blacklist) { int total_unset_bytes_base = 0; - size_t offset = 0; - for (auto& pair : kBbtoBlacklist) { + for (auto& pair : blacklist) { for (char* ptr = start_ptr + offset; ptr < start_ptr + pair.first; ptr++) { if (*ptr == kSpecialChar) { total_unset_bytes_base++; @@ -1515,8 +1504,7 @@ int NumUnsetBytes(char* start_ptr) { offset = pair.first + pair.second; } } - for (char* ptr = start_ptr + offset; - ptr < start_ptr + sizeof(BlockBasedTableOptions); ptr++) { + for (char* ptr = start_ptr + offset; ptr < start_ptr + total_size; ptr++) { if (*ptr == kSpecialChar) { total_unset_bytes_base++; } @@ -1524,7 +1512,28 @@ int NumUnsetBytes(char* start_ptr) { return total_unset_bytes_base; } +// If the test fails, likely a new option is added to BlockBasedTableOptions +// but it cannot be set through GetBlockBasedTableOptionsFromString(), or the +// test is not updated accordingly. +// After adding an option, we need to make sure it is settable by +// GetBlockBasedTableOptionsFromString() and add the option to the input string +// passed to the GetBlockBasedTableOptionsFromString() in this test. +// If it is a complicated type, you also need to add the field to +// kBbtoBlacklist, and maybe add customized verification for it. TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { + // Items in the form of . Need to be in ascending order + // and not overlapping. Need to updated if new pointer-option is added. + const OffsetGap kBbtoBlacklist = { + {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, block_cache), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, block_cache_compressed), + sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, filter_policy), + sizeof(std::shared_ptr)}, + }; + // In this test, we catch a new option of BlockBasedTableOptions that is not // settable through GetBlockBasedTableOptionsFromString(). // We count padding bytes of the option struct, and assert it to be the same @@ -1537,28 +1546,31 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { // copy a well constructed struct to this memory and see how many special // bytes left. BlockBasedTableOptions* bbto = new (bbto_ptr) BlockBasedTableOptions(); - FillWithSpecialChar(bbto_ptr); + FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); // It based on the behavior of compiler that padding bytes are not changed // when copying the struct. It's prone to failure when compiler behavior // changes. We verify there is unset bytes to detect the case. *bbto = BlockBasedTableOptions(); - int unset_bytes_base = NumUnsetBytes(bbto_ptr); + int unset_bytes_base = + NumUnsetBytes(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); ASSERT_GT(unset_bytes_base, 0); bbto->~BlockBasedTableOptions(); // Construct the base option passed into // GetBlockBasedTableOptionsFromString(). - FillWithSpecialChar(bbto_ptr); + bbto = new (bbto_ptr) BlockBasedTableOptions(); + FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); // This option is not setable: bbto->use_delta_encoding = true; char* new_bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; BlockBasedTableOptions* new_bbto = new (new_bbto_ptr) BlockBasedTableOptions(); - FillWithSpecialChar(new_bbto_ptr); + FillWithSpecialChar(new_bbto_ptr, sizeof(BlockBasedTableOptions), + kBbtoBlacklist); // Need to update the option string if a new option is added. - GetBlockBasedTableOptionsFromString( + ASSERT_OK(GetBlockBasedTableOptionsFromString( *bbto, "cache_index_and_filter_blocks=1;index_type=kHashSearch;" "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;" @@ -1567,9 +1579,11 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { "filter_policy=bloomfilter:4:true;whole_key_filtering=1;" "skip_table_builder_flush=1;format_version=1;" "hash_index_allow_collision=false;", - new_bbto); + new_bbto)); - ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_bbto_ptr)); + ASSERT_EQ(unset_bytes_base, + NumUnsetBytes(new_bbto_ptr, sizeof(BlockBasedTableOptions), + kBbtoBlacklist)); ASSERT_TRUE(new_bbto->block_cache.get() != nullptr); ASSERT_TRUE(new_bbto->block_cache_compressed.get() != nullptr); @@ -1581,6 +1595,312 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) { delete[] bbto_ptr; delete[] new_bbto_ptr; } + +// If the test fails, likely a new option is added to DBOptions +// but it cannot be set through GetDBOptionsFromString(), or the test is not +// updated accordingly. +// After adding an option, we need to make sure it is settable by +// GetDBOptionsFromString() and add the option to the input string passed to +// DBOptionsFromString()in this test. +// If it is a complicated type, you also need to add the field to +// kDBOptionsBlacklist, and maybe add customized verification for it. +TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) { + const OffsetGap kDBOptionsBlacklist = { + {offsetof(struct DBOptions, env), sizeof(Env*)}, + {offsetof(struct DBOptions, rate_limiter), + sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, delete_scheduler), + sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, info_log), sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, statistics), + sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, db_paths), sizeof(std::vector)}, + {offsetof(struct DBOptions, db_log_dir), sizeof(std::string)}, + {offsetof(struct DBOptions, wal_dir), sizeof(std::string)}, + {offsetof(struct DBOptions, listeners), + sizeof(std::vector>)}, + {offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, + }; + + char* options_ptr = new char[sizeof(DBOptions)]; + + // Count padding bytes by setting all bytes in the memory to a special char, + // copy a well constructed struct to this memory and see how many special + // bytes left. + DBOptions* options = new (options_ptr) DBOptions(); + FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); + // It based on the behavior of compiler that padding bytes are not changed + // when copying the struct. It's prone to failure when compiler behavior + // changes. We verify there is unset bytes to detect the case. + *options = DBOptions(); + int unset_bytes_base = + NumUnsetBytes(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); + ASSERT_GT(unset_bytes_base, 0); + options->~DBOptions(); + + options = new (options_ptr) DBOptions(); + FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); + + // Following options are not settable through GetDBOptionsFromString(): + options->fail_if_options_file_error = false; + options->allow_concurrent_memtable_write = false; + options->wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; + options->enable_write_thread_adaptive_yield = true; + options->write_thread_slow_yield_usec = true; + options->write_thread_max_yield_usec = 1000u; + options->access_hint_on_compaction_start = DBOptions::AccessHint::NONE; + options->info_log_level = InfoLogLevel::DEBUG_LEVEL; + + char* new_options_ptr = new char[sizeof(DBOptions)]; + DBOptions* new_options = new (new_options_ptr) DBOptions(); + FillWithSpecialChar(new_options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); + + // Need to update the option string if a new option is added. + ASSERT_OK( + GetDBOptionsFromString(*options, + "wal_bytes_per_sync=4295048118;" + "delete_obsolete_files_period_micros=4294967758;" + "WAL_ttl_seconds=4295008036;" + "WAL_size_limit_MB=4295036161;" + "wal_dir=path/to/wal_dir;" + "db_write_buffer_size=2587;" + "max_subcompactions=64330;" + "table_cache_numshardbits=28;" + "max_open_files=72;" + "max_file_opening_threads=35;" + "max_background_compactions=33;" + "use_fsync=true;" + "use_adaptive_mutex=false;" + "max_total_wal_size=4295005604;" + "compaction_readahead_size=0;" + "new_table_reader_for_compaction_inputs=false;" + "keep_log_file_num=4890;" + "skip_stats_update_on_db_open=false;" + "max_manifest_file_size=4295009941;" + "db_log_dir=path/to/db_log_dir;" + "skip_log_error_on_recovery=true;" + "writable_file_max_buffer_size=1048576;" + "paranoid_checks=true;" + "is_fd_close_on_exec=false;" + "bytes_per_sync=4295013613;" + "enable_thread_tracking=false;" + "disable_data_sync=false;" + "recycle_log_file_num=0;" + "disableDataSync=false;" + "create_missing_column_families=true;" + "log_file_time_to_roll=3097;" + "max_background_flushes=35;" + "create_if_missing=false;" + "error_if_exists=true;" + "allow_os_buffer=false;" + "delayed_write_rate=4294976214;" + "manifest_preallocation_size=1222;" + "allow_mmap_writes=false;" + "stats_dump_period_sec=70127;" + "allow_fallocate=true;" + "allow_mmap_reads=false;" + "max_log_file_size=4607;" + "random_access_max_buffer_size=1048576;" + "advise_random_on_open=true;" + "wal_bytes_per_sync=4295048118;" + "delete_obsolete_files_period_micros=4294967758;" + "WAL_ttl_seconds=4295008036;" + "WAL_size_limit_MB=4295036161;" + "wal_dir=path/to/wal_dir;" + "db_write_buffer_size=2587;" + "max_subcompactions=64330;" + "table_cache_numshardbits=28;" + "max_open_files=72;" + "max_file_opening_threads=35;" + "max_background_compactions=33;" + "use_fsync=true;" + "use_adaptive_mutex=false;" + "max_total_wal_size=4295005604;" + "compaction_readahead_size=0;" + "new_table_reader_for_compaction_inputs=false;" + "keep_log_file_num=4890;" + "skip_stats_update_on_db_open=false;" + "max_manifest_file_size=4295009941;" + "db_log_dir=path/to/db_log_dir;" + "skip_log_error_on_recovery=true;" + "writable_file_max_buffer_size=1048576;" + "paranoid_checks=true;" + "is_fd_close_on_exec=false;" + "bytes_per_sync=4295013613;" + "enable_thread_tracking=false;" + "disable_data_sync=false;" + "recycle_log_file_num=0;" + "disableDataSync=false;" + "create_missing_column_families=true;" + "log_file_time_to_roll=3097;" + "max_background_flushes=35;" + "create_if_missing=false;" + "error_if_exists=true;" + "allow_os_buffer=false;" + "delayed_write_rate=4294976214;" + "manifest_preallocation_size=1222;" + "allow_mmap_writes=false;" + "stats_dump_period_sec=70127;" + "allow_fallocate=true;" + "allow_mmap_reads=false;" + "max_log_file_size=4607;" + "random_access_max_buffer_size=1048576;" + "advise_random_on_open=true;", + new_options)); + + ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), + kDBOptionsBlacklist)); + + options->~DBOptions(); + new_options->~DBOptions(); + + delete[] options_ptr; + delete[] new_options_ptr; +} + +// If the test fails, likely a new option is added to ColumnFamilyOptions +// but it cannot be set through GetColumnFamilyOptionsFromString(), or the +// test is not updated accordingly. +// After adding an option, we need to make sure it is settable by +// GetColumnFamilyOptionsFromString() and add the option to the input +// string passed to GetColumnFamilyOptionsFromString()in this test. +// If it is a complicated type, you also need to add the field to +// kColumnFamilyOptionsBlacklist, and maybe add customized verification +// for it. +TEST_F(OptionsParserTest, ColumnFamilyOptionsAllFieldsSettable) { + const OffsetGap kColumnFamilyOptionsBlacklist = { + {offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)}, + {offsetof(struct ColumnFamilyOptions, merge_operator), + sizeof(std::shared_ptr)}, + {offsetof(struct ColumnFamilyOptions, compaction_filter), + sizeof(const CompactionFilter*)}, + {offsetof(struct ColumnFamilyOptions, compaction_filter_factory), + sizeof(std::shared_ptr)}, + {offsetof(struct ColumnFamilyOptions, compression_per_level), + sizeof(std::vector)}, + {offsetof(struct ColumnFamilyOptions, prefix_extractor), + sizeof(std::shared_ptr)}, + {offsetof(struct ColumnFamilyOptions, + max_bytes_for_level_multiplier_additional), + sizeof(std::vector)}, + {offsetof(struct ColumnFamilyOptions, memtable_factory), + sizeof(std::shared_ptr)}, + {offsetof(struct ColumnFamilyOptions, table_factory), + sizeof(std::shared_ptr)}, + {offsetof(struct ColumnFamilyOptions, + table_properties_collector_factories), + sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)}, + {offsetof(struct ColumnFamilyOptions, inplace_callback), + sizeof(UpdateStatus (*)(char*, uint32_t*, Slice, std::string*))}, + }; + + char* options_ptr = new char[sizeof(ColumnFamilyOptions)]; + + // Count padding bytes by setting all bytes in the memory to a special char, + // copy a well constructed struct to this memory and see how many special + // bytes left. + ColumnFamilyOptions* options = new (options_ptr) ColumnFamilyOptions(); + FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), + kColumnFamilyOptionsBlacklist); + // It based on the behavior of compiler that padding bytes are not changed + // when copying the struct. It's prone to failure when compiler behavior + // changes. We verify there is unset bytes to detect the case. + *options = ColumnFamilyOptions(); + + // Deprecatd option which is not initialized. Need to set it to avoid + // Valgrind error + options->max_mem_compaction_level = 0; + + int unset_bytes_base = NumUnsetBytes(options_ptr, sizeof(ColumnFamilyOptions), + kColumnFamilyOptionsBlacklist); + ASSERT_GT(unset_bytes_base, 0); + options->~ColumnFamilyOptions(); + + options = new (options_ptr) ColumnFamilyOptions(); + FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), + kColumnFamilyOptionsBlacklist); + + // Following options are not settable through + // GetColumnFamilyOptionsFromString(): + options->rate_limit_delay_max_milliseconds = 33; + options->compaction_pri = CompactionPri::kOldestSmallestSeqFirst; + options->compaction_options_universal = CompactionOptionsUniversal(); + options->compression_opts = CompressionOptions(); + options->hard_rate_limit = 0; + options->soft_rate_limit = 0; + options->compaction_options_fifo = CompactionOptionsFIFO(); + options->max_mem_compaction_level = 0; + + char* new_options_ptr = new char[sizeof(ColumnFamilyOptions)]; + ColumnFamilyOptions* new_options = + new (new_options_ptr) ColumnFamilyOptions(); + FillWithSpecialChar(new_options_ptr, sizeof(ColumnFamilyOptions), + kColumnFamilyOptionsBlacklist); + + // Need to update the option string if a new option is added. + ASSERT_OK(GetColumnFamilyOptionsFromString( + *options, + "compaction_filter_factory=mpudlojcujCompactionFilterFactory;" + "table_factory=PlainTable;" + "prefix_extractor=rocksdb.CappedPrefix.13;" + "comparator=leveldb.BytewiseComparator;" + "compression_per_level=kBZip2Compression:kBZip2Compression:" + "kBZip2Compression:kNoCompression:kZlibCompression:kBZip2Compression:" + "kSnappyCompression;" + "max_bytes_for_level_base=986;" + "bloom_locality=8016;" + "target_file_size_base=4294976376;" + "memtable_prefix_bloom_huge_page_tlb_size=2557;" + "max_successive_merges=5497;" + "max_sequential_skip_in_iterations=4294971408;" + "arena_block_size=1893;" + "target_file_size_multiplier=35;" + "source_compaction_factor=54;" + "min_write_buffer_number_to_merge=9;" + "max_write_buffer_number=84;" + "write_buffer_size=1653;" + "max_grandparent_overlap_factor=64;" + "max_bytes_for_level_multiplier=60;" + "memtable_factory=SkipListFactory;" + "compression=kNoCompression;" + "min_partial_merge_operands=7576;" + "level0_stop_writes_trigger=33;" + "num_levels=99;" + "level0_slowdown_writes_trigger=22;" + "level0_file_num_compaction_trigger=14;" + "expanded_compaction_factor=34;" + "compaction_filter=urxcqstuwnCompactionFilter;" + "soft_rate_limit=530.615385;" + "soft_pending_compaction_bytes_limit=0;" + "max_write_buffer_number_to_maintain=84;" + "verify_checksums_in_compaction=false;" + "merge_operator=aabcxehazrMergeOperator;" + "memtable_prefix_bloom_bits=4642;" + "paranoid_file_checks=true;" + "inplace_update_num_locks=7429;" + "optimize_filters_for_hits=false;" + "level_compaction_dynamic_level_bytes=false;" + "inplace_update_support=false;" + "compaction_style=kCompactionStyleFIFO;" + "memtable_prefix_bloom_probes=2511;" + "purge_redundant_kvs_while_flush=true;" + "filter_deletes=false;" + "hard_pending_compaction_bytes_limit=0;" + "disable_auto_compactions=false;" + "compaction_measure_io_stats=true;", + new_options)); + + ASSERT_EQ(unset_bytes_base, + NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions), + kColumnFamilyOptionsBlacklist)); + + options->~ColumnFamilyOptions(); + new_options->~ColumnFamilyOptions(); + + delete[] options_ptr; + delete[] new_options_ptr; +} #endif // !__clang__ #endif // OS_LINUX #endif // !ROCKSDB_LITE From f53c95f81bcdf4c552186c55fc1242cf09552333 Mon Sep 17 00:00:00 2001 From: krad Date: Fri, 22 Jan 2016 16:27:48 -0800 Subject: [PATCH 034/195] Cosmetic fixes and comments for the reader Summary: Cosmetic fixes and some comments for the script. It is one big hack and hopefully the comments will make it easy to maintain. Test Plan: Run manual tests Reviewers: sdong, rven Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53277 --- .../config/FacebookArcanistConfiguration.php | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/arcanist_util/config/FacebookArcanistConfiguration.php b/arcanist_util/config/FacebookArcanistConfiguration.php index c8834de47..c4785bc00 100644 --- a/arcanist_util/config/FacebookArcanistConfiguration.php +++ b/arcanist_util/config/FacebookArcanistConfiguration.php @@ -24,7 +24,6 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { . 'http_proxy=fwdproxy.any.facebook.com:8080 ' . 'https_proxy=fwdproxy.any.facebook.com:8080 arc call-conduit ' . 'differential.updateunitresults'; - shell_exec($cmd); } @@ -42,28 +41,33 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { shell_exec($this->updateTestCommand($diffID, $test, "waiting")); } - function getSteps($diffID, $username, $test) { $arcrc_content = exec("cat ~/.arcrc | base64 -w0"); + // Sandcastle machines don't have arc setup. We copy the user certificate + // and authenticate using that in sandcastle $setup = array( "name" => "Setup arcrc", "shell" => "echo " . $arcrc_content . " | base64 --decode > ~/.arcrc", "user" => "root" ); + // arc demands certain permission on its config $fix_permission = array( "name" => "Fix environment", "shell" => "chmod 600 ~/.arcrc", "user" => "root" ); + // fbcode is a sub-repo. We cannot patch until we add it to ignore otherwise + // git thinks it is uncommited change $fix_git_ignore = array( "name" => "Fix git ignore", "shell" => "echo fbcode >> .git/info/exclude", "user" => "root" ); + // Patch the code (keep your fingures crossed) $patch = array( "name" => "Patch " . $diffID, "shell" => "HTTPS_PROXY=fwdproxy:8080 arc --arcrc-file ~/.arcrc " @@ -71,17 +75,20 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { "user" => "root" ); + // Clean up the user arc config we are using $cleanup = array( "name" => "Arc cleanup", "shell" => "rm -f ~/.arcrc", "user" => "root" ); + // Construct the steps in the order of execution $steps[] = $setup; $steps[] = $fix_permission; $steps[] = $fix_git_ignore; $steps[] = $patch; + // Run the actual command $this->updateTest($diffID, $test); $cmd = $this->updateTestCommand($diffID, $test, "running") . ";" . "(./build_tools/precommit_checker.py " . $test @@ -104,17 +111,22 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { } function startTestsInSandcastle($workflow) { + // extract information we need from workflow or CLI $diffID = $workflow->getDiffId(); $username = exec("whoami"); if ($diffID == null || $username == null) { + // there is no diff and we can't extract username + // we cannot schedule sandcasstle job return; } + // list of tests we want to run in sandcastle $tests = array( "unit", "unit_481", "clang_unit", "tsan", "asan", "lite" ); + // construct a job definition for each test and add it to the master plan foreach ($tests as $test) { $arg[] = array( "name" => "RocksDB diff " . $diffID . " test " . $test, @@ -122,6 +134,10 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { ); } + // we cannot submit the parallel execution master plan to sandcastle + // we need supply the job plan as a determinator + // so we construct a small job that will spit out the master job plan + // which sandcastle will parse and execute $arg_encoded = base64_encode(json_encode($arg)); $command = array( @@ -136,6 +152,7 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { "user" => "root" ); + // submit to sandcastle $url = 'https://interngraph.intern.facebook.com/sandcastle/generate?' .'command=SandcastleUniversalCommand' .'&vcs=rocksdb-git&revision=origin%2Fmaster&type=lego' @@ -145,22 +162,15 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { $cmd = 'https_proxy= HTTPS_PROXY= curl -s -k -F app=659387027470559 ' . '-F token=AeO_3f2Ya3TujjnxGD4 "' . $url . '"'; - echo "\n====================================================== \n"; - echo "Scheduling sandcastle job for D" . $diffID . " for " . $username; - echo "\n"; - echo "Please follow the URL for details on the job. \n"; - echo "An email will be sent to " . $username . "@fb.com on failure. \n"; - echo "\n"; - $output = shell_exec($cmd); + // extract sandcastle URL from the response preg_match('/url": "(.+)"/', $output, $sandcastle_url); - echo "url: " . $sandcastle_url[1] . "\n"; + echo "\nSandcastle URL: " . $sandcastle_url[1] . "\n"; + // Ask phabricator to display it on the diff UI $this->postURL($diffID, $sandcastle_url[1]); - - echo "====================================================== \n"; } ////////////////////////////////////////////////////////////////////// @@ -180,5 +190,4 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { ."buildWithParameters?token=AUTH&DIFF_ID=$diffID"; system("curl --noproxy '*' \"$url\" > /dev/null 2>&1"); } - } From 2fbc59a34830734baa111704b3fa2296b09f21d5 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 25 Jan 2016 13:47:07 -0800 Subject: [PATCH 035/195] Disallow SstFileWriter from creating empty sst files Summary: SstFileWriter may create an sst file with no entries Right now this will fail when being ingested using DB::AddFile() saying that the keys are corrupted Test Plan: make check Reviewers: yhchiang, rven, anthony, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D52815 --- db/db_impl.cc | 3 +++ db/db_test.cc | 6 ++++++ table/sst_file_writer.cc | 3 +++ 3 files changed, 12 insertions(+) diff --git a/db/db_impl.cc b/db/db_impl.cc index 8f9c0168e..dcc90e31c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3502,6 +3502,9 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family, auto cfh = reinterpret_cast(column_family); ColumnFamilyData* cfd = cfh->cfd(); + if (file_info->num_entries == 0) { + return Status::InvalidArgument("File contain no entries"); + } if (file_info->version != 1) { return Status::InvalidArgument("Generated table version is not supported"); } diff --git a/db/db_test.cc b/db/db_test.cc index 1143eef1b..6bef10ca9 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8801,6 +8801,12 @@ TEST_F(DBTest, AddExternalSstFile) { ASSERT_EQ(file5_info.smallest_key, Key(400)); ASSERT_EQ(file5_info.largest_key, Key(499)); + // Cannot create an empty sst file + std::string file_empty = sst_files_folder + "file_empty.sst"; + ExternalSstFileInfo file_empty_info; + s = sst_file_writer.Finish(&file_empty_info); + ASSERT_NOK(s); + DestroyAndReopen(options); // Add file using file path s = db_->AddFile(file1); diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 1c21a25f7..d13adbe08 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -163,6 +163,9 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { if (!r->builder) { return Status::InvalidArgument("File is not opened"); } + if (r->file_info.num_entries == 0) { + return Status::InvalidArgument("Cannot create sst file with no entries"); + } Status s = r->builder->Finish(); if (s.ok()) { From 40911e0b32c7453f8ef69892482fc3801903667b Mon Sep 17 00:00:00 2001 From: Venkatesh Radhakrishnan Date: Mon, 25 Jan 2016 14:04:27 -0800 Subject: [PATCH 036/195] Run unit tests in parallel to find failing tests Summary: Added make targets parallel_test and parallel_dbtest to run tests in parallel. Each test is run 32 times in parallel. There is a timeout to catch hangs. The test continues after a failure and reports non-zero status on failure Test Plan: Run the two make targets Reviewers: anthony, yhchiang, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53079 --- Makefile | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8d2b9c41b..2eeb2f1c5 100644 --- a/Makefile +++ b/Makefile @@ -464,7 +464,7 @@ test_names = \ -e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};' \ -e 'print qq! $$p$$2!' -ifeq ($(MAKECMDGOALS),check) +ifneq (,$(filter check parallel_check,$(MAKECMDGOALS)),) # Use /dev/shm if it has the sticky bit set (otherwise, /tmp), # and create a randomly-named rocksdb.XXXX directory therein. # We'll use that directory in the "make check" rules. @@ -620,6 +620,48 @@ valgrind_check: $(TESTS) fi; \ done + +ifneq ($(PAR_TEST),) +parloop: + ret_bad=0; \ + for t in $(PAR_TEST); do \ + echo "===== Running $$t in parallel $(NUM_PAR)";\ + if [ $(db_test) -eq 1 ]; then \ + seq $(J) | v="$$t" parallel --gnu 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \ + 'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \ + else\ + seq $(J) | v="./$$t" parallel --gnu 's=$(TMPD)/rdb-{};' \ + 'export TEST_TMPDIR=$$s; timeout 10m $$v >> $$s/log-{} 2>1'; \ + fi; \ + ret_code=$$?; \ + if [ $$ret_code -ne 0 ]; then \ + ret_bad=$$ret_code; \ + echo $$t exited with $$ret_code; \ + fi; \ + done; \ + exit $$ret_bad; +endif + +all_tests:=$(shell $(test_names)) + +parallel_check: $(TESTS) + $(AM_V_GEN)if test "$(J)" > 1 \ + && (parallel --gnu --help 2>/dev/null) | \ + grep -q 'GNU Parallel'; \ + then \ + echo Running in parallel $(J); \ + else \ + echo "Need to have GNU Parallel and J > 1"; exit 1; \ + fi; \ + ret_bad=0; \ + echo $(J);\ + echo Test Dir: $(TMPD); \ + seq $(J) | parallel --gnu 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \ + $(MAKE) PAR_TEST="$(all_tests)" TMPD=$(TMPD) \ + J=$(J) db_test=1 parloop; \ + $(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \ + TMPD=$(TMPD) J=$(J) db_test=0 parloop; + analyze: clean $(CLANG_SCAN_BUILD) --use-analyzer=$(CLANG_ANALYZER) \ --use-c++=$(CXX) --use-cc=$(CC) --status-bugs \ From 3e9209a0787087d6ab37b109311774badff292a0 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 25 Jan 2016 14:04:55 -0800 Subject: [PATCH 037/195] Updated GetProperty documentation Summary: As titled. Also added the kBaseLevel string, which was missing earlier. Test Plan: built Reviewers: yhchiang, anthony, rven, kradhakrishnan, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53271 --- db/internal_stats.cc | 1 + include/rocksdb/db.h | 167 +++++++++++++++++++++++++++++-------------- 2 files changed, 113 insertions(+), 55 deletions(-) diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 8da179d84..ebd8cd020 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -180,6 +180,7 @@ const std::string DB::Properties::kEstimateLiveDataSize = rocksdb_prefix + estimate_live_data_size; const std::string DB::Properties::kTotalSstFilesSize = rocksdb_prefix + total_sst_files_size; +const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level; const std::string DB::Properties::kEstimatePendingCompactionBytes = rocksdb_prefix + estimate_pending_comp_bytes; const std::string DB::Properties::kAggregatedTableProperties = diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index a94496a65..d916db348 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -327,97 +327,154 @@ class DB { // use "snapshot" after this call. virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; - // DB implementations can export properties about their state - // via this method. If "property" is a valid property understood by this - // DB implementation, fills "*value" with its current value and returns - // true. Otherwise returns false. - // - // - // Valid property names include: - // - // "rocksdb.num-files-at-level" - return the number of files at level , - // where is an ASCII representation of a level number (e.g. "0"). - // "rocksdb.stats" - returns a multi-line string that describes statistics - // about the internal operation of the DB. - // "rocksdb.sstables" - returns a multi-line string that describes all - // of the sstables that make up the db contents. - // "rocksdb.cfstats" - // "rocksdb.dbstats" - // "rocksdb.levelstats" - // "rocksdb.num-immutable-mem-table" - // "rocksdb.mem-table-flush-pending" - // "rocksdb.num-immutable-mem-table-flushed" - // "rocksdb.compaction-pending" - 1 if at least one compaction is pending - // "rocksdb.background-errors" - accumulated number of background errors - // "rocksdb.cur-size-active-mem-table" - // "rocksdb.cur-size-all-mem-tables" - // "rocksdb.size-all-mem-tables" - // "rocksdb.num-entries-active-mem-table" - // "rocksdb.num-entries-imm-mem-tables" - // "rocksdb.num-deletes-active-mem-table" - // "rocksdb.num-deletes-imm-mem-tables" - // "rocksdb.estimate-num-keys" - estimated keys in the column family - // "rocksdb.estimate-table-readers-mem" - estimated memory used for reding - // SST tables, that is not counted as a part of block cache. - // "rocksdb.is-file-deletions-enabled" - // "rocksdb.num-snapshots" - // "rocksdb.oldest-snapshot-time" - // "rocksdb.num-live-versions" - `version` is an internal data structure. - // See version_set.h for details. More live versions often mean more SST - // files are held from being deleted, by iterators or unfinished - // compactions. - // "rocksdb.estimate-live-data-size" - // "rocksdb.total-sst-files-size" - total size of all used sst files, this - // may slow down online queries if there are too many files. - // "rocksdb.base-level" - // "rocksdb.estimate-pending-compaction-bytes" - estimated total number of - // bytes compaction needs to rewrite the data to get all levels down - // to under target size. Not valid for other compactions than - // level-based. - // "rocksdb.aggregated-table-properties" - returns a string representation - // of the aggregated table properties of the target column family. - // "rocksdb.aggregated-table-properties-at-level", same as the previous - // one but only returns the aggregated table properties of the specified - // level "N" at the target column family. - // "rocksdb.num-running-compactions" - the number of currently running - // compacitons. - // "rocksdb.num-running-flushes" - the number of currently running flushes. #ifndef ROCKSDB_LITE + // Contains all valid property arguments for GetProperty(). struct Properties { + // "rocksdb.num-files-at-level" - returns string containing the number + // of files at level , where is an ASCII representation of a + // level number (e.g., "0"). static const std::string kNumFilesAtLevelPrefix; + + // "rocksdb.stats" - returns a multi-line string containing the data + // described by kCFStats followed by the data described by kDBStats. static const std::string kStats; + + // "rocksdb.sstables" - returns a multi-line string summarizing current + // SST files. static const std::string kSSTables; + + // "rocksdb.cfstats" - returns a multi-line string with general column + // family stats per-level over db's lifetime ("L"), aggregated over + // db's lifetime ("Sum"), and aggregated over the interval since the + // last retrieval ("Int"). static const std::string kCFStats; + + // "rocksdb.dbstats" - returns a multi-line string with general database + // stats, both cumulative (over the db's lifetime) and interval (since + // the last retrieval of kDBStats). static const std::string kDBStats; + + // "rocksdb.levelstats" - returns multi-line string containing the number + // of files per level and total size of each level (MB). static const std::string kLevelStats; + + // "rocksdb.num-immutable-mem-table" - returns number of immutable + // memtables that have not yet been flushed. static const std::string kNumImmutableMemTable; + + // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable + // memtables that have already been flushed. static const std::string kNumImmutableMemTableFlushed; + + // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is + // pending; otherwise, returns 0. static const std::string kMemTableFlushPending; + + // "rocksdb.num-running-flushes" - returns the number of currently running + // flushes. static const std::string kNumRunningFlushes; + + // "rocksdb.compaction-pending" - returns 1 if at least one compaction is + // pending; otherwise, returns 0. static const std::string kCompactionPending; + + // "rocksdb.num-running-compactions" - returns the number of currently + // running compactions. static const std::string kNumRunningCompactions; + + // "rocksdb.background-errors" - returns accumulated number of background + // errors. static const std::string kBackgroundErrors; + + // "rocksdb.cur-size-active-mem-table" - returns approximate size of active + // memtable (bytes). static const std::string kCurSizeActiveMemTable; + + // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active + // and unflushed immutable memtables (bytes). static const std::string kCurSizeAllMemTables; + + // "rocksdb.size-all-mem-tables" - returns approximate size of active, + // unflushed immutable, and pinned immutable memtables (bytes). static const std::string kSizeAllMemTables; + + // "rocksdb.num-entries-active-mem-table" - returns total number of entries + // in the active memtable. static const std::string kNumEntriesActiveMemTable; + + // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries + // in the unflushed immutable memtables. static const std::string kNumEntriesImmMemTables; + + // "rocksdb.num-deletes-active-mem-table" - returns total number of delete + // entries in the active memtable. static const std::string kNumDeletesActiveMemTable; + + // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete + // entries in the unflushed immutable memtables. static const std::string kNumDeletesImmMemTables; + + // "rocksdb.estimate-num-keys" - returns estimated number of total keys in + // the active and unflushed immutable memtables. static const std::string kEstimateNumKeys; + + // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for + // reading SST tables, excluding memory used in block cache (e.g., + // filter and index blocks). static const std::string kEstimateTableReadersMem; + + // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete + // files is enabled; otherwise, returns a non-zero number. static const std::string kIsFileDeletionsEnabled; + + // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the + // database. static const std::string kNumSnapshots; + + // "rocksdb.oldest-snapshot-time" - returns number representing unix + // timestamp of oldest unreleased snapshot. static const std::string kOldestSnapshotTime; + + // "rocksdb.num-live-versions" - returns number of live versions. `Version` + // is an internal data structure. See version_set.h for details. More + // live versions often mean more SST files are held from being deleted, + // by iterators or unfinished compactions. static const std::string kNumLiveVersions; + + // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of + // live data in bytes. static const std::string kEstimateLiveDataSize; + + // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST + // files. + // WARNING: may slow down online queries if there are too many files. static const std::string kTotalSstFilesSize; + + // "rocksdb.base-level" - returns number of level to which L0 data will be + // compacted. + static const std::string kBaseLevel; + + // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total + // number of bytes compaction needs to rewrite to get all levels down + // to under target size. Not valid for other compactions than level- + // based. static const std::string kEstimatePendingCompactionBytes; + + // "rocksdb.aggregated-table-properties" - returns a string representation + // of the aggregated table properties of the target column family. static const std::string kAggregatedTableProperties; + + // "rocksdb.aggregated-table-properties-at-level", same as the previous + // one but only returns the aggregated table properties of the + // specified level "N" at the target column family. static const std::string kAggregatedTablePropertiesAtLevel; }; #endif /* ROCKSDB_LITE */ + // DB implementations can export properties about their state via this method. + // If "property" is a valid property understood by this DB implementation (see + // Properties struct above for valid options), fills "*value" with its current + // value and returns true. Otherwise, returns false. virtual bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) = 0; virtual bool GetProperty(const Slice& property, std::string* value) { From f7ef1a613276641af93f02f74f17cf66174d8126 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 25 Jan 2016 14:35:04 -0800 Subject: [PATCH 038/195] Include rest of dependencies in dependencies.sh Summary: This diff - Include the rest of the dependencies (kernel-headers, binutils, valgrind) in dependencies.sh - updtade zst to 0.4.7 - It also fix a problem in clang scan build Test Plan: make check USE_CLANG=1 make check USE_CLANG=1 make analyze coverage_test.sh Reviewers: sdong, yhchiang, rven, andrewkr, kradhakrishnan Reviewed By: kradhakrishnan Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53301 --- build_tools/dependencies.sh | 9 ++++-- build_tools/fbcode_config.sh | 20 +++++++------ build_tools/update_dependencies.sh | 45 +++++++++++++++++------------- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/build_tools/dependencies.sh b/build_tools/dependencies.sh index a7e590904..a7d9a6531 100644 --- a/build_tools/dependencies.sh +++ b/build_tools/dependencies.sh @@ -1,13 +1,16 @@ -GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/* -CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1 +GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/ +CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/ LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.9.x/gcc-4.9-glibc-2.20/024dbc3 GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.20/gcc-4.9-glibc-2.20/500e281 SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf -ZSTD_BASE=/mnt/gvfs/third-party2/zstd/d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde/0.4.5/gcc-4.9-glibc-2.20/e9936bf +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/69d56740ffb89d8bc81ded8ec428c01a813ea948/0.4.7/gcc-4.9-glibc-2.20/e9936bf GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.9-glibc-2.20/a6c5e1e NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.9-glibc-2.20/12266b1 +KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e +VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf diff --git a/build_tools/fbcode_config.sh b/build_tools/fbcode_config.sh index 8cc0b53ff..9e1c613ec 100644 --- a/build_tools/fbcode_config.sh +++ b/build_tools/fbcode_config.sh @@ -7,9 +7,8 @@ # PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included -source "$PWD/build_tools/dependencies.sh" - -TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2" +BASEDIR=`dirname $BASH_SOURCE` +source "$BASEDIR/dependencies.sh" CFLAGS="" @@ -76,16 +75,19 @@ fi # use Intel SSE support for checksum calculations export USE_SSE=1 -BINUTILS="$TP2_LATEST/binutils/2.25/centos6-native/*/bin" +BINUTILS="$BINUTILS_BASE/bin" AR="$BINUTILS/ar" DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE" STDLIBS="-L $GCC_BASE/lib64" -CLANG_BIN="$CLANG_BASE/centos6-native/*/bin" +CLANG_BIN="$CLANG_BASE/bin" +CLANG_LIB="$CLANG_BASE/lib" +CLANG_SRC="$CLANG_BASE/../../src" + CLANG_ANALYZER="$CLANG_BIN/clang++" -CLANG_SCAN_BUILD="$CLANG_BASE/src/clang/tools/scan-build/scan-build" +CLANG_SCAN_BUILD="$CLANG_SRC/clang/tools/scan-build/scan-build" if [ -z "$USE_CLANG" ]; then # gcc @@ -97,11 +99,11 @@ if [ -z "$USE_CLANG" ]; then CFLAGS+=" -isystem $LIBGCC_INCLUDE" else # clang - CLANG_INCLUDE="$CLANG_BASE/centos6-native/*/lib/clang/*/include" + CLANG_INCLUDE="$CLANG_LIB/clang/*/include" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" - KERNEL_HEADERS_INCLUDE="$TP2_LATEST/kernel-headers/3.2.18_70_fbk11_00129_gc8882d0/gcc-4.9-glibc-2.20/*/include" + KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x " @@ -127,6 +129,6 @@ PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" -VALGRIND_VER="$TP2_LATEST/valgrind/3.10.0/gcc-4.9-glibc-2.20/*/bin/" +VALGRIND_VER="$VALGRIND_BASE/bin/" export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD diff --git a/build_tools/update_dependencies.sh b/build_tools/update_dependencies.sh index 6a9c662a2..63074c372 100755 --- a/build_tools/update_dependencies.sh +++ b/build_tools/update_dependencies.sh @@ -3,10 +3,7 @@ # Update dependencies.sh file with the latest avaliable versions BASEDIR=$(dirname $0) -OUTPUT="$BASEDIR/dependencies.sh" - -rm -f "$OUTPUT" -touch "$OUTPUT" +OUTPUT="" function log_variable() { @@ -29,7 +26,7 @@ function get_lib_base() local result="$TP2_LATEST/$lib_name/" # Lib Version - if [ -z "$lib_version" ]; then + if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then # version is not provided, use latest result=`ls -dr1v $result/*/ | head -n1` else @@ -44,36 +41,46 @@ function get_lib_base() result="$result/$lib_platform/" fi - result="$result/*/" + result=`ls -1d $result/*/ | head -n1` # lib_name => LIB_NAME_BASE local __res_var=${lib_name^^}"_BASE" + __res_var=`echo $__res_var | tr - _` # LIB_NAME_BASE=$result eval $__res_var=`readlink -f $result` log_variable $__res_var } +OUTPUT="$BASEDIR/dependencies.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + echo "Writing dependencies to $OUTPUT" # Compilers locations -GCC_BASE="$TP2_LATEST/gcc/4.9.x/centos6-native/*" -CLANG_BASE="$TP2_LATEST/clang/3.7.1" +GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1` +CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1` log_variable GCC_BASE log_variable CLANG_BASE # Libraries locations -get_lib_base libgcc -get_lib_base glibc 2.20 gcc-4.9-glibc-2.20 -get_lib_base snappy -get_lib_base zlib -get_lib_base bzip2 -get_lib_base lz4 -get_lib_base zstd -get_lib_base gflags -get_lib_base jemalloc -get_lib_base numa -get_lib_base libunwind +get_lib_base libgcc 4.9.x +get_lib_base glibc 2.20 +get_lib_base snappy LATEST +get_lib_base zlib LATEST +get_lib_base bzip2 LATEST +get_lib_base lz4 LATEST +get_lib_base zstd LATEST +get_lib_base gflags LATEST +get_lib_base jemalloc LATEST +get_lib_base numa LATEST +get_lib_base libunwind LATEST + +get_lib_base kernel-headers LATEST +get_lib_base binutils LATEST centos6-native +get_lib_base valgrind LATEST git diff $OUTPUT From fb9811ee9bdb8582779ad713c750fc9ba0f42587 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 21 Jan 2016 13:55:56 -0800 Subject: [PATCH 039/195] Add a perf context level that doesn't measure time for mutex operations Summary: Timing mutex operations can impact scalability of the system. Add a new perf context level that can measure time counters except for mutex. Test Plan: Add a new unit test case to make sure it is not set. Reviewers: IslamAbdelRahman, rven, kradhakrishnan, yhchiang, anthony Reviewed By: anthony Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53199 --- HISTORY.md | 4 ++++ db/perf_context_test.cc | 15 +++++++++------ include/rocksdb/perf_level.h | 8 +++++--- util/instrumented_mutex.cc | 12 ++++++------ util/perf_context_imp.h | 8 ++++---- util/perf_step_timer.h | 12 ++++++------ 6 files changed, 34 insertions(+), 25 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index f721b7c1c..4cc2bac27 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # Rocksdb Change Log +## Unreleased +### Public API Changes +* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. + ## 4.4.0 (1/14/2016) ### Public API Changes * Change names in CompactionPri and add a new one. diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 00065dc92..72b52f6e8 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -543,28 +543,31 @@ TEST_F(PerfContextTest, SeekKeyComparison) { } TEST_F(PerfContextTest, DBMutexLockCounter) { - SetPerfLevel(kEnableTime); int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; - for (int c = 0; c < 2; ++c) { + for (PerfLevel perf_level : + {PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) { + for (int c = 0; c < 2; ++c) { InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); mutex.Lock(); std::thread child_thread([&] { - SetPerfLevel(kEnableTime); + SetPerfLevel(perf_level); perf_context.Reset(); ASSERT_EQ(perf_context.db_mutex_lock_nanos, 0); mutex.Lock(); mutex.Unlock(); - if (stats_code[c] == DB_MUTEX_WAIT_MICROS) { + if (perf_level == PerfLevel::kEnableTimeExceptForMutex || + stats_code[c] != DB_MUTEX_WAIT_MICROS) { + ASSERT_EQ(perf_context.db_mutex_lock_nanos, 0); + } else { // increment the counter only when it's a DB Mutex ASSERT_GT(perf_context.db_mutex_lock_nanos, 0); - } else { - ASSERT_EQ(perf_context.db_mutex_lock_nanos, 0); } }); Env::Default()->SleepForMicroseconds(100); mutex.Unlock(); child_thread.join(); } + } } TEST_F(PerfContextTest, FalseDBMutexWait) { diff --git a/include/rocksdb/perf_level.h b/include/rocksdb/perf_level.h index fee8ce1c4..cd7480097 100644 --- a/include/rocksdb/perf_level.h +++ b/include/rocksdb/perf_level.h @@ -14,9 +14,11 @@ namespace rocksdb { // How much perf stats to collect. Affects perf_context and iostats_context. enum PerfLevel { - kDisable = 0, // disable perf stats - kEnableCount = 1, // enable only count stats - kEnableTime = 2 // enable time stats too + kDisable = 0, // disable perf stats + kEnableCount = 1, // enable only count stats + kEnableTimeExceptForMutex = 2, // Other than count stats, also enable time + // stats except for mutexes + kEnableTime = 3 // enable count and time stats }; // set the perf stats level for current thread diff --git a/util/instrumented_mutex.cc b/util/instrumented_mutex.cc index bfb989a1d..e5c6527be 100644 --- a/util/instrumented_mutex.cc +++ b/util/instrumented_mutex.cc @@ -9,8 +9,8 @@ namespace rocksdb { void InstrumentedMutex::Lock() { - PERF_CONDITIONAL_TIMER_GUARD(db_mutex_lock_nanos, - stats_code_ == DB_MUTEX_WAIT_MICROS); + PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(db_mutex_lock_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; if (env_ != nullptr && stats_ != nullptr) { { @@ -31,8 +31,8 @@ void InstrumentedMutex::LockInternal() { } void InstrumentedCondVar::Wait() { - PERF_CONDITIONAL_TIMER_GUARD(db_condition_wait_nanos, - stats_code_ == DB_MUTEX_WAIT_MICROS); + PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(db_condition_wait_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; if (env_ != nullptr && stats_ != nullptr) { { @@ -53,8 +53,8 @@ void InstrumentedCondVar::WaitInternal() { } bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) { - PERF_CONDITIONAL_TIMER_GUARD(db_condition_wait_nanos, - stats_code_ == DB_MUTEX_WAIT_MICROS); + PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(db_condition_wait_nanos, + stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; bool result = false; if (env_ != nullptr && stats_ != nullptr) { diff --git a/util/perf_context_imp.h b/util/perf_context_imp.h index a5c4c39d9..d28b55179 100644 --- a/util/perf_context_imp.h +++ b/util/perf_context_imp.h @@ -33,10 +33,10 @@ namespace rocksdb { PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \ perf_step_timer_ ## metric.Start(); -#define PERF_CONDITIONAL_TIMER_GUARD(metric, condition) \ - PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \ - if ((condition)) { \ - perf_step_timer_##metric.Start(); \ +#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition) \ + PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), true); \ + if ((condition)) { \ + perf_step_timer_##metric.Start(); \ } // Update metric with time elapsed since last START. start time is reset diff --git a/util/perf_step_timer.h b/util/perf_step_timer.h index 950258345..631cd317c 100644 --- a/util/perf_step_timer.h +++ b/util/perf_step_timer.h @@ -12,12 +12,12 @@ namespace rocksdb { class PerfStepTimer { public: - PerfStepTimer(uint64_t* metric) - : enabled_(perf_level >= PerfLevel::kEnableTime), - env_(enabled_ ? Env::Default() : nullptr), - start_(0), - metric_(metric) { - } + explicit PerfStepTimer(uint64_t* metric, bool for_mutex = false) + : enabled_(perf_level >= PerfLevel::kEnableTime || + (!for_mutex && perf_level >= kEnableTimeExceptForMutex)), + env_(enabled_ ? Env::Default() : nullptr), + start_(0), + metric_(metric) {} ~PerfStepTimer() { Stop(); From da33dfe1884772cdd6e1c46d7ee0c892d1cdc7d2 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 22 Jan 2016 11:02:43 -0800 Subject: [PATCH 040/195] Parameterize DBTest.Randomized Summary: Break down DBTest.Randomized to multiple gtest tests based on config type Test Plan: Run the test and all tests. Make sure configurations are correctly set Reviewers: yhchiang, IslamAbdelRahman, rven, kradhakrishnan, andrewkr, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53247 --- db/db_test.cc | 47 ++++++++++++++++---------- db/db_test_util.cc | 83 ++++++++++++++++++++++++---------------------- db/db_test_util.h | 2 ++ 3 files changed, 75 insertions(+), 57 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 6bef10ca9..6b0f5b2d5 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5097,12 +5097,37 @@ static bool CompareIterators(int step, return ok; } -TEST_F(DBTest, Randomized) { +class DBTestRandomized : public DBTest, + public ::testing::WithParamInterface { + public: + virtual void SetUp() override { option_config_ = GetParam(); } + + static std::vector GenerateOptionConfigs() { + std::vector option_configs; + // skip cuckoo hash as it does not support snapshot. + for (int option_config = kDefault; option_config < kEnd; ++option_config) { + if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst | + kSkipNoSeekToLast | + kSkipHashCuckoo)) { + option_configs.push_back(option_config); + } + } + return option_configs; + } +}; + +INSTANTIATE_TEST_CASE_P( + DBTestRandomized, DBTestRandomized, + ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs())); + +TEST_P(DBTestRandomized, Randomized) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; - Random rnd(test::RandomSeed()); - do { - ModelDB model(CurrentOptions(options_override)); + Options options = CurrentOptions(options_override); + DestroyAndReopen(options); + + Random rnd(test::RandomSeed() + GetParam()); + ModelDB model(options); const int N = 10000; const Snapshot* model_snap = nullptr; const Snapshot* db_snap = nullptr; @@ -5127,13 +5152,10 @@ TEST_F(DBTest, Randomized) { : rnd.Uniform(8)); ASSERT_OK(model.Put(WriteOptions(), k, v)); ASSERT_OK(db_->Put(WriteOptions(), k, v)); - } else if (p < 90) { // Delete k = RandomKey(&rnd, minimum); ASSERT_OK(model.Delete(WriteOptions(), k)); ASSERT_OK(db_->Delete(WriteOptions(), k)); - - } else { // Multi-element batch WriteBatch b; const int num = rnd.Uniform(8); @@ -5171,26 +5193,15 @@ TEST_F(DBTest, Randomized) { if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); - - auto options = CurrentOptions(options_override); Reopen(options); ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); model_snap = model.GetSnapshot(); db_snap = db_->GetSnapshot(); } - - if ((step % 2000) == 0) { - fprintf(stderr, - "DBTest.Randomized, option ID: %d, step: %d out of %d\n", - option_config_, step, N); - } } if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); - // skip cuckoo hash as it does not support snapshot. - } while (ChangeOptions(kSkipDeletesFilterFirst | kSkipNoSeekToLast | - kSkipHashCuckoo)); } TEST_F(DBTest, MultiGetSimple) { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 0d342cc52..39a7a364f 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -73,63 +73,68 @@ DBTestBase::~DBTestBase() { delete env_; } -// Switch to a fresh database with the next option configuration to -// test. Return false if there are no more configurations to test. -bool DBTestBase::ChangeOptions(int skip_mask) { - for (option_config_++; option_config_ < kEnd; option_config_++) { +bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) { #ifdef ROCKSDB_LITE // These options are not supported in ROCKSDB_LITE - if (option_config_ == kHashSkipList || - option_config_ == kPlainTableFirstBytePrefix || - option_config_ == kPlainTableCappedPrefix || - option_config_ == kPlainTableCappedPrefixNonMmap || - option_config_ == kPlainTableAllBytesPrefix || - option_config_ == kVectorRep || option_config_ == kHashLinkList || - option_config_ == kHashCuckoo || - option_config_ == kUniversalCompaction || - option_config_ == kUniversalCompactionMultiLevel || - option_config_ == kUniversalSubcompactions || - option_config_ == kFIFOCompaction) { - continue; + if (option_config == kHashSkipList || + option_config == kPlainTableFirstBytePrefix || + option_config == kPlainTableCappedPrefix || + option_config == kPlainTableCappedPrefixNonMmap || + option_config == kPlainTableAllBytesPrefix || + option_config == kVectorRep || option_config == kHashLinkList || + option_config == kHashCuckoo || option_config == kUniversalCompaction || + option_config == kUniversalCompactionMultiLevel || + option_config == kUniversalSubcompactions || + option_config == kFIFOCompaction) { + return true; } #endif if ((skip_mask & kSkipDeletesFilterFirst) && - option_config_ == kDeletesFilterFirst) { - continue; + option_config == kDeletesFilterFirst) { + return true; } if ((skip_mask & kSkipUniversalCompaction) && - (option_config_ == kUniversalCompaction || - option_config_ == kUniversalCompactionMultiLevel)) { - continue; + (option_config == kUniversalCompaction || + option_config == kUniversalCompactionMultiLevel)) { + return true; } - if ((skip_mask & kSkipMergePut) && option_config_ == kMergePut) { - continue; + if ((skip_mask & kSkipMergePut) && option_config == kMergePut) { + return true; } if ((skip_mask & kSkipNoSeekToLast) && - (option_config_ == kHashLinkList || option_config_ == kHashSkipList)) { - continue; + (option_config == kHashLinkList || option_config == kHashSkipList)) { + return true; } if ((skip_mask & kSkipPlainTable) && - (option_config_ == kPlainTableAllBytesPrefix || - option_config_ == kPlainTableFirstBytePrefix || - option_config_ == kPlainTableCappedPrefix || - option_config_ == kPlainTableCappedPrefixNonMmap)) { - continue; + (option_config == kPlainTableAllBytesPrefix || + option_config == kPlainTableFirstBytePrefix || + option_config == kPlainTableCappedPrefix || + option_config == kPlainTableCappedPrefixNonMmap)) { + return true; } if ((skip_mask & kSkipHashIndex) && - (option_config_ == kBlockBasedTableWithPrefixHashIndex || - option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) { - continue; + (option_config == kBlockBasedTableWithPrefixHashIndex || + option_config == kBlockBasedTableWithWholeKeyHashIndex)) { + return true; } - if ((skip_mask & kSkipHashCuckoo) && (option_config_ == kHashCuckoo)) { - continue; + if ((skip_mask & kSkipHashCuckoo) && (option_config == kHashCuckoo)) { + return true; } - if ((skip_mask & kSkipFIFOCompaction) && - option_config_ == kFIFOCompaction) { - continue; + if ((skip_mask & kSkipFIFOCompaction) && option_config == kFIFOCompaction) { + return true; } - if ((skip_mask & kSkipMmapReads) && option_config_ == kWalDirAndMmapReads) { + if ((skip_mask & kSkipMmapReads) && option_config == kWalDirAndMmapReads) { + return true; + } + return false; +} + +// Switch to a fresh database with the next option configuration to +// test. Return false if there are no more configurations to test. +bool DBTestBase::ChangeOptions(int skip_mask) { + for (option_config_++; option_config_ < kEnd; option_config_++) { + if (ShouldSkipOptions(option_config_, skip_mask)) { continue; } break; diff --git a/db/db_test_util.h b/db/db_test_util.h index b93994afc..ebf105250 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -572,6 +572,8 @@ class DBTestBase : public testing::Test { return std::string(buf); } + static bool ShouldSkipOptions(int option_config, int skip_mask = kNoSkip); + // Switch to a fresh database with the next option configuration to // test. Return false if there are no more configurations to test. bool ChangeOptions(int skip_mask = kNoSkip); From 38e1d7fea348f9d416736af9cf2b766989edcf51 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 22 Jan 2016 15:46:32 -0800 Subject: [PATCH 041/195] ldb to support --column_family option Summary: Add an option --column_family option, so that users can query or update specific column family. Also add an create column family parameter to make unit test easier. Still need to add unit tests. Test Plan: Will add a test case in ldb python test. Reviewers: yhchiang, rven, andrewkr, IslamAbdelRahman, kradhakrishnan, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53265 --- HISTORY.md | 4 +- include/rocksdb/ldb_tool.h | 8 +- tools/ldb_cmd.cc | 123 +++++++++++++++++++++++-------- tools/ldb_cmd.h | 141 +++++++++++++++++++++++++++++++----- tools/ldb_test.py | 31 +++++++- tools/ldb_tool.cc | 20 +++-- tools/reduce_levels_test.cc | 4 +- 7 files changed, 269 insertions(+), 62 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 4cc2bac27..13fc5e158 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,11 @@ # Rocksdb Change Log - ## Unreleased ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. +### New Features +* ldb tool now supports operations to non-default column families. + ## 4.4.0 (1/14/2016) ### Public API Changes * Change names in CompactionPri and add a new one. diff --git a/include/rocksdb/ldb_tool.h b/include/rocksdb/ldb_tool.h index 1b1c64b06..cb8188be0 100644 --- a/include/rocksdb/ldb_tool.h +++ b/include/rocksdb/ldb_tool.h @@ -5,6 +5,8 @@ #ifndef ROCKSDB_LITE #pragma once #include +#include +#include "rocksdb/db.h" #include "rocksdb/options.h" namespace rocksdb { @@ -28,8 +30,10 @@ struct LDBOptions { class LDBTool { public: - void Run(int argc, char** argv, Options db_options= Options(), - const LDBOptions& ldb_options = LDBOptions()); + void Run( + int argc, char** argv, Options db_options = Options(), + const LDBOptions& ldb_options = LDBOptions(), + const std::vector* column_families = nullptr); }; } // namespace rocksdb diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 7ec4690d0..326c64b30 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -44,6 +44,7 @@ const string LDBCommand::ARG_PATH = "path"; const string LDBCommand::ARG_HEX = "hex"; const string LDBCommand::ARG_KEY_HEX = "key_hex"; const string LDBCommand::ARG_VALUE_HEX = "value_hex"; +const string LDBCommand::ARG_CF_NAME = "column_family"; const string LDBCommand::ARG_TTL = "ttl"; const string LDBCommand::ARG_TTL_START = "start_time"; const string LDBCommand::ARG_TTL_END = "end_time"; @@ -72,16 +73,14 @@ void DumpSstFile(std::string filename, bool output_hex, bool show_properties); }; LDBCommand* LDBCommand::InitFromCmdLineArgs( - int argc, - char** argv, - const Options& options, - const LDBOptions& ldb_options -) { + int argc, char** argv, const Options& options, + const LDBOptions& ldb_options, + const std::vector* column_families) { vector args; for (int i = 1; i < argc; i++) { args.push_back(argv[i]); } - return InitFromCmdLineArgs(args, options, ldb_options); + return InitFromCmdLineArgs(args, options, ldb_options, column_families); } /** @@ -95,10 +94,9 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( * Returns nullptr if the command-line cannot be parsed. */ LDBCommand* LDBCommand::InitFromCmdLineArgs( - const vector& args, - const Options& options, - const LDBOptions& ldb_options -) { + const vector& args, const Options& options, + const LDBOptions& ldb_options, + const std::vector* column_families) { // --x=y command line arguments are added as x->y map entries. map option_map; @@ -184,6 +182,8 @@ LDBCommand* LDBCommand::SelectCommand( return new ManifestDumpCommand(cmdParams, option_map, flags); } else if (cmd == ListColumnFamiliesCommand::Name()) { return new ListColumnFamiliesCommand(cmdParams, option_map, flags); + } else if (cmd == CreateColumnFamilyCommand::Name()) { + return new CreateColumnFamilyCommand(cmdParams, option_map, flags); } else if (cmd == DBFileDumperCommand::Name()) { return new DBFileDumperCommand(cmdParams, option_map, flags); } else if (cmd == InternalDumpCommand::Name()) { @@ -450,6 +450,10 @@ void CompactorCommand::Help(string& ret) { } void CompactorCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } Slice* begin = nullptr; Slice* end = nullptr; @@ -513,6 +517,7 @@ Options DBLoaderCommand::PrepareOptionsForOpenDB() { void DBLoaderCommand::DoCommand() { if (!db_) { + assert(GetExecuteState().IsFailed()); return; } @@ -527,7 +532,7 @@ void DBLoaderCommand::DoCommand() { string key; string value; if (ParseKeyValue(line, &key, &value, is_key_hex_, is_value_hex_)) { - db_->Put(write_options, Slice(key), Slice(value)); + db_->Put(write_options, GetCfHandle(), Slice(key), Slice(value)); } else if (0 == line.find("Keys in range:")) { // ignore this line } else if (0 == line.find("Created bg thread 0x")) { @@ -541,7 +546,7 @@ void DBLoaderCommand::DoCommand() { cout << "Warning: " << bad_lines << " bad lines ignored." << endl; } if (compact_) { - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + db_->CompactRange(CompactRangeOptions(), GetCfHandle(), nullptr, nullptr); } } @@ -696,6 +701,38 @@ void ListColumnFamiliesCommand::DoCommand() { } } +void CreateColumnFamilyCommand::Help(string& ret) { + ret.append(" "); + ret.append(CreateColumnFamilyCommand::Name()); + ret.append(" --db= "); + ret.append("\n"); +} + +CreateColumnFamilyCommand::CreateColumnFamilyCommand( + const vector& params, const map& options, + const vector& flags) + : LDBCommand(options, flags, true, {ARG_DB}) { + if (params.size() != 1) { + exec_state_ = LDBCommandExecuteResult::Failed( + "new column family name must be specified"); + } else { + new_cf_name_ = params[0]; + } +} + +void CreateColumnFamilyCommand::DoCommand() { + ColumnFamilyHandle* new_cf_handle; + Status st = db_->CreateColumnFamily(options_, new_cf_name_, &new_cf_handle); + if (st.ok()) { + fprintf(stdout, "OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::Failed( + "Fail to create new column family: " + st.ToString()); + } + delete new_cf_handle; + CloseDB(); +} + // ---------------------------------------------------------------------------- namespace { @@ -800,12 +837,13 @@ void InternalDumpCommand::Help(string& ret) { void InternalDumpCommand::DoCommand() { if (!db_) { + assert(GetExecuteState().IsFailed()); return; } if (print_stats_) { string stats; - if (db_->GetProperty("rocksdb.stats", &stats)) { + if (db_->GetProperty(GetCfHandle(), "rocksdb.stats", &stats)) { fprintf(stdout, "%s\n", stats.c_str()); } } @@ -1050,7 +1088,7 @@ void DBDumperCommand::DoDumpCommand() { } // Setup key iterator - Iterator* iter = db_->NewIterator(ReadOptions()); + Iterator* iter = db_->NewIterator(ReadOptions(), GetCfHandle()); Status st = iter->status(); if (!st.ok()) { exec_state_ = @@ -1285,7 +1323,7 @@ void ReduceDBLevelsCommand::DoCommand() { } // Compact the whole DB to put all files to the highest level. fprintf(stdout, "Compacting the db...\n"); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + db_->CompactRange(CompactRangeOptions(), GetCfHandle(), nullptr, nullptr); CloseDB(); EnvOptions soptions; @@ -1377,8 +1415,9 @@ void ChangeCompactionStyleCommand::DoCommand() { // print db stats before we have made any change std::string property; std::string files_per_level; - for (int i = 0; i < db_->NumberLevels(); i++) { - db_->GetProperty("rocksdb.num-files-at-level" + NumberToString(i), + for (int i = 0; i < db_->NumberLevels(GetCfHandle()); i++) { + db_->GetProperty(GetCfHandle(), + "rocksdb.num-files-at-level" + NumberToString(i), &property); // format print string @@ -1393,13 +1432,14 @@ void ChangeCompactionStyleCommand::DoCommand() { CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 0; - db_->CompactRange(compact_options, nullptr, nullptr); + db_->CompactRange(compact_options, GetCfHandle(), nullptr, nullptr); // verify compaction result files_per_level = ""; int num_files = 0; for (int i = 0; i < db_->NumberLevels(); i++) { - db_->GetProperty("rocksdb.num-files-at-level" + NumberToString(i), + db_->GetProperty(GetCfHandle(), + "rocksdb.num-files-at-level" + NumberToString(i), &property); // format print string @@ -1622,8 +1662,12 @@ void GetCommand::Help(string& ret) { } void GetCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } string value; - Status st = db_->Get(ReadOptions(), key_, &value); + Status st = db_->Get(ReadOptions(), GetCfHandle(), key_, &value); if (st.ok()) { fprintf(stdout, "%s\n", (is_value_hex_ ? StringToHex(value) : value).c_str()); @@ -1670,11 +1714,14 @@ void ApproxSizeCommand::Help(string& ret) { } void ApproxSizeCommand::DoCommand() { - + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } Range ranges[1]; ranges[0] = Range(start_key_, end_key_); uint64_t sizes[1]; - db_->GetApproximateSizes(ranges, 1, sizes); + db_->GetApproximateSizes(GetCfHandle(), ranges, 1, sizes); fprintf(stdout, "%lu\n", (unsigned long)sizes[0]); /* Weird that GetApproximateSizes() returns void, although documentation * says that it returns a Status object. @@ -1718,11 +1765,15 @@ void BatchPutCommand::Help(string& ret) { } void BatchPutCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } WriteBatch batch; for (vector>::const_iterator itr = key_values_.begin(); itr != key_values_.end(); ++itr) { - batch.Put(itr->first, itr->second); + batch.Put(GetCfHandle(), itr->first, itr->second); } Status st = db_->Write(WriteOptions(), &batch); if (st.ok()) { @@ -1798,9 +1849,13 @@ void ScanCommand::Help(string& ret) { } void ScanCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } int num_keys_scanned = 0; - Iterator* it = db_->NewIterator(ReadOptions()); + Iterator* it = db_->NewIterator(ReadOptions(), GetCfHandle()); if (start_key_specified_) { it->Seek(start_key_); } else { @@ -1896,7 +1951,11 @@ void DeleteCommand::Help(string& ret) { } void DeleteCommand::DoCommand() { - Status st = db_->Delete(WriteOptions(), key_); + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + Status st = db_->Delete(WriteOptions(), GetCfHandle(), key_); if (st.ok()) { fprintf(stdout, "OK\n"); } else { @@ -1937,7 +1996,11 @@ void PutCommand::Help(string& ret) { } void PutCommand::DoCommand() { - Status st = db_->Put(WriteOptions(), key_, value_); + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + Status st = db_->Put(WriteOptions(), GetCfHandle(), key_, value_); if (st.ok()) { fprintf(stdout, "OK\n"); } else { @@ -1978,6 +2041,7 @@ void DBQuerierCommand::Help(string& ret) { void DBQuerierCommand::DoCommand() { if (!db_) { + assert(GetExecuteState().IsFailed()); return; } @@ -2011,17 +2075,17 @@ void DBQuerierCommand::DoCommand() { "delete \n"); } else if (cmd == DELETE_CMD && tokens.size() == 2) { key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); - db_->Delete(write_options, Slice(key)); + db_->Delete(write_options, GetCfHandle(), Slice(key)); fprintf(stdout, "Successfully deleted %s\n", tokens[1].c_str()); } else if (cmd == PUT_CMD && tokens.size() == 3) { key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); value = (is_value_hex_ ? HexToString(tokens[2]) : tokens[2]); - db_->Put(write_options, Slice(key), Slice(value)); + db_->Put(write_options, GetCfHandle(), Slice(key), Slice(value)); fprintf(stdout, "Successfully put %s %s\n", tokens[1].c_str(), tokens[2].c_str()); } else if (cmd == GET_CMD && tokens.size() == 2) { key = (is_key_hex_ ? HexToString(tokens[1]) : tokens[1]); - if (db_->Get(read_options, Slice(key), &value).ok()) { + if (db_->Get(read_options, GetCfHandle(), Slice(key), &value).ok()) { fprintf(stdout, "%s\n", PrintKeyValue(key, value, is_key_hex_, is_value_hex_).c_str()); } else { @@ -2125,6 +2189,7 @@ void DBFileDumperCommand::Help(string& ret) { void DBFileDumperCommand::DoCommand() { if (!db_) { + assert(GetExecuteState().IsFailed()); return; } Status s; diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index 0c048e794..fdc8fbba5 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -44,6 +44,7 @@ public: static const string ARG_HEX; static const string ARG_KEY_HEX; static const string ARG_VALUE_HEX; + static const string ARG_CF_NAME; static const string ARG_TTL; static const string ARG_TTL_START; static const string ARG_TTL_END; @@ -62,17 +63,14 @@ public: static const string ARG_CREATE_IF_MISSING; static LDBCommand* InitFromCmdLineArgs( - const vector& args, - const Options& options, - const LDBOptions& ldb_options - ); + const vector& args, const Options& options, + const LDBOptions& ldb_options, + const std::vector* column_families); static LDBCommand* InitFromCmdLineArgs( - int argc, - char** argv, - const Options& options, - const LDBOptions& ldb_options - ); + int argc, char** argv, const Options& options, + const LDBOptions& ldb_options, + const std::vector* column_families); bool ValidateCmdLineOptions(); @@ -82,6 +80,15 @@ public: options_ = options; } + virtual void SetColumnFamilies( + const std::vector* column_families) { + if (column_families != nullptr) { + column_families_ = *column_families; + } else { + column_families_.clear(); + } + } + void SetLDBOptions(const LDBOptions& ldb_options) { ldb_options_ = ldb_options; } @@ -90,10 +97,7 @@ public: return false; } - virtual ~LDBCommand() { - delete db_; - db_ = nullptr; - } + virtual ~LDBCommand() { CloseDB(); } /* Run the command, and return the execute result. */ void Run() { @@ -181,8 +185,10 @@ protected: LDBCommandExecuteResult exec_state_; string db_path_; + string column_family_name_; DB* db_; DBWithTTL* db_ttl_; + std::map cf_handles_; /** * true implies that this command can work if the db is opened in read-only @@ -235,6 +241,13 @@ protected: db_path_ = itr->second; } + itr = options.find(ARG_CF_NAME); + if (itr != options.end()) { + column_family_name_ = itr->second; + } else { + column_family_name_ = kDefaultColumnFamilyName; + } + is_key_hex_ = IsKeyHex(options, flags); is_value_hex_ = IsValueHex(options, flags); is_db_ttl_ = IsFlagPresent(flags, ARG_TTL); @@ -248,21 +261,75 @@ protected: } // Open the DB. Status st; + std::vector handles_opened; if (is_db_ttl_) { + // ldb doesn't yet support TTL DB with multiple column families + if (!column_family_name_.empty() || !column_families_.empty()) { + exec_state_ = LDBCommandExecuteResult::Failed( + "ldb doesn't support TTL DB with multiple column families"); + } if (is_read_only_) { st = DBWithTTL::Open(opt, db_path_, &db_ttl_, 0, true); } else { st = DBWithTTL::Open(opt, db_path_, &db_ttl_); } db_ = db_ttl_; - } else if (is_read_only_) { - st = DB::OpenForReadOnly(opt, db_path_, &db_); } else { - st = DB::Open(opt, db_path_, &db_); + if (column_families_.empty()) { + // Try to figure out column family lists + std::vector cf_list; + st = DB::ListColumnFamilies(DBOptions(), db_path_, &cf_list); + // There is possible the DB doesn't exist yet, for "create if not + // "existing case". The failure is ignored here. We rely on DB::Open() + // to give us the correct error message for problem with opening + // existing DB. + if (st.ok() && cf_list.size() > 1) { + // Ignore single column family DB. + for (auto cf_name : cf_list) { + column_families_.emplace_back(cf_name, opt); + } + } + } + if (is_read_only_) { + if (column_families_.empty()) { + st = DB::OpenForReadOnly(opt, db_path_, &db_); + } else { + st = DB::OpenForReadOnly(opt, db_path_, column_families_, + &handles_opened, &db_); + } + } else { + if (column_families_.empty()) { + st = DB::Open(opt, db_path_, &db_); + } else { + st = DB::Open(opt, db_path_, column_families_, &handles_opened, &db_); + } + } } if (!st.ok()) { string msg = st.ToString(); exec_state_ = LDBCommandExecuteResult::Failed(msg); + } else if (!handles_opened.empty()) { + assert(handles_opened.size() == column_families_.size()); + bool found_cf_name = false; + for (size_t i = 0; i < handles_opened.size(); i++) { + cf_handles_[column_families_[i].name] = handles_opened[i]; + if (column_family_name_ == column_families_[i].name) { + found_cf_name = true; + } + } + if (!found_cf_name) { + exec_state_ = LDBCommandExecuteResult::Failed( + "Non-existing column family " + column_family_name_); + CloseDB(); + } + } else { + // We successfully opened DB in single column family mode. + assert(column_families_.empty()); + if (column_family_name_ != kDefaultColumnFamilyName) { + exec_state_ = LDBCommandExecuteResult::Failed( + "Non-existing column family " + column_family_name_); + CloseDB(); + } } options_ = opt; @@ -270,11 +337,27 @@ protected: void CloseDB () { if (db_ != nullptr) { + for (auto& pair : cf_handles_) { + delete pair.second; + } delete db_; db_ = nullptr; } } + ColumnFamilyHandle* GetCfHandle() { + if (!cf_handles_.empty()) { + auto it = cf_handles_.find(column_family_name_); + if (it == cf_handles_.end()) { + exec_state_ = LDBCommandExecuteResult::Failed( + "Cannot find column family " + column_family_name_); + } else { + return it->second; + } + } + return db_->DefaultColumnFamily(); + } + static string PrintKeyValue(const string& key, const string& value, bool is_key_hex, bool is_value_hex) { string result; @@ -310,10 +393,10 @@ protected: * passed in. */ static vector BuildCmdLineOptions(vector options) { - vector ret = {ARG_DB, ARG_BLOOM_BITS, - ARG_BLOCK_SIZE, ARG_AUTO_COMPACTION, - ARG_COMPRESSION_TYPE, ARG_WRITE_BUFFER_SIZE, - ARG_FILE_SIZE, ARG_FIX_PREFIX_LEN}; + vector ret = {ARG_DB, ARG_BLOOM_BITS, ARG_BLOCK_SIZE, + ARG_AUTO_COMPACTION, ARG_COMPRESSION_TYPE, + ARG_WRITE_BUFFER_SIZE, ARG_FILE_SIZE, + ARG_FIX_PREFIX_LEN, ARG_CF_NAME}; ret.insert(ret.end(), options.begin(), options.end()); return ret; } @@ -325,6 +408,7 @@ protected: const string& option, string* value); Options options_; + std::vector column_families_; LDBOptions ldb_options_; private: @@ -568,6 +652,23 @@ class ListColumnFamiliesCommand : public LDBCommand { string dbname_; }; +class CreateColumnFamilyCommand : public LDBCommand { + public: + static string Name() { return "create_column_family"; } + + CreateColumnFamilyCommand(const vector& params, + const map& options, + const vector& flags); + + static void Help(string& ret); + virtual void DoCommand() override; + + virtual bool NoDBOpen() override { return false; } + + private: + string new_cf_name_; +}; + class ReduceDBLevelsCommand : public LDBCommand { public: static string Name() { return "reduce_levels"; } diff --git a/tools/ldb_test.py b/tools/ldb_test.py index 471232419..f4899587d 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -503,7 +503,36 @@ class LDBTestCase(unittest.TestCase): # Test on empty path. self.assertRunFAILFull(cmd % "") - + def testColumnFamilies(self): + print "Running testColumnFamilies..." + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put cf1_1 1 --create_if_missing", "OK") + self.assertRunOK("put cf1_2 2 --create_if_missing", "OK") + self.assertRunOK("put cf1_3 3", "OK") + # Given non-default column family to single CF DB. + self.assertRunFAIL("get cf1_1 --column_family=two") + self.assertRunOK("create_column_family two", "OK") + self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", + "OK") + self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", + "OK") + self.assertRunOK("delete cf1_2", "OK") + self.assertRunOK("create_column_family three", "OK") + self.assertRunOK("delete cf2_2 --column_family=two", "OK") + self.assertRunOK( + "put cf3_1 3 --create_if_missing --column_family=three", + "OK") + self.assertRunOK("get cf1_1 --column_family=default", "1") + self.assertRunOK("dump --column_family=two", + "cf2_1 ==> 1\nKeys in range: 1") + self.assertRunOK("dump", + "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2") + self.assertRunOK("get cf2_1 --column_family=two", + "1") + self.assertRunOK("get cf3_1 --column_family=three", + "3") + # non-existing column family. + self.assertRunFAIL("get cf3_1 --column_family=four") if __name__ == "__main__": unittest.main() diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index c1b23ebac..d99931dfe 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -30,6 +30,10 @@ public: " : Values are input/output as hex\n"); ret.append(" --" + LDBCommand::ARG_HEX + " : Both keys and values are input/output as hex\n"); + ret.append( + " --" + LDBCommand::ARG_CF_NAME + + " : name of the column family to operate on. default: default column " + "family\n"); ret.append("\n"); ret.append("The following optional parameters control the database " @@ -77,15 +81,16 @@ public: fprintf(stderr, "%s\n", ret.c_str()); } - static void RunCommand(int argc, char** argv, Options options, - const LDBOptions& ldb_options) { + static void RunCommand( + int argc, char** argv, Options options, const LDBOptions& ldb_options, + const std::vector* column_families) { if (argc <= 2) { PrintHelp(argv[0]); exit(1); } - LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs(argc, argv, options, - ldb_options); + LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs( + argc, argv, options, ldb_options, column_families); if (cmdObj == nullptr) { fprintf(stderr, "Unknown command\n"); PrintHelp(argv[0]); @@ -106,10 +111,11 @@ public: }; - void LDBTool::Run(int argc, char** argv, Options options, - const LDBOptions& ldb_options) { - LDBCommandRunner::RunCommand(argc, argv, options, ldb_options); + const LDBOptions& ldb_options, + const std::vector* column_families) { + LDBCommandRunner::RunCommand(argc, argv, options, ldb_options, + column_families); } } // namespace rocksdb diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc index 863d8607e..d55c82a61 100644 --- a/tools/reduce_levels_test.cc +++ b/tools/reduce_levels_test.cc @@ -92,8 +92,8 @@ Status ReduceLevelTest::OpenDB(bool create_if_missing, int num_levels) { bool ReduceLevelTest::ReduceLevels(int target_level) { std::vector args = rocksdb::ReduceDBLevelsCommand::PrepareArgs( dbname_, target_level, false); - LDBCommand* level_reducer = LDBCommand::InitFromCmdLineArgs( - args, Options(), LDBOptions()); + LDBCommand* level_reducer = + LDBCommand::InitFromCmdLineArgs(args, Options(), LDBOptions(), nullptr); level_reducer->Run(); bool is_succeed = level_reducer->GetExecuteState().IsSucceed(); delete level_reducer; From b7ecf3d214a5c4f36aeb9824cd278844bf55c692 Mon Sep 17 00:00:00 2001 From: Venkatesh Radhakrishnan Date: Tue, 26 Jan 2016 09:12:20 -0800 Subject: [PATCH 042/195] Fix intermittent hang in ColumnFamilyTest.FlushAndDropRaceCondition Summary: ColumnFamilyTest.FlushAndDropRaceCondition sometimes hangs because the sync point, "FlushJob::InstallResults", sleeps holding the DB mutex. Fixing it by releasing the mutex before sleeping. Test Plan: seq 1000 |parallel --gnu --eta 't=/dev/shm/rdb-{}; rm -rf $t; mkdir $t && export TEST_TMPDIR=$t; ./column_family_test -gtest_filter=*FlushAndDropRaceCondition* > $t/log-{}' Reviewers: IslamAbdelRahman, anthony, kradhakrishnan, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53349 --- db/column_family_test.cc | 4 +++- db/flush_job.cc | 1 + db/version_set.cc | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/db/column_family_test.cc b/db/column_family_test.cc index d86735c2e..62fadbbee 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1999,7 +1999,9 @@ TEST_F(ColumnFamilyTest, FlushAndDropRaceCondition) { Reopen({options, options}); rocksdb::SyncPoint::GetInstance()->LoadDependency( - {{"VersionSet::LogAndApply::ColumnFamilyDrop:1", + {{"VersionSet::LogAndApply::ColumnFamilyDrop:0", + "FlushJob::WriteLevel0Table"}, + {"VersionSet::LogAndApply::ColumnFamilyDrop:1", "FlushJob::InstallResults"}, {"FlushJob::InstallResults", "VersionSet::LogAndApply::ColumnFamilyDrop:2"}}); diff --git a/db/flush_job.cc b/db/flush_job.cc index 9da7d9546..a565f8f25 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -270,6 +270,7 @@ Status FlushJob::WriteLevel0Table(const autovector& mems, if (!db_options_.disableDataSync && output_file_directory_ != nullptr) { output_file_directory_->Fsync(); } + TEST_SYNC_POINT("FlushJob::WriteLevel0Table"); db_mutex_->Lock(); } base->Unref(); diff --git a/db/version_set.cc b/db/version_set.cc index 519805396..235789512 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2218,6 +2218,7 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, } if (edit->is_column_family_drop_) { + TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:0"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:1"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:2"); } From 22ecb752dbab4d134fe9891e244924aaae1ff07a Mon Sep 17 00:00:00 2001 From: krad Date: Mon, 25 Jan 2016 13:06:11 -0800 Subject: [PATCH 043/195] Add valgrind to pre-commit sandcastle testing Summary: Initially I removed "valgrind" from the list since it take too much time (3+hr) compared to tsan (40 min) when the tests are run in parallel. It is not effective to run the tests in parallel in sandcastle and tsan takes about 3hrs as well. Adding valgrind to the list. Test Plan: Submit this diff and watch the run Reviewers: sdong, rven Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53337 --- arcanist_util/config/FacebookArcanistConfiguration.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arcanist_util/config/FacebookArcanistConfiguration.php b/arcanist_util/config/FacebookArcanistConfiguration.php index c4785bc00..31fa27c9b 100644 --- a/arcanist_util/config/FacebookArcanistConfiguration.php +++ b/arcanist_util/config/FacebookArcanistConfiguration.php @@ -123,7 +123,7 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { // list of tests we want to run in sandcastle $tests = array( - "unit", "unit_481", "clang_unit", "tsan", "asan", "lite" + "unit", "unit_481", "clang_unit", "tsan", "asan", "lite", "valgrind" ); // construct a job definition for each test and add it to the master plan From 46f9cd46af7f3746753e7d64bcd0013ae7d35809 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 25 Jan 2016 13:27:50 -0800 Subject: [PATCH 044/195] [directory includes cleanup] Move cross-function test points Summary: I split the db-specific test points out into a separate file under db/ directory. There were also a few bugs to fix in xfunc.{h,cc} that prevented it from compiling previously; see https://reviews.facebook.net/D36825. Test Plan: compilation works now, below command works, will also run "make xfunc". $ make check ROCKSDB_XFUNC_TEST='managed_new' tests-regexp='DBTest' -j32 Reviewers: sdong, yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53343 --- CMakeLists.txt | 1 + db/db_impl.cc | 1 + db/managed_iterator.cc | 4 +- db/xfunc_test_points.cc | 145 ++++++++++++++++++++++++++++++++++++++++ db/xfunc_test_points.h | 33 +++++++++ src.mk | 1 + util/xfunc.cc | 139 +------------------------------------- util/xfunc.h | 15 +---- 8 files changed, 189 insertions(+), 150 deletions(-) create mode 100644 db/xfunc_test_points.cc create mode 100644 db/xfunc_test_points.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 208267254..7222807fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,6 +145,7 @@ set(SOURCES db/write_batch_base.cc db/write_controller.cc db/write_thread.cc + db/xfunc_test_points.cc memtable/hash_cuckoo_rep.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc diff --git a/db/db_impl.cc b/db/db_impl.cc index dcc90e31c..d4f441715 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -54,6 +54,7 @@ #include "db/write_batch_internal.h" #include "db/write_callback.h" #include "db/writebuffer.h" +#include "db/xfunc_test_points.h" #include "memtable/hash_linklist_rep.h" #include "memtable/hash_skiplist_rep.h" #include "port/likely.h" diff --git a/db/managed_iterator.cc b/db/managed_iterator.cc index 45faeba4e..c8d943724 100644 --- a/db/managed_iterator.cc +++ b/db/managed_iterator.cc @@ -5,6 +5,8 @@ #ifndef ROCKSDB_LITE +#include "db/managed_iterator.h" + #include #include #include @@ -13,7 +15,7 @@ #include "db/db_impl.h" #include "db/db_iter.h" #include "db/dbformat.h" -#include "db/managed_iterator.h" +#include "db/xfunc_test_points.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" diff --git a/db/xfunc_test_points.cc b/db/xfunc_test_points.cc new file mode 100644 index 000000000..82cf685e2 --- /dev/null +++ b/db/xfunc_test_points.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "db/xfunc_test_points.h" +#include "util/xfunc.h" + +namespace rocksdb { + +#ifdef XFUNC + +void xf_manage_release(ManagedIterator* iter) { + if (!(XFuncPoint::GetSkip() & kSkipNoPrefix)) { + iter->ReleaseIter(false); + } +} + +void xf_manage_create(ManagedIterator* iter) { iter->SetDropOld(false); } + +void xf_manage_new(DBImpl* db, ReadOptions* read_options, + bool is_snapshot_supported) { + if ((!XFuncPoint::Check("managed_xftest_dropold") && + (!XFuncPoint::Check("managed_xftest_release"))) || + (!read_options->managed)) { + return; + } + if ((!read_options->tailing) && (read_options->snapshot == nullptr) && + (!is_snapshot_supported)) { + read_options->managed = false; + return; + } + if (db->GetOptions().prefix_extractor != nullptr) { + if (strcmp(db->GetOptions().table_factory.get()->Name(), "PlainTable")) { + if (!(XFuncPoint::GetSkip() & kSkipNoPrefix)) { + read_options->total_order_seek = true; + } + } else { + read_options->managed = false; + } + } +} + +class XFTransactionWriteHandler : public WriteBatch::Handler { + public: + Transaction* txn_; + DBImpl* db_impl_; + + XFTransactionWriteHandler(Transaction* txn, DBImpl* db_impl) + : txn_(txn), db_impl_(db_impl) {} + + virtual Status PutCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + InstrumentedMutexLock l(&db_impl_->mutex_); + + ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); + if (cfh == nullptr) { + return Status::InvalidArgument( + "XFUNC test could not find column family " + "handle for id ", + ToString(column_family_id)); + } + + txn_->Put(cfh, key, value); + + return Status::OK(); + } + + virtual Status MergeCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + InstrumentedMutexLock l(&db_impl_->mutex_); + + ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); + if (cfh == nullptr) { + return Status::InvalidArgument( + "XFUNC test could not find column family " + "handle for id ", + ToString(column_family_id)); + } + + txn_->Merge(cfh, key, value); + + return Status::OK(); + } + + virtual Status DeleteCF(uint32_t column_family_id, + const Slice& key) override { + InstrumentedMutexLock l(&db_impl_->mutex_); + + ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); + if (cfh == nullptr) { + return Status::InvalidArgument( + "XFUNC test could not find column family " + "handle for id ", + ToString(column_family_id)); + } + + txn_->Delete(cfh, key); + + return Status::OK(); + } + + virtual void LogData(const Slice& blob) override { txn_->PutLogData(blob); } +}; + +// Whenever DBImpl::Write is called, create a transaction and do the write via +// the transaction. +void xf_transaction_write(const WriteOptions& write_options, + const DBOptions& db_options, WriteBatch* my_batch, + WriteCallback* callback, DBImpl* db_impl, Status* s, + bool* write_attempted) { + if (callback != nullptr) { + // We may already be in a transaction, don't force a transaction + *write_attempted = false; + return; + } + + OptimisticTransactionDB* txn_db = new OptimisticTransactionDB(db_impl); + Transaction* txn = Transaction::BeginTransaction(txn_db, write_options); + + XFTransactionWriteHandler handler(txn, db_impl); + *s = my_batch->Iterate(&handler); + + if (!s->ok()) { + Log(InfoLogLevel::ERROR_LEVEL, db_options.info_log, + "XFUNC test could not iterate batch. status: $s\n", + s->ToString().c_str()); + } + + *s = txn->Commit(); + + if (!s->ok()) { + Log(InfoLogLevel::ERROR_LEVEL, db_options.info_log, + "XFUNC test could not commit transaction. status: $s\n", + s->ToString().c_str()); + } + + *write_attempted = true; + delete txn; + delete txn_db; +} + +#endif // XFUNC + +} // namespace rocksdb diff --git a/db/xfunc_test_points.h b/db/xfunc_test_points.h new file mode 100644 index 000000000..2887d2dfc --- /dev/null +++ b/db/xfunc_test_points.h @@ -0,0 +1,33 @@ +// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "db/db_impl.h" +#include "db/managed_iterator.h" +#include "db/write_callback.h" +#include "rocksdb/options.h" +#include "rocksdb/write_batch.h" +#include "util/xfunc.h" + +namespace rocksdb { + +#ifdef XFUNC + +// DB-specific test points for the cross-functional test framework (see +// util/xfunc.h). +void xf_manage_release(ManagedIterator* iter); +void xf_manage_create(ManagedIterator* iter); +void xf_manage_new(DBImpl* db, ReadOptions* readoptions, + bool is_snapshot_supported); +void xf_transaction_write(const WriteOptions& write_options, + const DBOptions& db_options, + class WriteBatch* my_batch, + class WriteCallback* callback, DBImpl* db_impl, + Status* success, bool* write_attempted); + +#endif // XFUNC + +} // namespace rocksdb diff --git a/src.mk b/src.mk index 369890258..46949b611 100644 --- a/src.mk +++ b/src.mk @@ -46,6 +46,7 @@ LIB_SOURCES = \ db/write_batch_base.cc \ db/write_controller.cc \ db/write_thread.cc \ + db/xfunc_test_points.cc \ memtable/hash_cuckoo_rep.cc \ memtable/hash_linklist_rep.cc \ memtable/hash_skiplist_rep.cc \ diff --git a/util/xfunc.cc b/util/xfunc.cc index 98de1c594..3da5e2a75 100644 --- a/util/xfunc.cc +++ b/util/xfunc.cc @@ -4,16 +4,14 @@ // of patent rights can be found in the PATENTS file in the same directory. #ifdef XFUNC +#include "util/xfunc.h" + #include -#include "db/db_impl.h" -#include "db/managed_iterator.h" -#include "db/write_callback.h" + #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/write_batch.h" -#include "util/xfunc.h" - namespace rocksdb { @@ -29,12 +27,6 @@ void GetXFTestOptions(Options* options, int skip_policy) { } } -void xf_manage_release(ManagedIterator* iter) { - if (!(XFuncPoint::GetSkip() & kSkipNoPrefix)) { - iter->ReleaseIter(false); - } -} - void xf_manage_options(ReadOptions* read_options) { if (!XFuncPoint::Check("managed_xftest_dropold") && (!XFuncPoint::Check("managed_xftest_release"))) { @@ -43,31 +35,6 @@ void xf_manage_options(ReadOptions* read_options) { read_options->managed = true; } -void xf_manage_new(DBImpl* db, ReadOptions* read_options, - bool is_snapshot_supported) { - if ((!XFuncPoint::Check("managed_xftest_dropold") && - (!XFuncPoint::Check("managed_xftest_release"))) || - (!read_options->managed)) { - return; - } - if ((!read_options->tailing) && (read_options->snapshot == nullptr) && - (!is_snapshot_supported)) { - read_options->managed = false; - return; - } - if (db->GetOptions().prefix_extractor != nullptr) { - if (strcmp(db->GetOptions().table_factory.get()->Name(), "PlainTable")) { - if (!(XFuncPoint::GetSkip() & kSkipNoPrefix)) { - read_options->total_order_seek = true; - } - } else { - read_options->managed = false; - } - } -} - -void xf_manage_create(ManagedIterator* iter) { iter->SetDropOld(false); } - void xf_transaction_set_memtable_history( int32_t* max_write_buffer_number_to_maintain) { *max_write_buffer_number_to_maintain = 10; @@ -78,106 +45,6 @@ void xf_transaction_clear_memtable_history( *max_write_buffer_number_to_maintain = 0; } -class XFTransactionWriteHandler : public WriteBatch::Handler { - public: - OptimisticTransaction* txn_; - DBImpl* db_impl_; - - XFTransactionWriteHandler(OptimisticTransaction* txn, DBImpl* db_impl) - : txn_(txn), db_impl_(db_impl) {} - - virtual Status PutCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - InstrumentedMutexLock l(&db_impl_->mutex_); - - ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); - if (cfh == nullptr) { - return Status::InvalidArgument( - "XFUNC test could not find column family " - "handle for id ", - ToString(column_family_id)); - } - - txn_->Put(cfh, key, value); - - return Status::OK(); - } - - virtual Status MergeCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - InstrumentedMutexLock l(&db_impl_->mutex_); - - ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); - if (cfh == nullptr) { - return Status::InvalidArgument( - "XFUNC test could not find column family " - "handle for id ", - ToString(column_family_id)); - } - - txn_->Merge(cfh, key, value); - - return Status::OK(); - } - - virtual Status DeleteCF(uint32_t column_family_id, - const Slice& key) override { - InstrumentedMutexLock l(&db_impl_->mutex_); - - ColumnFamilyHandle* cfh = db_impl_->GetColumnFamilyHandle(column_family_id); - if (cfh == nullptr) { - return Status::InvalidArgument( - "XFUNC test could not find column family " - "handle for id ", - ToString(column_family_id)); - } - - txn_->Delete(cfh, key); - - return Status::OK(); - } - - virtual void LogData(const Slice& blob) override { txn_->PutLogData(blob); } -}; - -// Whenever DBImpl::Write is called, create a transaction and do the write via -// the transaction. -void xf_transaction_write(const WriteOptions& write_options, - const DBOptions& db_options, WriteBatch* my_batch, - WriteCallback* callback, DBImpl* db_impl, Status* s, - bool* write_attempted) { - if (callback != nullptr) { - // We may already be in a transaction, don't force a transaction - *write_attempted = false; - return; - } - - OptimisticTransactionDB* txn_db = new OptimisticTransactionDB(db_impl); - OptimisticTransaction* txn = - OptimisticTransaction::BeginTransaction(txn_db, write_options); - - XFTransactionWriteHandler handler(txn, db_impl); - *s = my_batch->Iterate(&handler); - - if (!s->ok()) { - Log(InfoLogLevel::ERROR_LEVEL, db_options.info_log, - "XFUNC test could not iterate batch. status: $s\n", - s->ToString().c_str()); - } - - *s = txn->Commit(); - - if (!s->ok()) { - Log(InfoLogLevel::ERROR_LEVEL, db_options.info_log, - "XFUNC test could not commit transaction. status: $s\n", - s->ToString().c_str()); - } - - *write_attempted = true; - delete txn; - delete txn_db; -} - } // namespace rocksdb #endif // XFUNC diff --git a/util/xfunc.h b/util/xfunc.h index 2b3b0e3ee..122d235b0 100644 --- a/util/xfunc.h +++ b/util/xfunc.h @@ -7,6 +7,8 @@ #include #include +#include "rocksdb/options.h" + namespace rocksdb { /* @@ -31,25 +33,12 @@ namespace rocksdb { #define XFUNC_TEST(condition, location, lfname, fname, ...) #else -struct Options; -struct WriteOptions; -class ManagedIterator; -class DBImpl; void GetXFTestOptions(Options* options, int skip_policy); -void xf_manage_release(ManagedIterator* iter); -void xf_manage_new(DBImpl* db, ReadOptions* readoptions, - bool is_snapshot_supported); -void xf_manage_create(ManagedIterator* iter); void xf_manage_options(ReadOptions* read_options); void xf_transaction_set_memtable_history( int32_t* max_write_buffer_number_to_maintain); void xf_transaction_clear_memtable_history( int32_t* max_write_buffer_number_to_maintain); -void xf_transaction_write(const WriteOptions& write_options, - const DBOptions& db_options, - class WriteBatch* my_batch, - class WriteCallback* callback, DBImpl* db_impl, - Status* success, bool* write_attempted); // This class provides the facility to run custom code to test a specific // feature typically with all existing unit tests. From acd7d5869535c5e7c89c48f5c37ceffb944f3590 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 25 Jan 2016 16:26:53 -0800 Subject: [PATCH 045/195] [directory includes cleanup] Remove util->db dependency for ThreadStatusUtil Summary: We can avoid the dependency by forward-declaring ColumnFamilyData and then treating it as a black box. That means callers of ThreadStatusUtil need to explicitly provide more options, even if they can be derived from the ColumnFamilyData, since ThreadStatusUtil doesn't include the definition. This is part of a series of diffs to eliminate circular dependencies between directories (e.g., db/* files depending on util/* files and vice-versa). Test Plan: $ ./db_test --gtest_filter=DBTest.GetThreadStatus $ make -j32 commit-prereq Reviewers: sdong, yhchiang, IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53361 --- db/compaction_job.cc | 18 +++++++++--------- db/db_impl.cc | 7 +++++-- db/flush_job.cc | 3 ++- db/perf_context_test.cc | 1 + util/env_posix.cc | 1 + util/thread_status_util.cc | 35 +++++++++++++++++++++-------------- util/thread_status_util.h | 13 ++++++++----- 7 files changed, 47 insertions(+), 31 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 2d0711ff0..c30ee7736 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -237,7 +237,9 @@ CompactionJob::CompactionJob( paranoid_file_checks_(paranoid_file_checks), measure_io_stats_(measure_io_stats) { assert(log_buffer_ != nullptr); - ThreadStatusUtil::SetColumnFamily(compact_->compaction->column_family_data()); + const auto* cfd = compact_->compaction->column_family_data(); + ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env, + cfd->options()->enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); ReportStartedCompaction(compaction); } @@ -249,8 +251,9 @@ CompactionJob::~CompactionJob() { void CompactionJob::ReportStartedCompaction( Compaction* compaction) { - ThreadStatusUtil::SetColumnFamily( - compact_->compaction->column_family_data()); + const auto* cfd = compact_->compaction->column_family_data(); + ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env, + cfd->options()->enable_thread_tracking); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_JOB_ID, @@ -415,12 +418,9 @@ void CompactionJob::GenSubcompactionBoundaries() { // Group the ranges into subcompactions const double min_file_fill_percent = 4.0 / 5; - uint64_t max_output_files = - static_cast( - std::ceil( - sum / min_file_fill_percent / - cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(out_lvl)) - ); + uint64_t max_output_files = static_cast(std::ceil( + sum / min_file_fill_percent / + cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(out_lvl))); uint64_t subcompactions = std::min({static_cast(ranges.size()), static_cast(db_options_.max_subcompactions), diff --git a/db/db_impl.cc b/db/db_impl.cc index d4f441715..2ddf26347 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2898,7 +2898,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, TEST_SYNC_POINT("DBImpl::BackgroundCompaction:TrivialMove"); // Instrument for event update // TODO(yhchiang): add op details for showing trivial-move. - ThreadStatusUtil::SetColumnFamily(c->column_family_data()); + ThreadStatusUtil::SetColumnFamily( + c->column_family_data(), c->column_family_data()->ioptions()->env, + c->column_family_data()->options()->enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); compaction_job_stats.num_input_files = c->num_input_files(0); @@ -5640,7 +5642,8 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) { void DBImpl::NewThreadStatusCfInfo( ColumnFamilyData* cfd) const { if (db_options_.enable_thread_tracking) { - ThreadStatusUtil::NewColumnFamilyInfo(this, cfd); + ThreadStatusUtil::NewColumnFamilyInfo(this, cfd, cfd->GetName(), + cfd->ioptions()->env); } } diff --git a/db/flush_job.cc b/db/flush_job.cc index a565f8f25..da1124474 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -94,7 +94,8 @@ FlushJob::~FlushJob() { } void FlushJob::ReportStartedFlush() { - ThreadStatusUtil::SetColumnFamily(cfd_); + ThreadStatusUtil::SetColumnFamily(cfd_, cfd_->ioptions()->env, + cfd_->options()->enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_FLUSH); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_JOB_ID, diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 72b52f6e8..9494ac92b 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -13,6 +13,7 @@ #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" #include "util/histogram.h" +#include "util/instrumented_mutex.h" #include "util/stop_watch.h" #include "util/testharness.h" #include "util/thread_status_util.h" diff --git a/util/env_posix.cc b/util/env_posix.cc index 9d549b44d..2ea8eebb1 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -46,6 +46,7 @@ #include "util/iostats_context_imp.h" #include "util/logging.h" #include "util/posix_logger.h" +#include "util/random.h" #include "util/string_util.h" #include "util/sync_point.h" #include "util/thread_local.h" diff --git a/util/thread_status_util.cc b/util/thread_status_util.cc index e67a8e4ef..6039c5f1d 100644 --- a/util/thread_status_util.cc +++ b/util/thread_status_util.cc @@ -3,9 +3,10 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include "util/thread_status_util.h" + #include "rocksdb/env.h" #include "util/thread_status_updater.h" -#include "util/thread_status_util.h" namespace rocksdb { @@ -33,12 +34,14 @@ void ThreadStatusUtil::UnregisterThread() { } } -void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd) { - if (!MaybeInitThreadLocalUpdater(cfd->ioptions()->env)) { +void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd, + const Env* env, + bool enable_thread_tracking) { + if (!MaybeInitThreadLocalUpdater(env)) { return; } assert(thread_updater_local_cache_); - if (cfd != nullptr && cfd->options()->enable_thread_tracking) { + if (cfd != nullptr && enable_thread_tracking) { thread_updater_local_cache_->SetColumnFamilyInfoKey(cfd); } else { // When cfd == nullptr or enable_thread_tracking == false, we set @@ -118,15 +121,17 @@ void ThreadStatusUtil::ResetThreadStatus() { thread_updater_local_cache_->ResetThreadStatus(); } -void ThreadStatusUtil::NewColumnFamilyInfo( - const DB* db, const ColumnFamilyData* cfd) { - if (!MaybeInitThreadLocalUpdater(cfd->ioptions()->env)) { +void ThreadStatusUtil::NewColumnFamilyInfo(const DB* db, + const ColumnFamilyData* cfd, + const std::string& cf_name, + const Env* env) { + if (!MaybeInitThreadLocalUpdater(env)) { return; } assert(thread_updater_local_cache_); if (thread_updater_local_cache_) { - thread_updater_local_cache_->NewColumnFamilyInfo( - db, db->GetName(), cfd, cfd->GetName()); + thread_updater_local_cache_->NewColumnFamilyInfo(db, db->GetName(), cfd, + cf_name); } } @@ -171,8 +176,9 @@ bool ThreadStatusUtil::MaybeInitThreadLocalUpdater(const Env* env) { return false; } -void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd) { -} +void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd, + const Env* env, + bool enable_thread_tracking) {} void ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType op) { } @@ -188,9 +194,10 @@ void ThreadStatusUtil::IncreaseThreadOperationProperty( void ThreadStatusUtil::SetThreadState(ThreadStatus::StateType state) { } -void ThreadStatusUtil::NewColumnFamilyInfo( - const DB* db, const ColumnFamilyData* cfd) { -} +void ThreadStatusUtil::NewColumnFamilyInfo(const DB* db, + const ColumnFamilyData* cfd, + const std::string& cf_name, + const Env* env) {} void ThreadStatusUtil::EraseColumnFamilyInfo( const ColumnFamilyData* cfd) { diff --git a/util/thread_status_util.h b/util/thread_status_util.h index aa13a6c40..101cd0ef1 100644 --- a/util/thread_status_util.h +++ b/util/thread_status_util.h @@ -5,14 +5,16 @@ #pragma once -#include "db/column_family.h" +#include + +#include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/thread_status.h" #include "util/thread_status_updater.h" namespace rocksdb { -class ColumnFamilyData; +class ColumnFamilyData; // The static utility class for updating thread-local status. // @@ -37,8 +39,8 @@ class ThreadStatusUtil { // Create an entry in the global ColumnFamilyInfo table for the // specified column family. This function should be called only // when the current thread does not hold db_mutex. - static void NewColumnFamilyInfo( - const DB* db, const ColumnFamilyData* cfd); + static void NewColumnFamilyInfo(const DB* db, const ColumnFamilyData* cfd, + const std::string& cf_name, const Env* env); // Erase the ConstantColumnFamilyInfo that is associated with the // specified ColumnFamilyData. This function should be called only @@ -52,7 +54,8 @@ class ThreadStatusUtil { // Update the thread status to indicate the current thread is doing // something related to the specified column family. - static void SetColumnFamily(const ColumnFamilyData* cfd); + static void SetColumnFamily(const ColumnFamilyData* cfd, const Env* env, + bool enable_thread_tracking); static void SetThreadOperation(ThreadStatus::OperationType type); From 167bd8856d14ca0934ad9f1bf4c3c7b94db4b76a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 25 Jan 2016 17:49:58 -0800 Subject: [PATCH 046/195] [directory includes cleanup] Finish removing util->db dependencies --- CMakeLists.txt | 9 +- Makefile | 2 +- {util => db}/auto_roll_logger.cc | 2 +- {util => db}/auto_roll_logger.h | 0 {util => db}/auto_roll_logger_test.cc | 2 +- db/db_impl.cc | 4 +- {util => db}/db_info_dumper.cc | 3 +- {util => db}/db_info_dumper.h | 0 db/db_test.cc | 100 +++++++++++++++++++++ {util => memtable}/skiplistrep.cc | 0 {util => memtable}/vectorrep.cc | 0 src.mk | 10 +-- util/file_util.cc | 1 - util/memenv_test.cc | 53 ----------- util/mock_env_test.cc | 47 ---------- util/options.cc | 1 - util/testutil.h | 1 - utilities/backupable/backupable_db_test.cc | 2 +- 18 files changed, 118 insertions(+), 119 deletions(-) rename {util => db}/auto_roll_logger.cc (99%) rename {util => db}/auto_roll_logger.h (100%) rename {util => db}/auto_roll_logger_test.cc (99%) rename {util => db}/db_info_dumper.cc (99%) rename {util => db}/db_info_dumper.h (100%) rename {util => memtable}/skiplistrep.cc (100%) rename {util => memtable}/vectorrep.cc (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7222807fb..7cfabcd86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ add_subdirectory(third-party/gtest-1.7.0/fused-src/gtest) # Main library source code set(SOURCES + db/auto_roll_logger.cc db/builder.cc db/c.cc db/column_family.cc @@ -114,6 +115,7 @@ set(SOURCES db/db_impl_debug.cc db/db_impl_experimental.cc db/db_impl_readonly.cc + db/db_info_dumper.cc db/db_iter.cc db/event_helpers.cc db/experimental.cc @@ -149,6 +151,8 @@ set(SOURCES memtable/hash_cuckoo_rep.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc + memtable/skiplistrep.cc + memtable/vectorrep.cc port/stack_trace.cc port/win/env_win.cc port/win/port_win.cc @@ -194,7 +198,6 @@ set(SOURCES util/comparator.cc util/concurrent_arena.cc util/crc32c.cc - util/db_info_dumper.cc util/delete_scheduler_impl.cc util/dynamic_bloom.cc util/env.cc @@ -224,7 +227,6 @@ set(SOURCES util/perf_level.cc util/random.cc util/rate_limiter.cc - util/skiplistrep.cc util/slice.cc util/statistics.cc util/status.cc @@ -238,7 +240,6 @@ set(SOURCES util/thread_status_updater.cc util/thread_status_util.cc util/thread_status_util_debug.cc - util/vectorrep.cc util/xfunc.cc util/xxhash.cc utilities/backupable/backupable_db.cc @@ -304,6 +305,7 @@ set(APPS set(C_TESTS db/c_test.c) set(TESTS + db/auto_roll_logger_test.cc db/column_family_test.cc db/compact_files_test.cc db/compaction_iterator_test.cc @@ -365,7 +367,6 @@ set(TESTS tools/sst_dump_test.cc util/arena_test.cc util/autovector_test.cc - util/auto_roll_logger_test.cc util/bloom_test.cc util/cache_test.cc util/coding_test.cc diff --git a/Makefile b/Makefile index 2eeb2f1c5..d9c3374b3 100644 --- a/Makefile +++ b/Makefile @@ -1019,7 +1019,7 @@ manual_compaction_test: db/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS) filelock_test: util/filelock_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) -auto_roll_logger_test: util/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) +auto_roll_logger_test: db/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) memtable_list_test: db/memtable_list_test.o $(LIBOBJECTS) $(TESTHARNESS) diff --git a/util/auto_roll_logger.cc b/db/auto_roll_logger.cc similarity index 99% rename from util/auto_roll_logger.cc rename to db/auto_roll_logger.cc index e9b13d109..cf92f34c8 100644 --- a/util/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -3,7 +3,7 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // -#include "util/auto_roll_logger.h" +#include "db/auto_roll_logger.h" #include "util/mutexlock.h" using namespace std; diff --git a/util/auto_roll_logger.h b/db/auto_roll_logger.h similarity index 100% rename from util/auto_roll_logger.h rename to db/auto_roll_logger.h diff --git a/util/auto_roll_logger_test.cc b/db/auto_roll_logger_test.cc similarity index 99% rename from util/auto_roll_logger_test.cc rename to db/auto_roll_logger_test.cc index c26be2bd2..e3e04c93f 100644 --- a/util/auto_roll_logger_test.cc +++ b/db/auto_roll_logger_test.cc @@ -10,8 +10,8 @@ #include #include #include +#include "db/auto_roll_logger.h" #include "util/testharness.h" -#include "util/auto_roll_logger.h" #include "rocksdb/db.h" #include #include diff --git a/db/db_impl.cc b/db/db_impl.cc index 2ddf26347..02060393f 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -31,8 +31,10 @@ #include #include +#include "db/auto_roll_logger.h" #include "db/builder.h" #include "db/compaction_job.h" +#include "db/db_info_dumper.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/event_helpers.h" @@ -76,13 +78,11 @@ #include "table/merger.h" #include "table/table_builder.h" #include "table/two_level_iterator.h" -#include "util/auto_roll_logger.h" #include "util/autovector.h" #include "util/build_version.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" -#include "util/db_info_dumper.h" #include "util/file_reader_writer.h" #include "util/file_util.h" #include "util/iostats_context_imp.h" diff --git a/util/db_info_dumper.cc b/db/db_info_dumper.cc similarity index 99% rename from util/db_info_dumper.cc rename to db/db_info_dumper.cc index 6cb978fbb..de9b77b1b 100644 --- a/util/db_info_dumper.cc +++ b/db/db_info_dumper.cc @@ -7,6 +7,8 @@ #define __STDC_FORMAT_MACROS #endif +#include "db/db_info_dumper.h" + #include #include #include @@ -16,7 +18,6 @@ #include "db/filename.h" #include "rocksdb/options.h" #include "rocksdb/env.h" -#include "util/db_info_dumper.h" namespace rocksdb { diff --git a/util/db_info_dumper.h b/db/db_info_dumper.h similarity index 100% rename from util/db_info_dumper.h rename to db/db_info_dumper.h diff --git a/db/db_test.cc b/db/db_test.cc index 6b0f5b2d5..f2b6761c2 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -122,6 +122,106 @@ class DBTestWithParam bool exclusive_manual_compaction_; }; +TEST_F(DBTest, MockEnvTest) { + unique_ptr env{new MockEnv(Env::Default())}; + Options options; + options.create_if_missing = true; + options.env = env.get(); + DB* db; + + const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; + const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; + + ASSERT_OK(DB::Open(options, "/dir/db", &db)); + for (size_t i = 0; i < 3; ++i) { + ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); + } + + for (size_t i = 0; i < 3; ++i) { + std::string res; + ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); + ASSERT_TRUE(res == vals[i]); + } + + Iterator* iterator = db->NewIterator(ReadOptions()); + iterator->SeekToFirst(); + for (size_t i = 0; i < 3; ++i) { + ASSERT_TRUE(iterator->Valid()); + ASSERT_TRUE(keys[i] == iterator->key()); + ASSERT_TRUE(vals[i] == iterator->value()); + iterator->Next(); + } + ASSERT_TRUE(!iterator->Valid()); + delete iterator; + + // TEST_FlushMemTable() is not supported in ROCKSDB_LITE + #ifndef ROCKSDB_LITE + DBImpl* dbi = reinterpret_cast(db); + ASSERT_OK(dbi->TEST_FlushMemTable()); + + for (size_t i = 0; i < 3; ++i) { + std::string res; + ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); + ASSERT_TRUE(res == vals[i]); + } + #endif // ROCKSDB_LITE + + delete db; +} + +TEST_F(DBTest, MemEnvTest) { + unique_ptr env{NewMemEnv(Env::Default())}; + Options options; + options.create_if_missing = true; + options.env = env.get(); + DB* db; + + const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; + const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; + + ASSERT_OK(DB::Open(options, "/dir/db", &db)); + for (size_t i = 0; i < 3; ++i) { + ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); + } + + for (size_t i = 0; i < 3; ++i) { + std::string res; + ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); + ASSERT_TRUE(res == vals[i]); + } + + Iterator* iterator = db->NewIterator(ReadOptions()); + iterator->SeekToFirst(); + for (size_t i = 0; i < 3; ++i) { + ASSERT_TRUE(iterator->Valid()); + ASSERT_TRUE(keys[i] == iterator->key()); + ASSERT_TRUE(vals[i] == iterator->value()); + iterator->Next(); + } + ASSERT_TRUE(!iterator->Valid()); + delete iterator; + + DBImpl* dbi = reinterpret_cast(db); + ASSERT_OK(dbi->TEST_FlushMemTable()); + + for (size_t i = 0; i < 3; ++i) { + std::string res; + ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); + ASSERT_TRUE(res == vals[i]); + } + + delete db; + + options.create_if_missing = false; + ASSERT_OK(DB::Open(options, "/dir/db", &db)); + for (size_t i = 0; i < 3; ++i) { + std::string res; + ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); + ASSERT_TRUE(res == vals[i]); + } + delete db; +} + TEST_F(DBTest, WriteEmptyBatch) { Options options; options.env = env_; diff --git a/util/skiplistrep.cc b/memtable/skiplistrep.cc similarity index 100% rename from util/skiplistrep.cc rename to memtable/skiplistrep.cc diff --git a/util/vectorrep.cc b/memtable/vectorrep.cc similarity index 100% rename from util/vectorrep.cc rename to memtable/vectorrep.cc diff --git a/src.mk b/src.mk index 46949b611..4e95b1fc1 100644 --- a/src.mk +++ b/src.mk @@ -1,5 +1,6 @@ # These are the sources from which librocksdb.a is built: LIB_SOURCES = \ + db/auto_roll_logger.cc \ db/builder.cc \ db/c.cc \ db/column_family.cc \ @@ -15,6 +16,7 @@ LIB_SOURCES = \ db/db_impl_debug.cc \ db/db_impl_readonly.cc \ db/db_impl_experimental.cc \ + db/db_info_dumper.cc \ db/db_iter.cc \ db/experimental.cc \ db/event_helpers.cc \ @@ -50,6 +52,8 @@ LIB_SOURCES = \ memtable/hash_cuckoo_rep.cc \ memtable/hash_linklist_rep.cc \ memtable/hash_skiplist_rep.cc \ + memtable/skiplistrep.cc \ + memtable/vectorrep.cc \ port/stack_trace.cc \ port/port_posix.cc \ table/adaptive_table_factory.cc \ @@ -82,7 +86,6 @@ LIB_SOURCES = \ table/two_level_iterator.cc \ tools/dump/db_dump_tool.cc \ util/arena.cc \ - util/auto_roll_logger.cc \ util/bloom.cc \ util/build_version.cc \ util/cache.cc \ @@ -91,7 +94,6 @@ LIB_SOURCES = \ util/compaction_job_stats_impl.cc \ util/concurrent_arena.cc \ util/crc32c.cc \ - util/db_info_dumper.cc \ util/delete_scheduler_impl.cc \ util/dynamic_bloom.cc \ util/env.cc \ @@ -152,7 +154,6 @@ LIB_SOURCES = \ util/perf_level.cc \ util/random.cc \ util/rate_limiter.cc \ - util/skiplistrep.cc \ util/slice.cc \ util/statistics.cc \ util/status.cc \ @@ -165,7 +166,6 @@ LIB_SOURCES = \ util/thread_status_updater_debug.cc \ util/thread_status_util.cc \ util/thread_status_util_debug.cc \ - util/vectorrep.cc \ util/xfunc.cc \ util/xxhash.cc \ @@ -180,6 +180,7 @@ MOCK_SOURCES = \ TEST_BENCH_SOURCES = \ third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \ + db/auto_roll_logger_test.cc \ db/column_family_test.cc \ db/compaction_job_test.cc \ db/compaction_job_stats_test.cc \ @@ -240,7 +241,6 @@ TEST_BENCH_SOURCES = \ tools/reduce_levels_test.cc \ tools/sst_dump_test.cc \ util/arena_test.cc \ - util/auto_roll_logger_test.cc \ util/autovector_test.cc \ util/benchharness.cc \ util/benchharness_test.cc \ diff --git a/util/file_util.cc b/util/file_util.cc index d4f7b4004..55eeab722 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -11,7 +11,6 @@ #include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" #include "rocksdb/options.h" -#include "db/filename.h" #include "util/file_reader_writer.h" namespace rocksdb { diff --git a/util/memenv_test.cc b/util/memenv_test.cc index 24190daba..2b872d266 100644 --- a/util/memenv_test.cc +++ b/util/memenv_test.cc @@ -4,7 +4,6 @@ #ifndef ROCKSDB_LITE -#include "db/db_impl.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/testharness.h" @@ -183,58 +182,6 @@ TEST_F(MemEnvTest, LargeWrite) { delete [] scratch; } -TEST_F(MemEnvTest, DBTest) { - Options options; - options.create_if_missing = true; - options.env = env_; - DB* db; - - const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; - const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; - - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); - } - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - Iterator* iterator = db->NewIterator(ReadOptions()); - iterator->SeekToFirst(); - for (size_t i = 0; i < 3; ++i) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_TRUE(keys[i] == iterator->key()); - ASSERT_TRUE(vals[i] == iterator->value()); - iterator->Next(); - } - ASSERT_TRUE(!iterator->Valid()); - delete iterator; - - DBImpl* dbi = reinterpret_cast(db); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - delete db; - - options.create_if_missing = false; - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - delete db; -} - } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/mock_env_test.cc b/util/mock_env_test.cc index 2f50c2a82..710881b55 100644 --- a/util/mock_env_test.cc +++ b/util/mock_env_test.cc @@ -6,7 +6,6 @@ #include #include #include "util/mock_env.h" -#include "db/db_impl.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/testharness.h" @@ -221,52 +220,6 @@ TEST_F(MockEnvTest, Corrupt) { ASSERT_NE(result.compare(kCorrupted), 0); } -TEST_F(MockEnvTest, DBTest) { - Options options; - options.create_if_missing = true; - options.env = env_; - DB* db; - - const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; - const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; - - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); - } - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - Iterator* iterator = db->NewIterator(ReadOptions()); - iterator->SeekToFirst(); - for (size_t i = 0; i < 3; ++i) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_TRUE(keys[i] == iterator->key()); - ASSERT_TRUE(vals[i] == iterator->value()); - iterator->Next(); - } - ASSERT_TRUE(!iterator->Valid()); - delete iterator; - - // TEST_FlushMemTable() is not supported in ROCKSDB_LITE - #ifndef ROCKSDB_LITE - DBImpl* dbi = reinterpret_cast(db); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - #endif // ROCKSDB_LITE - - delete db; -} - TEST_F(MockEnvTest, FakeSleeping) { int64_t now = 0; auto s = env_->GetCurrentTime(&now); diff --git a/util/options.cc b/util/options.cc index c925153fd..13fee9051 100644 --- a/util/options.cc +++ b/util/options.cc @@ -17,7 +17,6 @@ #include #include -#include "db/writebuffer.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/comparator.h" diff --git a/util/testutil.h b/util/testutil.h index d8b4f0ca2..02e786b47 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -13,7 +13,6 @@ #include #include -#include "db/dbformat.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index a41b4094f..a3d32090c 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -13,6 +13,7 @@ #include #include +#include "db/filename.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/types.h" @@ -24,7 +25,6 @@ #include "util/mutexlock.h" #include "util/string_util.h" #include "util/testutil.h" -#include "util/auto_roll_logger.h" #include "util/mock_env.h" #include "utilities/backupable/backupable_db_testutil.h" From b0afcdeeac86ae677b99f47fa8ef8572835059d8 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 26 Jan 2016 11:07:08 -0800 Subject: [PATCH 047/195] Fix bug in block based tables with full filter block and prefix_extractor Summary: Right now when we are creating a BlockBasedTable with fill filter block we add to the filter all the prefixes that are InDomain() based on the prefix_extractor the problem is that when we read a key from the file, we check the filter block for the prefix whether or not it's InDomain() Test Plan: unit tests Reviewers: yhchiang, rven, anthony, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53385 --- db/db_test.cc | 46 +++++++++++++++++++++++++++++++ table/block_based_table_reader.cc | 1 + 2 files changed, 47 insertions(+) diff --git a/db/db_test.cc b/db/db_test.cc index f2b6761c2..ef46f17fb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -10058,6 +10058,52 @@ TEST_F(DBTest, WalFilterTestWithChangeBatchExtraKeys) { #endif // ROCKSDB_LITE +class SliceTransformLimitedDomain : public SliceTransform { + const char* Name() const override { return "SliceTransformLimitedDomain"; } + + Slice Transform(const Slice& src) const override { + return Slice(src.data(), 5); + } + + bool InDomain(const Slice& src) const override { + // prefix will be x???? + return src.size() >= 5 && src[0] == 'x'; + } + + bool InRange(const Slice& dst) const override { + // prefix will be x???? + return dst.size() == 5 && dst[0] == 'x'; + } +}; + +TEST_F(DBTest, PrefixExtractorFullFilter) { + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + + Options options = CurrentOptions(); + options.prefix_extractor = std::make_shared(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + DestroyAndReopen(options); + + ASSERT_OK(Put("x1111_AAAA", "val1")); + ASSERT_OK(Put("x1112_AAAA", "val2")); + ASSERT_OK(Put("x1113_AAAA", "val3")); + ASSERT_OK(Put("x1114_AAAA", "val4")); + // Not in domain, wont be added to filter + ASSERT_OK(Put("zzzzz_AAAA", "val5")); + + ASSERT_OK(Flush()); + + ASSERT_EQ(Get("x1111_AAAA"), "val1"); + ASSERT_EQ(Get("x1112_AAAA"), "val2"); + ASSERT_EQ(Get("x1113_AAAA"), "val3"); + ASSERT_EQ(Get("x1114_AAAA"), "val4"); + // Was not added to filter but rocksdb will try to read it from the filter + ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); +} + #ifndef ROCKSDB_LITE class BloomStatsTestWithParam : public DBTest, diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 4a358d361..3114a6f15 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1207,6 +1207,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter, return false; } if (rep_->ioptions.prefix_extractor && + rep_->ioptions.prefix_extractor->InDomain(user_key) && !filter->PrefixMayMatch( rep_->ioptions.prefix_extractor->Transform(user_key))) { return false; From 955ecf8b49ba71666548fefd2d9aeb5f43b8ab9e Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 26 Jan 2016 11:30:30 -0800 Subject: [PATCH 048/195] Fix an ASAN error in compact_files_test Summary: compact_files_test enables SyncPoint but never disable it before the test terminates. As a result, it might cause heap-use-after-free error when some code path trying to access the static variable of SyncPoint when it has already gone out of scope after the main thread dies. Test Plan: COMPILE_WITH_ASAN=1 make compact_files_test -j32 ./compact_files_test Reviewers: sdong, anthony, kradhakrishnan, rven, andrewkr, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53379 --- db/compact_files_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc index b2a131ecf..5512ed11f 100644 --- a/db/compact_files_test.cc +++ b/db/compact_files_test.cc @@ -107,6 +107,7 @@ TEST_F(CompactFilesTest, L0ConflictsFiles) { break; } } + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); delete db; } From 77ef87ccb0ba4559147d3e8b761e2b1b91d89596 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 26 Jan 2016 12:13:27 -0800 Subject: [PATCH 049/195] Update fbcode_config4.8.1.sh to use update_dependencies.sh Summary: This patch update fbcode_config4.8.1.sh to get it's dependencies the same way we updated fbcode_config.sh in D53037 as a result zstd is upgraded to 0.4.7 instead of 0.4.5 Test Plan: make clean && ROCKSDB_FBCODE_BUILD_WITH_481=1 make check -j64 make clean && ROCKSDB_FBCODE_BUILD_WITH_481=1 USE_CLANG=1 make check -j64 Reviewers: yhchiang, andrewkr, rven, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53355 --- build_tools/dependencies_4.8.1.sh | 16 +++++++ build_tools/fbcode_config4.8.1.sh | 75 +++++++++++++----------------- build_tools/update_dependencies.sh | 41 ++++++++++++++++ 3 files changed, 90 insertions(+), 42 deletions(-) create mode 100644 build_tools/dependencies_4.8.1.sh diff --git a/build_tools/dependencies_4.8.1.sh b/build_tools/dependencies_4.8.1.sh new file mode 100644 index 000000000..33c092ecb --- /dev/null +++ b/build_tools/dependencies_4.8.1.sh @@ -0,0 +1,16 @@ +GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/ +CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/ +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.17/gcc-4.8.1-glibc-2.17/99df8fc +SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a +ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a +BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a +LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.8.1-glibc-2.17/c3f970a +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/69d56740ffb89d8bc81ded8ec428c01a813ea948/0.4.7/gcc-4.8.1-glibc-2.17/c3f970a +GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51 +NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.8.1-glibc-2.17/675d945 +KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.8.1-glibc-2.17/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e +VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a diff --git a/build_tools/fbcode_config4.8.1.sh b/build_tools/fbcode_config4.8.1.sh index 31bc091f4..71428d03b 100644 --- a/build_tools/fbcode_config4.8.1.sh +++ b/build_tools/fbcode_config4.8.1.sh @@ -4,67 +4,58 @@ # fbcode settings. It uses the latest g++ compiler and also # uses jemalloc +BASEDIR=`dirname $BASH_SOURCE` +source "$BASEDIR/dependencies_4.8.1.sh" + # location of libgcc -LIBGCC_BASE="/mnt/gvfs/third-party2/libgcc/d00277f4559e261ed0a81f30f23c0ce5564e359e/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc" LIBGCC_INCLUDE="$LIBGCC_BASE/include" LIBGCC_LIBS=" -L $LIBGCC_BASE/libs" # location of glibc -GLIBC_REV=0600c95b31226b5e535614c590677d87c62d8016 -GLIBC_INCLUDE="/mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.17/gcc-4.8.1-glibc-2.17/99df8fc/include" -GLIBC_LIBS=" -L /mnt/gvfs/third-party2/glibc/$GLIBC_REV/2.17/gcc-4.8.1-glibc-2.17/99df8fc/lib" +GLIBC_INCLUDE="$GLIBC_BASE/include" +GLIBC_LIBS=" -L $GLIBC_BASE/lib" # location of snappy headers and libraries -SNAPPY_REV=cbf6f1f209e5bd160bdc5d971744e039f36b1566 -SNAPPY_INCLUDE=" -I /mnt/gvfs/third-party2/snappy/$SNAPPY_REV/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a/include" -SNAPPY_LIBS=" /mnt/gvfs/third-party2/snappy/$SNAPPY_REV/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a/lib/libsnappy.a" +SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include" +SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" # location of zlib headers and libraries -ZLIB_REV=6d39cb54708049f527e713ad19f2aadb9d3667e8 -ZLIB_INCLUDE=" -I /mnt/gvfs/third-party2/zlib/$ZLIB_REV/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a/include" -ZLIB_LIBS=" /mnt/gvfs/third-party2/zlib/$ZLIB_REV/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a/lib/libz.a" +ZLIB_INCLUDE=" -I $ZLIB_BASE/include" +ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" # location of bzip headers and libraries -BZIP_REV=d6c789bfc2ec4c51a63d66df2878926b8158cde8 -BZIP_INCLUDE=" -I /mnt/gvfs/third-party2/bzip2/$BZIP_REV/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a/include/" -BZIP_LIBS=" /mnt/gvfs/third-party2/bzip2/$BZIP_REV/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a/lib/libbz2.a" +BZIP2_INCLUDE=" -I $BZIP2_BASE/include/" +BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a" -LZ4_REV=6858fac689e0f92e584224d91bdb0e39f6c8320d -LZ4_INCLUDE=" -I /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/include" -LZ4_LIBS=" /mnt/gvfs/third-party2/lz4/$LZ4_REV/r131/gcc-4.8.1-glibc-2.17/c3f970a/lib/liblz4.a" +LZ4_INCLUDE=" -I $LZ4_BASE/include" +LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" -ZSTD_REV=d4ac2c5f9be76d57a6cbd3eb1011e97574a56cde -ZSTD_INCLUDE=" -I /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/include" -ZSTD_LIBS=" /mnt/gvfs/third-party2/zstd/$ZSTD_REV/0.4.5/gcc-4.8.1-glibc-2.17/c3f970a/lib/libzstd.a" +ZSTD_INCLUDE=" -I $ZSTD_BASE/include" +ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" # location of gflags headers and libraries -GFLAGS_REV=c7275a4ceae0aca0929e56964a31dafc53c1ee96 -GFLAGS_INCLUDE=" -I /mnt/gvfs/third-party2/gflags/$GFLAGS_REV/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a/include/" -GFLAGS_LIBS=" /mnt/gvfs/third-party2/gflags/$GFLAGS_REV/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a/lib/libgflags.a" +GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" +GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" # location of jemalloc -JEMALLOC_REV=c370265e58c4b6602e798df23335a1e9913dae52 -JEMALLOC_INCLUDE=" -I /mnt/gvfs/third-party2/jemalloc/$JEMALLOC_REV/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51/include" -JEMALLOC_LIB="/mnt/gvfs/third-party2/jemalloc/$JEMALLOC_REV/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51/lib/libjemalloc.a" +JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include" +JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a" # location of numa -NUMA_REV=ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73 -NUMA_INCLUDE=" -I /mnt/gvfs/third-party2/numa/$NUMA_REV/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a/include/" -NUMA_LIB=" /mnt/gvfs/third-party2/numa/$NUMA_REV/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a/lib/libnuma.a" +NUMA_INCLUDE=" -I $NUMA_BASE/include/" +NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" # location of libunwind -LIBUNWIND_REV=121f1a75c4414683aea8c70b761bfaf187f7c1a3 -LIBUNWIND="/mnt/gvfs/third-party2/libunwind/$LIBUNWIND_REV/trunk/gcc-4.8.1-glibc-2.17/675d945/lib/libunwind.a" +LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" # use Intel SSE support for checksum calculations export USE_SSE=1 -BINUTILS="/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e/bin" +BINUTILS="$BINUTILS_BASE/bin" AR="$BINUTILS/ar" -DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE" +DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE" -GCC_BASE="/mnt/gvfs/third-party2/gcc/c0064002d2609ab649603f769f0bd110bbe48029/4.8.1/centos6-native/cc6c9dc" STDLIBS="-L $GCC_BASE/lib64" if [ -z "$USE_CLANG" ]; then @@ -77,12 +68,13 @@ if [ -z "$USE_CLANG" ]; then CFLAGS+=" -isystem $LIBGCC_INCLUDE" else # clang - CLANG_BASE="/mnt/gvfs/third-party2/clang/ab054e9a490a8fd4537c0b6ec56e5c91c0f81c91/3.7" - CLANG_INCLUDE="$CLANG_BASE/gcc-4.8.1-glibc-2.17/ee9b060/lib/clang/3.7/include" - CC="$CLANG_BASE/centos6-native/b2feaee/bin/clang" - CXX="$CLANG_BASE/centos6-native/b2feaee/bin/clang++" + CLANG_BIN="$CLANG_BASE/bin" + CLANG_LIB="$CLANG_BASE/lib" + CLANG_INCLUDE="$CLANG_LIB/clang/*/include" + CC="$CLANG_BIN/clang" + CXX="$CLANG_BIN/clang++" - KERNEL_HEADERS_INCLUDE="/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.8.1-glibc-2.17/da39a3e/include/" + KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/" CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 " @@ -100,16 +92,15 @@ CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PR CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA" CXXFLAGS+=" $CFLAGS" -EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB" +EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB" EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so" EXEC_LDFLAGS+=" $LIBUNWIND" EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib" PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" -EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" +EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" -VALGRIND_REV=af85c56f424cd5edfc2c97588299b44ecdec96bb -VALGRIND_VER="/mnt/gvfs/third-party2/valgrind/$VALGRIND_REV/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a/bin/" +VALGRIND_VER="$VALGRIND_BASE/bin/" export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE diff --git a/build_tools/update_dependencies.sh b/build_tools/update_dependencies.sh index 63074c372..4f8033398 100755 --- a/build_tools/update_dependencies.sh +++ b/build_tools/update_dependencies.sh @@ -52,6 +52,10 @@ function get_lib_base() log_variable $__res_var } +########################################################### +# 4.9.x dependencies # +########################################################### + OUTPUT="$BASEDIR/dependencies.sh" rm -f "$OUTPUT" @@ -84,3 +88,40 @@ get_lib_base binutils LATEST centos6-native get_lib_base valgrind LATEST git diff $OUTPUT + +########################################################### +# 4.8.1 dependencies # +########################################################### + +OUTPUT="$BASEDIR/dependencies_4.8.1.sh" + +rm -f "$OUTPUT" +touch "$OUTPUT" + +echo "Writing 4.8.1 dependencies to $OUTPUT" + +# Compilers locations +GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1` +CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1` + +log_variable GCC_BASE +log_variable CLANG_BASE + +# Libraries locations +get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17 +get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17 +get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17 +get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17 +get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17 +get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17 +get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17 +get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17 +get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17 +get_lib_base numa LATEST gcc-4.8.1-glibc-2.17 +get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17 + +get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17 +get_lib_base binutils LATEST centos6-native +get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17 + +git diff $OUTPUT From 035857a31248b814cd22fde20934d6bb761e339f Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 26 Jan 2016 13:15:36 -0800 Subject: [PATCH 050/195] Fix RocksDB lite build Summary: NewMemEnv() is defined in rocksdb lite but just returns nullptr -- would it be better to just not define it so we can catch issues like this at compile-time? Test Plan: $ make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j32 db_test $ ./db_test --gtest_filter='DBTest.MemEnvTest' ... [ PASSED ] 0 tests. Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53427 --- db/db_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/db_test.cc b/db/db_test.cc index ef46f17fb..5111c05ce 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -169,6 +169,9 @@ TEST_F(DBTest, MockEnvTest) { delete db; } +// NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't +// defined. +#ifndef ROCKSDB_LITE TEST_F(DBTest, MemEnvTest) { unique_ptr env{NewMemEnv(Env::Default())}; Options options; @@ -221,6 +224,7 @@ TEST_F(DBTest, MemEnvTest) { } delete db; } +#endif // ROCKSDB_LITE TEST_F(DBTest, WriteEmptyBatch) { Options options; From 0c433cd1eb27930868128e396ee2e75689565698 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 26 Jan 2016 14:47:42 -0800 Subject: [PATCH 051/195] Fix issue in Iterator::Seek when using Block based filter block with prefix_extractor Summary: Similar to D53385 we need to check InDomain before checking the filter block. Test Plan: unit tests Reviewers: yhchiang, rven, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53421 --- db/db_test.cc | 34 +++++++++++++++++++++++++++++++ table/block_based_table_reader.cc | 7 +++++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 5111c05ce..1296bad06 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -10082,6 +10082,7 @@ class SliceTransformLimitedDomain : public SliceTransform { TEST_F(DBTest, PrefixExtractorFullFilter) { BlockBasedTableOptions bbto; + // Full Filter Block bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; @@ -10108,6 +10109,39 @@ TEST_F(DBTest, PrefixExtractorFullFilter) { ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); } +TEST_F(DBTest, PrefixExtractorBlockFilter) { + BlockBasedTableOptions bbto; + // Block Filter Block + bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + + Options options = CurrentOptions(); + options.prefix_extractor = std::make_shared(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + DestroyAndReopen(options); + + ASSERT_OK(Put("x1113_AAAA", "val3")); + ASSERT_OK(Put("x1114_AAAA", "val4")); + // Not in domain, wont be added to filter + ASSERT_OK(Put("zzzzz_AAAA", "val1")); + ASSERT_OK(Put("zzzzz_AAAB", "val2")); + ASSERT_OK(Put("zzzzz_AAAC", "val3")); + ASSERT_OK(Put("zzzzz_AAAD", "val4")); + + ASSERT_OK(Flush()); + + std::vector iter_res; + auto iter = db_->NewIterator(ReadOptions()); + // Seek to a key that was not in Domain + for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { + iter_res.emplace_back(iter->value().ToString()); + } + + std::vector expected_res = {"val1", "val2", "val3", "val4"}; + ASSERT_EQ(iter_res, expected_res); + delete iter; +} + #ifndef ROCKSDB_LITE class BloomStatsTestWithParam : public DBTest, diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 3114a6f15..00997f3ea 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1120,8 +1120,11 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { } assert(rep_->ioptions.prefix_extractor != nullptr); - auto prefix = rep_->ioptions.prefix_extractor->Transform( - ExtractUserKey(internal_key)); + auto user_key = ExtractUserKey(internal_key); + if (!rep_->ioptions.prefix_extractor->InDomain(user_key)) { + return true; + } + auto prefix = rep_->ioptions.prefix_extractor->Transform(user_key); InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue); auto internal_prefix = internal_key_prefix.Encode(); From d20915d52a8227fa4ecda2680254e668ca80bcc4 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 25 Jan 2016 17:07:37 -0800 Subject: [PATCH 052/195] Disable stats about mutex duration by default Summary: Measuring mutex duration will measure time inside DB mutex, which breaks our best practice. Add a stat level in Statistics class. By default, disable to measure the mutex operations. Test Plan: Add a unit test to make sure it is off by default. Reviewers: rven, anthony, IslamAbdelRahman, kradhakrishnan, andrewkr, yhchiang Reviewed By: yhchiang Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53367 --- HISTORY.md | 1 + db/db_test.cc | 14 ++++++++++++++ include/rocksdb/statistics.h | 13 +++++++++++++ util/instrumented_mutex.cc | 13 ++++++++++--- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 13fc5e158..809cbf76c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. +* Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. ### New Features * ldb tool now supports operations to non-default column families. diff --git a/db/db_test.cc b/db/db_test.cc index 1296bad06..db3524345 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -7405,10 +7405,24 @@ TEST_F(DBTest, EncodeDecompressedBlockSizeTest) { } } +TEST_F(DBTest, MutexWaitStatsDisabledByDefault) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + CreateAndReopenWithCF({"pikachu"}, options); + const uint64_t kMutexWaitDelay = 100; + ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, + kMutexWaitDelay); + ASSERT_OK(Put("hello", "rocksdb")); + ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0); + ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); +} + TEST_F(DBTest, MutexWaitStats) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = rocksdb::CreateDBStatistics(); + options.statistics->stats_level_ = StatsLevel::kAll; CreateAndReopenWithCF({"pikachu"}, options); const uint64_t kMutexWaitDelay = 100; ThreadStatusUtil::TEST_SetStateDelay( diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 15c49439c..813104b99 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -109,6 +109,7 @@ enum Tickers : uint32_t { // Writer has to wait for compaction or flush to finish. STALL_MICROS, // The wait time for db mutex. + // Disabled by default. To enable it set stats level to kAll DB_MUTEX_WAIT_MICROS, RATE_LIMIT_DELAY_MILLIS, NO_ITERATORS, // number of iterators currently open @@ -316,6 +317,16 @@ struct HistogramData { double standard_deviation; }; +enum StatsLevel { + // Collect all stats except the counters requiring to get time inside the + // mutex lock. + kExceptTimeForMutex, + // Collect all stats, including measuring duration of mutex operations. + // If getting time is expensive on the platform to run, it can + // reduce scalability to more threads, especialy for writes. + kAll, +}; + // Analyze the performance of a db class Statistics { public: @@ -339,6 +350,8 @@ class Statistics { virtual bool HistEnabledForType(uint32_t type) const { return type < HISTOGRAM_ENUM_MAX; } + + StatsLevel stats_level_ = kExceptTimeForMutex; }; // Create a concrete DBStatistics object diff --git a/util/instrumented_mutex.cc b/util/instrumented_mutex.cc index e5c6527be..e5603fe08 100644 --- a/util/instrumented_mutex.cc +++ b/util/instrumented_mutex.cc @@ -8,11 +8,18 @@ #include "util/thread_status_util.h" namespace rocksdb { +namespace { +bool ShouldReportToStats(Env* env, Statistics* stats) { + return env != nullptr && stats != nullptr && + stats->stats_level_ != kExceptTimeForMutex; +} +} // namespace + void InstrumentedMutex::Lock() { PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; - if (env_ != nullptr && stats_ != nullptr) { + if (ShouldReportToStats(env_, stats_)) { { StopWatch sw(env_, nullptr, 0, &wait_time_micros); LockInternal(); @@ -34,7 +41,7 @@ void InstrumentedCondVar::Wait() { PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; - if (env_ != nullptr && stats_ != nullptr) { + if (ShouldReportToStats(env_, stats_)) { { StopWatch sw(env_, nullptr, 0, &wait_time_micros); WaitInternal(); @@ -57,7 +64,7 @@ bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) { stats_code_ == DB_MUTEX_WAIT_MICROS); uint64_t wait_time_micros = 0; bool result = false; - if (env_ != nullptr && stats_ != nullptr) { + if (ShouldReportToStats(env_, stats_)) { { StopWatch sw(env_, nullptr, 0, &wait_time_micros); result = TimedWaitInternal(abs_time_us); From d7f22b6d2574f20ee82d7c45521a7790cfb80601 Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Sun, 24 Jan 2016 20:41:29 +0100 Subject: [PATCH 053/195] Fixing generated GenerateBuildVersion.vcxproj when one builds on different locale than english. The problem is that date and time CLI utilities generates different format so that REGEX in CMake does not work. --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cfabcd86..493da023b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,10 +30,9 @@ cmake_minimum_required(VERSION 2.6) project(rocksdb) include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) - -execute_process(COMMAND $ENV{COMSPEC} " /C date /T" OUTPUT_VARIABLE DATE) -execute_process(COMMAND $ENV{COMSPEC} " /C time /T" OUTPUT_VARIABLE TIME) -string(REGEX REPLACE "(..)/(..)/..(..).*" "\\1/\\2/\\3" DATE ${DATE}) +execute_process(COMMAND powershell -Command "Get-Date -format MM_dd_yyyy" OUTPUT_VARIABLE DATE) +execute_process(COMMAND powershell -Command "Get-Date -format HH:mm:ss" OUTPUT_VARIABLE TIME) +string(REGEX REPLACE "(..)_(..)_..(..).*" "\\1/\\2/\\3" DATE ${DATE}) string(REGEX REPLACE "(..):(.....).*" " \\1:\\2" TIME ${TIME}) string(CONCAT GIT_DATE_TIME ${DATE} ${TIME}) From 4265f81e87753ed3c892dad282dcd9cf43b2e678 Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Wed, 27 Jan 2016 12:07:31 +0100 Subject: [PATCH 054/195] Remove util/auto_roll_logger.cc (it was moved to different directory) --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 493da023b..b0fd54a46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,7 +188,6 @@ set(SOURCES tools/sst_dump_tool.cc tools/dump/db_dump_tool.cc util/arena.cc - util/auto_roll_logger.cc util/bloom.cc util/build_version.cc util/cache.cc From 26c618004a3e7a4bec054808b819fd2a4f309b31 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Mon, 18 Jan 2016 22:58:20 +0530 Subject: [PATCH 055/195] Add Wingify to USERS.md VWO, the flagship product of Wingify, uses RocksDB to populate the list of URLs where clients can run A/B test campaigns. It is also used to provide a way for the clients to see if VWO's Smart Code is installed on a specific URL of their account. --- USERS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/USERS.md b/USERS.md index 7123790ac..929f4da8c 100644 --- a/USERS.md +++ b/USERS.md @@ -48,4 +48,7 @@ Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtub [Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services. ## Rakuten Marketing -[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP. \ No newline at end of file +[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP. + +## VWO, Wingify +[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed. From 4b50f135401710de452570d7cf8db3722ed88a34 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 27 Jan 2016 15:28:15 -0800 Subject: [PATCH 056/195] Should not skip bloom filter for L0 during the query. Summary: It's a regression bug caused by e089db40f9c8f2a8af466377ed0f6fd8a3c26456. With the change, if options.optimize_filters_for_hits=true and there are only L0 files (like single level universal compaction), we skip all the files in L0, which is more than necessary. Fix it by always trying to query bloom filter for files in level 0. Test Plan: Add a unit test for it. Reviewers: anthony, rven, yhchiang, IslamAbdelRahman, kradhakrishnan, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53493 --- db/db_universal_compaction_test.cc | 62 ++++++++++++++++++++++++++++++ db/version_set.cc | 2 +- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 9efcf4ae5..9d3cca83c 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -14,6 +14,11 @@ namespace rocksdb { +static uint64_t TestGetTickerCount(const Options& options, + Tickers ticker_type) { + return options.statistics->getTickerCount(ticker_type); +} + static std::string CompressibleString(Random* rnd, int len) { std::string r; test::CompressibleString(rnd, 0.8, len, &r); @@ -154,6 +159,63 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) { } } +TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { + Options options; + options = CurrentOptions(options); + options.compaction_style = kCompactionStyleUniversal; + options.compaction_options_universal.size_ratio = 5; + options.num_levels = num_levels_; + options.write_buffer_size = 105 << 10; // 105KB + options.arena_block_size = 4 << 10; + options.target_file_size_base = 32 << 10; // 32KB + // trigger compaction if there are >= 4 files + options.level0_file_num_compaction_trigger = 4; + BlockBasedTableOptions bbto; + bbto.cache_index_and_filter_blocks = true; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.optimize_filters_for_hits = true; + options.statistics = rocksdb::CreateDBStatistics(); + options.memtable_factory.reset(new SpecialSkipListFactory(3)); + + DestroyAndReopen(options); + + // block compaction from happening + env_->SetBackgroundThreads(1, Env::LOW); + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + Put("", ""); + for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { + Put(Key(num * 10), "val"); + Put(Key(30 + num * 10), "val"); + Put(Key(60 + num * 10), "val"); + + dbfull()->TEST_WaitForFlushMemTable(); + } + + // Query set of non existing keys + for (int i = 5; i < 90; i += 10) { + ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); + } + + // Make sure bloom filter is used at least once. + ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + + // Unblock compaction and wait it for happening. + sleeping_task_low.WakeUp(); + dbfull()->TEST_WaitForCompact(); + + // The same queries will not trigger bloom filter + for (int i = 5; i < 90; i += 10) { + ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); + } + ASSERT_EQ(prev_counter, TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); +} + // TODO(kailiu) The tests on UniversalCompaction has some issues: // 1. A lot of magic numbers ("11" or "12"). // 2. Made assumption on the memtable flush conditions, which may change from diff --git a/db/version_set.cc b/db/version_set.cc index 235789512..3679bfbb4 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -963,7 +963,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, bool Version::IsFilterSkipped(int level) { // Reaching the bottom level implies misses at all upper levels, so we'll // skip checking the filters when we predict a hit. - return cfd_->ioptions()->optimize_filters_for_hits && + return cfd_->ioptions()->optimize_filters_for_hits && level > 0 && level == storage_info_.num_non_empty_levels() - 1; } From 03a5661a178364489b542019cff97412117c39c7 Mon Sep 17 00:00:00 2001 From: benoitc Date: Thu, 28 Jan 2016 11:39:48 +0100 Subject: [PATCH 057/195] fix build for raspberry 2 fix #810 --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index d9c3374b3..38fe1f5c1 100644 --- a/Makefile +++ b/Makefile @@ -85,9 +85,11 @@ endif ifneq ($(DEBUG_LEVEL), 2) OPT += -O2 -fno-omit-frame-pointer ifeq (,$(findstring ppc64,$(MACHINE))) # ppc64[le] doesn't support -momit-leaf-frame-pointer +ifneq ($(MACHINE),armv7l) OPT += -momit-leaf-frame-pointer endif endif +endif # if we're compiling for release, compile without debug code (-DNDEBUG) and # don't treat warnings as errors From 45768ade4fd071ccff2cc054c5b4dbc8da073830 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Wed, 27 Jan 2016 17:11:44 -0800 Subject: [PATCH 058/195] transaction allocation perf improvements Summary: Removed a couple of memory allocations Test Plan: changes covered by existing tests Reviewers: rven, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53523 --- .../optimistic_transaction_impl.cc | 4 +- utilities/transactions/transaction_base.cc | 38 +++++++++++-------- utilities/transactions/transaction_base.h | 17 +++++---- utilities/transactions/transaction_impl.cc | 10 ++--- 4 files changed, 40 insertions(+), 29 deletions(-) diff --git a/utilities/transactions/optimistic_transaction_impl.cc b/utilities/transactions/optimistic_transaction_impl.cc index 120f18ed8..897e2711c 100644 --- a/utilities/transactions/optimistic_transaction_impl.cc +++ b/utilities/transactions/optimistic_transaction_impl.cc @@ -54,7 +54,7 @@ Status OptimisticTransactionImpl::Commit() { } Status s = db_impl->WriteWithCallback( - write_options_, write_batch_->GetWriteBatch(), &callback); + write_options_, GetWriteBatch()->GetWriteBatch(), &callback); if (s.ok()) { Clear(); @@ -77,7 +77,7 @@ Status OptimisticTransactionImpl::TryLock(ColumnFamilyHandle* column_family, SequenceNumber seq; if (snapshot_) { - seq = snapshot_->snapshot()->GetSequenceNumber(); + seq = snapshot_->GetSequenceNumber(); } else { seq = db_->GetLatestSequenceNumber(); } diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index 5f3e97e9b..aeea21e73 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -21,14 +21,14 @@ TransactionBaseImpl::TransactionBaseImpl(DB* db, : db_(db), write_options_(write_options), cmp_(GetColumnFamilyUserComparator(db->DefaultColumnFamily())), - write_batch_(new WriteBatchWithIndex(cmp_, 0, true)), - start_time_(db_->GetEnv()->NowMicros()) {} + start_time_(db_->GetEnv()->NowMicros()), + write_batch_(cmp_, 0, true) {} TransactionBaseImpl::~TransactionBaseImpl() {} void TransactionBaseImpl::Clear() { save_points_.reset(nullptr); - write_batch_->Clear(); + write_batch_.Clear(); tracked_keys_.clear(); num_puts_ = 0; num_deletes_ = 0; @@ -40,7 +40,11 @@ void TransactionBaseImpl::SetSnapshot() { auto db_impl = reinterpret_cast(db_); const Snapshot* snapshot = db_impl->GetSnapshotForWriteConflictBoundary(); - snapshot_.reset(new ManagedSnapshot(db_, snapshot)); + + // Set a custom deleter for the snapshot_ SharedPtr as the snapshot needs to + // be released, not deleted when it is no longer referenced. + snapshot_.reset(snapshot, std::bind(&TransactionBaseImpl::ReleaseSnapshot, + this, std::placeholders::_1, db_)); snapshot_needed_ = false; snapshot_notifier_ = nullptr; } @@ -84,7 +88,7 @@ void TransactionBaseImpl::SetSavePoint() { } save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_, num_puts_, num_deletes_, num_merges_); - write_batch_->SetSavePoint(); + write_batch_.SetSavePoint(); } Status TransactionBaseImpl::RollbackToSavePoint() { @@ -99,7 +103,7 @@ Status TransactionBaseImpl::RollbackToSavePoint() { num_merges_ = save_point.num_merges_; // Rollback batch - Status s = write_batch_->RollbackToSavePoint(); + Status s = write_batch_.RollbackToSavePoint(); assert(s.ok()); // Rollback any keys that were tracked since the last savepoint @@ -119,7 +123,7 @@ Status TransactionBaseImpl::RollbackToSavePoint() { return s; } else { - assert(write_batch_->RollbackToSavePoint().IsNotFound()); + assert(write_batch_.RollbackToSavePoint().IsNotFound()); return Status::NotFound(); } } @@ -127,8 +131,8 @@ Status TransactionBaseImpl::RollbackToSavePoint() { Status TransactionBaseImpl::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) { - return write_batch_->GetFromBatchAndDB(db_, read_options, column_family, key, - value); + return write_batch_.GetFromBatchAndDB(db_, read_options, column_family, key, + value); } Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options, @@ -189,7 +193,7 @@ Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options) { Iterator* db_iter = db_->NewIterator(read_options); assert(db_iter); - return write_batch_->NewIteratorWithBase(db_iter); + return write_batch_.NewIteratorWithBase(db_iter); } Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options, @@ -197,7 +201,7 @@ Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options, Iterator* db_iter = db_->NewIterator(read_options, column_family); assert(db_iter); - return write_batch_->NewIteratorWithBase(column_family, db_iter); + return write_batch_.NewIteratorWithBase(column_family, db_iter); } Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, @@ -353,11 +357,11 @@ Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family, } void TransactionBaseImpl::PutLogData(const Slice& blob) { - write_batch_->PutLogData(blob); + write_batch_.PutLogData(blob); } WriteBatchWithIndex* TransactionBaseImpl::GetWriteBatch() { - return write_batch_.get(); + return &write_batch_; } uint64_t TransactionBaseImpl::GetElapsedTime() const { @@ -413,13 +417,17 @@ const TransactionKeyMap* TransactionBaseImpl::GetTrackedKeysSinceSavePoint() { WriteBatchBase* TransactionBaseImpl::GetBatchForWrite() { if (indexing_enabled_) { // Use WriteBatchWithIndex - return write_batch_.get(); + return &write_batch_; } else { // Don't use WriteBatchWithIndex. Return base WriteBatch. - return write_batch_->GetWriteBatch(); + return write_batch_.GetWriteBatch(); } } +void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) { + db->ReleaseSnapshot(snapshot); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index 4515bfaf5..3fe3513b9 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -165,7 +165,7 @@ class TransactionBaseImpl : public Transaction { } const Snapshot* GetSnapshot() const override { - return snapshot_ ? snapshot_->snapshot() : nullptr; + return snapshot_ ? snapshot_.get() : nullptr; } void SetSnapshot() override; @@ -202,6 +202,9 @@ class TransactionBaseImpl : public Transaction { write_options_ = write_options; } + // Used for memory management for snapshot_ + void ReleaseSnapshot(const Snapshot* snapshot, DB* db); + protected: // Add a key to the list of tracked keys. // seqno is the earliest seqno this key was involved with this transaction. @@ -218,15 +221,12 @@ class TransactionBaseImpl : public Transaction { const Comparator* cmp_; - // Records writes pending in this transaction - std::unique_ptr write_batch_; - // Stores that time the txn was constructed, in microseconds. const uint64_t start_time_; // Stores the current snapshot that was was set by SetSnapshot or null if // no snapshot is currently set. - std::shared_ptr snapshot_; + std::shared_ptr snapshot_; // Count of various operations pending in this transaction uint64_t num_puts_ = 0; @@ -234,7 +234,7 @@ class TransactionBaseImpl : public Transaction { uint64_t num_merges_ = 0; struct SavePoint { - std::shared_ptr snapshot_; + std::shared_ptr snapshot_; bool snapshot_needed_; std::shared_ptr snapshot_notifier_; uint64_t num_puts_; @@ -244,7 +244,7 @@ class TransactionBaseImpl : public Transaction { // Record all keys tracked since the last savepoint TransactionKeyMap new_keys_; - SavePoint(std::shared_ptr snapshot, bool snapshot_needed, + SavePoint(std::shared_ptr snapshot, bool snapshot_needed, std::shared_ptr snapshot_notifier, uint64_t num_puts, uint64_t num_deletes, uint64_t num_merges) : snapshot_(snapshot), @@ -256,6 +256,9 @@ class TransactionBaseImpl : public Transaction { }; private: + // Records writes pending in this transaction + WriteBatchWithIndex write_batch_; + // Stack of the Snapshot saved at each save point. Saved snapshots may be // nullptr if there was no snapshot at the time SetSavePoint() was called. std::unique_ptr> save_points_; diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index 7480ce6dd..3f25ff77d 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -92,7 +92,7 @@ Status TransactionImpl::CommitBatch(WriteBatch* batch) { } Status TransactionImpl::Commit() { - Status s = DoCommit(write_batch_->GetWriteBatch()); + Status s = DoCommit(GetWriteBatch()->GetWriteBatch()); Clear(); @@ -295,7 +295,7 @@ Status TransactionImpl::ValidateSnapshot(ColumnFamilyHandle* column_family, SequenceNumber* new_seqno) { assert(snapshot_); - SequenceNumber seq = snapshot_->snapshot()->GetSequenceNumber(); + SequenceNumber seq = snapshot_->GetSequenceNumber(); if (prev_seqno <= seq) { // If the key has been previous validated at a sequence number earlier // than the curent snapshot's sequence number, we already know it has not @@ -311,9 +311,9 @@ Status TransactionImpl::ValidateSnapshot(ColumnFamilyHandle* column_family, ColumnFamilyHandle* cfh = column_family ? column_family : db_impl->DefaultColumnFamily(); - return TransactionUtil::CheckKeyForConflicts( - db_impl, cfh, key.ToString(), snapshot_->snapshot()->GetSequenceNumber(), - false /* cache_only */); + return TransactionUtil::CheckKeyForConflicts(db_impl, cfh, key.ToString(), + snapshot_->GetSequenceNumber(), + false /* cache_only */); } } // namespace rocksdb From d6c838f1e130d8860407bc771fa6d4ac238859ba Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 28 Jan 2016 18:35:01 -0800 Subject: [PATCH 059/195] Add SstFileManager (component tracking all SST file in DBs and control the deletion rate) Summary: Add a new class SstFileTracker that will be notified whenever a DB add/delete/move and sst file, it will also replace DeleteScheduler SstFileTracker can be used later to abort writes when we exceed a specific size Test Plan: unit tests Reviewers: rven, anthony, yhchiang, sdong Reviewed By: sdong Subscribers: igor, lovro, march, dhruba Differential Revision: https://reviews.facebook.net/D50469 --- CMakeLists.txt | 3 +- HISTORY.md | 2 + db/compaction_job.cc | 6 + db/db_impl.cc | 43 ++++-- db/db_test.cc | 102 ++++++++++--- db/db_test_util.cc | 18 +++ db/db_test_util.h | 3 + include/rocksdb/delete_scheduler.h | 67 -------- include/rocksdb/options.h | 14 +- include/rocksdb/sst_file_manager.h | 64 ++++++++ src.mk | 3 +- ..._scheduler_impl.cc => delete_scheduler.cc} | 98 +++++------- ...te_scheduler_impl.h => delete_scheduler.h} | 24 ++- util/delete_scheduler_test.cc | 87 ++++------- util/file_util.cc | 15 +- util/file_util.h | 4 +- util/options.cc | 11 +- util/options_test.cc | 4 +- util/sst_file_manager_impl.cc | 143 ++++++++++++++++++ util/sst_file_manager_impl.h | 77 ++++++++++ 20 files changed, 546 insertions(+), 242 deletions(-) delete mode 100644 include/rocksdb/delete_scheduler.h create mode 100644 include/rocksdb/sst_file_manager.h rename util/{delete_scheduler_impl.cc => delete_scheduler.cc} (63%) rename util/{delete_scheduler_impl.h => delete_scheduler.h} (73%) create mode 100644 util/sst_file_manager_impl.cc create mode 100644 util/sst_file_manager_impl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b0fd54a46..a80344fe8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,13 +196,14 @@ set(SOURCES util/comparator.cc util/concurrent_arena.cc util/crc32c.cc - util/delete_scheduler_impl.cc + util/delete_scheduler.cc util/dynamic_bloom.cc util/env.cc util/env_hdfs.cc util/event_logger.cc util/file_util.cc util/file_reader_writer.cc + util/sst_file_manager_impl.cc util/filter_policy.cc util/hash.cc util/histogram.cc diff --git a/HISTORY.md b/HISTORY.md index 809cbf76c..f2476b8f1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,9 +3,11 @@ ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. +* DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead ### New Features * ldb tool now supports operations to non-default column families. +* Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. ## 4.4.0 (1/14/2016) ### Public API Changes diff --git a/db/compaction_job.cc b/db/compaction_job.cc index c30ee7736..5be4a2c2e 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -51,6 +51,7 @@ #include "util/iostats_context_imp.h" #include "util/log_buffer.h" #include "util/logging.h" +#include "util/sst_file_manager_impl.h" #include "util/mutexlock.h" #include "util/perf_context_imp.h" #include "util/stop_watch.h" @@ -498,11 +499,16 @@ Status CompactionJob::Run() { } TablePropertiesCollection tp; + auto sfm = + static_cast(db_options_.sst_file_manager.get()); for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { auto fn = TableFileName(db_options_.db_paths, output.meta.fd.GetNumber(), output.meta.fd.GetPathId()); tp[fn] = output.table_properties; + if (sfm && output.meta.fd.GetPathId() == 0) { + sfm->OnAddFile(fn); + } } } compact_->compaction->SetOutputTableProperties(std::move(tp)); diff --git a/db/db_impl.cc b/db/db_impl.cc index 02060393f..6db05ae66 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -64,7 +64,6 @@ #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" -#include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/sst_file_writer.h" @@ -89,6 +88,7 @@ #include "util/log_buffer.h" #include "util/logging.h" #include "util/mutexlock.h" +#include "util/sst_file_manager_impl.h" #include "util/options_helper.h" #include "util/options_parser.h" #include "util/perf_context_imp.h" @@ -786,8 +786,8 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) { } #endif // !ROCKSDB_LITE Status file_deletion_status; - if (type == kTableFile && path_id == 0) { - file_deletion_status = DeleteOrMoveToTrash(&db_options_, fname); + if (type == kTableFile) { + file_deletion_status = DeleteSSTFile(&db_options_, fname, path_id); } else { file_deletion_status = env_->DeleteFile(fname); } @@ -1509,6 +1509,14 @@ Status DBImpl::FlushMemTableToOutputFile( // may temporarily unlock and lock the mutex. NotifyOnFlushCompleted(cfd, &file_meta, mutable_cf_options, job_context->job_id, flush_job.GetTableProperties()); + auto sfm = + static_cast(db_options_.sst_file_manager.get()); + if (sfm) { + // Notify sst_file_manager that a new file was added + std::string file_path = MakeTableFileName(db_options_.db_paths[0].path, + file_meta.fd.GetNumber()); + sfm->OnAddFile(file_path); + } } #endif // ROCKSDB_LITE return s; @@ -5406,6 +5414,25 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname, } impl->mutex_.Unlock(); + auto sfm = static_cast( + impl->db_options_.sst_file_manager.get()); + if (s.ok() && sfm) { + // Notify SstFileManager about all sst files that already exist in + // db_paths[0] when the DB is opened. + auto& db_path = impl->db_options_.db_paths[0]; + std::vector existing_files; + impl->db_options_.env->GetChildren(db_path.path, &existing_files); + for (auto& file_name : existing_files) { + uint64_t file_number; + FileType file_type; + std::string file_path = db_path.path + "/" + file_name; + if (ParseFileName(file_name, &file_number, &file_type) && + file_type == kTableFile) { + sfm->OnAddFile(file_path); + } + } + } + if (s.ok()) { Log(InfoLogLevel::INFO_LEVEL, impl->db_options_.info_log, "DB pointer %p", impl); @@ -5465,7 +5492,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) { if (type == kMetaDatabase) { del = DestroyDB(path_to_delete, options); } else if (type == kTableFile) { - del = DeleteOrMoveToTrash(&options, path_to_delete); + del = DeleteSSTFile(&options, path_to_delete, 0); } else { del = env->DeleteFile(path_to_delete); } @@ -5481,13 +5508,9 @@ Status DestroyDB(const std::string& dbname, const Options& options) { for (size_t i = 0; i < filenames.size(); i++) { if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) { // Lock file will be deleted at end - Status del; std::string table_path = db_path.path + "/" + filenames[i]; - if (path_id == 0) { - del = DeleteOrMoveToTrash(&options, table_path); - } else { - del = env->DeleteFile(table_path); - } + Status del = DeleteSSTFile(&options, table_path, + static_cast(path_id)); if (result.ok() && !del.ok()) { result = del; } diff --git a/db/db_test.cc b/db/db_test.cc index db3524345..76e64d484 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -37,9 +37,9 @@ #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" -#include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" #include "rocksdb/experimental.h" +#include "rocksdb/sst_file_manager.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" @@ -65,9 +65,10 @@ #include "util/compression.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" +#include "util/sst_file_manager_impl.h" #include "util/statistics.h" -#include "util/testharness.h" #include "util/sync_point.h" +#include "util/testharness.h" #include "util/testutil.h" #include "util/mock_env.h" #include "util/string_util.h" @@ -8431,15 +8432,78 @@ TEST_F(DBTest, DeletingOldWalAfterDrop) { } #ifndef ROCKSDB_LITE +TEST_F(DBTest, DBWithSstFileManager) { + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + int files_added = 0; + int files_deleted = 0; + int files_moved = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnAddFile", [&](void* arg) { files_added++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { files_deleted++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnMoveFile", [&](void* arg) { files_moved++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + DestroyAndReopen(options); + + Random rnd(301); + for (int i = 0; i < 25; i++) { + GenerateNewRandomFile(&rnd); + ASSERT_OK(Flush()); + dbfull()->TEST_WaitForFlushMemTable(); + dbfull()->TEST_WaitForCompact(); + // Verify that we are tracking all sst files in dbname_ + ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles()); + } + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + auto files_in_db = GetAllSSTFiles(); + // Verify that we are tracking all sst files in dbname_ + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + // Verify the total files size + uint64_t total_files_size = 0; + for (auto& file_to_size : files_in_db) { + total_files_size += file_to_size.second; + } + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + // We flushed at least 25 files + ASSERT_GE(files_added, 25); + // Compaction must have deleted some files + ASSERT_GT(files_deleted, 0); + // No files were moved + ASSERT_EQ(files_moved, 0); + + Close(); + Reopen(options); + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + + // Verify that we track all the files again after the DB is closed and opened + Close(); + sst_file_manager.reset(NewSstFileManager(env_)); + options.sst_file_manager = sst_file_manager; + sfm = static_cast(sst_file_manager.get()); + + Reopen(options); + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBTest, RateLimitedDelete) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ - {"DBTest::RateLimitedDelete:1", - "DeleteSchedulerImpl::BackgroundEmptyTrash"}, + {"DBTest::RateLimitedDelete:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::BackgroundEmptyTrash:Wait", + "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); @@ -8450,9 +8514,10 @@ TEST_F(DBTest, RateLimitedDelete) { std::string trash_dir = test::TmpDir(env_) + "/trash"; int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec Status s; - options.delete_scheduler.reset(NewDeleteScheduler( - env_, trash_dir, rate_bytes_per_sec, nullptr, false, &s)); + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); ASSERT_OK(s); + auto sfm = static_cast(options.sst_file_manager.get()); Destroy(last_options_); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); @@ -8479,7 +8544,7 @@ TEST_F(DBTest, RateLimitedDelete) { uint64_t delete_start_time = env_->NowMicros(); // Hold BackgroundEmptyTrash TEST_SYNC_POINT("DBTest::RateLimitedDelete:1"); - options.delete_scheduler->WaitForEmptyTrash(); + sfm->WaitForEmptyTrash(); uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; uint64_t total_files_size = 0; @@ -8502,7 +8567,7 @@ TEST_F(DBTest, RateLimitedDelete) { TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); @@ -8515,9 +8580,10 @@ TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { std::string trash_dir = test::TmpDir(env_) + "/trash"; int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec Status s; - options.delete_scheduler.reset(NewDeleteScheduler( - env_, trash_dir, rate_bytes_per_sec, nullptr, false, &s)); + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); ASSERT_OK(s); + auto sfm = static_cast(options.sst_file_manager.get()); DestroyAndReopen(options); @@ -8551,7 +8617,7 @@ TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); ASSERT_EQ("0,2", FilesPerLevel(0)); - options.delete_scheduler->WaitForEmptyTrash(); + sfm->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, 8); compact_options.bottommost_level_compaction = @@ -8559,7 +8625,7 @@ TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,1", FilesPerLevel(0)); - options.delete_scheduler->WaitForEmptyTrash(); + sfm->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, 8); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); @@ -8568,7 +8634,7 @@ TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { TEST_F(DBTest, DestroyDBWithRateLimitedDelete) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); @@ -8590,12 +8656,13 @@ TEST_F(DBTest, DestroyDBWithRateLimitedDelete) { std::string trash_dir = test::TmpDir(env_) + "/trash"; int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec Status s; - options.delete_scheduler.reset(NewDeleteScheduler( - env_, trash_dir, rate_bytes_per_sec, nullptr, false, &s)); + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); ASSERT_OK(s); ASSERT_OK(DestroyDB(dbname_, options)); - options.delete_scheduler->WaitForEmptyTrash(); + auto sfm = static_cast(options.sst_file_manager.get()); + sfm->WaitForEmptyTrash(); // We have deleted the 4 sst files in the delete_scheduler ASSERT_EQ(bg_delete_file, 4); } @@ -10073,7 +10140,6 @@ TEST_F(DBTest, WalFilterTestWithChangeBatchExtraKeys) { ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist); } - #endif // ROCKSDB_LITE class SliceTransformLimitedDomain : public SliceTransform { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 39a7a364f..e6ee304a5 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1005,4 +1005,22 @@ void DBTestBase::CopyFile(const std::string& source, ASSERT_OK(destfile->Close()); } +std::unordered_map DBTestBase::GetAllSSTFiles() { + std::unordered_map res; + + std::vector files; + env_->GetChildren(dbname_, &files); + for (auto& file_name : files) { + uint64_t number; + FileType type; + std::string file_path = dbname_ + "/" + file_name; + if (ParseFileName(file_name, &number, &type) && type == kTableFile) { + uint64_t file_size = 0; + env_->GetFileSize(file_path, &file_size); + res[file_path] = file_size; + } + } + return res; +} + } // namespace rocksdb diff --git a/db/db_test_util.h b/db/db_test_util.h index ebf105250..031057bbb 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -19,6 +19,7 @@ #endif #include +#include #include #include #include @@ -750,6 +751,8 @@ class DBTestBase : public testing::Test { void CopyFile(const std::string& source, const std::string& destination, uint64_t size = 0); + + std::unordered_map GetAllSSTFiles(); }; } // namespace rocksdb diff --git a/include/rocksdb/delete_scheduler.h b/include/rocksdb/delete_scheduler.h deleted file mode 100644 index 7c3eaee77..000000000 --- a/include/rocksdb/delete_scheduler.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. An additional grant -// of patent rights can be found in the PATENTS file in the same directory. - -#pragma once - -#include -#include -#include - -#include "rocksdb/status.h" - -namespace rocksdb { - -class Env; -class Logger; - -// DeleteScheduler allow the DB to enforce a rate limit on file deletion, -// Instead of deleteing files immediately, files are moved to trash_dir -// and deleted in a background thread that apply sleep penlty between deletes -// if they are happening in a rate faster than rate_bytes_per_sec, -// -// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this -// case DeleteScheduler will delete files immediately. -class DeleteScheduler { - public: - virtual ~DeleteScheduler() {} - - // Return delete rate limit in bytes per second - virtual int64_t GetRateBytesPerSecond() = 0; - - // Move file to trash directory and schedule it's deletion - virtual Status DeleteFile(const std::string& fname) = 0; - - // Return a map containing errors that happened in the background thread - // file_path => error status - virtual std::map GetBackgroundErrors() = 0; - - // Wait for all files being deleteing in the background to finish or for - // destructor to be called. - virtual void WaitForEmptyTrash() = 0; -}; - -// Create a new DeleteScheduler that can be shared among multiple RocksDB -// instances to control the file deletion rate. -// -// @env: Pointer to Env object, please see "rocksdb/env.h". -// @trash_dir: Path to the directory where deleted files will be moved into -// to be deleted in a background thread while applying rate limiting. If this -// directory dont exist, it will be created. This directory should not be -// used by any other process or any other DeleteScheduler. -// @rate_bytes_per_sec: How many bytes should be deleted per second, If this -// value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb -// in 1 second, we will wait for another 3 seconds before we delete other -// files, Set to 0 to disable rate limiting. -// @info_log: If not nullptr, info_log will be used to log errors. -// @delete_exisitng_trash: If set to true, the newly created DeleteScheduler -// will delete files that already exist in trash_dir. -// @status: If not nullptr, status will contain any errors that happened during -// creating the missing trash_dir or deleting existing files in trash. -extern DeleteScheduler* NewDeleteScheduler( - Env* env, const std::string& trash_dir, int64_t rate_bytes_per_sec, - std::shared_ptr info_log = nullptr, - bool delete_exisitng_trash = true, Status* status = nullptr); - -} // namespace rocksdb diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index e7064b3cb..a3f410422 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -33,6 +33,7 @@ class CompactionFilterFactory; class Comparator; class Env; enum InfoLogLevel : unsigned char; +class SstFileManager; class FilterPolicy; class Logger; class MergeOperator; @@ -41,7 +42,6 @@ class TableFactory; class MemTableRepFactory; class TablePropertiesCollectorFactory; class RateLimiter; -class DeleteScheduler; class Slice; class SliceTransform; class Statistics; @@ -830,12 +830,12 @@ struct DBOptions { // Default: nullptr std::shared_ptr rate_limiter; - // Use to control files deletion rate, can be used among multiple - // RocksDB instances. delete_scheduler is only used to delete table files that - // need to be deleted from the first db_path (db_name if db_paths is empty), - // other files types and other db_paths wont be affected by delete_scheduler. - // Default: nullptr (disabled) - std::shared_ptr delete_scheduler; + // Use to track SST files and control their file deletion rate, can be used + // among multiple RocksDB instances, sst_file_manager only track and throttle + // deletes of SST files in first db_path (db_name if db_paths is empty), other + // files and other db_paths wont be tracked or affected by sst_file_manager. + // Default: nullptr + std::shared_ptr sst_file_manager; // Any internal progress/error information generated by the db will // be written to info_log if it is non-nullptr, or to a file stored diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h new file mode 100644 index 000000000..665f01add --- /dev/null +++ b/include/rocksdb/sst_file_manager.h @@ -0,0 +1,64 @@ +// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/status.h" + +namespace rocksdb { + +class Env; +class Logger; + +// SstFileManager is used to track SST files in the DB and control there +// deletion rate. +// All SstFileManager public functions are thread-safe. +class SstFileManager { + public: + virtual ~SstFileManager() {} + + // Return the total size of all tracked files. + // thread-safe + virtual uint64_t GetTotalSize() = 0; + + // Return a map containing all tracked files and there corresponding sizes. + // thread-safe + virtual std::unordered_map GetTrackedFiles() = 0; + + // Return delete rate limit in bytes per second. + // thread-safe + virtual int64_t GetDeleteRateBytesPerSecond() = 0; +}; + +// Create a new SstFileManager that can be shared among multiple RocksDB +// instances to track SST file and control there deletion rate. +// +// @param env: Pointer to Env object, please see "rocksdb/env.h". +// @param info_log: If not nullptr, info_log will be used to log errors. +// +// == Deletion rate limiting specific arguments == +// @param trash_dir: Path to the directory where deleted files will be moved +// to be deleted in a background thread while applying rate limiting. If this +// directory dont exist, it will be created. This directory should not be +// used by any other process or any other SstFileManager, Set to "" to +// disable deletion rate limiting. +// @param rate_bytes_per_sec: How many bytes should be deleted per second, If +// this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb +// in 1 second, we will wait for another 3 seconds before we delete other +// files, Set to 0 to disable deletion rate limiting. +// @param delete_exisitng_trash: If set to true, the newly created +// SstFileManager will delete files that already exist in trash_dir. +// @param status: If not nullptr, status will contain any errors that happened +// during creating the missing trash_dir or deleting existing files in trash. +extern SstFileManager* NewSstFileManager( + Env* env, std::shared_ptr info_log = nullptr, + std::string trash_dir = "", int64_t rate_bytes_per_sec = 0, + bool delete_exisitng_trash = true, Status* status = nullptr); + +} // namespace rocksdb diff --git a/src.mk b/src.mk index 4e95b1fc1..3fb811144 100644 --- a/src.mk +++ b/src.mk @@ -94,13 +94,14 @@ LIB_SOURCES = \ util/compaction_job_stats_impl.cc \ util/concurrent_arena.cc \ util/crc32c.cc \ - util/delete_scheduler_impl.cc \ + util/delete_scheduler.cc \ util/dynamic_bloom.cc \ util/env.cc \ util/env_hdfs.cc \ util/env_posix.cc \ util/io_posix.cc \ util/thread_posix.cc \ + util/sst_file_manager_impl.cc \ util/file_util.cc \ util/file_reader_writer.cc \ util/filter_policy.cc \ diff --git a/util/delete_scheduler_impl.cc b/util/delete_scheduler.cc similarity index 63% rename from util/delete_scheduler_impl.cc rename to util/delete_scheduler.cc index e0f7511e0..650b8582d 100644 --- a/util/delete_scheduler_impl.cc +++ b/util/delete_scheduler.cc @@ -3,38 +3,40 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. -#include "util/delete_scheduler_impl.h" +#include "util/delete_scheduler.h" #include #include #include "port/port.h" #include "rocksdb/env.h" +#include "util/sst_file_manager_impl.h" #include "util/mutexlock.h" #include "util/sync_point.h" namespace rocksdb { -DeleteSchedulerImpl::DeleteSchedulerImpl(Env* env, const std::string& trash_dir, - int64_t rate_bytes_per_sec, - std::shared_ptr info_log) +DeleteScheduler::DeleteScheduler(Env* env, const std::string& trash_dir, + int64_t rate_bytes_per_sec, Logger* info_log, + SstFileManagerImpl* sst_file_manager) : env_(env), trash_dir_(trash_dir), rate_bytes_per_sec_(rate_bytes_per_sec), pending_files_(0), closing_(false), cv_(&mu_), - info_log_(info_log) { - if (rate_bytes_per_sec_ == 0) { + info_log_(info_log), + sst_file_manager_(sst_file_manager) { + if (rate_bytes_per_sec_ <= 0) { // Rate limiting is disabled bg_thread_.reset(); } else { bg_thread_.reset( - new std::thread(&DeleteSchedulerImpl::BackgroundEmptyTrash, this)); + new std::thread(&DeleteScheduler::BackgroundEmptyTrash, this)); } } -DeleteSchedulerImpl::~DeleteSchedulerImpl() { +DeleteScheduler::~DeleteScheduler() { { MutexLock l(&mu_); closing_ = true; @@ -45,20 +47,29 @@ DeleteSchedulerImpl::~DeleteSchedulerImpl() { } } -Status DeleteSchedulerImpl::DeleteFile(const std::string& file_path) { - if (rate_bytes_per_sec_ == 0) { +Status DeleteScheduler::DeleteFile(const std::string& file_path) { + Status s; + if (rate_bytes_per_sec_ <= 0) { // Rate limiting is disabled - return env_->DeleteFile(file_path); + s = env_->DeleteFile(file_path); + if (s.ok() && sst_file_manager_) { + sst_file_manager_->OnDeleteFile(file_path); + } + return s; } // Move file to trash std::string path_in_trash; - Status s = MoveToTrash(file_path, &path_in_trash); + s = MoveToTrash(file_path, &path_in_trash); if (!s.ok()) { Log(InfoLogLevel::ERROR_LEVEL, info_log_, "Failed to move %s to trash directory (%s)", file_path.c_str(), trash_dir_.c_str()); - return env_->DeleteFile(file_path); + s = env_->DeleteFile(file_path); + if (s.ok() && sst_file_manager_) { + sst_file_manager_->OnDeleteFile(file_path); + } + return s; } // Add file to delete queue @@ -73,13 +84,13 @@ Status DeleteSchedulerImpl::DeleteFile(const std::string& file_path) { return s; } -std::map DeleteSchedulerImpl::GetBackgroundErrors() { +std::map DeleteScheduler::GetBackgroundErrors() { MutexLock l(&mu_); return bg_errors_; } -Status DeleteSchedulerImpl::MoveToTrash(const std::string& file_path, - std::string* path_in_trash) { +Status DeleteScheduler::MoveToTrash(const std::string& file_path, + std::string* path_in_trash) { Status s; // Figure out the name of the file in trash folder size_t idx = file_path.rfind("/"); @@ -112,11 +123,14 @@ Status DeleteSchedulerImpl::MoveToTrash(const std::string& file_path, break; } } + if (s.ok() && sst_file_manager_) { + sst_file_manager_->OnMoveFile(file_path, *path_in_trash); + } return s; } -void DeleteSchedulerImpl::BackgroundEmptyTrash() { - TEST_SYNC_POINT("DeleteSchedulerImpl::BackgroundEmptyTrash"); +void DeleteScheduler::BackgroundEmptyTrash() { + TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash"); while (true) { MutexLock l(&mu_); @@ -151,7 +165,7 @@ void DeleteSchedulerImpl::BackgroundEmptyTrash() { uint64_t total_penlty = ((total_deleted_bytes * kMicrosInSecond) / rate_bytes_per_sec_); while (!closing_ && !cv_.TimedWait(start_time + total_penlty)) {} - TEST_SYNC_POINT_CALLBACK("DeleteSchedulerImpl::BackgroundEmptyTrash:Wait", + TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", &total_penlty); pending_files_--; @@ -164,12 +178,12 @@ void DeleteSchedulerImpl::BackgroundEmptyTrash() { } } -Status DeleteSchedulerImpl::DeleteTrashFile(const std::string& path_in_trash, - uint64_t* deleted_bytes) { +Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, + uint64_t* deleted_bytes) { uint64_t file_size; Status s = env_->GetFileSize(path_in_trash, &file_size); if (s.ok()) { - TEST_SYNC_POINT("DeleteSchedulerImpl::DeleteTrashFile:DeleteFile"); + TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); s = env_->DeleteFile(path_in_trash); } @@ -181,51 +195,19 @@ Status DeleteSchedulerImpl::DeleteTrashFile(const std::string& path_in_trash, *deleted_bytes = 0; } else { *deleted_bytes = file_size; + if (sst_file_manager_) { + sst_file_manager_->OnDeleteFile(path_in_trash); + } } return s; } -void DeleteSchedulerImpl::WaitForEmptyTrash() { +void DeleteScheduler::WaitForEmptyTrash() { MutexLock l(&mu_); while (pending_files_ > 0 && !closing_) { cv_.Wait(); } } -DeleteScheduler* NewDeleteScheduler(Env* env, const std::string& trash_dir, - int64_t rate_bytes_per_sec, - std::shared_ptr info_log, - bool delete_exisitng_trash, - Status* status) { - DeleteScheduler* res = - new DeleteSchedulerImpl(env, trash_dir, rate_bytes_per_sec, info_log); - - Status s; - if (trash_dir != "") { - s = env->CreateDirIfMissing(trash_dir); - if (s.ok() && delete_exisitng_trash) { - std::vector files_in_trash; - s = env->GetChildren(trash_dir, &files_in_trash); - if (s.ok()) { - for (const std::string& trash_file : files_in_trash) { - if (trash_file == "." || trash_file == "..") { - continue; - } - Status file_delete = res->DeleteFile(trash_dir + "/" + trash_file); - if (s.ok() && !file_delete.ok()) { - s = file_delete; - } - } - } - } - } - - if (status) { - *status = s; - } - - return res; -} - } // namespace rocksdb diff --git a/util/delete_scheduler_impl.h b/util/delete_scheduler.h similarity index 73% rename from util/delete_scheduler_impl.h rename to util/delete_scheduler.h index 32ef65f0c..8ce2e3005 100644 --- a/util/delete_scheduler_impl.h +++ b/util/delete_scheduler.h @@ -12,21 +12,28 @@ #include "port/port.h" -#include "rocksdb/delete_scheduler.h" #include "rocksdb/status.h" namespace rocksdb { class Env; class Logger; +class SstFileManagerImpl; -class DeleteSchedulerImpl : public DeleteScheduler { +// DeleteScheduler allows the DB to enforce a rate limit on file deletion, +// Instead of deleteing files immediately, files are moved to trash_dir +// and deleted in a background thread that apply sleep penlty between deletes +// if they are happening in a rate faster than rate_bytes_per_sec, +// +// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this +// case DeleteScheduler will delete files immediately. +class DeleteScheduler { public: - DeleteSchedulerImpl(Env* env, const std::string& trash_dir, - int64_t rate_bytes_per_sec, - std::shared_ptr info_log); + DeleteScheduler(Env* env, const std::string& trash_dir, + int64_t rate_bytes_per_sec, Logger* info_log, + SstFileManagerImpl* sst_file_manager); - ~DeleteSchedulerImpl(); + ~DeleteScheduler(); // Return delete rate limit in bytes per second int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_; } @@ -63,7 +70,7 @@ class DeleteSchedulerImpl : public DeleteScheduler { int32_t pending_files_; // Errors that happened in BackgroundEmptyTrash (file_path => error) std::map bg_errors_; - // Set to true in ~DeleteSchedulerImpl() to force BackgroundEmptyTrash to stop + // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop bool closing_; // Condition variable signaled in these conditions // - pending_files_ value change from 0 => 1 @@ -74,7 +81,8 @@ class DeleteSchedulerImpl : public DeleteScheduler { std::unique_ptr bg_thread_; // Mutex to protect threads from file name conflicts port::Mutex file_move_mu_; - std::shared_ptr info_log_; + Logger* info_log_; + SstFileManagerImpl* sst_file_manager_; static const uint64_t kMicrosInSecond = 1000 * 1000LL; }; diff --git a/util/delete_scheduler_test.cc b/util/delete_scheduler_test.cc index fcd821c15..21b8a5b19 100644 --- a/util/delete_scheduler_test.cc +++ b/util/delete_scheduler_test.cc @@ -12,9 +12,9 @@ #include #include -#include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" #include "rocksdb/options.h" +#include "util/delete_scheduler.h" #include "util/string_util.h" #include "util/sync_point.h" #include "util/testharness.h" @@ -74,6 +74,12 @@ class DeleteSchedulerTest : public testing::Test { return file_path; } + void NewDeleteScheduler() { + ASSERT_OK(env_->CreateDirIfMissing(trash_dir_)); + delete_scheduler_.reset(new DeleteScheduler( + env_, trash_dir_, rate_bytes_per_sec_, nullptr, nullptr)); + } + Env* env_; std::string dummy_files_dir_; std::string trash_dir_; @@ -84,19 +90,19 @@ class DeleteSchedulerTest : public testing::Test { // Test the basic functionality of DeleteScheduler (Rate Limiting). // 1- Create 100 dummy files // 2- Delete the 100 dummy files using DeleteScheduler -// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash --- +// --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 3- Wait for DeleteScheduler to delete all files in trash // 4- Verify that BackgroundEmptyTrash used to correct penlties for the files // 5- Make sure that all created files were completely deleted TEST_F(DeleteSchedulerTest, BasicRateLimiting) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::BasicRateLimiting:1", - "DeleteSchedulerImpl::BackgroundEmptyTrash"}, + "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::BackgroundEmptyTrash:Wait", + "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); int num_files = 100; // 100 files @@ -110,8 +116,7 @@ TEST_F(DeleteSchedulerTest, BasicRateLimiting) { DestroyAndCreateDir(dummy_files_dir_); rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // Create 100 dummy files, every file is 1 Kb std::vector generated_files; @@ -152,19 +157,19 @@ TEST_F(DeleteSchedulerTest, BasicRateLimiting) { // Same as the BasicRateLimiting test but delete files in multiple threads. // 1- Create 100 dummy files // 2- Delete the 100 dummy files using DeleteScheduler using 10 threads -// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash --- +// --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 3- Wait for DeleteScheduler to delete all files in queue // 4- Verify that BackgroundEmptyTrash used to correct penlties for the files // 5- Make sure that all created files were completely deleted TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::RateLimitingMultiThreaded:1", - "DeleteSchedulerImpl::BackgroundEmptyTrash"}, + "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::BackgroundEmptyTrash:Wait", + "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); int thread_cnt = 10; @@ -179,8 +184,7 @@ TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { DestroyAndCreateDir(dummy_files_dir_); rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // Create 100 dummy files, every file is 1 Kb std::vector generated_files; @@ -239,12 +243,13 @@ TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { TEST_F(DeleteSchedulerTest, DisableRateLimiting) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - delete_scheduler_.reset(NewDeleteScheduler(env_, "", 0)); + rate_bytes_per_sec_ = 0; + NewDeleteScheduler(); for (int i = 0; i < 10; i++) { // Every file we delete will be deleted immediately @@ -264,18 +269,17 @@ TEST_F(DeleteSchedulerTest, DisableRateLimiting) { // 1- Create 10 files with the same name "conflict.data" // 2- Delete the 10 files using DeleteScheduler // 3- Make sure that trash directory contain 10 files ("conflict.data" x 10) -// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash --- +// --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 4- Make sure that files are deleted from trash TEST_F(DeleteSchedulerTest, ConflictNames) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::ConflictNames:1", - "DeleteSchedulerImpl::BackgroundEmptyTrash"}, + "DeleteScheduler::BackgroundEmptyTrash"}, }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // Create "conflict.data" and move it to trash 10 times for (int i = 0; i < 10; i++) { @@ -300,19 +304,18 @@ TEST_F(DeleteSchedulerTest, ConflictNames) { // 1- Create 10 dummy files // 2- Delete the 10 files using DeleteScheduler (move them to trsah) // 3- Delete the 10 files directly (using env_->DeleteFile) -// --- Hold DeleteSchedulerImpl::BackgroundEmptyTrash --- +// --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 4- Make sure that DeleteScheduler failed to delete the 10 files and // reported 10 background errors TEST_F(DeleteSchedulerTest, BackgroundError) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::BackgroundError:1", - "DeleteSchedulerImpl::BackgroundEmptyTrash"}, + "DeleteScheduler::BackgroundEmptyTrash"}, }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // Generate 10 dummy files and move them to trash for (int i = 0; i < 10; i++) { @@ -339,32 +342,6 @@ TEST_F(DeleteSchedulerTest, BackgroundError) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } -// 1- Create 10 files in trash -// 2- Create a DeleteScheduler with delete_exisitng_trash = true -// 3- Wait for DeleteScheduler to delete all files in queue -// 4- Make sure that all files in trash directory were deleted -TEST_F(DeleteSchedulerTest, TrashWithExistingFiles) { - std::vector dummy_files; - for (int i = 0; i < 10; i++) { - std::string file_name = "data_" + ToString(i) + ".data"; - std::string trash_path = trash_dir_ + "/" + file_name; - env_->RenameFile(NewDummyFile(file_name), trash_path); - } - ASSERT_EQ(CountFilesInDir(trash_dir_), 10); - - Status s; - rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec - delete_scheduler_.reset(NewDeleteScheduler( - env_, trash_dir_, rate_bytes_per_sec_, nullptr, true, &s)); - ASSERT_OK(s); - - delete_scheduler_->WaitForEmptyTrash(); - ASSERT_EQ(CountFilesInDir(trash_dir_), 0); - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); -} - // 1- Create 10 dummy files // 2- Delete 10 dummy files using DeleteScheduler // 3- Wait for DeleteScheduler to delete all files in queue @@ -373,13 +350,12 @@ TEST_F(DeleteSchedulerTest, TrashWithExistingFiles) { TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // Move files to trash, wait for empty trash, start again for (int run = 1; run <= 5; run++) { @@ -409,13 +385,12 @@ TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1; // 1 Byte / sec - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); for (int i = 0; i < 100; i++) { std::string file_name = "data_" + ToString(i) + ".data"; @@ -439,13 +414,12 @@ TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { TEST_F(DeleteSchedulerTest, MoveToTrashError) { int bg_delete_file = 0; rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteSchedulerImpl::DeleteTrashFile:DeleteFile", + "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* arg) { bg_delete_file++; }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024; // 1 Kb / sec - delete_scheduler_.reset( - NewDeleteScheduler(env_, trash_dir_, rate_bytes_per_sec_)); + NewDeleteScheduler(); // We will delete the trash directory, that mean that DeleteScheduler wont // be able to move files to trash and will delete files them immediately. @@ -460,7 +434,6 @@ TEST_F(DeleteSchedulerTest, MoveToTrashError) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } - } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/file_util.cc b/util/file_util.cc index 55eeab722..1e2e84211 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -8,9 +8,9 @@ #include #include -#include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" #include "rocksdb/options.h" +#include "util/sst_file_manager_impl.h" #include "util/file_reader_writer.h" namespace rocksdb { @@ -66,12 +66,15 @@ Status CopyFile(Env* env, const std::string& source, return Status::OK(); } -Status DeleteOrMoveToTrash(const DBOptions* db_options, - const std::string& fname) { - if (db_options->delete_scheduler == nullptr) { - return db_options->env->DeleteFile(fname); +Status DeleteSSTFile(const DBOptions* db_options, const std::string& fname, + uint32_t path_id) { + // TODO(tec): support sst_file_manager for multiple path_ids + auto sfm = + static_cast(db_options->sst_file_manager.get()); + if (sfm && path_id == 0) { + return sfm->ScheduleFileDeletion(fname); } else { - return db_options->delete_scheduler->DeleteFile(fname); + return db_options->env->DeleteFile(fname); } } diff --git a/util/file_util.h b/util/file_util.h index f3e02fb0b..f19dc6f0b 100644 --- a/util/file_util.h +++ b/util/file_util.h @@ -16,7 +16,7 @@ namespace rocksdb { extern Status CopyFile(Env* env, const std::string& source, const std::string& destination, uint64_t size = 0); -extern Status DeleteOrMoveToTrash(const DBOptions* db_options, - const std::string& fname); +extern Status DeleteSSTFile(const DBOptions* db_options, + const std::string& fname, uint32_t path_id); } // namespace rocksdb diff --git a/util/options.cc b/util/options.cc index 13fee9051..d21d2a24b 100644 --- a/util/options.cc +++ b/util/options.cc @@ -20,8 +20,8 @@ #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/comparator.h" -#include "rocksdb/delete_scheduler.h" #include "rocksdb/env.h" +#include "rocksdb/sst_file_manager.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/slice.h" @@ -213,7 +213,7 @@ DBOptions::DBOptions() paranoid_checks(true), env(Env::Default()), rate_limiter(nullptr), - delete_scheduler(nullptr), + sst_file_manager(nullptr), info_log(nullptr), #ifdef NDEBUG info_log_level(INFO_LEVEL), @@ -281,7 +281,7 @@ DBOptions::DBOptions(const Options& options) paranoid_checks(options.paranoid_checks), env(options.env), rate_limiter(options.rate_limiter), - delete_scheduler(options.delete_scheduler), + sst_file_manager(options.sst_file_manager), info_log(options.info_log), info_log_level(options.info_log_level), max_open_files(options.max_open_files), @@ -433,8 +433,9 @@ void DBOptions::Dump(Logger* log) const { use_adaptive_mutex); Header(log, " Options.rate_limiter: %p", rate_limiter.get()); - Header(log, " Options.delete_scheduler.rate_bytes_per_sec: %" PRIi64, - delete_scheduler ? delete_scheduler->GetRateBytesPerSecond() : 0); + Header( + log, " Options.sst_file_manager.rate_bytes_per_sec: %" PRIi64, + sst_file_manager ? sst_file_manager->GetDeleteRateBytesPerSecond() : 0); Header(log, " Options.bytes_per_sync: %" PRIu64, bytes_per_sync); Header(log, " Options.wal_bytes_per_sync: %" PRIu64, diff --git a/util/options_test.cc b/util/options_test.cc index 09ecbea03..65c45c2b0 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1609,8 +1609,8 @@ TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) { {offsetof(struct DBOptions, env), sizeof(Env*)}, {offsetof(struct DBOptions, rate_limiter), sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, delete_scheduler), - sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, sst_file_manager), + sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, info_log), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, statistics), sizeof(std::shared_ptr)}, diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc new file mode 100644 index 000000000..f4cc82e5d --- /dev/null +++ b/util/sst_file_manager_impl.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "util/sst_file_manager_impl.h" + +#include + +#include "port/port.h" +#include "rocksdb/env.h" +#include "util/mutexlock.h" +#include "util/sync_point.h" + +namespace rocksdb { + +SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr logger, + const std::string& trash_dir, + int64_t rate_bytes_per_sec) + : env_(env), + logger_(logger), + total_files_size_(0), + delete_scheduler_(env, trash_dir, rate_bytes_per_sec, logger.get(), + this) {} + +SstFileManagerImpl::~SstFileManagerImpl() {} + +Status SstFileManagerImpl::OnAddFile(const std::string& file_path) { + uint64_t file_size; + Status s = env_->GetFileSize(file_path, &file_size); + if (s.ok()) { + MutexLock l(&mu_); + OnAddFileImpl(file_path, file_size); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnAddFile"); + return s; +} + +Status SstFileManagerImpl::OnDeleteFile(const std::string& file_path) { + { + MutexLock l(&mu_); + OnDeleteFileImpl(file_path); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnDeleteFile"); + return Status::OK(); +} + +Status SstFileManagerImpl::OnMoveFile(const std::string& old_path, + const std::string& new_path) { + { + MutexLock l(&mu_); + OnAddFileImpl(new_path, tracked_files_[old_path]); + OnDeleteFileImpl(old_path); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnMoveFile"); + return Status::OK(); +} + +uint64_t SstFileManagerImpl::GetTotalSize() { + MutexLock l(&mu_); + return total_files_size_; +} + +std::unordered_map +SstFileManagerImpl::GetTrackedFiles() { + MutexLock l(&mu_); + return tracked_files_; +} + +int64_t SstFileManagerImpl::GetDeleteRateBytesPerSecond() { + return delete_scheduler_.GetRateBytesPerSecond(); +} + +Status SstFileManagerImpl::ScheduleFileDeletion(const std::string& file_path) { + return delete_scheduler_.DeleteFile(file_path); +} + +void SstFileManagerImpl::WaitForEmptyTrash() { + delete_scheduler_.WaitForEmptyTrash(); +} + +void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path, + uint64_t file_size) { + auto tracked_file = tracked_files_.find(file_path); + if (tracked_file != tracked_files_.end()) { + // File was added before, we will just update the size + total_files_size_ -= tracked_file->second; + total_files_size_ += file_size; + } else { + total_files_size_ += file_size; + } + tracked_files_[file_path] = file_size; +} + +void SstFileManagerImpl::OnDeleteFileImpl(const std::string& file_path) { + auto tracked_file = tracked_files_.find(file_path); + if (tracked_file == tracked_files_.end()) { + // File is not tracked + return; + } + + total_files_size_ -= tracked_file->second; + tracked_files_.erase(tracked_file); +} + +SstFileManager* NewSstFileManager(Env* env, std::shared_ptr info_log, + std::string trash_dir, + int64_t rate_bytes_per_sec, + bool delete_exisitng_trash, Status* status) { + SstFileManagerImpl* res = + new SstFileManagerImpl(env, info_log, trash_dir, rate_bytes_per_sec); + + Status s; + if (trash_dir != "" && rate_bytes_per_sec > 0) { + s = env->CreateDirIfMissing(trash_dir); + if (s.ok() && delete_exisitng_trash) { + std::vector files_in_trash; + s = env->GetChildren(trash_dir, &files_in_trash); + if (s.ok()) { + for (const std::string& trash_file : files_in_trash) { + if (trash_file == "." || trash_file == "..") { + continue; + } + + std::string path_in_trash = trash_dir + "/" + trash_file; + res->OnAddFile(path_in_trash); + Status file_delete = res->ScheduleFileDeletion(path_in_trash); + if (s.ok() && !file_delete.ok()) { + s = file_delete; + } + } + } + } + } + + if (status) { + *status = s; + } + + return res; +} + +} // namespace rocksdb diff --git a/util/sst_file_manager_impl.h b/util/sst_file_manager_impl.h new file mode 100644 index 000000000..216f7cf05 --- /dev/null +++ b/util/sst_file_manager_impl.h @@ -0,0 +1,77 @@ +// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "port/port.h" + +#include "rocksdb/sst_file_manager.h" +#include "util/delete_scheduler.h" + +namespace rocksdb { + +class Env; +class Logger; + +// SstFileManager is used to track SST files in the DB and control there +// deletion rate. +// All SstFileManager public functions are thread-safe. +class SstFileManagerImpl : public SstFileManager { + public: + explicit SstFileManagerImpl(Env* env, std::shared_ptr logger, + const std::string& trash_dir, + int64_t rate_bytes_per_sec); + + ~SstFileManagerImpl(); + + // DB will call OnAddFile whenever a new sst file is added. + Status OnAddFile(const std::string& file_path); + + // DB will call OnDeleteFile whenever an sst file is deleted. + Status OnDeleteFile(const std::string& file_path); + + // DB will call OnMoveFile whenever an sst file is move to a new path. + Status OnMoveFile(const std::string& old_path, const std::string& new_path); + + // Return the total size of all tracked files. + uint64_t GetTotalSize() override; + + // Return a map containing all tracked files and there corresponding sizes. + std::unordered_map GetTrackedFiles() override; + + // Return delete rate limit in bytes per second. + virtual int64_t GetDeleteRateBytesPerSecond() override; + + // Move file to trash directory and schedule it's deletion. + virtual Status ScheduleFileDeletion(const std::string& file_path); + + // Wait for all files being deleteing in the background to finish or for + // destructor to be called. + virtual void WaitForEmptyTrash(); + + private: + // REQUIRES: mutex locked + void OnAddFileImpl(const std::string& file_path, uint64_t file_size); + // REQUIRES: mutex locked + void OnDeleteFileImpl(const std::string& file_path); + + Env* env_; + std::shared_ptr logger_; + // Mutex to protect tracked_files_, total_files_size_ + port::Mutex mu_; + // The summation of the sizes of all files in tracked_files_ map + uint64_t total_files_size_; + // A map containing all tracked files and there sizes + // file_path => file_size + std::unordered_map tracked_files_; + // DeleteScheduler used to throttle file deletition, if SstFileManagerImpl was + // created with rate_bytes_per_sec == 0 or trash_dir == "", delete_scheduler_ + // rate limiting will be disabled and will simply delete the files. + DeleteScheduler delete_scheduler_; +}; + +} // namespace rocksdb From 6ee38bb15c5421f4089e680ba67054cc14540b9d Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 28 Jan 2016 11:43:28 -0800 Subject: [PATCH 060/195] Slowdown of writing to the last memtable should not override stopping Summary: Now slowing down for the last mem table takes priority against some stopping conditions. This is logically confusing. Fix it. Test Plan: Run all existing tests. Reviewers: yhchiang, IslamAbdelRahman, kradhakrishnan, andrewkr, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53529 --- db/column_family.cc | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 408f53831..4c12a35bd 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -531,21 +531,6 @@ void ColumnFamilyData::RecalculateWriteStallConditions( "(waiting for flush), max_write_buffer_number is set to %d", name_.c_str(), imm()->NumNotFlushed(), mutable_cf_options.max_write_buffer_number); - } else if (mutable_cf_options.max_write_buffer_number > 3 && - imm()->NumNotFlushed() >= - mutable_cf_options.max_write_buffer_number - 1) { - write_controller_token_ = - SetupDelay(ioptions_.delayed_write_rate, write_controller, - compaction_needed_bytes, prev_compaction_needed_bytes_, - mutable_cf_options.disable_auto_compactions); - internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1); - Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, - "[%s] Stalling writes because we have %d immutable memtables " - "(waiting for flush), max_write_buffer_number is set to %d " - "rate %" PRIu64, - name_.c_str(), imm()->NumNotFlushed(), - mutable_cf_options.max_write_buffer_number, - write_controller->delayed_write_rate()); } else if (vstorage->l0_delay_trigger_count() >= mutable_cf_options.level0_stop_writes_trigger) { write_controller_token_ = write_controller->GetStopToken(); @@ -567,6 +552,21 @@ void ColumnFamilyData::RecalculateWriteStallConditions( "[%s] Stopping writes because of estimated pending compaction " "bytes %" PRIu64, name_.c_str(), compaction_needed_bytes); + } else if (mutable_cf_options.max_write_buffer_number > 3 && + imm()->NumNotFlushed() >= + mutable_cf_options.max_write_buffer_number - 1) { + write_controller_token_ = + SetupDelay(ioptions_.delayed_write_rate, write_controller, + compaction_needed_bytes, prev_compaction_needed_bytes_, + mutable_cf_options.disable_auto_compactions); + internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1); + Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, + "[%s] Stalling writes because we have %d immutable memtables " + "(waiting for flush), max_write_buffer_number is set to %d " + "rate %" PRIu64, + name_.c_str(), imm()->NumNotFlushed(), + mutable_cf_options.max_write_buffer_number, + write_controller->delayed_write_rate()); } else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 && vstorage->l0_delay_trigger_count() >= mutable_cf_options.level0_slowdown_writes_trigger) { From 3b2a1ddd2e9ca0998aa711644258675324febf6a Mon Sep 17 00:00:00 2001 From: Venkatesh Radhakrishnan Date: Thu, 28 Jan 2016 11:56:16 -0800 Subject: [PATCH 061/195] Add options.base_background_compactions as a number of compaction threads for low compaction debt Summary: If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions. The watermarks are calculated based on slowdown thresholds. Test Plan: Add new test cases in column_family_test. Adding more unit tests. Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony Reviewed By: anthony Subscribers: leveldb, dhruba, yoshinorim Differential Revision: https://reviews.facebook.net/D53409 --- db/column_family.cc | 49 +++++++++++++ db/column_family_test.cc | 142 ++++++++++++++++++++++++++++++++++++++ db/db_compaction_test.cc | 117 +++++++++++++++++++++++++++++++ db/db_impl.cc | 28 ++++++-- db/db_impl.h | 4 ++ db/write_controller.cc | 12 ++++ db/write_controller.h | 18 ++++- include/rocksdb/options.h | 11 +++ util/options.cc | 5 ++ util/options_helper.h | 5 +- util/options_test.cc | 45 +----------- 11 files changed, 384 insertions(+), 52 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 4c12a35bd..ca3be7855 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -239,6 +239,17 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options, result.level0_slowdown_writes_trigger, result.level0_file_num_compaction_trigger); } + + if (result.soft_pending_compaction_bytes_limit == 0) { + result.soft_pending_compaction_bytes_limit = + result.hard_pending_compaction_bytes_limit; + } else if (result.hard_pending_compaction_bytes_limit > 0 && + result.soft_pending_compaction_bytes_limit > + result.hard_pending_compaction_bytes_limit) { + result.soft_pending_compaction_bytes_limit = + result.hard_pending_compaction_bytes_limit; + } + if (result.level_compaction_dynamic_level_bytes) { if (result.compaction_style != kCompactionStyleLevel || db_options.db_paths.size() > 1U) { @@ -513,6 +524,21 @@ std::unique_ptr SetupDelay( } return write_controller->GetDelayToken(write_rate); } + +int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger, + int level0_slowdown_writes_trigger) { + // SanitizeOptions() ensures it. + assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger); + + // 1/4 of the way between L0 compaction trigger threshold and slowdown + // condition. + // Or twice as compaction trigger, if it is smaller. + return std::min(level0_file_num_compaction_trigger * 2, + level0_file_num_compaction_trigger + + (level0_slowdown_writes_trigger - + level0_file_num_compaction_trigger) / + 4); +} } // namespace void ColumnFamilyData::RecalculateWriteStallConditions( @@ -598,6 +624,29 @@ void ColumnFamilyData::RecalculateWriteStallConditions( "bytes %" PRIu64 " rate %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes(), write_controller->delayed_write_rate()); + } else if (vstorage->l0_delay_trigger_count() >= + GetL0ThresholdSpeedupCompaction( + mutable_cf_options.level0_file_num_compaction_trigger, + mutable_cf_options.level0_slowdown_writes_trigger)) { + write_controller_token_ = write_controller->GetCompactionPressureToken(); + Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, + "[%s] Increasing compaction threads because we have %d level-0 " + "files ", + name_.c_str(), vstorage->l0_delay_trigger_count()); + } else if (vstorage->estimated_compaction_needed_bytes() >= + mutable_cf_options.soft_pending_compaction_bytes_limit / 4) { + // Increase compaction threads if bytes needed for compaction exceeds + // 1/4 of threshold for slowing down. + // If soft pending compaction byte limit is not set, always speed up + // compaction. + write_controller_token_ = write_controller->GetCompactionPressureToken(); + if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) { + Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, + "[%s] Increasing compaction threads because of estimated pending " + "compaction " + "bytes %" PRIu64, + name_.c_str(), vstorage->estimated_compaction_needed_bytes()); + } } else { write_controller_token_.reset(); } diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 62fadbbee..114451464 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -2137,6 +2137,9 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) { TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { const uint64_t kBaseRate = 810000u; db_options_.delayed_write_rate = kBaseRate; + db_options_.base_background_compactions = 2; + db_options_.max_background_compactions = 6; + Open({"default"}); ColumnFamilyData* cfd = static_cast(db_->DefaultColumnFamily())->cfd(); @@ -2162,6 +2165,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { ASSERT_TRUE(!dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(400); cfd->RecalculateWriteStallConditions(mutable_cf_options); @@ -2169,6 +2173,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.2, dbfull()->TEST_write_controler().delayed_write_rate()); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(500); cfd->RecalculateWriteStallConditions(mutable_cf_options); @@ -2224,6 +2229,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { cfd->RecalculateWriteStallConditions(mutable_cf_options); ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(3001); cfd->RecalculateWriteStallConditions(mutable_cf_options); @@ -2248,6 +2254,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.2, dbfull()->TEST_write_controler().delayed_write_rate()); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(101); cfd->RecalculateWriteStallConditions(mutable_cf_options); @@ -2320,6 +2327,73 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { dbfull()->TEST_write_controler().delayed_write_rate()); } +TEST_F(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) { + db_options_.base_background_compactions = 2; + db_options_.max_background_compactions = 6; + Open({"default"}); + ColumnFamilyData* cfd = + static_cast(db_->DefaultColumnFamily())->cfd(); + + VersionStorageInfo* vstorage = cfd->current()->storage_info(); + + MutableCFOptions mutable_cf_options( + Options(db_options_, column_family_options_), + ImmutableCFOptions(Options(db_options_, column_family_options_))); + + // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 + mutable_cf_options.level0_file_num_compaction_trigger = 4; + mutable_cf_options.level0_slowdown_writes_trigger = 36; + mutable_cf_options.level0_stop_writes_trigger = 50; + // Speedup threshold = 200 / 4 = 50 + mutable_cf_options.soft_pending_compaction_bytes_limit = 200; + mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; + + vstorage->TEST_set_estimated_compaction_needed_bytes(40); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->TEST_set_estimated_compaction_needed_bytes(50); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->TEST_set_estimated_compaction_needed_bytes(300); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->TEST_set_estimated_compaction_needed_bytes(45); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(7); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(9); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(6); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + // Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6 + mutable_cf_options.level0_file_num_compaction_trigger = 4; + mutable_cf_options.level0_slowdown_writes_trigger = 16; + mutable_cf_options.level0_stop_writes_trigger = 30; + + vstorage->set_l0_delay_trigger_count(5); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(7); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(3); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); +} + TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) { const uint64_t kBaseRate = 810000u; db_options_.delayed_write_rate = kBaseRate; @@ -2401,6 +2475,74 @@ TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) { ASSERT_EQ(kBaseRate / 1.2, dbfull()->TEST_write_controler().delayed_write_rate()); } + +TEST_F(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) { + db_options_.base_background_compactions = 2; + db_options_.max_background_compactions = 6; + column_family_options_.soft_pending_compaction_bytes_limit = 200; + column_family_options_.hard_pending_compaction_bytes_limit = 2000; + Open(); + CreateColumnFamilies({"one"}); + ColumnFamilyData* cfd = + static_cast(db_->DefaultColumnFamily())->cfd(); + VersionStorageInfo* vstorage = cfd->current()->storage_info(); + + ColumnFamilyData* cfd1 = + static_cast(handles_[1])->cfd(); + VersionStorageInfo* vstorage1 = cfd1->current()->storage_info(); + + MutableCFOptions mutable_cf_options( + Options(db_options_, column_family_options_), + ImmutableCFOptions(Options(db_options_, column_family_options_))); + // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 + mutable_cf_options.level0_file_num_compaction_trigger = 4; + mutable_cf_options.level0_slowdown_writes_trigger = 36; + mutable_cf_options.level0_stop_writes_trigger = 30; + // Speedup threshold = 200 / 4 = 50 + mutable_cf_options.soft_pending_compaction_bytes_limit = 200; + mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; + + MutableCFOptions mutable_cf_options1 = mutable_cf_options; + mutable_cf_options1.level0_slowdown_writes_trigger = 16; + + vstorage->TEST_set_estimated_compaction_needed_bytes(40); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->TEST_set_estimated_compaction_needed_bytes(60); + cfd1->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage1->TEST_set_estimated_compaction_needed_bytes(30); + cfd1->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage1->TEST_set_estimated_compaction_needed_bytes(70); + cfd1->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->TEST_set_estimated_compaction_needed_bytes(20); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage1->TEST_set_estimated_compaction_needed_bytes(3); + cfd1->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(9); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage1->set_l0_delay_trigger_count(2); + cfd1->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(6, dbfull()->BGCompactionsAllowed()); + + vstorage->set_l0_delay_trigger_count(0); + cfd->RecalculateWriteStallConditions(mutable_cf_options); + ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index d29b50e7f..5d9e0536f 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -533,6 +533,104 @@ TEST_P(DBCompactionTestWithParam, CompactionTrigger) { ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); } +TEST_F(DBCompactionTest, BGCompactionsAllowed) { + // Create several column families. Make compaction triggers in all of them + // and see number of compactions scheduled to be less than allowed. + const int kNumKeysPerFile = 100; + + Options options; + options.write_buffer_size = 110 << 10; // 110KB + options.arena_block_size = 4 << 10; + options.num_levels = 3; + // Should speed up compaction when there are 4 files. + options.level0_file_num_compaction_trigger = 2; + options.level0_slowdown_writes_trigger = 20; + options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large + options.base_background_compactions = 1; + options.max_background_compactions = 3; + options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + options = CurrentOptions(options); + + // Block all threads in thread pool. + const size_t kTotalTasks = 4; + env_->SetBackgroundThreads(4, Env::LOW); + test::SleepingBackgroundTask sleeping_tasks[kTotalTasks]; + for (size_t i = 0; i < kTotalTasks; i++) { + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, + &sleeping_tasks[i], Env::Priority::LOW); + sleeping_tasks[i].WaitUntilSleeping(); + } + + CreateAndReopenWithCF({"one", "two", "three"}, options); + + Random rnd(301); + for (int cf = 0; cf < 4; cf++) { + for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { + for (int i = 0; i < kNumKeysPerFile; i++) { + ASSERT_OK(Put(cf, Key(i), "")); + } + // put extra key to trigger flush + ASSERT_OK(Put(cf, "", "")); + dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); + } + } + + // Now all column families qualify compaction but only one should be + // scheduled, because no column family hits speed up condition. + ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + + // Create two more files for one column family, which triggers speed up + // condition, three compactions will be scheduled. + for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { + for (int i = 0; i < kNumKeysPerFile; i++) { + ASSERT_OK(Put(2, Key(i), "")); + } + // put extra key to trigger flush + ASSERT_OK(Put(2, "", "")); + dbfull()->TEST_WaitForFlushMemTable(handles_[2]); + ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, + NumTableFilesAtLevel(0, 2)); + } + ASSERT_EQ(3, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + + // Unblock all threads to unblock all compactions. + for (size_t i = 0; i < kTotalTasks; i++) { + sleeping_tasks[i].WakeUp(); + sleeping_tasks[i].WaitUntilDone(); + } + dbfull()->TEST_WaitForCompact(); + + // Verify number of compactions allowed will come back to 1. + + for (size_t i = 0; i < kTotalTasks; i++) { + sleeping_tasks[i].Reset(); + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, + &sleeping_tasks[i], Env::Priority::LOW); + sleeping_tasks[i].WaitUntilSleeping(); + } + for (int cf = 0; cf < 4; cf++) { + for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { + for (int i = 0; i < kNumKeysPerFile; i++) { + ASSERT_OK(Put(cf, Key(i), "")); + } + // put extra key to trigger flush + ASSERT_OK(Put(cf, "", "")); + dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); + } + } + + // Now all column families qualify compaction but only one should be + // scheduled, because no column family hits speed up condition. + ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + + for (size_t i = 0; i < kTotalTasks; i++) { + sleeping_tasks[i].WakeUp(); + sleeping_tasks[i].WaitUntilDone(); + } +} + TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) { Options options; options.write_buffer_size = 100000000; // Large write buffer @@ -2198,6 +2296,25 @@ TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) { Destroy(options); } +TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) { + Options options = CurrentOptions(); + options.max_background_compactions = 5; + options.soft_pending_compaction_bytes_limit = 0; + options.hard_pending_compaction_bytes_limit = 100; + options.create_if_missing = true; + DestroyAndReopen(options); + ASSERT_EQ(5, db_->GetOptions().base_background_compactions); + ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit); + + options.base_background_compactions = 4; + options.max_background_compactions = 3; + options.soft_pending_compaction_bytes_limit = 200; + options.hard_pending_compaction_bytes_limit = 150; + DestroyAndReopen(options); + ASSERT_EQ(3, db_->GetOptions().base_background_compactions); + ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit); +} + // This tests for a bug that could cause two level0 compactions running // concurrently // TODO(aekmekji): Make sure that the reason this fails when run with diff --git a/db/db_impl.cc b/db/db_impl.cc index 6db05ae66..2afab389d 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -146,6 +146,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { result.info_log = nullptr; } } + if (result.base_background_compactions == -1) { + result.base_background_compactions = result.max_background_compactions; + } + if (result.base_background_compactions > result.max_background_compactions) { + result.base_background_compactions = result.max_background_compactions; + } result.env->IncBackgroundThreadsIfNeeded(src.max_background_compactions, Env::Priority::LOW); result.env->IncBackgroundThreadsIfNeeded(src.max_background_flushes, @@ -2448,12 +2454,14 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH, this); } + auto bg_compactions_allowed = BGCompactionsAllowed(); + // special case -- if max_background_flushes == 0, then schedule flush on a // compaction thread if (db_options_.max_background_flushes == 0) { while (unscheduled_flushes_ > 0 && bg_flush_scheduled_ + bg_compaction_scheduled_ < - db_options_.max_background_compactions) { + bg_compactions_allowed) { unscheduled_flushes_--; bg_flush_scheduled_++; env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::LOW, this); @@ -2466,7 +2474,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { return; } - while (bg_compaction_scheduled_ < db_options_.max_background_compactions && + while (bg_compaction_scheduled_ < bg_compactions_allowed && unscheduled_compactions_ > 0) { CompactionArg* ca = new CompactionArg; ca->db = this; @@ -2478,6 +2486,14 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { } } +int DBImpl::BGCompactionsAllowed() const { + if (write_controller_.NeedSpeedupCompaction()) { + return db_options_.max_background_compactions; + } else { + return db_options_.base_background_compactions; + } +} + void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) { assert(!cfd->pending_compaction()); cfd->Ref(); @@ -2590,10 +2606,10 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, LogToBuffer( log_buffer, "Calling FlushMemTableToOutputFile with column " - "family [%s], flush slots available %d, compaction slots available %d", - cfd->GetName().c_str(), - db_options_.max_background_flushes - bg_flush_scheduled_, - db_options_.max_background_compactions - bg_compaction_scheduled_); + "family [%s], flush slots available %d, compaction slots allowed %d, " + "compaction slots scheduled %d", + cfd->GetName().c_str(), db_options_.max_background_flushes, + bg_flush_scheduled_, BGCompactionsAllowed() - bg_compaction_scheduled_); status = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress, job_context, log_buffer); if (cfd->Unref()) { diff --git a/db/db_impl.h b/db/db_impl.h index 683fd49dc..d09d645d7 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -347,6 +347,10 @@ class DBImpl : public DB { #endif // NDEBUG + // Return maximum background compaction alowed to be scheduled based on + // compaction status. + int BGCompactionsAllowed() const; + // Returns the list of live files in 'live' and the list // of all files in the filesystem in 'candidate_files'. // If force == false and the last call was less than diff --git a/db/write_controller.cc b/db/write_controller.cc index 7a933ec42..a0c18835f 100644 --- a/db/write_controller.cc +++ b/db/write_controller.cc @@ -26,6 +26,13 @@ std::unique_ptr WriteController::GetDelayToken( return std::unique_ptr(new DelayWriteToken(this)); } +std::unique_ptr +WriteController::GetCompactionPressureToken() { + ++total_compaction_pressure_; + return std::unique_ptr( + new CompactionPressureToken(this)); +} + bool WriteController::IsStopped() const { return total_stopped_ > 0; } // This is inside DB mutex, so we can't sleep and need to minimize // frequency to get time. @@ -106,4 +113,9 @@ DelayWriteToken::~DelayWriteToken() { assert(controller_->total_delayed_ >= 0); } +CompactionPressureToken::~CompactionPressureToken() { + controller_->total_compaction_pressure_--; + assert(controller_->total_compaction_pressure_ >= 0); +} + } // namespace rocksdb diff --git a/db/write_controller.h b/db/write_controller.h index a5d498c3a..6cba2c643 100644 --- a/db/write_controller.h +++ b/db/write_controller.h @@ -23,6 +23,7 @@ class WriteController { explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u) : total_stopped_(0), total_delayed_(0), + total_compaction_pressure_(0), bytes_left_(0), last_refill_time_(0) { set_delayed_write_rate(_delayed_write_rate); @@ -38,10 +39,16 @@ class WriteController { // which returns number of microseconds to sleep. std::unique_ptr GetDelayToken( uint64_t delayed_write_rate); + // When an actor (column family) requests a moderate token, compaction + // threads will be increased + std::unique_ptr GetCompactionPressureToken(); - // these two metods are querying the state of the WriteController + // these three metods are querying the state of the WriteController bool IsStopped() const; bool NeedsDelay() const { return total_delayed_ > 0; } + bool NeedSpeedupCompaction() const { + return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0; + } // return how many microseconds the caller needs to sleep after the call // num_bytes: how many number of bytes to put into the DB. // Prerequisite: DB mutex held. @@ -59,9 +66,11 @@ class WriteController { friend class WriteControllerToken; friend class StopWriteToken; friend class DelayWriteToken; + friend class CompactionPressureToken; int total_stopped_; int total_delayed_; + int total_compaction_pressure_; uint64_t bytes_left_; uint64_t last_refill_time_; uint64_t delayed_write_rate_; @@ -96,4 +105,11 @@ class DelayWriteToken : public WriteControllerToken { virtual ~DelayWriteToken(); }; +class CompactionPressureToken : public WriteControllerToken { + public: + explicit CompactionPressureToken(WriteController* controller) + : WriteControllerToken(controller) {} + virtual ~CompactionPressureToken(); +}; + } // namespace rocksdb diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index a3f410422..9eee07ac0 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -933,8 +933,19 @@ struct DBOptions { // regardless of this setting uint64_t delete_obsolete_files_period_micros; + // Suggested number of concurrent background compaction jobs, submitted to + // the default LOW priority thread pool. + // + // Default: max_background_compactions + int base_background_compactions; + // Maximum number of concurrent background compaction jobs, submitted to // the default LOW priority thread pool. + // We first try to schedule compactions based on + // `base_background_compactions`. If the compaction cannot catch up , we + // will increase number of compaction threads up to + // `max_background_compactions`. + // // If you're increasing this, also consider increasing number of threads in // LOW priority thread pool. For more information, see // Env::SetBackgroundThreads diff --git a/util/options.cc b/util/options.cc index d21d2a24b..00d797167 100644 --- a/util/options.cc +++ b/util/options.cc @@ -229,6 +229,7 @@ DBOptions::DBOptions() db_log_dir(""), wal_dir(""), delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000), + base_background_compactions(-1), max_background_compactions(1), max_subcompactions(1), max_background_flushes(1), @@ -295,6 +296,7 @@ DBOptions::DBOptions(const Options& options) wal_dir(options.wal_dir), delete_obsolete_files_period_micros( options.delete_obsolete_files_period_micros), + base_background_compactions(options.base_background_compactions), max_background_compactions(options.max_background_compactions), max_subcompactions(options.max_subcompactions), max_background_flushes(options.max_background_flushes), @@ -383,6 +385,8 @@ void DBOptions::Dump(Logger* log) const { table_cache_numshardbits); Header(log, " Options.delete_obsolete_files_period_micros: %" PRIu64, delete_obsolete_files_period_micros); + Header(log, " Options.base_background_compactions: %d", + base_background_compactions); Header(log, " Options.max_background_compactions: %d", max_background_compactions); Header(log, " Options.max_subcompactions: %" PRIu32, @@ -652,6 +656,7 @@ Options::PrepareForBulkLoad() // to L1. This is helpful so that all files that are // input to the manual compaction are all at L0. max_background_compactions = 2; + base_background_compactions = 2; // The compaction would create large files in L1. target_file_size_base = 256 * 1024 * 1024; diff --git a/util/options_helper.h b/util/options_helper.h index 84d547cfc..4c4555aca 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -208,7 +208,7 @@ static std::unordered_map db_options_type_info = { {offsetof(struct DBOptions, random_access_max_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal}}, {"writable_file_max_buffer_size", - {offsetof(struct DBOptions, writable_file_max_buffer_size), + {offsetof(struct DBOptions, writable_file_max_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal}}, {"use_adaptive_mutex", {offsetof(struct DBOptions, use_adaptive_mutex), OptionType::kBoolean, @@ -219,6 +219,9 @@ static std::unordered_map db_options_type_info = { {"max_background_compactions", {offsetof(struct DBOptions, max_background_compactions), OptionType::kInt, OptionVerificationType::kNormal}}, + {"base_background_compactions", + {offsetof(struct DBOptions, base_background_compactions), OptionType::kInt, + OptionVerificationType::kNormal}}, {"max_background_flushes", {offsetof(struct DBOptions, max_background_flushes), OptionType::kInt, OptionVerificationType::kNormal}}, diff --git a/util/options_test.cc b/util/options_test.cc index 65c45c2b0..961593bd0 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1669,50 +1669,7 @@ TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) { "table_cache_numshardbits=28;" "max_open_files=72;" "max_file_opening_threads=35;" - "max_background_compactions=33;" - "use_fsync=true;" - "use_adaptive_mutex=false;" - "max_total_wal_size=4295005604;" - "compaction_readahead_size=0;" - "new_table_reader_for_compaction_inputs=false;" - "keep_log_file_num=4890;" - "skip_stats_update_on_db_open=false;" - "max_manifest_file_size=4295009941;" - "db_log_dir=path/to/db_log_dir;" - "skip_log_error_on_recovery=true;" - "writable_file_max_buffer_size=1048576;" - "paranoid_checks=true;" - "is_fd_close_on_exec=false;" - "bytes_per_sync=4295013613;" - "enable_thread_tracking=false;" - "disable_data_sync=false;" - "recycle_log_file_num=0;" - "disableDataSync=false;" - "create_missing_column_families=true;" - "log_file_time_to_roll=3097;" - "max_background_flushes=35;" - "create_if_missing=false;" - "error_if_exists=true;" - "allow_os_buffer=false;" - "delayed_write_rate=4294976214;" - "manifest_preallocation_size=1222;" - "allow_mmap_writes=false;" - "stats_dump_period_sec=70127;" - "allow_fallocate=true;" - "allow_mmap_reads=false;" - "max_log_file_size=4607;" - "random_access_max_buffer_size=1048576;" - "advise_random_on_open=true;" - "wal_bytes_per_sync=4295048118;" - "delete_obsolete_files_period_micros=4294967758;" - "WAL_ttl_seconds=4295008036;" - "WAL_size_limit_MB=4295036161;" - "wal_dir=path/to/wal_dir;" - "db_write_buffer_size=2587;" - "max_subcompactions=64330;" - "table_cache_numshardbits=28;" - "max_open_files=72;" - "max_file_opening_threads=35;" + "base_background_compactions=3;" "max_background_compactions=33;" "use_fsync=true;" "use_adaptive_mutex=false;" From 37159a6448f64fac976a9c96ced440bd6bfb15e0 Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Sun, 31 Jan 2016 18:09:24 -0800 Subject: [PATCH 062/195] Add histogram for value size per operation --- db/db_impl.cc | 3 +++ include/rocksdb/statistics.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/db/db_impl.cc b/db/db_impl.cc index 2afab389d..51ed9a3a0 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3327,6 +3327,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, RecordTick(stats_, NUMBER_KEYS_READ); RecordTick(stats_, BYTES_READ, value->size()); + MeasureTime(stats_, BYTES_PER_READ, value->size()); } return s; } @@ -3437,6 +3438,7 @@ std::vector DBImpl::MultiGet( RecordTick(stats_, NUMBER_MULTIGET_CALLS); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); + MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read); PERF_TIMER_STOP(get_post_process_time); return stat_list; @@ -4321,6 +4323,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, // Record statistics RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); RecordTick(stats_, BYTES_WRITTEN, total_byte_size); + MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); PERF_TIMER_STOP(write_pre_and_post_process_time); if (write_options.disableWAL) { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 813104b99..35de93334 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -280,6 +280,10 @@ enum Histograms : uint32_t { SST_READ_MICROS, // The number of subcompactions actually scheduled during a compaction NUM_SUBCOMPACTIONS_SCHEDULED, + // Value size distribution in each operation + BYTES_PER_READ, + BYTES_PER_WRITE, + BYTES_PER_MULTIGET, HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match }; @@ -307,6 +311,9 @@ const std::vector> HistogramsNameMap = { {WRITE_STALL, "rocksdb.db.write.stall"}, {SST_READ_MICROS, "rocksdb.sst.read.micros"}, {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, + {BYTES_PER_READ, "rocksdb.bytes.per.read"}, + {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, + {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"}, }; struct HistogramData { From 1d854fa3d46cc8da984b5f70ec7856917bb52e70 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Mon, 1 Feb 2016 12:45:45 -0800 Subject: [PATCH 063/195] Fixed the asan error on column_family_test Summary: Fixed the asan error on column_family_test caused by not disabling SyncPoint. Test Plan: column_family_test Reviewers: anthony, rven, kradhakrishnan, sdong, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53505 --- db/column_family_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 114451464..e3b51fc85 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -65,6 +65,7 @@ class ColumnFamilyTest : public testing::Test { ~ColumnFamilyTest() { Close(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); Destroy(); delete env_; } @@ -2047,7 +2048,6 @@ TEST_F(ColumnFamilyTest, FlushAndDropRaceCondition) { Close(); Destroy(); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } #ifndef ROCKSDB_LITE @@ -2125,7 +2125,6 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) { drop_cf_thread.join(); Close(); Destroy(); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); for (auto* comparator : comparators) { if (comparator) { delete comparator; From 36300fbbe3a7fde402152e4a57bbf5cda614d53c Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 1 Feb 2016 13:14:37 -0800 Subject: [PATCH 064/195] Enable per-request buffer allocation in RandomAccessFile This change impacts only non-buffered I/O on Windows. Currently, there is a buffer per RandomAccessFile instance that is protected by a lock. The reason we maintain the buffer is non-buffered I/O requires an aligned buffer to work. XPerf traces demonstrate that we accumulate a considerable wait time while waiting for that lock. This change enables to set random access buffer size to zero which would indicate a per request allocation. We are expecting that allocation expense would be much less than I/O costs plus wait time due to the fact that the memory heap would tend to re-use page aligned allocations especially with the use of Jemalloc. This change does not affect buffer use as a read_ahead_buffer for compaction purposes. --- include/rocksdb/options.h | 3 + port/win/env_win.cc | 121 ++++++++++++++++++++++++-------------- 2 files changed, 80 insertions(+), 44 deletions(-) diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 9eee07ac0..a26ed7d81 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1121,6 +1121,9 @@ struct DBOptions { // This option is currently honored only on Windows // // Default: 1 Mb + // + // Special value: 0 - means do not maintain per instance buffer. Allocate + // per request buffer and avoid locking. size_t random_access_max_buffer_size; // This is the maximum buffer size that is used by WritableFileWriter. diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 977c80b88..50059a98f 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -766,6 +766,18 @@ class WinRandomAccessFile : public RandomAccessFile { return read; } + void CalculateReadParameters(uint64_t offset, size_t bytes_requested, + size_t& actual_bytes_toread, + uint64_t& first_page_start) const { + + const size_t alignment = buffer_.Alignment(); + + first_page_start = TruncateToPageBoundary(alignment, offset); + const uint64_t last_page_start = + TruncateToPageBoundary(alignment, offset + bytes_requested - 1); + actual_bytes_toread = (last_page_start - first_page_start) + alignment; + } + public: WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, const EnvOptions& options) @@ -797,66 +809,87 @@ class WinRandomAccessFile : public RandomAccessFile { virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + Status s; SSIZE_T r = -1; size_t left = n; char* dest = scratch; + if (n == 0) { + *result = Slice(scratch, 0); + return s; + } + // When in unbuffered mode we need to do the following changes: // - use our own aligned buffer // - always read at the offset of that is a multiple of alignment if (!use_os_buffer_) { - std::unique_lock lock(buffer_mut_); - // Let's see if at least some of the requested data is already - // in the buffer - if (offset >= buffered_start_ && + uint64_t first_page_start = 0; + size_t actual_bytes_toread = 0; + size_t bytes_requested = left; + + if (!read_ahead_ && random_access_max_buffer_size_ == 0) { + CalculateReadParameters(offset, bytes_requested, actual_bytes_toread, + first_page_start); + + assert(actual_bytes_toread > 0); + + r = ReadIntoOneShotBuffer(offset, first_page_start, + actual_bytes_toread, left, dest); + } else { + + std::unique_lock lock(buffer_mut_); + + // Let's see if at least some of the requested data is already + // in the buffer + if (offset >= buffered_start_ && offset < (buffered_start_ + buffer_.CurrentSize())) { - size_t buffer_offset = offset - buffered_start_; - r = buffer_.Read(dest, buffer_offset, left); - assert(r >= 0); + size_t buffer_offset = offset - buffered_start_; + r = buffer_.Read(dest, buffer_offset, left); + assert(r >= 0); - left -= size_t(r); - offset += r; - dest += r; - } - - // Still some left or none was buffered - if (left > 0) { - // Figure out the start/end offset for reading and amount to read - const size_t alignment = buffer_.Alignment(); - const size_t first_page_start = - TruncateToPageBoundary(alignment, offset); - - size_t bytes_requested = left; - if (read_ahead_ && bytes_requested < compaction_readahead_size_) { - bytes_requested = compaction_readahead_size_; + left -= size_t(r); + offset += r; + dest += r; } - const size_t last_page_start = - TruncateToPageBoundary(alignment, offset + bytes_requested - 1); - const size_t actual_bytes_toread = - (last_page_start - first_page_start) + alignment; + // Still some left or none was buffered + if (left > 0) { + // Figure out the start/end offset for reading and amount to read + bytes_requested = left; - if (buffer_.Capacity() < actual_bytes_toread) { - // If we are in read-ahead mode or the requested size - // exceeds max buffer size then use one-shot - // big buffer otherwise reallocate main buffer - if (read_ahead_ || - (actual_bytes_toread > random_access_max_buffer_size_)) { - // Unlock the mutex since we are not using instance buffer - lock.unlock(); - r = ReadIntoOneShotBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); - } else { - buffer_.AllocateNewBuffer(actual_bytes_toread); - r = ReadIntoInstanceBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); + if (read_ahead_ && bytes_requested < compaction_readahead_size_) { + bytes_requested = compaction_readahead_size_; + } + + CalculateReadParameters(offset, bytes_requested, actual_bytes_toread, + first_page_start); + + assert(actual_bytes_toread > 0); + + if (buffer_.Capacity() < actual_bytes_toread) { + // If we are in read-ahead mode or the requested size + // exceeds max buffer size then use one-shot + // big buffer otherwise reallocate main buffer + if (read_ahead_ || + (actual_bytes_toread > random_access_max_buffer_size_)) { + // Unlock the mutex since we are not using instance buffer + lock.unlock(); + r = ReadIntoOneShotBuffer(offset, first_page_start, + actual_bytes_toread, left, dest); + } + else { + buffer_.AllocateNewBuffer(actual_bytes_toread); + r = ReadIntoInstanceBuffer(offset, first_page_start, + actual_bytes_toread, left, dest); + } + } + else { + buffer_.Clear(); + r = ReadIntoInstanceBuffer(offset, first_page_start, + actual_bytes_toread, left, dest); } - } else { - buffer_.Clear(); - r = ReadIntoInstanceBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); } } } else { From aa5e3b7c04b7116a177b7fb955b8e2fe76c5d64b Mon Sep 17 00:00:00 2001 From: Dmytro Ivchenko Date: Mon, 1 Feb 2016 13:41:13 -0800 Subject: [PATCH 065/195] PerfContext::ToString() add option to exclude zero counters Test Plan: Added unit test to check w/ w/o zeros scenarios Reviewers: yhchiang Reviewed By: yhchiang Subscribers: sdong, dhruba Differential Revision: https://reviews.facebook.net/D52809 --- db/perf_context_test.cc | 13 +++++++ include/rocksdb/perf_context.h | 2 +- util/perf_context.cc | 64 +++++++++++++++++++++++----------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 9494ac92b..8a345e5bb 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -589,6 +589,19 @@ TEST_F(PerfContextTest, FalseDBMutexWait) { } } } + +TEST_F(PerfContextTest, ToString) { + perf_context.Reset(); + perf_context.block_read_count = 12345; + + std::string zero_included = perf_context.ToString(); + ASSERT_NE(std::string::npos, zero_included.find("= 0")); + ASSERT_NE(std::string::npos, zero_included.find("= 12345")); + + std::string zero_excluded = perf_context.ToString(true); + ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); + ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); +} } int main(int argc, char** argv) { diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index c2af729e3..7a6b6f367 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -21,7 +21,7 @@ struct PerfContext { void Reset(); // reset all performance counters to zero - std::string ToString() const; + std::string ToString(bool exclude_zero_counters = false) const; uint64_t user_key_comparison_count; // total number of user key comparisons uint64_t block_cache_hit_count; // total number of block cache hits diff --git a/util/perf_context.cc b/util/perf_context.cc index 282516590..07bad40f2 100644 --- a/util/perf_context.cc +++ b/util/perf_context.cc @@ -61,32 +61,54 @@ void PerfContext::Reset() { #endif } -#define OUTPUT(counter) #counter << " = " << counter << ", " +#define OUTPUT(counter) \ + if (!exclude_zero_counters || (counter > 0)) { \ + ss << #counter << " = " << counter << ", "; \ + } -std::string PerfContext::ToString() const { +std::string PerfContext::ToString(bool exclude_zero_counters) const { #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) return ""; #else std::ostringstream ss; - ss << OUTPUT(user_key_comparison_count) << OUTPUT(block_cache_hit_count) - << OUTPUT(block_read_count) << OUTPUT(block_read_byte) - << OUTPUT(block_read_time) << OUTPUT(block_checksum_time) - << OUTPUT(block_decompress_time) << OUTPUT(internal_key_skipped_count) - << OUTPUT(internal_delete_skipped_count) << OUTPUT(write_wal_time) - << OUTPUT(get_snapshot_time) << OUTPUT(get_from_memtable_time) - << OUTPUT(get_from_memtable_count) << OUTPUT(get_post_process_time) - << OUTPUT(get_from_output_files_time) << OUTPUT(seek_on_memtable_time) - << OUTPUT(seek_on_memtable_count) << OUTPUT(seek_child_seek_time) - << OUTPUT(seek_child_seek_count) << OUTPUT(seek_min_heap_time) - << OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time) - << OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time) - << OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos) - << OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time) - << OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos) - << OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos) - << OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos) - << OUTPUT(bloom_memtable_hit_count) << OUTPUT(bloom_memtable_miss_count) - << OUTPUT(bloom_sst_hit_count) << OUTPUT(bloom_sst_miss_count); + OUTPUT(user_key_comparison_count); + OUTPUT(block_cache_hit_count); + OUTPUT(block_read_count); + OUTPUT(block_read_byte); + OUTPUT(block_read_time); + OUTPUT(block_checksum_time); + OUTPUT(block_decompress_time); + OUTPUT(internal_key_skipped_count); + OUTPUT(internal_delete_skipped_count); + OUTPUT(write_wal_time); + OUTPUT(get_snapshot_time); + OUTPUT(get_from_memtable_time); + OUTPUT(get_from_memtable_count); + OUTPUT(get_post_process_time); + OUTPUT(get_from_output_files_time); + OUTPUT(seek_on_memtable_time); + OUTPUT(seek_on_memtable_count); + OUTPUT(seek_child_seek_time); + OUTPUT(seek_child_seek_count); + OUTPUT(seek_min_heap_time); + OUTPUT(seek_internal_seek_time); + OUTPUT(find_next_user_entry_time); + OUTPUT(write_pre_and_post_process_time); + OUTPUT(write_memtable_time); + OUTPUT(db_mutex_lock_nanos); + OUTPUT(db_condition_wait_nanos); + OUTPUT(merge_operator_time_nanos); + OUTPUT(write_delay_time); + OUTPUT(read_index_block_nanos); + OUTPUT(read_filter_block_nanos); + OUTPUT(new_table_block_iter_nanos); + OUTPUT(new_table_iterator_nanos); + OUTPUT(block_seek_nanos); + OUTPUT(find_table_nanos); + OUTPUT(bloom_memtable_hit_count); + OUTPUT(bloom_memtable_miss_count); + OUTPUT(bloom_sst_hit_count); + OUTPUT(bloom_sst_miss_count); return ss.str(); #endif } From fdd70d14955e521d1b6bdb42838b44569f40c942 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 1 Feb 2016 14:58:46 -0800 Subject: [PATCH 066/195] Skip filters for last L0 file if hit-optimized Summary: Following up on D53493, we can still enable the filter-skipping optimization for last file in L0. It's correct to assume the key will be present in the last L0 file when we're hit-optimized and L0 is deepest. The FilePicker encapsulates the state for traversing each level's files, so I needed to make it expose whether the returned file is last in its level. Test Plan: verified below test fails before this patch and passes afterwards. The change to how the test memtable is populated is needed so file 1 has keys (0, 30, 60), file 2 has keys (10, 40, 70), etc. $ ./db_universal_compaction_test --gtest_filter=UniversalCompactionNumLevels/DBTestUniversalCompaction.OptimizeFiltersForHits/* Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53583 --- db/db_universal_compaction_test.cc | 15 ++++++++++++--- db/version_set.cc | 30 ++++++++++++++++++------------ db/version_set.h | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 9d3cca83c..a4cf6657f 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -187,14 +187,16 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); - Put("", ""); for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { Put(Key(num * 10), "val"); + if (num) { + dbfull()->TEST_WaitForFlushMemTable(); + } Put(Key(30 + num * 10), "val"); Put(Key(60 + num * 10), "val"); - - dbfull()->TEST_WaitForFlushMemTable(); } + Put("", ""); + dbfull()->TEST_WaitForFlushMemTable(); // Query set of non existing keys for (int i = 5; i < 90; i += 10) { @@ -205,6 +207,13 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + // Make sure bloom filter is used for all but the last L0 file when looking + // up a non-existent key that's in the range of all L0 files. + ASSERT_EQ(Get(Key(35)), "NOT_FOUND"); + ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1, + TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); + prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); + // Unblock compaction and wait it for happening. sleeping_task_low.WakeUp(); dbfull()->TEST_WaitForCompact(); diff --git a/db/version_set.cc b/db/version_set.cc index 3679bfbb4..6804730d7 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -84,15 +84,11 @@ int FindFileInRange(const InternalKeyComparator& icmp, // are MergeInProgress). class FilePicker { public: - FilePicker( - std::vector* files, - const Slice& user_key, - const Slice& ikey, - autovector* file_levels, - unsigned int num_levels, - FileIndexer* file_indexer, - const Comparator* user_comparator, - const InternalKeyComparator* internal_comparator) + FilePicker(std::vector* files, const Slice& user_key, + const Slice& ikey, autovector* file_levels, + unsigned int num_levels, FileIndexer* file_indexer, + const Comparator* user_comparator, + const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(-1), hit_file_level_(-1), @@ -102,6 +98,7 @@ class FilePicker { files_(files), #endif level_files_brief_(file_levels), + is_hit_file_last_in_level_(false), user_key_(user_key), ikey_(ikey), file_indexer_(file_indexer), @@ -126,6 +123,8 @@ class FilePicker { // Loops over all files in current level. FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; hit_file_level_ = curr_level_; + is_hit_file_last_in_level_ = + curr_index_in_curr_level_ == curr_file_level_->num_files - 1; int cmp_largest = -1; // Do key range filtering of files or/and fractional cascading if: @@ -209,6 +208,10 @@ class FilePicker { // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts unsigned int GetHitFileLevel() { return hit_file_level_; } + // Returns true if the most recent "hit file" (i.e., one returned by + // GetNextFile()) is at the last index in its level. + bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; } + private: unsigned int num_levels_; unsigned int curr_level_; @@ -220,6 +223,7 @@ class FilePicker { #endif autovector* level_files_brief_; bool search_ended_; + bool is_hit_file_last_in_level_; LevelFilesBrief* curr_file_level_; unsigned int curr_index_in_curr_level_; unsigned int start_index_in_curr_level_; @@ -903,7 +907,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, *status = table_cache_->Get( read_options, *internal_comparator(), f->fd, ikey, &get_context, cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), - IsFilterSkipped(static_cast(fp.GetHitFileLevel()))); + IsFilterSkipped(static_cast(fp.GetHitFileLevel()), + fp.IsHitFileLastInLevel())); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -960,10 +965,11 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, } } -bool Version::IsFilterSkipped(int level) { +bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) { // Reaching the bottom level implies misses at all upper levels, so we'll // skip checking the filters when we predict a hit. - return cfd_->ioptions()->optimize_filters_for_hits && level > 0 && + return cfd_->ioptions()->optimize_filters_for_hits && + (level > 0 || is_file_last_in_level) && level == storage_info_.num_non_empty_levels() - 1; } diff --git a/db/version_set.h b/db/version_set.h index 097109fd4..7ce4a6bdf 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -530,7 +530,7 @@ class Version { // checked during read operations. In certain cases (trivial move or preload), // the filter block may already be cached, but we still do not access it such // that it eventually expires from the cache. - bool IsFilterSkipped(int level); + bool IsFilterSkipped(int level, bool is_file_last_in_level = false); // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_mata from its associated TableProperties. From ad7ecca72dc0b282fbf62ca0b43595787d278e48 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 1 Feb 2016 11:03:28 -0800 Subject: [PATCH 067/195] Add unit tests to verify large key/value Summary: Add unit tests: (1) insert entries of 8MB key and 3GB value to DB (2) insert entry of 3GB key and 3GB value into write batch and make sure we can read it. (3) insert 3 billions of key-value pairs into write batch and make sure we can read it. Disable them because not all platform can run it. Test Plan: Run the tests Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, andrewkr, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53619 --- db/db_test.cc | 55 ++++++++++++++++++++ db/write_batch_test.cc | 114 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) diff --git a/db/db_test.cc b/db/db_test.cc index 76e64d484..dfc231969 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -606,6 +606,61 @@ TEST_F(DBTest, EmptyFlush) { kSkipUniversalCompaction | kSkipMergePut)); } +// Disable because not all platform can run it. +// It requires more than 9GB memory to run it, With single allocation +// of more than 3GB. +TEST_F(DBTest, DISABLED_VeryLargeValue) { + const size_t kValueSize = 3221225472u; // 3GB value + const size_t kKeySize = 8388608u; // 8MB key + std::string raw(kValueSize, 'v'); + std::string key1(kKeySize, 'c'); + std::string key2(kKeySize, 'd'); + + Options options; + options.env = env_; + options.write_buffer_size = 100000; // Small write buffer + options.paranoid_checks = true; + options = CurrentOptions(options); + DestroyAndReopen(options); + + ASSERT_OK(Put("boo", "v1")); + ASSERT_OK(Put("foo", "v1")); + ASSERT_OK(Put(key1, raw)); + raw[0] = 'w'; + ASSERT_OK(Put(key2, raw)); + dbfull()->TEST_WaitForFlushMemTable(); + + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + + std::string value; + Status s = db_->Get(ReadOptions(), key1, &value); + ASSERT_OK(s); + ASSERT_EQ(kValueSize, value.size()); + ASSERT_EQ('v', value[0]); + + s = db_->Get(ReadOptions(), key2, &value); + ASSERT_OK(s); + ASSERT_EQ(kValueSize, value.size()); + ASSERT_EQ('w', value[0]); + + // Compact all files. + Flush(); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + + // Check DB is not in read-only state. + ASSERT_OK(Put("boo", "v1")); + + s = db_->Get(ReadOptions(), key1, &value); + ASSERT_OK(s); + ASSERT_EQ(kValueSize, value.size()); + ASSERT_EQ('v', value[0]); + + s = db_->Get(ReadOptions(), key2, &value); + ASSERT_OK(s); + ASSERT_EQ(kValueSize, value.size()); + ASSERT_EQ('w', value[0]); +} + TEST_F(DBTest, GetFromImmutableLayer) { do { Options options; diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index 5d008b3a4..cd981fd60 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -308,6 +308,120 @@ TEST_F(WriteBatchTest, Blob) { handler.seen); } +// It requires more than 30GB of memory to run the test. With single memory +// allocation of more than 30GB. +// Not all platform can run it. Also it runs a long time. So disable it. +TEST_F(WriteBatchTest, DISABLED_ManyUpdates) { + // Insert key and value of 3GB and push total batch size to 12GB. + const size_t kKeyValueSize = 4u; + const uint32_t kNumUpdates = 3 << 30; + std::string raw(kKeyValueSize, 'A'); + WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u); + char c = 'A'; + for (uint32_t i = 0; i < kNumUpdates; i++) { + if (c > 'Z') { + c = 'A'; + } + raw[0] = c; + raw[raw.length() - 1] = c; + c++; + batch.Put(raw, raw); + } + + ASSERT_EQ(kNumUpdates, batch.Count()); + + struct NoopHandler : public WriteBatch::Handler { + uint32_t num_seen = 0; + char expected_char = 'A'; + virtual Status PutCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + EXPECT_EQ(kKeyValueSize, key.size()); + EXPECT_EQ(kKeyValueSize, value.size()); + EXPECT_EQ(expected_char, key[0]); + EXPECT_EQ(expected_char, value[0]); + EXPECT_EQ(expected_char, key[kKeyValueSize - 1]); + EXPECT_EQ(expected_char, value[kKeyValueSize - 1]); + expected_char++; + if (expected_char > 'Z') { + expected_char = 'A'; + } + ++num_seen; + return Status::OK(); + } + virtual Status DeleteCF(uint32_t column_family_id, + const Slice& key) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual Status SingleDeleteCF(uint32_t column_family_id, + const Slice& key) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual Status MergeCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); } + virtual bool Continue() override { return num_seen < kNumUpdates; } + } handler; + + batch.Iterate(&handler); + ASSERT_EQ(kNumUpdates, handler.num_seen); +} + +// The test requires more than 18GB memory to run it, with single memory +// allocation of more than 12GB. Not all the platform can run it. So disable it. +TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) { + // Insert key and value of 3GB and push total batch size to 12GB. + const size_t kKeyValueSize = 3221225472u; + std::string raw(kKeyValueSize, 'A'); + WriteBatch batch(12884901888u + 1024u); + for (char i = 0; i < 2; i++) { + raw[0] = 'A' + i; + raw[raw.length() - 1] = 'A' - i; + batch.Put(raw, raw); + } + + ASSERT_EQ(2, batch.Count()); + + struct NoopHandler : public WriteBatch::Handler { + int num_seen = 0; + virtual Status PutCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + EXPECT_EQ(kKeyValueSize, key.size()); + EXPECT_EQ(kKeyValueSize, value.size()); + EXPECT_EQ('A' + num_seen, key[0]); + EXPECT_EQ('A' + num_seen, value[0]); + EXPECT_EQ('A' - num_seen, key[kKeyValueSize - 1]); + EXPECT_EQ('A' - num_seen, value[kKeyValueSize - 1]); + ++num_seen; + return Status::OK(); + } + virtual Status DeleteCF(uint32_t column_family_id, + const Slice& key) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual Status SingleDeleteCF(uint32_t column_family_id, + const Slice& key) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual Status MergeCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + EXPECT_TRUE(false); + return Status::OK(); + } + virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); } + virtual bool Continue() override { return num_seen < 2; } + } handler; + + batch.Iterate(&handler); + ASSERT_EQ(2, handler.num_seen); +} + TEST_F(WriteBatchTest, Continue) { WriteBatch batch; From 1ad8182950f8423134f8c0a92213bced34cbf2df Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 1 Feb 2016 16:07:53 -0800 Subject: [PATCH 068/195] Fix WriteBatchTest.ManyUpdates, WriteBatchTest.LargeKeyValue under clang Summary: Fix current clang failure https://ci-builds.fb.com/view/rocksdb/job/rocksdb_clang_build/1398/console Test Plan: make sure that both clang and g++ compilation succeed USE_CLANG=1 make check -j64 make check -j64 Reviewers: anthony, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53667 --- db/write_batch_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index cd981fd60..c475dbeb6 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -313,8 +313,8 @@ TEST_F(WriteBatchTest, Blob) { // Not all platform can run it. Also it runs a long time. So disable it. TEST_F(WriteBatchTest, DISABLED_ManyUpdates) { // Insert key and value of 3GB and push total batch size to 12GB. - const size_t kKeyValueSize = 4u; - const uint32_t kNumUpdates = 3 << 30; + static const size_t kKeyValueSize = 4u; + static const uint32_t kNumUpdates = 3 << 30; std::string raw(kKeyValueSize, 'A'); WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u); char c = 'A'; @@ -375,7 +375,7 @@ TEST_F(WriteBatchTest, DISABLED_ManyUpdates) { // allocation of more than 12GB. Not all the platform can run it. So disable it. TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) { // Insert key and value of 3GB and push total batch size to 12GB. - const size_t kKeyValueSize = 3221225472u; + static const size_t kKeyValueSize = 3221225472u; std::string raw(kKeyValueSize, 'A'); WriteBatch batch(12884901888u + 1024u); for (char i = 0; i < 2; i++) { From ac3fa9a6fe3684d3d1234c8a505f5741db06681a Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 1 Feb 2016 18:20:34 -0800 Subject: [PATCH 069/195] Travis CI to disable ROCKSDB_LITE tests Summary: Travis CI fails most of the times because of timing out. To unblock it, disable LITE tests in Travis CI. Test Plan: Will see. Reviewers: anthony, yhchiang, kradhakrishnan, IslamAbdelRahman, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53679 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b6fa63c5d..b045d259e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,7 +34,7 @@ before_script: # as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. script: - if [[ "${TRAVIS_OS_NAME}" == 'linux' ]]; then OPT=-DTRAVIS CLANG_FORMAT_DIFF=/tmp/clang-format-diff.py make format || true; fi - - OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 check + - OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 static_lib notifications: email: From 9c2cf9479b1b59b22562a4ee673caf9826199ef3 Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Sun, 31 Jan 2016 10:48:43 -0800 Subject: [PATCH 070/195] Fix for --allow_concurrent_memtable_write with batching Summary: Concurrent memtable adds were incorrectly computing the last sequence number for a write batch group when the write batches were not solitary. This is the cause of https://github.com/facebook/mysql-5.6/issues/155 Test Plan: 1. unit tests 2. new unit test 3. parallel db_bench stress tests with batch size of 10 and asserts enabled Reviewers: igor, sdong Reviewed By: sdong Subscribers: IslamAbdelRahman, MarkCallaghan, dhruba Differential Revision: https://reviews.facebook.net/D53595 --- db/db_impl.cc | 8 ++++++-- db/db_test_util.cc | 8 +++++++- db/db_test_util.h | 7 ++++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 51ed9a3a0..f625c775e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -4137,7 +4137,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, if (write_thread_.CompleteParallelWorker(&w)) { // we're responsible for early exit - auto last_sequence = w.parallel_group->last_writer->sequence; + auto last_sequence = + w.parallel_group->last_writer->sequence + + WriteBatchInternal::Count(w.parallel_group->last_writer->batch) - 1; SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence); versions_->SetLastSequence(last_sequence); write_thread_.EarlyExitParallelGroup(&w); @@ -4437,7 +4439,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, this, true /*dont_filter_deletes*/, true /*concurrent_memtable_writes*/); - assert(last_writer->sequence == last_sequence); + assert(last_writer->sequence + + WriteBatchInternal::Count(last_writer->batch) - 1 == + last_sequence); // CompleteParallelWorker returns true if this thread should // handle exit, false means somebody else did exit_completed_early = !write_thread_.CompleteParallelWorker(&w); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index e6ee304a5..f2906c7ca 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -85,7 +85,8 @@ bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) { option_config == kHashCuckoo || option_config == kUniversalCompaction || option_config == kUniversalCompactionMultiLevel || option_config == kUniversalSubcompactions || - option_config == kFIFOCompaction) { + option_config == kFIFOCompaction || + option_config == kConcurrentSkipList) { return true; } #endif @@ -361,6 +362,11 @@ Options DBTestBase::CurrentOptions( options.max_subcompactions = 4; break; } + case kConcurrentSkipList: { + options.allow_concurrent_memtable_write = true; + options.enable_write_thread_adaptive_yield = true; + break; + } default: break; diff --git a/db/db_test_util.h b/db/db_test_util.h index 031057bbb..b993af8cb 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -525,9 +525,10 @@ class DBTestBase : public testing::Test { kOptimizeFiltersForHits = 27, kRowCache = 28, kRecycleLogFiles = 29, - kLevelSubcompactions = 30, - kUniversalSubcompactions = 31, - kEnd = 30 + kConcurrentSkipList = 30, + kEnd = 31, + kLevelSubcompactions = 31, + kUniversalSubcompactions = 32, }; int option_config_; From e2972803a602cccb743d1aa1343f163a77f33198 Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Thu, 28 Jan 2016 15:44:31 +0100 Subject: [PATCH 071/195] Adding support for Windows JNI build --- CMakeLists.txt | 16 +- java/CMakeLists.txt | 155 ++++++++++++++++++ java/rocksjni/backupablejni.cc | 12 +- java/rocksjni/backupenginejni.cc | 12 +- java/rocksjni/options.cc | 5 +- java/rocksjni/portal.h | 5 + java/rocksjni/restorejni.cc | 13 +- java/rocksjni/rocksjni.cc | 9 +- java/rocksjni/statistics.cc | 4 +- .../java/org/rocksdb/NativeLibraryLoader.java | 64 ++++---- .../java/org/rocksdb/util/Environment.java | 9 +- .../org/rocksdb/BackupableDBOptionsTest.java | 10 +- .../java/org/rocksdb/InfoLogLevelTest.java | 3 +- .../org/rocksdb/NativeLibraryLoaderTest.java | 2 +- .../org/rocksdb/util/EnvironmentTest.java | 18 +- 15 files changed, 257 insertions(+), 80 deletions(-) create mode 100644 java/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index a80344fe8..d6084356b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,8 +13,8 @@ # cd build # 3. Run cmake to generate project files for Windows, add more options to enable required third-party libraries. # See thirdparty.inc for more information. -# sample command: cmake -G "Visual Studio 12 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 .. -# OR for VS Studio 15 cmake -G "Visual Studio 14 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 .. +# sample command: cmake -G "Visual Studio 12 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 -DJNI=1 .. +# OR for VS Studio 15 cmake -G "Visual Studio 14 Win64" -DGFLAGS=1 -DSNAPPY=1 -DJEMALLOC=1 -DJNI=1 .. # 4. Then build the project in debug mode (you may want to add /m[:] flag to run msbuild in parallel threads # or simply /m ot use all avail cores) # msbuild rocksdb.sln @@ -243,6 +243,7 @@ set(SOURCES util/xxhash.cc utilities/backupable/backupable_db.cc utilities/checkpoint/checkpoint.cc + utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc utilities/document/document_db.cc utilities/document/json_document.cc utilities/document/json_document_builder.cc @@ -288,6 +289,17 @@ set_target_properties(rocksdb${ARTIFACT_SUFFIX} PROPERTIES COMPILE_FLAGS "-DROCK add_dependencies(rocksdb${ARTIFACT_SUFFIX} GenerateBuildVersion) target_link_libraries(rocksdb${ARTIFACT_SUFFIX} ${LIBS}) +if (DEFINED JNI) + if (${JNI} EQUAL 1) + message(STATUS "JNI library is enabled") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java) + else() + message(STATUS "JNI library is disabled") + endif() +else() + message(STATUS "JNI library is disabled") +endif() + set(APPS db/db_bench.cc db/memtablerep_bench.cc diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt new file mode 100644 index 000000000..d4a707b3c --- /dev/null +++ b/java/CMakeLists.txt @@ -0,0 +1,155 @@ +set(JNI_NATIVE_SOURCES + rocksjni/backupenginejni.cc + rocksjni/backupablejni.cc + rocksjni/checkpoint.cc + rocksjni/columnfamilyhandle.cc + rocksjni/compaction_filter.cc + rocksjni/comparator.cc + rocksjni/comparatorjnicallback.cc + rocksjni/env.cc + rocksjni/filter.cc + rocksjni/iterator.cc + rocksjni/loggerjnicallback.cc + rocksjni/memtablejni.cc + rocksjni/merge_operator.cc + rocksjni/options.cc + rocksjni/ratelimiterjni.cc + rocksjni/remove_emptyvalue_compactionfilterjni.cc + rocksjni/restorejni.cc + rocksjni/rocksjni.cc + rocksjni/slice.cc + rocksjni/snapshot.cc + rocksjni/statistics.cc + rocksjni/table.cc + rocksjni/transaction_log.cc + rocksjni/ttl.cc + rocksjni/write_batch.cc + rocksjni/writebatchhandlerjnicallback.cc + rocksjni/write_batch_with_index.cc + rocksjni/write_batch_test.cc +) + +set(NATIVE_JAVA_CLASSES + org.rocksdb.AbstractCompactionFilter + org.rocksdb.AbstractComparator + org.rocksdb.AbstractSlice + org.rocksdb.BackupEngine + org.rocksdb.BackupableDB + org.rocksdb.BackupableDBOptions + org.rocksdb.BlockBasedTableConfig + org.rocksdb.BloomFilter + org.rocksdb.Checkpoint + org.rocksdb.ColumnFamilyHandle + org.rocksdb.ColumnFamilyOptions + org.rocksdb.Comparator + org.rocksdb.ComparatorOptions + org.rocksdb.DBOptions + org.rocksdb.DirectComparator + org.rocksdb.DirectSlice + org.rocksdb.Env + org.rocksdb.FlushOptions + org.rocksdb.Filter + org.rocksdb.GenericRateLimiterConfig + org.rocksdb.HashLinkedListMemTableConfig + org.rocksdb.HashSkipListMemTableConfig + org.rocksdb.Logger + org.rocksdb.MergeOperator + org.rocksdb.Options + org.rocksdb.PlainTableConfig + org.rocksdb.ReadOptions + org.rocksdb.RemoveEmptyValueCompactionFilter + org.rocksdb.RestoreBackupableDB + org.rocksdb.RestoreOptions + org.rocksdb.RocksDB + org.rocksdb.RocksEnv + org.rocksdb.RocksIterator + org.rocksdb.RocksMemEnv + org.rocksdb.SkipListMemTableConfig + org.rocksdb.Slice + org.rocksdb.Statistics + org.rocksdb.TransactionLogIterator + org.rocksdb.TtlDB + org.rocksdb.VectorMemTableConfig + org.rocksdb.Snapshot + org.rocksdb.StringAppendOperator + org.rocksdb.WriteBatch + org.rocksdb.WriteBatch.Handler + org.rocksdb.WriteOptions + org.rocksdb.WriteBatchWithIndex + org.rocksdb.WBWIRocksIterator + org.rocksdb.WriteBatchTest + org.rocksdb.WriteBatchTestInternalHelper +) + +include_directories($ENV{JAVA_HOME}/include) +include_directories($ENV{JAVA_HOME}/include/win32) +include_directories(${PROJECT_SOURCE_DIR}/java) + +set(JAVA_TEST_LIBDIR ${PROJECT_SOURCE_DIR}/java/test-libs) +set(JAVA_TMP_JAR ${JAVA_TEST_LIBDIR}/tmp.jar) +set(JAVA_JUNIT_JAR ${JAVA_TEST_LIBDIR}/junit-4.12.jar) +set(JAVA_HAMCR_JAR ${JAVA_TEST_LIBDIR}/hamcrest-core-1.3.jar) +set(JAVA_MOCKITO_JAR ${JAVA_TEST_LIBDIR}/mockito-all-1.10.19.jar) +set(JAVA_CGLIB_JAR ${JAVA_TEST_LIBDIR}/cglib-2.2.2.jar) +set(JAVA_ASSERTJ_JAR ${JAVA_TEST_LIBDIR}/assertj-core-1.7.1.jar) +set(JAVA_TESTCLASSPATH "${JAVA_JUNIT_JAR}\;${JAVA_HAMCR_JAR}\;${JAVA_MOCKITO_JAR}\;${JAVA_CGLIB_JAR}\;${JAVA_ASSERTJ_JAR}") + +if(NOT EXISTS ${PROJECT_SOURCE_DIR}/java/classes) + execute_process(COMMAND mkdir ${PROJECT_SOURCE_DIR}/java/classes) +endif() + +if(NOT EXISTS ${JAVA_TEST_LIBDIR}) + execute_process(COMMAND mkdir ${JAVA_TEST_LIBDIR}) +endif() + +if(NOT EXISTS ${JAVA_JUNIT_JAR}) + message("Downloading ${JAVA_JUNIT_JAR}") + file(DOWNLOAD http://search.maven.org/remotecontent?filepath=junit/junit/4.12/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + list(GET downloadStatus 0 error_code) + if(NOT error_code EQUAL 0) + message(FATAL_ERROR "Failed downloading ${JAVA_JUNIT_JAR}") + endif() + file(RENAME ${JAVA_TMP_JAR} ${JAVA_JUNIT_JAR}) +endif() +if(NOT EXISTS ${JAVA_HAMCR_JAR}) + message("Downloading ${JAVA_HAMCR_JAR}") + file(DOWNLOAD http://search.maven.org/remotecontent?filepath=org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + list(GET downloadStatus 0 error_code) + if(NOT error_code EQUAL 0) + message(FATAL_ERROR "Failed downloading ${JAVA_HAMCR_JAR}") + endif() + file(RENAME ${JAVA_TMP_JAR} ${JAVA_HAMCR_JAR}) +endif() +if(NOT EXISTS ${JAVA_MOCKITO_JAR}) + message("Downloading ${JAVA_MOCKITO_JAR}") + file(DOWNLOAD http://search.maven.org/remotecontent?filepath=org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + list(GET downloadStatus 0 error_code) + if(NOT error_code EQUAL 0) + message(FATAL_ERROR "Failed downloading ${JAVA_MOCKITO_JAR}") + endif() + file(RENAME ${JAVA_TMP_JAR} ${JAVA_MOCKITO_JAR}) +endif() +if(NOT EXISTS ${JAVA_CGLIB_JAR}) + message("Downloading ${JAVA_CGLIB_JAR}") + file(DOWNLOAD http://search.maven.org/remotecontent?filepath=cglib/cglib/2.2.2/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + list(GET downloadStatus 0 error_code) + if(NOT error_code EQUAL 0) + message(FATAL_ERROR "Failed downloading ${JAVA_CGLIB_JAR}") + endif() + file(RENAME ${JAVA_TMP_JAR} ${JAVA_CGLIB_JAR}) +endif() +if(NOT EXISTS ${JAVA_ASSERTJ_JAR}) + message("Downloading ${JAVA_ASSERTJ_JAR}") + file(DOWNLOAD http://central.maven.org/maven2/org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + list(GET downloadStatus 0 error_code) + if(NOT error_code EQUAL 0) + message(FATAL_ERROR "Failed downloading ${JAVA_ASSERTJ_JAR}") + endif() + file(RENAME ${JAVA_TMP_JAR} ${JAVA_ASSERTJ_JAR}) +endif() + +execute_process(COMMAND javac -cp ${JAVA_TESTCLASSPATH} -d ${PROJECT_SOURCE_DIR}/java/classes ${PROJECT_SOURCE_DIR}/java/src/main/java/org/rocksdb/util/*.java ${PROJECT_SOURCE_DIR}/java/src/main/java/org/rocksdb/*.java ${PROJECT_SOURCE_DIR}/java/src/test/java/org/rocksdb/*.java) +execute_process(COMMAND javah -cp ${PROJECT_SOURCE_DIR}/java/classes -d ${PROJECT_SOURCE_DIR}/java/include -jni ${NATIVE_JAVA_CLASSES}) +add_library(rocksdbjni${ARTIFACT_SUFFIX} SHARED ${JNI_NATIVE_SOURCES}) +set_target_properties(rocksdbjni${ARTIFACT_SUFFIX} PROPERTIES COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/rocksdbjni${ARTIFACT_SUFFIX}.pdb") +target_link_libraries(rocksdbjni${ARTIFACT_SUFFIX} rocksdblib${ARTIFACT_SUFFIX} ${LIBS}) diff --git a/java/rocksjni/backupablejni.cc b/java/rocksjni/backupablejni.cc index d26e46e88..59e6dc781 100644 --- a/java/rocksjni/backupablejni.cc +++ b/java/rocksjni/backupablejni.cc @@ -103,20 +103,14 @@ jintArray Java_org_rocksdb_BackupableDB_getCorruptedBackups( reinterpret_cast(jhandle)-> GetCorruptedBackups(&backup_ids); // store backupids in int array - const std::vector::size_type - kIdSize = backup_ids.size(); - int int_backup_ids[kIdSize]; - for (std::vector::size_type i = 0; - i != kIdSize; i++) { - int_backup_ids[i] = backup_ids[i]; - } + std::vector int_backup_ids(backup_ids.begin(), backup_ids.end()); // Store ints in java array jintArray ret_backup_ids; // Its ok to loose precision here (64->32) - jsize ret_backup_ids_size = static_cast(kIdSize); + jsize ret_backup_ids_size = static_cast(backup_ids.size()); ret_backup_ids = env->NewIntArray(ret_backup_ids_size); env->SetIntArrayRegion(ret_backup_ids, 0, ret_backup_ids_size, - int_backup_ids); + int_backup_ids.data()); return ret_backup_ids; } diff --git a/java/rocksjni/backupenginejni.cc b/java/rocksjni/backupenginejni.cc index 750ab965a..e597c98c9 100644 --- a/java/rocksjni/backupenginejni.cc +++ b/java/rocksjni/backupenginejni.cc @@ -81,20 +81,14 @@ jintArray Java_org_rocksdb_BackupEngine_getCorruptedBackups( std::vector backup_ids; backup_engine->GetCorruptedBackups(&backup_ids); // store backupids in int array - const std::vector::size_type - kIdSize = backup_ids.size(); - int int_backup_ids[kIdSize]; - for (std::vector::size_type i = 0; - i != kIdSize; i++) { - int_backup_ids[i] = backup_ids[i]; - } + std::vector int_backup_ids(backup_ids.begin(), backup_ids.end()); // Store ints in java array jintArray ret_backup_ids; // Its ok to loose precision here (64->32) - jsize ret_backup_ids_size = static_cast(kIdSize); + jsize ret_backup_ids_size = static_cast(backup_ids.size()); ret_backup_ids = env->NewIntArray(ret_backup_ids_size); env->SetIntArrayRegion(ret_backup_ids, 0, ret_backup_ids_size, - int_backup_ids); + int_backup_ids.data()); return ret_backup_ids; } diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index de3df942c..d1702a0fe 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "include/org_rocksdb_Options.h" @@ -1180,7 +1179,7 @@ jbyte Java_org_rocksdb_Options_compactionStyle( void Java_org_rocksdb_Options_setMaxTableFilesSizeFIFO( JNIEnv* env, jobject jobj, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle)->compaction_options_fifo.max_table_files_size = - static_cast(jmax_table_files_size); + static_cast(jmax_table_files_size); } /* @@ -2339,7 +2338,7 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionStyle( void Java_org_rocksdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( JNIEnv* env, jobject jobj, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle)->compaction_options_fifo.max_table_files_size = - static_cast(jmax_table_files_size); + static_cast(jmax_table_files_size); } /* diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 804bbc68a..5423ea7b2 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -24,6 +24,11 @@ #include "rocksjni/loggerjnicallback.h" #include "rocksjni/writebatchhandlerjnicallback.h" +// Remove macro on windows +#ifdef DELETE +#undef DELETE +#endif + namespace rocksdb { // Detect if jlong overflows size_t diff --git a/java/rocksjni/restorejni.cc b/java/rocksjni/restorejni.cc index a2341632b..c53ed9a60 100644 --- a/java/rocksjni/restorejni.cc +++ b/java/rocksjni/restorejni.cc @@ -156,21 +156,14 @@ jintArray Java_org_rocksdb_RestoreBackupableDB_getCorruptedBackups( reinterpret_cast(jhandle)-> GetCorruptedBackups(&backup_ids); // store backupids in int array - const std::vector::size_type - kIdSize = backup_ids.size(); - - int int_backup_ids[kIdSize]; - for (std::vector::size_type i = 0; - i != kIdSize; i++) { - int_backup_ids[i] = backup_ids[i]; - } + std::vector int_backup_ids(backup_ids.begin(), backup_ids.end()); // Store ints in java array jintArray ret_backup_ids; // Its ok to loose precision here (64->32) - jsize ret_backup_ids_size = static_cast(kIdSize); + jsize ret_backup_ids_size = static_cast(backup_ids.size()); ret_backup_ids = env->NewIntArray(ret_backup_ids_size); env->SetIntArrayRegion(ret_backup_ids, 0, ret_backup_ids_size, - int_backup_ids); + int_backup_ids.data()); return ret_backup_ids; } diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 221e7fff2..0ebbcad49 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include "include/org_rocksdb_RocksDB.h" #include "rocksdb/db.h" @@ -19,6 +20,10 @@ #include "rocksdb/types.h" #include "rocksjni/portal.h" +#ifdef min +#undef min +#endif + ////////////////////////////////////////////////////////////////////////////// // rocksdb::DB::Open @@ -688,8 +693,8 @@ jint rocksdb_get_helper( return kStatusError; } - int cvalue_len = static_cast(cvalue.size()); - int length = std::min(jentry_value_len, cvalue_len); + jint cvalue_len = static_cast(cvalue.size()); + jint length = std::min(jentry_value_len, cvalue_len); env->SetByteArrayRegion( jentry_value, 0, length, diff --git a/java/rocksjni/statistics.cc b/java/rocksjni/statistics.cc index bf170c6de..0e44ea564 100644 --- a/java/rocksjni/statistics.cc +++ b/java/rocksjni/statistics.cc @@ -20,7 +20,7 @@ * Signature: (IJ)J */ jlong Java_org_rocksdb_Statistics_getTickerCount0( - JNIEnv* env, jobject jobj, int tickerType, jlong handle) { + JNIEnv* env, jobject jobj, jint tickerType, jlong handle) { auto st = reinterpret_cast(handle); assert(st != nullptr); @@ -33,7 +33,7 @@ jlong Java_org_rocksdb_Statistics_getTickerCount0( * Signature: (IJ)Lorg/rocksdb/HistogramData; */ jobject Java_org_rocksdb_Statistics_geHistogramData0( - JNIEnv* env, jobject jobj, int histogramType, jlong handle) { + JNIEnv* env, jobject jobj, jint histogramType, jlong handle) { auto st = reinterpret_cast(handle); assert(st != nullptr); diff --git a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java index dca9b3119..49d8f7110 100644 --- a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java @@ -19,7 +19,7 @@ public class NativeLibraryLoader { private static final String jniLibraryName = Environment.getJniLibraryName("rocksdb"); private static final String jniLibraryFileName = Environment.getJniLibraryFileName("rocksdb"); private static final String tempFilePrefix = "librocksdbjni"; - private static final String tempFileSuffix = "." + Environment.getJniLibraryExtension(); + private static final String tempFileSuffix = Environment.getJniLibraryExtension(); /** * Get a reference to the NativeLibraryLoader @@ -75,37 +75,43 @@ public class NativeLibraryLoader { void loadLibraryFromJar(final String tmpDir) throws IOException { if (!initialized) { - final File temp; - if (tmpDir == null || tmpDir.equals("")) { - temp = File.createTempFile(tempFilePrefix, tempFileSuffix); - } else { - temp = new File(tmpDir, jniLibraryFileName); - if (!temp.createNewFile()) { - throw new RuntimeException("File: " + temp.getAbsolutePath() - + " could not be created."); - } - } - - if (!temp.exists()) { - throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); - } else { - temp.deleteOnExit(); - } - - // attempt to copy the library from the Jar file to the temp destination - try (final InputStream is = getClass().getClassLoader(). - getResourceAsStream(jniLibraryFileName)) { - if (is == null) { - throw new RuntimeException(jniLibraryFileName + " was not found inside JAR."); - } else { - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - } - } - - System.load(temp.getAbsolutePath()); + System.load(loadLibraryFromJarToTemp(tmpDir).getAbsolutePath()); initialized = true; } } + + File loadLibraryFromJarToTemp(final String tmpDir) + throws IOException { + final File temp; + if (tmpDir == null || tmpDir.isEmpty()) { + temp = File.createTempFile(tempFilePrefix, tempFileSuffix); + } else { + temp = new File(tmpDir, jniLibraryFileName); + if (!temp.createNewFile()) { + throw new RuntimeException("File: " + temp.getAbsolutePath() + + " could not be created."); + } + } + + if (!temp.exists()) { + throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); + } else { + temp.deleteOnExit(); + } + + // attempt to copy the library from the Jar file to the temp destination + try (final InputStream is = getClass().getClassLoader(). + getResourceAsStream(jniLibraryFileName)) { + if (is == null) { + throw new RuntimeException(jniLibraryFileName + " was not found inside JAR."); + } else { + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + + return temp; + } + /** * Private constructor to disallow instantiation */ diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/rocksdb/util/Environment.java index 306eae093..6fccc43bb 100644 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ b/java/src/main/java/org/rocksdb/util/Environment.java @@ -42,7 +42,9 @@ public class Environment { return String.format("%sjni-osx", name); } else if (isSolaris()) { return String.format("%sjni-solaris%d", name, is64Bit() ? 64 : 32); - } + } else if (isWindows() && is64Bit()) { + return String.format("%sjni-win64", name); + } throw new UnsupportedOperationException(); } @@ -55,11 +57,16 @@ public class Environment { return libraryFileName + ".so"; } else if (isMac()) { return libraryFileName + (shared ? ".dylib" : ".jnilib"); + } else if (isWindows()) { + return libraryFileName + ".dll"; } throw new UnsupportedOperationException(); } public static String getJniLibraryExtension() { + if (isWindows()) { + return ".dll"; + } return (isMac()) ? ".jnilib" : ".so"; } } diff --git a/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java b/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java index 6fe3bd2f0..9bad1af3d 100644 --- a/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java @@ -5,18 +5,18 @@ package org.rocksdb; +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Random; + import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - public class BackupableDBOptionsTest { - private final static String ARBITRARY_PATH = "/tmp"; + private final static String ARBITRARY_PATH = System.getProperty("java.io.tmpdir"); @ClassRule public static final RocksMemoryResource rocksMemoryResource = diff --git a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java b/java/src/test/java/org/rocksdb/InfoLogLevelTest.java index 630666b90..71a032a0b 100644 --- a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java +++ b/java/src/test/java/org/rocksdb/InfoLogLevelTest.java @@ -4,6 +4,7 @@ import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.rocksdb.util.Environment; import java.io.IOException; @@ -113,7 +114,7 @@ public class InfoLogLevelTest { * @throws IOException if file is not found. */ private String getLogContentsWithoutHeader() throws IOException { - final String separator = System.getProperty("line.separator"); + final String separator = Environment.isWindows() ? "\n" : System.getProperty("line.separator"); final String[] lines = new String(readAllBytes(get( dbFolder.getRoot().getAbsolutePath()+ "/LOG"))).split(separator); diff --git a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java index 7d9322a53..fe964b60a 100644 --- a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java +++ b/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java @@ -21,7 +21,7 @@ public class NativeLibraryLoaderTest { @Test public void tempFolder() throws IOException { - NativeLibraryLoader.getInstance().loadLibraryFromJar( + NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); Path path = Paths.get(temporaryFolder.getRoot().getAbsolutePath(), Environment.getJniLibraryFileName("rocksdb")); diff --git a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java index c7160deb6..1fc51ee73 100644 --- a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java +++ b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java @@ -117,16 +117,22 @@ public class EnvironmentTest { assertThat(Environment.isWindows()).isTrue(); } - @Test(expected = UnsupportedOperationException.class) - public void failWinJniLibraryName(){ + @Test + public void win64() { setEnvironmentClassFields("win", "x64"); - Environment.getJniLibraryFileName("rocksdb"); + assertThat(Environment.isWindows()).isTrue(); + assertThat(Environment.getJniLibraryExtension()). + isEqualTo(".dll"); + assertThat(Environment.getJniLibraryFileName("rocksdb")). + isEqualTo("librocksdbjni-win64.dll"); + assertThat(Environment.getSharedLibraryFileName("rocksdb")). + isEqualTo("librocksdbjni.dll"); } @Test(expected = UnsupportedOperationException.class) - public void failWinSharedLibrary(){ - setEnvironmentClassFields("win", "x64"); - Environment.getSharedLibraryFileName("rocksdb"); + public void win32(){ + setEnvironmentClassFields("win", "32"); + Environment.getJniLibraryFileName("rocksdb"); } private void setEnvironmentClassFields(String osName, From 52153930d84e977fd2b5f9a467d127c6028fc794 Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Tue, 2 Feb 2016 09:45:18 +0100 Subject: [PATCH 072/195] Adding support for Windows JNI build - fix Java unit test for release build of JNI DLL --- java/rocksjni/write_batch_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/write_batch_test.cc b/java/rocksjni/write_batch_test.cc index 2690f619e..e9cd2fbba 100644 --- a/java/rocksjni/write_batch_test.cc +++ b/java/rocksjni/write_batch_test.cc @@ -60,7 +60,8 @@ jbyteArray Java_org_rocksdb_WriteBatchTest_getContents( for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { rocksdb::ParsedInternalKey ikey; memset(reinterpret_cast(&ikey), 0, sizeof(ikey)); - assert(rocksdb::ParseInternalKey(iter->key(), &ikey)); + bool parsed = rocksdb::ParseInternalKey(iter->key(), &ikey); + assert(parsed); switch (ikey.type) { case rocksdb::kTypeValue: state.append("Put("); From 502d41f1503d7d65e91a47b6d6ba9faf1c0471dd Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Wed, 27 Jan 2016 13:36:22 +0100 Subject: [PATCH 073/195] Making use of GetSystemTimePreciseAsFileTime dynamic to not break compatibility with Windows 7. The issue with rotated logs was fixed other way. --- db/auto_roll_logger.cc | 13 ++++++++-- port/win/env_win.cc | 55 +++++++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index cf92f34c8..c984b4810 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -32,8 +32,17 @@ Status AutoRollLogger::ResetLogger() { } void AutoRollLogger::RollLogFile() { - std::string old_fname = OldInfoLogFileName( - dbname_, env_->NowMicros(), db_absolute_path_, db_log_dir_); + uint64_t now = env_->NowMicros(); + std::string old_fname; + // Try to check target name only 10 times at most + for (int i = 0; i < 10; i++) { + old_fname = OldInfoLogFileName( + dbname_, now, db_absolute_path_, db_log_dir_); + if (!env_->FileExists(old_fname).ok()) { + break; + } + now++; + }; env_->RenameFile(log_fname_, old_fname); } diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 50059a98f..87a25569c 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -1138,6 +1138,8 @@ void WinthreadCall(const char* label, std::error_code result) { } } +typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME); + class WinEnv : public Env { public: WinEnv(); @@ -1676,25 +1678,29 @@ class WinEnv : public Env { } virtual uint64_t NowMicros() override { - // all std::chrono clocks on windows proved to return - // values that may repeat that is not good enough for some uses. - const int64_t c_UnixEpochStartTicks = 116444736000000000i64; - const int64_t c_FtToMicroSec = 10; + if (GetSystemTimePreciseAsFileTime_ != NULL) { + // all std::chrono clocks on windows proved to return + // values that may repeat that is not good enough for some uses. + const int64_t c_UnixEpochStartTicks = 116444736000000000i64; + const int64_t c_FtToMicroSec = 10; - // This interface needs to return system time and not - // just any microseconds because it is often used as an argument - // to TimedWait() on condition variable - FILETIME ftSystemTime; - GetSystemTimePreciseAsFileTime(&ftSystemTime); + // This interface needs to return system time and not + // just any microseconds because it is often used as an argument + // to TimedWait() on condition variable + FILETIME ftSystemTime; + GetSystemTimePreciseAsFileTime_(&ftSystemTime); - LARGE_INTEGER li; - li.LowPart = ftSystemTime.dwLowDateTime; - li.HighPart = ftSystemTime.dwHighDateTime; - // Subtract unix epoch start - li.QuadPart -= c_UnixEpochStartTicks; - // Convert to microsecs - li.QuadPart /= c_FtToMicroSec; - return li.QuadPart; + LARGE_INTEGER li; + li.LowPart = ftSystemTime.dwLowDateTime; + li.HighPart = ftSystemTime.dwHighDateTime; + // Subtract unix epoch start + li.QuadPart -= c_UnixEpochStartTicks; + // Convert to microsecs + li.QuadPart /= c_FtToMicroSec; + return li.QuadPart; + } + using namespace std::chrono; + return duration_cast(system_clock::now().time_since_epoch()).count(); } virtual uint64_t NowNanos() override { @@ -2104,8 +2110,13 @@ class WinEnv : public Env { std::vector thread_pools_; mutable std::mutex mu_; std::vector threads_to_join_; + static FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_; + static bool GetSystemTimePreciseAsFileTimeInitialized_; }; +FnGetSystemTimePreciseAsFileTime WinEnv::GetSystemTimePreciseAsFileTime_ = NULL; +bool WinEnv::GetSystemTimePreciseAsFileTimeInitialized_ = false; + WinEnv::WinEnv() : checkedDiskForMmap_(false), forceMmapOff(false), @@ -2113,6 +2124,16 @@ WinEnv::WinEnv() allocation_granularity_(page_size_), perf_counter_frequency_(0), thread_pools_(Priority::TOTAL) { + + if (!GetSystemTimePreciseAsFileTimeInitialized_) { + HMODULE module = GetModuleHandle("kernel32.dll"); + if (module != NULL) { + GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress( + module, "GetSystemTimePreciseAsFileTime"); + } + GetSystemTimePreciseAsFileTimeInitialized_ = true; + } + SYSTEM_INFO sinfo; GetSystemInfo(&sinfo); From 57a95a700155345c658936a3b2ca492f36192691 Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Wed, 27 Jan 2016 21:00:42 +0100 Subject: [PATCH 074/195] Making use of GetSystemTimePreciseAsFileTime dynamic - code review fixes --- db/auto_roll_logger.cc | 4 +++- port/win/env_win.cc | 20 +++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index c984b4810..b8ba14c83 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -32,9 +32,11 @@ Status AutoRollLogger::ResetLogger() { } void AutoRollLogger::RollLogFile() { + // This function is called when log is rotating. Two rotations + // can happen quickly (NowMicro returns same value). To not overwrite + // previous log file we increment by one micro second and try again. uint64_t now = env_->NowMicros(); std::string old_fname; - // Try to check target name only 10 times at most for (int i = 0; i < 10; i++) { old_fname = OldInfoLogFileName( dbname_, now, db_absolute_path_, db_log_dir_); diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 87a25569c..1ae0b1932 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -2110,28 +2110,22 @@ class WinEnv : public Env { std::vector thread_pools_; mutable std::mutex mu_; std::vector threads_to_join_; - static FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_; - static bool GetSystemTimePreciseAsFileTimeInitialized_; + FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_; }; -FnGetSystemTimePreciseAsFileTime WinEnv::GetSystemTimePreciseAsFileTime_ = NULL; -bool WinEnv::GetSystemTimePreciseAsFileTimeInitialized_ = false; - WinEnv::WinEnv() : checkedDiskForMmap_(false), forceMmapOff(false), page_size_(4 * 1012), allocation_granularity_(page_size_), perf_counter_frequency_(0), - thread_pools_(Priority::TOTAL) { + thread_pools_(Priority::TOTAL), + GetSystemTimePreciseAsFileTime_(NULL) { - if (!GetSystemTimePreciseAsFileTimeInitialized_) { - HMODULE module = GetModuleHandle("kernel32.dll"); - if (module != NULL) { - GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress( - module, "GetSystemTimePreciseAsFileTime"); - } - GetSystemTimePreciseAsFileTimeInitialized_ = true; + HMODULE module = GetModuleHandle("kernel32.dll"); + if (module != NULL) { + GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress( + module, "GetSystemTimePreciseAsFileTime"); } SYSTEM_INFO sinfo; From a62c519bb6129bce4f2300cb07c305ad6c0c945b Mon Sep 17 00:00:00 2001 From: Tomas Kolda Date: Tue, 2 Feb 2016 10:33:49 +0100 Subject: [PATCH 075/195] RollLogFile tries to find non conflicting file until there is no conflict. --- db/auto_roll_logger.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index b8ba14c83..2349bd0c0 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -37,14 +37,11 @@ void AutoRollLogger::RollLogFile() { // previous log file we increment by one micro second and try again. uint64_t now = env_->NowMicros(); std::string old_fname; - for (int i = 0; i < 10; i++) { + do { old_fname = OldInfoLogFileName( dbname_, now, db_absolute_path_, db_log_dir_); - if (!env_->FileExists(old_fname).ok()) { - break; - } now++; - }; + } while (env_->FileExists(old_fname).ok()); env_->RenameFile(log_fname_, old_fname); } From 466c2c1bf71762752a1c1aa94f74e2918f9aed68 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 2 Feb 2016 14:52:49 -0800 Subject: [PATCH 076/195] Generate tags for *.c files Summary: db/c_test.c uses the functions in db/c.cc. If we have tags generated for one but not the other, it's easy to make mistakes like updating a function signature and missing a call site. Test Plan: $ make tags in vim: :cscope find s rocksdb_options_set_compression_options ... 3 325 db/c_test.c <
> rocksdb_options_set_compression_options(options, -14, -1, 0); Reviewers: sdong, yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53685 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 38fe1f5c1..c2642ce0f 100644 --- a/Makefile +++ b/Makefile @@ -700,7 +700,7 @@ clean: tags: ctags * -R - cscope -b `find . -name '*.cc'` `find . -name '*.h'` + cscope -b `find . -name '*.cc'` `find . -name '*.h'` `find . -name '*.c'` format: build_tools/format-diff.sh From 5fcd1ba30a2432a0d1241f4f8d52a97b362ec31f Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Tue, 2 Feb 2016 18:19:07 -0800 Subject: [PATCH 077/195] disable kConcurrentSkipList multithreaded test Summary: Disable test that is intermittently failing Test Plan: unit tests Reviewers: igor, andrewkr, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53715 --- db/db_test_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_test_util.h b/db/db_test_util.h index b993af8cb..48fa5430e 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -525,8 +525,8 @@ class DBTestBase : public testing::Test { kOptimizeFiltersForHits = 27, kRowCache = 28, kRecycleLogFiles = 29, + kEnd = 30, kConcurrentSkipList = 30, - kEnd = 31, kLevelSubcompactions = 31, kUniversalSubcompactions = 32, }; From 0c2bd5cb4b4911c1bfd88f9f614fa2306c909c8f Mon Sep 17 00:00:00 2001 From: Gabriela Jacques da Silva Date: Mon, 1 Feb 2016 17:07:05 -0800 Subject: [PATCH 078/195] Removing data race from expirable transactions Summary: Doing inline checking of transaction expiration instead of using a callback. Test Plan: To be added Reviewers: anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53673 --- utilities/transactions/transaction_db_impl.cc | 26 ++++++++++- utilities/transactions/transaction_db_impl.h | 18 ++++++++ utilities/transactions/transaction_impl.cc | 46 ++++++++++++++----- utilities/transactions/transaction_impl.h | 26 ++++------- .../transactions/transaction_lock_mgr.cc | 16 +++++-- utilities/transactions/transaction_lock_mgr.h | 6 ++- utilities/transactions/transaction_test.cc | 46 +++++++++++++++++++ 7 files changed, 150 insertions(+), 34 deletions(-) diff --git a/utilities/transactions/transaction_db_impl.cc b/utilities/transactions/transaction_db_impl.cc index f8a47b948..91440ae65 100644 --- a/utilities/transactions/transaction_db_impl.cc +++ b/utilities/transactions/transaction_db_impl.cc @@ -24,7 +24,7 @@ TransactionDBImpl::TransactionDBImpl(DB* db, const TransactionDBOptions& txn_db_options) : TransactionDB(db), txn_db_options_(txn_db_options), - lock_mgr_(txn_db_options_.num_stripes, txn_db_options.max_num_locks, + lock_mgr_(this, txn_db_options_.num_stripes, txn_db_options.max_num_locks, txn_db_options_.custom_mutex_factory ? txn_db_options_.custom_mutex_factory : std::shared_ptr( @@ -278,5 +278,29 @@ Status TransactionDBImpl::Write(const WriteOptions& opts, WriteBatch* updates) { return s; } +void TransactionDBImpl::InsertExpirableTransaction(TransactionID tx_id, + TransactionImpl* tx) { + assert(tx->GetExpirationTime() > 0); + std::lock_guard lock(map_mutex_); + expirable_transactions_map_.insert({tx_id, tx}); +} + +void TransactionDBImpl::RemoveExpirableTransaction(TransactionID tx_id) { + std::lock_guard lock(map_mutex_); + expirable_transactions_map_.erase(tx_id); +} + +bool TransactionDBImpl::TryStealingExpiredTransactionLocks( + TransactionID tx_id) { + std::lock_guard lock(map_mutex_); + + auto tx_it = expirable_transactions_map_.find(tx_id); + if (tx_it == expirable_transactions_map_.end()) { + return true; + } + TransactionImpl& tx = *(tx_it->second); + return tx.TryStealingLocks(); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_db_impl.h b/utilities/transactions/transaction_db_impl.h index 5a9d8b474..060510136 100644 --- a/utilities/transactions/transaction_db_impl.h +++ b/utilities/transactions/transaction_db_impl.h @@ -6,7 +6,9 @@ #pragma once #ifndef ROCKSDB_LITE +#include #include +#include #include "rocksdb/db.h" #include "rocksdb/options.h" @@ -66,6 +68,15 @@ class TransactionDBImpl : public TransactionDB { return txn_db_options_; } + void InsertExpirableTransaction(TransactionID tx_id, TransactionImpl* tx); + void RemoveExpirableTransaction(TransactionID tx_id); + + // If transaction is no longer available, locks can be stolen + // If transaction is available, try stealing locks directly from transaction + // It is the caller's responsibility to ensure that the referred transaction + // is expirable (GetExpirationTime() > 0) and that it is expired. + bool TryStealingExpiredTransactionLocks(TransactionID tx_id); + private: const TransactionDBOptions txn_db_options_; TransactionLockMgr lock_mgr_; @@ -74,6 +85,13 @@ class TransactionDBImpl : public TransactionDB { InstrumentedMutex column_family_mutex_; Transaction* BeginInternalTransaction(const WriteOptions& options); Status WriteHelper(WriteBatch* updates, TransactionImpl* txn_impl); + + // Used to ensure that no locks are stolen from an expirable transaction + // that has started a commit. Only transactions with an expiration time + // should be in this map. + std::mutex map_mutex_; + std::unordered_map + expirable_transactions_map_; }; } // namespace rocksdb diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index 3f25ff77d..2602d30e6 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -20,6 +20,7 @@ #include "rocksdb/status.h" #include "rocksdb/utilities/transaction_db.h" #include "util/string_util.h" +#include "util/sync_point.h" #include "utilities/transactions/transaction_db_impl.h" #include "utilities/transactions/transaction_util.h" @@ -42,7 +43,8 @@ TransactionImpl::TransactionImpl(TransactionDB* txn_db, expiration_time_(txn_options.expiration >= 0 ? start_time_ + txn_options.expiration * 1000 : 0), - lock_timeout_(txn_options.lock_timeout * 1000) { + lock_timeout_(txn_options.lock_timeout * 1000), + exec_status_(STARTED) { txn_db_impl_ = dynamic_cast(txn_db); assert(txn_db_impl_); @@ -55,10 +57,16 @@ TransactionImpl::TransactionImpl(TransactionDB* txn_db, if (txn_options.set_snapshot) { SetSnapshot(); } + if (expiration_time_ > 0) { + txn_db_impl_->InsertExpirableTransaction(txn_id_, this); + } } TransactionImpl::~TransactionImpl() { txn_db_impl_->UnLock(this, &GetTrackedKeys()); + if (expiration_time_ > 0) { + txn_db_impl_->RemoveExpirableTransaction(txn_id_); + } } void TransactionImpl::Clear() { @@ -103,18 +111,27 @@ Status TransactionImpl::DoCommit(WriteBatch* batch) { Status s; if (expiration_time_ > 0) { - // We cannot commit a transaction that is expired as its locks might have - // been released. - // To avoid race conditions, we need to use a WriteCallback to check the - // expiration time once we're on the writer thread. - TransactionCallback callback(this); + if (IsExpired()) { + return Status::Expired(); + } - // Do write directly on base db as TransctionDB::Write() would attempt to - // do conflict checking that we've already done. - assert(dynamic_cast(db_) != nullptr); - auto db_impl = reinterpret_cast(db_); + // Transaction should only be committed if the thread succeeds + // changing its execution status to COMMITTING. This is because + // A different transaction may consider this one expired and attempt + // to steal its locks between the IsExpired() check and the beginning + // of a commit. + ExecutionStatus expected = STARTED; + bool can_commit = std::atomic_compare_exchange_strong( + &exec_status_, &expected, COMMITTING); - s = db_impl->WriteWithCallback(write_options_, batch, &callback); + TEST_SYNC_POINT("TransactionTest::ExpirableTransactionDataRace:1"); + + if (can_commit) { + s = db_->Write(write_options_, batch); + } else { + assert(exec_status_ == LOCKS_STOLEN); + return Status::Expired(); + } } else { s = db_->Write(write_options_, batch); } @@ -316,6 +333,13 @@ Status TransactionImpl::ValidateSnapshot(ColumnFamilyHandle* column_family, false /* cache_only */); } +bool TransactionImpl::TryStealingLocks() { + assert(IsExpired()); + ExecutionStatus expected = STARTED; + return std::atomic_compare_exchange_strong(&exec_status_, &expected, + LOCKS_STOLEN); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index 0fa087d67..caed15d3a 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -66,11 +66,16 @@ class TransactionImpl : public TransactionBaseImpl { lock_timeout_ = timeout * 1000; } + // Returns true if locks were stolen successfully, false otherwise. + bool TryStealingLocks(); + protected: Status TryLock(ColumnFamilyHandle* column_family, const Slice& key, bool untracked = false) override; private: + enum ExecutionStatus { STARTED, COMMITTING, LOCKS_STOLEN }; + TransactionDBImpl* txn_db_impl_; // Used to create unique ids for transactions. @@ -86,6 +91,9 @@ class TransactionImpl : public TransactionBaseImpl { // Timeout in microseconds when locking a key or -1 if there is no timeout. int64_t lock_timeout_; + // Execution status of the transaction. + std::atomic exec_status_; + void Clear() override; Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key, @@ -102,24 +110,6 @@ class TransactionImpl : public TransactionBaseImpl { void operator=(const TransactionImpl&); }; -// Used at commit time to check whether transaction is committing before its -// expiration time. -class TransactionCallback : public WriteCallback { - public: - explicit TransactionCallback(TransactionImpl* txn) : txn_(txn) {} - - Status Callback(DB* db) override { - if (txn_->IsExpired()) { - return Status::Expired(); - } else { - return Status::OK(); - } - } - - private: - TransactionImpl* txn_; -}; - } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc index 80e4fb8d9..51b8d4a82 100644 --- a/utilities/transactions/transaction_lock_mgr.cc +++ b/utilities/transactions/transaction_lock_mgr.cc @@ -25,6 +25,7 @@ #include "util/autovector.h" #include "util/murmurhash.h" #include "util/thread_local.h" +#include "utilities/transactions/transaction_db_impl.h" namespace rocksdb { @@ -99,12 +100,16 @@ void UnrefLockMapsCache(void* ptr) { } // anonymous namespace TransactionLockMgr::TransactionLockMgr( - size_t default_num_stripes, int64_t max_num_locks, + TransactionDB* txn_db, size_t default_num_stripes, int64_t max_num_locks, std::shared_ptr mutex_factory) - : default_num_stripes_(default_num_stripes), + : txn_db_impl_(nullptr), + default_num_stripes_(default_num_stripes), max_num_locks_(max_num_locks), mutex_factory_(mutex_factory), - lock_maps_cache_(new ThreadLocalPtr(&UnrefLockMapsCache)) {} + lock_maps_cache_(new ThreadLocalPtr(&UnrefLockMapsCache)) { + txn_db_impl_ = dynamic_cast(txn_db); + assert(txn_db_impl_); +} TransactionLockMgr::~TransactionLockMgr() {} @@ -197,6 +202,11 @@ bool TransactionLockMgr::IsLockExpired(const LockInfo& lock_info, Env* env, // return how many microseconds until lock will be expired *expire_time = lock_info.expiration_time; } else { + bool success = + txn_db_impl_->TryStealingExpiredTransactionLocks(lock_info.txn_id); + if (!success) { + expired = false; + } *expire_time = 0; } diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h index 8f640d4ca..fa46c62be 100644 --- a/utilities/transactions/transaction_lock_mgr.h +++ b/utilities/transactions/transaction_lock_mgr.h @@ -24,10 +24,12 @@ struct LockMap; struct LockMapStripe; class Slice; +class TransactionDBImpl; class TransactionLockMgr { public: - TransactionLockMgr(size_t default_num_stripes, int64_t max_num_locks, + TransactionLockMgr(TransactionDB* txn_db, size_t default_num_stripes, + int64_t max_num_locks, std::shared_ptr factory); ~TransactionLockMgr(); @@ -53,6 +55,8 @@ class TransactionLockMgr { const std::string& key, Env* env); private: + TransactionDBImpl* txn_db_impl_; + // Default number of lock map stripes per column family const size_t default_num_stripes_; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 911212317..859b02bce 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -14,6 +14,7 @@ #include "rocksdb/utilities/transaction_db.h" #include "table/mock_table.h" #include "util/logging.h" +#include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" #include "utilities/merge_operators.h" @@ -2483,6 +2484,51 @@ TEST_F(TransactionTest, ToggleAutoCompactionTest) { } } +TEST_F(TransactionTest, ExpiredTransactionDataRace1) { + // In this test, txn1 should succeed committing, + // as the callback is called after txn1 starts committing. + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"TransactionTest::ExpirableTransactionDataRace:1"}}); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "TransactionTest::ExpirableTransactionDataRace:1", [&](void* arg) { + WriteOptions write_options; + TransactionOptions txn_options; + + // Force txn1 to expire + /* sleep override */ + std::this_thread::sleep_for(std::chrono::milliseconds(150)); + + Transaction* txn2 = db->BeginTransaction(write_options, txn_options); + Status s; + s = txn2->Put("X", "2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + }); + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + WriteOptions write_options; + TransactionOptions txn_options; + + txn_options.expiration = 100; + Transaction* txn1 = db->BeginTransaction(write_options, txn_options); + + Status s; + s = txn1->Put("X", "1"); + ASSERT_OK(s); + s = txn1->Commit(); + ASSERT_OK(s); + + ReadOptions read_options; + string value; + s = db->Get(read_options, "X", &value); + ASSERT_EQ("1", value); + + delete txn1; +} + } // namespace rocksdb int main(int argc, char** argv) { From 284aa613a7cc96d74975439b53cd35f6b6eadc63 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 2 Feb 2016 19:14:56 -0800 Subject: [PATCH 079/195] Eliminate duplicated property constants Summary: Before this diff, there were duplicated constants to refer to properties (user- facing API had strings and InternalStats had an enum). I noticed these were inconsistent in terms of which constants are provided, names of constants, and documentation of constants. Overall it seemed annoying/error-prone to maintain these duplicated constants. So, this diff gets rid of InternalStats's constants and replaces them with a map keyed on the user-facing constant. The value in that map contains a function pointer to get the property value, so we don't need to do string matching while holding db->mutex_. This approach has a side benefit of making many small handler functions rather than a giant switch-statement. Test Plan: db_properties_test passes, running "make commit-prereq -j32" Reviewers: sdong, yhchiang, kradhakrishnan, IslamAbdelRahman, rven, anthony Reviewed By: anthony Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53253 --- db/db_impl.cc | 79 +++-- db/db_impl.h | 5 +- db/db_properties_test.cc | 12 + db/internal_stats.cc | 634 ++++++++++++++++++++++----------------- db/internal_stats.h | 158 +++++----- include/rocksdb/db.h | 3 + 6 files changed, 500 insertions(+), 391 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index f625c775e..bc3866816 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -494,23 +494,22 @@ void DBImpl::MaybeDumpStats() { last_stats_dump_time_microsec_ = now_micros; #ifndef ROCKSDB_LITE - bool tmp1 = false; - bool tmp2 = false; - DBPropertyType cf_property_type = - GetPropertyType(DB::Properties::kCFStats, &tmp1, &tmp2); - DBPropertyType db_property_type = - GetPropertyType(DB::Properties::kDBStats, &tmp1, &tmp2); + const DBPropertyInfo* cf_property_info = + GetPropertyInfo(DB::Properties::kCFStats); + assert(cf_property_info != nullptr); + const DBPropertyInfo* db_property_info = + GetPropertyInfo(DB::Properties::kDBStats); + assert(db_property_info != nullptr); + std::string stats; { InstrumentedMutexLock l(&mutex_); for (auto cfd : *versions_->GetColumnFamilySet()) { - cfd->internal_stats()->GetStringProperty(cf_property_type, - DB::Properties::kCFStats, - &stats); + cfd->internal_stats()->GetStringProperty( + *cf_property_info, DB::Properties::kCFStats, &stats); } - default_cf_internal_stats_->GetStringProperty(db_property_type, - DB::Properties::kDBStats, - &stats); + default_cf_internal_stats_->GetStringProperty( + *db_property_info, DB::Properties::kDBStats, &stats); } Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log, "------- DUMPING STATS -------"); @@ -4701,53 +4700,51 @@ const DBOptions& DBImpl::GetDBOptions() const { return db_options_; } bool DBImpl::GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) { - bool is_int_property = false; - bool need_out_of_mutex = false; - DBPropertyType property_type = - GetPropertyType(property, &is_int_property, &need_out_of_mutex); - + const DBPropertyInfo* property_info = GetPropertyInfo(property); value->clear(); auto cfd = reinterpret_cast(column_family)->cfd(); - if (is_int_property) { + if (property_info == nullptr) { + return false; + } else if (property_info->handle_int) { uint64_t int_value; - bool ret_value = GetIntPropertyInternal( - cfd, property_type, need_out_of_mutex, false, &int_value); + bool ret_value = + GetIntPropertyInternal(cfd, *property_info, false, &int_value); if (ret_value) { *value = ToString(int_value); } return ret_value; - } else { + } else if (property_info->handle_string) { InstrumentedMutexLock l(&mutex_); - return cfd->internal_stats()->GetStringProperty(property_type, property, + return cfd->internal_stats()->GetStringProperty(*property_info, property, value); } + // Shouldn't reach here since exactly one of handle_string and handle_int + // should be non-nullptr. + assert(false); + return false; } bool DBImpl::GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) { - bool is_int_property = false; - bool need_out_of_mutex = false; - DBPropertyType property_type = - GetPropertyType(property, &is_int_property, &need_out_of_mutex); - if (!is_int_property) { + const DBPropertyInfo* property_info = GetPropertyInfo(property); + if (property_info == nullptr || property_info->handle_int == nullptr) { return false; } auto cfd = reinterpret_cast(column_family)->cfd(); - return GetIntPropertyInternal(cfd, property_type, need_out_of_mutex, false, - value); + return GetIntPropertyInternal(cfd, *property_info, false, value); } bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd, - DBPropertyType property_type, - bool need_out_of_mutex, bool is_locked, - uint64_t* value) { - if (!need_out_of_mutex) { + const DBPropertyInfo& property_info, + bool is_locked, uint64_t* value) { + assert(property_info.handle_int != nullptr); + if (!property_info.need_out_of_mutex) { if (is_locked) { mutex_.AssertHeld(); - return cfd->internal_stats()->GetIntProperty(property_type, value, this); + return cfd->internal_stats()->GetIntProperty(property_info, value, this); } else { InstrumentedMutexLock l(&mutex_); - return cfd->internal_stats()->GetIntProperty(property_type, value, this); + return cfd->internal_stats()->GetIntProperty(property_info, value, this); } } else { SuperVersion* sv = nullptr; @@ -4758,7 +4755,7 @@ bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd, } bool ret = cfd->internal_stats()->GetIntPropertyOutOfMutex( - property_type, sv->current, value); + property_info, sv->current, value); if (!is_locked) { ReturnAndCleanupSuperVersion(cfd, sv); @@ -4770,11 +4767,8 @@ bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd, bool DBImpl::GetAggregatedIntProperty(const Slice& property, uint64_t* aggregated_value) { - bool need_out_of_mutex; - bool is_int_property; - DBPropertyType property_type = - GetPropertyType(property, &is_int_property, &need_out_of_mutex); - if (!is_int_property) { + const DBPropertyInfo* property_info = GetPropertyInfo(property); + if (property_info == nullptr || property_info->handle_int == nullptr) { return false; } @@ -4784,8 +4778,7 @@ bool DBImpl::GetAggregatedIntProperty(const Slice& property, InstrumentedMutexLock l(&mutex_); uint64_t value; for (auto* cfd : *versions_->GetColumnFamilySet()) { - if (GetIntPropertyInternal(cfd, property_type, need_out_of_mutex, true, - &value)) { + if (GetIntPropertyInternal(cfd, *property_info, true, &value)) { sum += value; } else { return false; diff --git a/db/db_impl.h b/db/db_impl.h index d09d645d7..429589360 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -893,9 +893,8 @@ class DBImpl : public DB { bool* value_found = nullptr); bool GetIntPropertyInternal(ColumnFamilyData* cfd, - DBPropertyType property_type, - bool need_out_of_mutex, bool is_locked, - uint64_t* value); + const DBPropertyInfo& property_info, + bool is_locked, uint64_t* value); bool HasPendingManualCompaction(); bool HasExclusiveManualCompaction(); diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 50e08c45f..98c9d4e20 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -226,6 +226,18 @@ void GetExpectedTableProperties(TableProperties* expected_tp, } } // anonymous namespace +TEST_F(DBPropertiesTest, ValidatePropertyInfo) { + for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) { + // If C++ gets a std::string_literal, this would be better to check at + // compile-time using static_assert. + ASSERT_TRUE(ppt_name_and_info.first.empty() || + !isdigit(ppt_name_and_info.first.back())); + + ASSERT_TRUE((ppt_name_and_info.second.handle_string == nullptr) != + (ppt_name_and_info.second.handle_int == nullptr)); + } +} + TEST_F(DBPropertiesTest, AggregatedTableProperties) { for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { const int kKeysPerTable = 100; diff --git a/db/internal_stats.cc b/db/internal_stats.cc index ebd8cd020..1ec795c9c 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "db/column_family.h" @@ -81,7 +82,21 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name, stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count, num_input_records.c_str(), num_dropped_records.c_str()); } + +// Assumes that trailing numbers represent an optional argument. This requires +// property names to not end with numbers. +std::pair GetPropertyNameAndArg(const Slice& property) { + Slice name = property, arg = property; + size_t sfx_len = 0; + while (sfx_len < property.size() && + isdigit(property[property.size() - sfx_len - 1])) { + ++sfx_len; + } + name.remove_suffix(sfx_len); + arg.remove_prefix(property.size() - sfx_len); + return {name, arg}; } +} // anonymous namespace static const std::string rocksdb_prefix = "rocksdb."; @@ -139,7 +154,7 @@ const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats; const std::string DB::Properties::kNumImmutableMemTable = rocksdb_prefix + num_immutable_mem_table; const std::string DB::Properties::kNumImmutableMemTableFlushed = - rocksdb_prefix + num_immutable_mem_table_flushed; + rocksdb_prefix + num_immutable_mem_table_flushed; const std::string DB::Properties::kMemTableFlushPending = rocksdb_prefix + mem_table_flush_pending; const std::string DB::Properties::kCompactionPending = @@ -188,294 +203,360 @@ const std::string DB::Properties::kAggregatedTableProperties = const std::string DB::Properties::kAggregatedTablePropertiesAtLevel = rocksdb_prefix + aggregated_table_properties_at_level; -DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, - bool* need_out_of_mutex) { - assert(is_int_property != nullptr); - assert(need_out_of_mutex != nullptr); - Slice in = property; - Slice prefix(rocksdb_prefix); - *need_out_of_mutex = false; - *is_int_property = false; - if (!in.starts_with(prefix)) { - return kUnknown; - } - in.remove_prefix(prefix.size()); +const std::unordered_map InternalStats::ppt_name_to_info = { + {DB::Properties::kNumFilesAtLevelPrefix, + {false, &InternalStats::HandleNumFilesAtLevel, nullptr}}, + {DB::Properties::kLevelStats, + {false, &InternalStats::HandleLevelStats, nullptr}}, + {DB::Properties::kStats, {false, &InternalStats::HandleStats, nullptr}}, + {DB::Properties::kCFStats, {false, &InternalStats::HandleCFStats, nullptr}}, + {DB::Properties::kDBStats, {false, &InternalStats::HandleDBStats, nullptr}}, + {DB::Properties::kSSTables, + {false, &InternalStats::HandleSsTables, nullptr}}, + {DB::Properties::kAggregatedTableProperties, + {false, &InternalStats::HandleAggregatedTableProperties, nullptr}}, + {DB::Properties::kAggregatedTablePropertiesAtLevel, + {false, &InternalStats::HandleAggregatedTablePropertiesAtLevel, nullptr}}, + {DB::Properties::kNumImmutableMemTable, + {false, nullptr, &InternalStats::HandleNumImmutableMemTable}}, + {DB::Properties::kNumImmutableMemTableFlushed, + {false, nullptr, &InternalStats::HandleNumImmutableMemTableFlushed}}, + {DB::Properties::kMemTableFlushPending, + {false, nullptr, &InternalStats::HandleMemTableFlushPending}}, + {DB::Properties::kCompactionPending, + {false, nullptr, &InternalStats::HandleCompactionPending}}, + {DB::Properties::kBackgroundErrors, + {false, nullptr, &InternalStats::HandleBackgroundErrors}}, + {DB::Properties::kCurSizeActiveMemTable, + {false, nullptr, &InternalStats::HandleCurSizeActiveMemTable}}, + {DB::Properties::kCurSizeAllMemTables, + {false, nullptr, &InternalStats::HandleCurSizeAllMemTables}}, + {DB::Properties::kSizeAllMemTables, + {false, nullptr, &InternalStats::HandleSizeAllMemTables}}, + {DB::Properties::kNumEntriesActiveMemTable, + {false, nullptr, &InternalStats::HandleNumEntriesActiveMemTable}}, + {DB::Properties::kNumEntriesImmMemTables, + {false, nullptr, &InternalStats::HandleNumEntriesImmMemTables}}, + {DB::Properties::kNumDeletesActiveMemTable, + {false, nullptr, &InternalStats::HandleNumDeletesActiveMemTable}}, + {DB::Properties::kNumDeletesImmMemTables, + {false, nullptr, &InternalStats::HandleNumDeletesImmMemTables}}, + {DB::Properties::kEstimateNumKeys, + {false, nullptr, &InternalStats::HandleEstimateNumKeys}}, + {DB::Properties::kEstimateTableReadersMem, + {true, nullptr, &InternalStats::HandleEstimateTableReadersMem}}, + {DB::Properties::kIsFileDeletionsEnabled, + {false, nullptr, &InternalStats::HandleIsFileDeletionsEnabled}}, + {DB::Properties::kNumSnapshots, + {false, nullptr, &InternalStats::HandleNumSnapshots}}, + {DB::Properties::kOldestSnapshotTime, + {false, nullptr, &InternalStats::HandleOldestSnapshotTime}}, + {DB::Properties::kNumLiveVersions, + {false, nullptr, &InternalStats::HandleNumLiveVersions}}, + {DB::Properties::kEstimateLiveDataSize, + {true, nullptr, &InternalStats::HandleEstimateLiveDataSize}}, + {DB::Properties::kBaseLevel, + {false, nullptr, &InternalStats::HandleBaseLevel}}, + {DB::Properties::kTotalSstFilesSize, + {false, nullptr, &InternalStats::HandleTotalSstFilesSize}}, + {DB::Properties::kEstimatePendingCompactionBytes, + {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes}}, + {DB::Properties::kNumRunningFlushes, + {false, nullptr, &InternalStats::HandleNumRunningFlushes}}, + {DB::Properties::kNumRunningCompactions, + {false, nullptr, &InternalStats::HandleNumRunningCompactions}}, +}; - if (in.starts_with(num_files_at_level_prefix)) { - return kNumFilesAtLevel; - } else if (in == levelstats) { - return kLevelStats; - } else if (in == allstats) { - return kStats; - } else if (in == cfstats) { - return kCFStats; - } else if (in == dbstats) { - return kDBStats; - } else if (in == sstables) { - return kSsTables; - } else if (in == aggregated_table_properties) { - return kAggregatedTableProperties; - } else if (in.starts_with(aggregated_table_properties_at_level)) { - return kAggregatedTablePropertiesAtLevel; +const DBPropertyInfo* GetPropertyInfo(const Slice& property) { + std::string ppt_name = GetPropertyNameAndArg(property).first.ToString(); + auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name); + if (ppt_info_iter == InternalStats::ppt_name_to_info.end()) { + return nullptr; } - - *is_int_property = true; - if (in == num_immutable_mem_table) { - return kNumImmutableMemTable; - } else if (in == num_immutable_mem_table_flushed) { - return kNumImmutableMemTableFlushed; - } else if (in == mem_table_flush_pending) { - return kMemtableFlushPending; - } else if (in == compaction_pending) { - return kCompactionPending; - } else if (in == background_errors) { - return kBackgroundErrors; - } else if (in == cur_size_active_mem_table) { - return kCurSizeActiveMemTable; - } else if (in == cur_size_all_mem_tables) { - return kCurSizeAllMemTables; - } else if (in == size_all_mem_tables) { - return kSizeAllMemTables; - } else if (in == num_entries_active_mem_table) { - return kNumEntriesInMutableMemtable; - } else if (in == num_entries_imm_mem_tables) { - return kNumEntriesInImmutableMemtable; - } else if (in == num_deletes_active_mem_table) { - return kNumDeletesInMutableMemtable; - } else if (in == num_deletes_imm_mem_tables) { - return kNumDeletesInImmutableMemtable; - } else if (in == estimate_num_keys) { - return kEstimatedNumKeys; - } else if (in == estimate_table_readers_mem) { - *need_out_of_mutex = true; - return kEstimatedUsageByTableReaders; - } else if (in == is_file_deletions_enabled) { - return kIsFileDeletionEnabled; - } else if (in == num_snapshots) { - return kNumSnapshots; - } else if (in == oldest_snapshot_time) { - return kOldestSnapshotTime; - } else if (in == num_live_versions) { - return kNumLiveVersions; - } else if (in == estimate_live_data_size) { - *need_out_of_mutex = true; - return kEstimateLiveDataSize; - } else if (in == base_level) { - return kBaseLevel; - } else if (in == total_sst_files_size) { - return kTotalSstFilesSize; - } else if (in == estimate_pending_comp_bytes) { - return kEstimatePendingCompactionBytes; - } else if (in == num_running_flushes) { - return kNumRunningFlushes; - } else if (in == num_running_compactions) { - return kNumRunningCompactions; - } - return kUnknown; + return &ppt_info_iter->second; } -bool InternalStats::GetIntPropertyOutOfMutex(DBPropertyType property_type, - Version* version, - uint64_t* value) const { - assert(value != nullptr); - const auto* vstorage = cfd_->current()->storage_info(); - - switch (property_type) { - case kEstimatedUsageByTableReaders: - *value = (version == nullptr) ? - 0 : version->GetMemoryUsageByTableReaders(); - return true; - case kEstimateLiveDataSize: - *value = vstorage->EstimateLiveDataSize(); - return true; - default: - return false; - } -} - -bool InternalStats::GetStringProperty(DBPropertyType property_type, +bool InternalStats::GetStringProperty(const DBPropertyInfo& property_info, const Slice& property, std::string* value) { assert(value != nullptr); - auto* current = cfd_->current(); - const auto* vstorage = current->storage_info(); - Slice in = property; + assert(property_info.handle_string != nullptr); + Slice arg = GetPropertyNameAndArg(property).second; + return (this->*(property_info.handle_string))(value, arg); +} - switch (property_type) { - case kNumFilesAtLevel: { - in.remove_prefix(strlen("rocksdb.num-files-at-level")); - uint64_t level; - bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); - if (!ok || (int)level >= number_levels_) { - return false; - } else { - char buf[100]; - snprintf(buf, sizeof(buf), "%d", - vstorage->NumLevelFiles(static_cast(level))); - *value = buf; - return true; - } - } - case kLevelStats: { - char buf[1000]; - snprintf(buf, sizeof(buf), - "Level Files Size(MB)\n" - "--------------------\n"); - value->append(buf); +bool InternalStats::GetIntProperty(const DBPropertyInfo& property_info, + uint64_t* value, DBImpl* db) { + assert(value != nullptr); + assert(property_info.handle_int != nullptr && + !property_info.need_out_of_mutex); + db->mutex_.AssertHeld(); + return (this->*(property_info.handle_int))(value, db, nullptr /* version */); +} - for (int level = 0; level < number_levels_; level++) { - snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level, - vstorage->NumLevelFiles(level), - vstorage->NumLevelBytes(level) / kMB); - value->append(buf); - } - return true; - } - case kStats: { - if (!GetStringProperty(kCFStats, DB::Properties::kCFStats, value)) { - return false; - } - if (!GetStringProperty(kDBStats, DB::Properties::kDBStats, value)) { - return false; - } - return true; - } - case kCFStats: { - DumpCFStats(value); - return true; - } - case kDBStats: { - DumpDBStats(value); - return true; - } - case kSsTables: - *value = current->DebugString(); - return true; - case kAggregatedTableProperties: { - std::shared_ptr tp; - auto s = cfd_->current()->GetAggregatedTableProperties(&tp); - if (!s.ok()) { - return false; - } - *value = tp->ToString(); - return true; - } - case kAggregatedTablePropertiesAtLevel: { - in.remove_prefix( - DB::Properties::kAggregatedTablePropertiesAtLevel.length()); - uint64_t level; - bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); - if (!ok || static_cast(level) >= number_levels_) { - return false; - } - std::shared_ptr tp; - auto s = cfd_->current()->GetAggregatedTableProperties( - &tp, static_cast(level)); - if (!s.ok()) { - return false; - } - *value = tp->ToString(); - return true; - } - default: - return false; +bool InternalStats::GetIntPropertyOutOfMutex( + const DBPropertyInfo& property_info, Version* version, uint64_t* value) { + assert(value != nullptr); + assert(property_info.handle_int != nullptr && + property_info.need_out_of_mutex); + return (this->*(property_info.handle_int))(value, nullptr /* db */, version); +} + +bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) { + uint64_t level; + const auto* vstorage = cfd_->current()->storage_info(); + bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); + if (!ok || static_cast(level) >= number_levels_) { + return false; + } else { + char buf[100]; + snprintf(buf, sizeof(buf), "%d", + vstorage->NumLevelFiles(static_cast(level))); + *value = buf; + return true; } } -bool InternalStats::GetIntProperty(DBPropertyType property_type, - uint64_t* value, DBImpl* db) const { - db->mutex_.AssertHeld(); +bool InternalStats::HandleLevelStats(std::string* value, Slice suffix) { + char buf[1000]; const auto* vstorage = cfd_->current()->storage_info(); + snprintf(buf, sizeof(buf), + "Level Files Size(MB)\n" + "--------------------\n"); + value->append(buf); - switch (property_type) { - case kNumImmutableMemTable: - *value = cfd_->imm()->NumNotFlushed(); - return true; - case kNumImmutableMemTableFlushed: - *value = cfd_->imm()->NumFlushed(); - return true; - case kMemtableFlushPending: - // Return number of mem tables that are ready to flush (made immutable) - *value = (cfd_->imm()->IsFlushPending() ? 1 : 0); - return true; - case kNumRunningFlushes: - *value = db->num_running_flushes(); - return true; - case kCompactionPending: - // 1 if the system already determines at least one compaction is needed. - // 0 otherwise, - *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0); - return true; - case kNumRunningCompactions: - *value = db->num_running_compactions_; - return true; - case kBackgroundErrors: - // Accumulated number of errors in background flushes or compactions. - *value = GetBackgroundErrorCount(); - return true; - case kCurSizeActiveMemTable: - // Current size of the active memtable - *value = cfd_->mem()->ApproximateMemoryUsage(); - return true; - case kCurSizeAllMemTables: - // Current size of the active memtable + immutable memtables - *value = cfd_->mem()->ApproximateMemoryUsage() + - cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage(); - return true; - case kSizeAllMemTables: - *value = cfd_->mem()->ApproximateMemoryUsage() + - cfd_->imm()->ApproximateMemoryUsage(); - return true; - case kNumEntriesInMutableMemtable: - // Current number of entires in the active memtable - *value = cfd_->mem()->num_entries(); - return true; - case kNumEntriesInImmutableMemtable: - // Current number of entries in the immutable memtables - *value = cfd_->imm()->current()->GetTotalNumEntries(); - return true; - case kNumDeletesInMutableMemtable: - // Current number of entires in the active memtable - *value = cfd_->mem()->num_deletes(); - return true; - case kNumDeletesInImmutableMemtable: - // Current number of entries in the immutable memtables - *value = cfd_->imm()->current()->GetTotalNumDeletes(); - return true; - case kEstimatedNumKeys: - // Estimate number of entries in the column family: - // Use estimated entries in tables + total entries in memtables. - *value = cfd_->mem()->num_entries() + - cfd_->imm()->current()->GetTotalNumEntries() - - (cfd_->mem()->num_deletes() + - cfd_->imm()->current()->GetTotalNumDeletes()) * - 2 + - vstorage->GetEstimatedActiveKeys(); - return true; - case kNumSnapshots: - *value = db->snapshots().count(); - return true; - case kOldestSnapshotTime: - *value = static_cast(db->snapshots().GetOldestSnapshotTime()); - return true; - case kNumLiveVersions: - *value = cfd_->GetNumLiveVersions(); - return true; - case kIsFileDeletionEnabled: - *value = db->IsFileDeletionsEnabled(); - return true; - case kBaseLevel: - *value = vstorage->base_level(); - return true; - case kTotalSstFilesSize: - *value = cfd_->GetTotalSstFilesSize(); - return true; - case kEstimatePendingCompactionBytes: - *value = vstorage->estimated_compaction_needed_bytes(); - return true; - default: - return false; + for (int level = 0; level < number_levels_; level++) { + snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level, + vstorage->NumLevelFiles(level), + vstorage->NumLevelBytes(level) / kMB); + value->append(buf); } + return true; +} + +bool InternalStats::HandleStats(std::string* value, Slice suffix) { + if (!HandleCFStats(value, suffix)) { + return false; + } + if (!HandleDBStats(value, suffix)) { + return false; + } + return true; +} + +bool InternalStats::HandleCFStats(std::string* value, Slice suffix) { + DumpCFStats(value); + return true; +} + +bool InternalStats::HandleDBStats(std::string* value, Slice suffix) { + DumpDBStats(value); + return true; +} + +bool InternalStats::HandleSsTables(std::string* value, Slice suffix) { + auto* current = cfd_->current(); + *value = current->DebugString(); + return true; +} + +bool InternalStats::HandleAggregatedTableProperties(std::string* value, + Slice suffix) { + std::shared_ptr tp; + auto s = cfd_->current()->GetAggregatedTableProperties(&tp); + if (!s.ok()) { + return false; + } + *value = tp->ToString(); + return true; +} + +bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* value, + Slice suffix) { + uint64_t level; + bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); + if (!ok || static_cast(level) >= number_levels_) { + return false; + } + std::shared_ptr tp; + auto s = cfd_->current()->GetAggregatedTableProperties( + &tp, static_cast(level)); + if (!s.ok()) { + return false; + } + *value = tp->ToString(); + return true; +} + +bool InternalStats::HandleNumImmutableMemTable(uint64_t* value, DBImpl* db, + Version* version) { + *value = cfd_->imm()->NumNotFlushed(); + return true; +} + +bool InternalStats::HandleNumImmutableMemTableFlushed(uint64_t* value, + DBImpl* db, + Version* version) { + *value = cfd_->imm()->NumFlushed(); + return true; +} + +bool InternalStats::HandleMemTableFlushPending(uint64_t* value, DBImpl* db, + Version* version) { + // Return number of mem tables that are ready to flush (made immutable) + *value = (cfd_->imm()->IsFlushPending() ? 1 : 0); + return true; +} + +bool InternalStats::HandleNumRunningFlushes(uint64_t* value, DBImpl* db, + Version* version) { + *value = db->num_running_flushes(); + return true; +} + +bool InternalStats::HandleCompactionPending(uint64_t* value, DBImpl* db, + Version* version) { + // 1 if the system already determines at least one compaction is needed. + // 0 otherwise, + const auto* vstorage = cfd_->current()->storage_info(); + *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0); + return true; +} + +bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db, + Version* version) { + *value = db->num_running_compactions_; + return true; +} + +bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* db, + Version* version) { + // Accumulated number of errors in background flushes or compactions. + *value = GetBackgroundErrorCount(); + return true; +} + +bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db, + Version* version) { + // Current size of the active memtable + *value = cfd_->mem()->ApproximateMemoryUsage(); + return true; +} + +bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, + Version* version) { + // Current size of the active memtable + immutable memtables + *value = cfd_->mem()->ApproximateMemoryUsage() + + cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage(); + return true; +} + +bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* db, + Version* version) { + *value = cfd_->mem()->ApproximateMemoryUsage() + + cfd_->imm()->ApproximateMemoryUsage(); + return true; +} + +bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db, + Version* version) { + // Current number of entires in the active memtable + *value = cfd_->mem()->num_entries(); + return true; +} + +bool InternalStats::HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db, + Version* version) { + // Current number of entries in the immutable memtables + *value = cfd_->imm()->current()->GetTotalNumEntries(); + return true; +} + +bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db, + Version* version) { + // Current number of entires in the active memtable + *value = cfd_->mem()->num_deletes(); + return true; +} + +bool InternalStats::HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db, + Version* version) { + // Current number of entries in the immutable memtables + *value = cfd_->imm()->current()->GetTotalNumDeletes(); + return true; +} + +bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* db, + Version* version) { + // Estimate number of entries in the column family: + // Use estimated entries in tables + total entries in memtables. + const auto* vstorage = cfd_->current()->storage_info(); + *value = cfd_->mem()->num_entries() + + cfd_->imm()->current()->GetTotalNumEntries() - + (cfd_->mem()->num_deletes() + + cfd_->imm()->current()->GetTotalNumDeletes()) * + 2 + + vstorage->GetEstimatedActiveKeys(); + return true; +} + +bool InternalStats::HandleNumSnapshots(uint64_t* value, DBImpl* db, + Version* version) { + *value = db->snapshots().count(); + return true; +} + +bool InternalStats::HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, + Version* version) { + *value = static_cast(db->snapshots().GetOldestSnapshotTime()); + return true; +} + +bool InternalStats::HandleNumLiveVersions(uint64_t* value, DBImpl* db, + Version* version) { + *value = cfd_->GetNumLiveVersions(); + return true; +} + +bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, + Version* version) { + *value = db->IsFileDeletionsEnabled(); + return true; +} + +bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* db, + Version* version) { + const auto* vstorage = cfd_->current()->storage_info(); + *value = vstorage->base_level(); + return true; +} + +bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, + Version* version) { + *value = cfd_->GetTotalSstFilesSize(); + return true; +} + +bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value, + DBImpl* db, + Version* version) { + const auto* vstorage = cfd_->current()->storage_info(); + *value = vstorage->estimated_compaction_needed_bytes(); + return true; +} + +bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db, + Version* version) { + *value = (version == nullptr) ? 0 : version->GetMemoryUsageByTableReaders(); + return true; +} + +bool InternalStats::HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db, + Version* version) { + const auto* vstorage = cfd_->current()->storage_info(); + *value = vstorage->EstimateLiveDataSize(); + return true; } void InternalStats::DumpDBStats(std::string* value) { @@ -760,10 +841,7 @@ void InternalStats::DumpCFStats(std::string* value) { #else -DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, - bool* need_out_of_mutex) { - return kUnknown; -} +const DBPropertyInfo* GetPropertyInfo(const Slice& property) { return nullptr; } #endif // !ROCKSDB_LITE diff --git a/db/internal_stats.h b/db/internal_stats.h index 9c4414ef1..65408e53f 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -21,63 +21,29 @@ namespace rocksdb { class MemTableList; class DBImpl; -// IMPORTANT: If you add a new property here, also add it to the list in -// include/rocksdb/db.h -enum DBPropertyType : uint32_t { - kUnknown, - kNumFilesAtLevel, // Number of files at a specific level - kLevelStats, // Return number of files and total sizes of each level - kCFStats, // Return general statitistics of CF - kDBStats, // Return general statitistics of DB - kStats, // Return general statitistics of both DB and CF - kSsTables, // Return a human readable string of current SST files - kStartIntTypes, // ---- Dummy value to indicate the start of integer values - kNumImmutableMemTable, // Return number of immutable mem tables that - // have not been flushed. - kNumImmutableMemTableFlushed, // Return number of immutable mem tables - // in memory that have already been flushed - kMemtableFlushPending, // Return 1 if mem table flushing is pending, - // otherwise 0. - kNumRunningFlushes, // Return the number of currently running flushes. - kCompactionPending, // Return 1 if a compaction is pending. Otherwise 0. - kNumRunningCompactions, // Return the number of currently running - // compactions. - kBackgroundErrors, // Return accumulated background errors encountered. - kCurSizeActiveMemTable, // Return current size of the active memtable - kCurSizeAllMemTables, // Return current size of unflushed - // (active + immutable) memtables - kSizeAllMemTables, // Return current size of all (active + immutable - // + pinned) memtables - kNumEntriesInMutableMemtable, // Return number of deletes in the mutable - // memtable. - kNumEntriesInImmutableMemtable, // Return sum of number of entries in all - // the immutable mem tables. - kNumDeletesInMutableMemtable, // Return number of deletion entries in the - // mutable memtable. - kNumDeletesInImmutableMemtable, // Return the total number of deletion - // entries in all the immutable mem tables. - kEstimatedNumKeys, // Estimated total number of keys in the database. - kEstimatedUsageByTableReaders, // Estimated memory by table readers. - kIsFileDeletionEnabled, // Equals disable_delete_obsolete_files_, - // 0 means file deletions enabled - kNumSnapshots, // Number of snapshots in the system - kOldestSnapshotTime, // Unix timestamp of the first snapshot - kNumLiveVersions, - kEstimateLiveDataSize, // Estimated amount of live data in bytes - kTotalSstFilesSize, // Total size of all sst files. - kBaseLevel, // The level that L0 data is compacted to - kEstimatePendingCompactionBytes, // Estimated bytes to compaction - kAggregatedTableProperties, // Return a string that contains the aggregated - // table properties. - kAggregatedTablePropertiesAtLevel, // Return a string that contains the - // aggregated - // table properties at the specified level. +// Config for retrieving a property's value. +struct DBPropertyInfo { + bool need_out_of_mutex; + + // gcc had an internal error for initializing union of pointer-to-member- + // functions. Workaround is to populate exactly one of the following function + // pointers with a non-nullptr value. + + // @param value Value-result argument for storing the property's string value + // @param suffix Argument portion of the property. For example, suffix would + // be "5" for the property "rocksdb.num-files-at-level5". So far, only + // certain string properties take an argument. + bool (InternalStats::*handle_string)(std::string* value, Slice suffix); + + // @param value Value-result argument for storing the property's uint64 value + // @param db Many of the int properties rely on DBImpl methods. + // @param version Version is needed in case the property is retrieved without + // holding db mutex, which is only supported for int properties. + bool (InternalStats::*handle_int)(uint64_t* value, DBImpl* db, + Version* version); }; -extern DBPropertyType GetPropertyType(const Slice& property, - bool* is_int_property, - bool* need_out_of_mutex); - +extern const DBPropertyInfo* GetPropertyInfo(const Slice& property); #ifndef ROCKSDB_LITE class InternalStats { @@ -248,14 +214,18 @@ class InternalStats { uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; } - bool GetStringProperty(DBPropertyType property_type, const Slice& property, - std::string* value); + bool GetStringProperty(const DBPropertyInfo& property_info, + const Slice& property, std::string* value); - bool GetIntProperty(DBPropertyType property_type, uint64_t* value, - DBImpl* db) const; + bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value, + DBImpl* db); - bool GetIntPropertyOutOfMutex(DBPropertyType property_type, Version* version, - uint64_t* value) const; + bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info, + Version* version, uint64_t* value); + + // Store a mapping from the user-facing DB::Properties string to our + // DBPropertyInfo struct used internally for retrieving properties. + static const std::unordered_map ppt_name_to_info; private: void DumpDBStats(std::string* value); @@ -321,6 +291,54 @@ class InternalStats { seconds_up(0) {} } db_stats_snapshot_; + // Handler functions for getting property values. They use "value" as a value- + // result argument, and return true upon successfully setting "value". + bool HandleNumFilesAtLevel(std::string* value, Slice suffix); + bool HandleLevelStats(std::string* value, Slice suffix); + bool HandleStats(std::string* value, Slice suffix); + bool HandleCFStats(std::string* value, Slice suffix); + bool HandleDBStats(std::string* value, Slice suffix); + bool HandleSsTables(std::string* value, Slice suffix); + bool HandleAggregatedTableProperties(std::string* value, Slice suffix); + bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix); + bool HandleNumImmutableMemTable(uint64_t* value, DBImpl* db, + Version* version); + bool HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* db, + Version* version); + bool HandleMemTableFlushPending(uint64_t* value, DBImpl* db, + Version* version); + bool HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* version); + bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version); + bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db, + Version* version); + bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version); + bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db, + Version* version); + bool HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version); + bool HandleSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version); + bool HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db, + Version* version); + bool HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db, + Version* version); + bool HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db, + Version* version); + bool HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db, + Version* version); + bool HandleEstimateNumKeys(uint64_t* value, DBImpl* db, Version* version); + bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version); + bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version); + bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version); + bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, + Version* version); + bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version); + bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version); + bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db, + Version* version); + bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db, + Version* version); + bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db, + Version* version); + // Total number of background errors encountered. Every time a flush task // or compaction task fails, this counter is incremented. The failure can // be caused by any possible reason, including file system errors, out of @@ -402,14 +420,20 @@ class InternalStats { uint64_t BumpAndGetBackgroundErrorCount() { return 0; } - bool GetStringProperty(DBPropertyType property_type, const Slice& property, - std::string* value) { return false; } + bool GetStringProperty(const DBPropertyInfo& property_info, + const Slice& property, std::string* value) { + return false; + } - bool GetIntProperty(DBPropertyType property_type, uint64_t* value, - DBImpl* db) const { return false; } + bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value, + DBImpl* db) const { + return false; + } - bool GetIntPropertyOutOfMutex(DBPropertyType property_type, Version* version, - uint64_t* value) const { return false; } + bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info, + Version* version, uint64_t* value) const { + return false; + } }; #endif // !ROCKSDB_LITE diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d916db348..6f5229b5f 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -329,6 +329,9 @@ class DB { #ifndef ROCKSDB_LITE // Contains all valid property arguments for GetProperty(). + // + // NOTE: Property names cannot end in numbers since those are interpreted as + // arguments, e.g., see kNumFilesAtLevelPrefix. struct Properties { // "rocksdb.num-files-at-level" - returns string containing the number // of files at level , where is an ASCII representation of a From a09ce4fcd38be20313884400e1588c0b87f4cc4e Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Wed, 3 Feb 2016 09:56:56 -0800 Subject: [PATCH 080/195] Skip some of the non-critical tests in ./tools/run_flash_bench.sh Summary: Some of the tests aren't considered to be critical when it comes to getting key benchmarking data for RocksDB. Therefore we'll introduce an environment variable `SKIP_LOW_PRI_TESTS` which enables skipping those test cases. By default all the tests will be run. If you want to optimize the test-case execution then do the following: ` $ export SKIP_LOW_PRI_TESTS=1 $ ./tools/run_flash_bench.sh ` Test Plan: Verified that when `SKIP_LOW_PRI_TESTS` is not set then `benchmark.sh` is called for all the scenarios and when `SKIP_LOW_PRI_TESTS` is set to `1` then `benchmark.sh` is called only for the test-cases which are critical. Reviewers: MarkCallaghan Reviewed By: MarkCallaghan Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53739 --- tools/run_flash_bench.sh | 131 +++++++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 40 deletions(-) diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index 1f59a5ada..c24b0f5eb 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -22,6 +22,7 @@ # test and the tests are listed below. # # The environment variables are also optional. The variables are: +# # NKEYS - number of key/value pairs to load # BG_MBWRITEPERSEC - write rate limit in MB/second for tests in which # there is one thread doing writes and stats are @@ -54,6 +55,10 @@ # SAVE_SETUP - saves a copy of the database at the end of step 1 to # $DATA_DIR.bak. When LOG_DIR != DATA_DIR then it is copied # to $LOG_DIR.bak. +# SKIP_LOW_PRI_TESTS - skip some of the tests which aren't crucial for getting +# actionable benchmarking data (look for keywords "bulkload", +# "sync=1", and "while merging"). +# # Size constants K=1024 @@ -89,6 +94,14 @@ wal_dir=${LOG_DIR:-"/tmp/rocksdb/"} do_setup=${DO_SETUP:-1} save_setup=${SAVE_SETUP:-0} +# By default we'll run all the tests. Set this to skip a set of tests which +# aren't critical for getting key metrics. +skip_low_pri_tests=${SKIP_LOW_PRI_TESTS:-0} + +if [[ $skip_low_pri_tests == 1 ]]; then + echo "Skipping some non-critical tests because SKIP_LOW_PRI_TESTS is set." +fi + output_dir="/tmp/output" ARGS="\ @@ -116,8 +129,10 @@ echo -e "ops/sec\tmb/sec\tSize-GB\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp if [[ $do_setup != 0 ]]; then echo Doing setup - # Test 1: bulk load - env $ARGS ./tools/benchmark.sh bulkload + if [[ $skip_low_pri_tests != 1 ]]; then + # Test 1: bulk load + env $ARGS ./tools/benchmark.sh bulkload + fi # Test 2a: sequential fill with large values to get peak ingest # adjust NUM_KEYS given the use of larger values @@ -188,16 +203,20 @@ for num_thr in "${nthreads[@]}" ; do env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite - # Test 8: overwrite with sync=1 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ - ./tools/benchmark.sh overwrite + if [[ $skip_low_pri_tests != 1 ]]; then + # Test 8: overwrite with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + ./tools/benchmark.sh overwrite + fi # Test 9: random update with sync=0 env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ ./tools/benchmark.sh updaterandom - # Test 10: random update with sync=1 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom + if [[ $skip_low_pri_tests != 1 ]]; then + # Test 10: random update with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom + fi # Test 11: random read while writing env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ @@ -219,73 +238,105 @@ for num_thr in "${nthreads[@]}" ; do env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh mergerandom - # Test 15: random merge with sync=1 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ - ./tools/benchmark.sh mergerandom + if [[ $skip_low_pri_tests != 1 ]]; then + # Test 15: random merge with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + ./tools/benchmark.sh mergerandom - # Test 16: random read while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ - DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging + # Test 16: random read while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ + DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging - # Test 17: range scan while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ - DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging + # Test 17: range scan while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ + DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging - # Test 18: reverse range scan while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ - DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging + # Test 18: reverse range scan while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ + DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging + fi done -echo bulkload > $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep bulkload $output_dir/report.txt >> $output_dir/report2.txt +if [[ $skip_low_pri_tests != 1 ]]; then + echo bulkload > $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep bulkload $output_dir/report.txt >> $output_dir/report2.txt +fi + echo fillseq >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep fillseq $output_dir/report.txt >> $output_dir/report2.txt + echo overwrite sync=0 >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep overwrite $output_dir/report.txt | grep \.s0 >> $output_dir/report2.txt -echo overwrite sync=1 >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep overwrite $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo overwrite sync=1 >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep overwrite $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt +fi + echo updaterandom sync=0 >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep updaterandom $output_dir/report.txt | grep \.s0 >> $output_dir/report2.txt -echo updaterandom sync=1 >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep updaterandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo updaterandom sync=1 >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep updaterandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt +fi + echo mergerandom sync=0 >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep mergerandom $output_dir/report.txt | grep \.s0 >> $output_dir/report2.txt -echo mergerandom sync=1 >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep mergerandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo mergerandom sync=1 >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep mergerandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt +fi + echo readrandom >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep readrandom $output_dir/report.txt >> $output_dir/report2.txt + echo fwdrange >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep fwdrange\.t $output_dir/report.txt >> $output_dir/report2.txt + echo revrange >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep revrange\.t $output_dir/report.txt >> $output_dir/report2.txt + echo readwhile >> $output_dir/report2.txt >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep readwhilewriting $output_dir/report.txt >> $output_dir/report2.txt -echo readwhile >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep readwhilemerging $output_dir/report.txt >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo readwhile >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep readwhilemerging $output_dir/report.txt >> $output_dir/report2.txt +fi + echo fwdreadwhilewriting >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep fwdrangewhilewriting $output_dir/report.txt >> $output_dir/report2.txt -echo fwdreadwhilemerging >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep fwdrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo fwdreadwhilemerging >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep fwdrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt +fi + echo revreadwhilewriting >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt grep revrangewhilewriting $output_dir/report.txt >> $output_dir/report2.txt -echo revreadwhilemerging >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep revrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt + +if [[ $skip_low_pri_tests != 1 ]]; then + echo revreadwhilemerging >> $output_dir/report2.txt + head -1 $output_dir/report.txt >> $output_dir/report2.txt + grep revrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt +fi cat $output_dir/report2.txt From 2c1db5ea515b75c6472a622e152c88c1f9befe00 Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Wed, 3 Feb 2016 09:21:44 -0800 Subject: [PATCH 081/195] always invalidate sequential-insertion cache for concurrent skiplist adds Summary: InlineSkipList::InsertConcurrently should invalidate the sequential-insertion cache prev_[] for all inserts of multi-level nodes, not just those that increase the height of the skip list. The invariant for prev_ is that prev_[i] (i > 0) is supposed to be the predecessor of prev_[0] at level i. Before this diff InsertConcurrently could violate this constraint when inserting a multi-level node after prev_[i] but before prev_[0]. This diff also reenables kConcurrentSkipList as db_test's MultiThreaded/MultiThreadedDBTest.MultiThreaded/29. Test Plan: 1. unit tests 2. temporarily hack kConcurrentSkipList timing so that it is fast but has a 1.5% failure rate on my dev box (1ms stagger on thread launch, 1s test duration, failure rate baseline over 1000 runs) 3. observe 1000 passes post-fix Reviewers: igor, sdong Reviewed By: sdong Subscribers: MarkCallaghan, dhruba Differential Revision: https://reviews.facebook.net/D53751 --- db/db_test_util.h | 2 +- db/inlineskiplist.h | 31 ++++++++++++++++++++----------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/db/db_test_util.h b/db/db_test_util.h index 48fa5430e..b993af8cb 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -525,8 +525,8 @@ class DBTestBase : public testing::Test { kOptimizeFiltersForHits = 27, kRowCache = 28, kRecycleLogFiles = 29, - kEnd = 30, kConcurrentSkipList = 30, + kEnd = 31, kLevelSubcompactions = 31, kUniversalSubcompactions = 32, }; diff --git a/db/inlineskiplist.h b/db/inlineskiplist.h index 201580b10..98621829c 100644 --- a/db/inlineskiplist.h +++ b/db/inlineskiplist.h @@ -147,8 +147,9 @@ class InlineSkipList { // values are ok. std::atomic max_height_; // Height of the entire list - // Used for optimizing sequential insert patterns. Tricky. prev_[i] for - // i up to max_height_ - 1 (inclusive) is the predecessor of prev_[0]. + // Used for optimizing sequential insert patterns. Tricky. prev_height_ + // of zero means prev_ is undefined. Otherwise: prev_[i] for i up + // to max_height_ - 1 (inclusive) is the predecessor of prev_[0], and // prev_height_ is the height of prev_[0]. prev_[0] can only be equal // to head when max_height_ and prev_height_ are both 1. Node** prev_; @@ -510,11 +511,10 @@ InlineSkipList::AllocateNode(size_t key_size, int height) { template void InlineSkipList::Insert(const char* key) { - // InsertConcurrently can't maintain the prev_ invariants when it needs - // to increase max_height_. In that case it sets prev_height_ to zero, - // letting us know that we should ignore it. A relaxed load suffices - // here because write thread synchronization separates Insert calls - // from InsertConcurrently calls. + // InsertConcurrently often can't maintain the prev_ invariants, so + // it just sets prev_height_ to zero, letting us know that we should + // ignore it. A relaxed load suffices here because write thread + // synchronization separates Insert calls from InsertConcurrently calls. auto prev_height = prev_height_.load(std::memory_order_relaxed); // fast path for sequential insertion @@ -595,15 +595,24 @@ void InlineSkipList::InsertConcurrently(const char* key) { int height = x->UnstashHeight(); assert(height >= 1 && height <= kMaxHeight_); + // We don't have a lock-free algorithm for updating prev_, but we do have + // the option of invalidating the entire sequential-insertion cache. + // prev_'s invariant is that prev_[i] (i > 0) is the predecessor of + // prev_[0] at that level. We're only going to violate that if height + // > 1 and key lands after prev_[height - 1] but before prev_[0]. + // Comparisons are pretty expensive, so an easier version is to just + // clear the cache if height > 1. We only write to prev_height_ if the + // nobody else has, to avoid invalidating the root of the skip list in + // all of the other CPU caches. + if (height > 1 && prev_height_.load(std::memory_order_relaxed) != 0) { + prev_height_.store(0, std::memory_order_relaxed); + } + int max_height = max_height_.load(std::memory_order_relaxed); while (height > max_height) { if (max_height_.compare_exchange_strong(max_height, height)) { // successfully updated it max_height = height; - - // we dont have a lock-free algorithm for fixing up prev_, so just - // mark it invalid - prev_height_.store(0, std::memory_order_relaxed); break; } // else retry, possibly exiting the loop because somebody else From bf767c6411a233443098aef867dcaa21ac9c3cda Mon Sep 17 00:00:00 2001 From: krad Date: Wed, 3 Feb 2016 11:34:03 -0800 Subject: [PATCH 082/195] Minor fix to makefile Summary: unit_481 is misspelt. Fixing it. Test Plan: Running make commit_prereq Reviewers: leveldb Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53757 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c2642ce0f..6d4e48d4e 100644 --- a/Makefile +++ b/Makefile @@ -1207,8 +1207,8 @@ jdb_bench: commit_prereq: build_tools/rocksdb-lego-determinator \ build_tools/precommit_checker.py + build_tools/precommit_checker.py unit unit_481 clang_unit tsan asan lite $(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava; - build_tools/precommit_checker.py unit uint_481 clang_unit tsan asan lite xfunc: for xftest in $(XFUNC_TESTS); do \ From 9ab269ab393f9119f48b4be6a816ad20cc661376 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Wed, 3 Feb 2016 19:13:03 +0000 Subject: [PATCH 083/195] Threaded tests for WriteBatch --- java/Makefile | 1 + .../org/rocksdb/WriteBatchThreadedTest.java | 104 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java diff --git a/java/Makefile b/java/Makefile index abc8f73ee..bffca4b27 100644 --- a/java/Makefile +++ b/java/Makefile @@ -99,6 +99,7 @@ JAVA_TESTS = org.rocksdb.BackupableDBOptionsTest\ org.rocksdb.StatisticsCollectorTest\ org.rocksdb.WriteBatchHandlerTest\ org.rocksdb.WriteBatchTest\ + org.rocksdb.WriteBatchThreadedTest\ org.rocksdb.WriteOptionsTest\ org.rocksdb.WriteBatchWithIndexTest diff --git a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java b/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java new file mode 100644 index 000000000..ab38c475f --- /dev/null +++ b/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java @@ -0,0 +1,104 @@ +// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +package org.rocksdb; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.*; + +@RunWith(Parameterized.class) +public class WriteBatchThreadedTest { + + @Parameters(name = "WriteBatchThreadedTest(threadCount={0})") + public static Iterable data() { + return Arrays.asList(new Integer[]{1, 10, 50, 100}); + } + + @Parameter + public int threadCount; + + @Rule + public TemporaryFolder dbFolder = new TemporaryFolder(); + + RocksDB db; + + @Before + public void setUp() throws Exception { + RocksDB.loadLibrary(); + final Options options = new Options() + .setCreateIfMissing(true) + .setIncreaseParallelism(32); + db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); + assert (db != null); + } + + @After + public void tearDown() throws Exception { + if (db != null) { + db.close(); + } + } + + @Test + public void threadedWrites() throws InterruptedException, ExecutionException { + final List> callables = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + final int offset = i * 100; + callables.add(new Callable() { + @Override + public Void call() throws RocksDBException { + final WriteBatch wb = new WriteBatch(); + for (int i = offset; i < offset + 100; i++) { + wb.put(ByteBuffer.allocate(4).putInt(i).array(), + "parallel rocks test".getBytes()); + } + db.write(new WriteOptions(), wb); + + return null; + } + }); + } + + //submit the callables + final ExecutorService executorService = + Executors.newFixedThreadPool(threadCount); + try { + final ExecutorCompletionService completionService = + new ExecutorCompletionService<>(executorService); + final Set> futures = new HashSet<>(); + for (final Callable callable : callables) { + futures.add(completionService.submit(callable)); + } + + while (futures.size() > 0) { + final Future future = completionService.take(); + futures.remove(future); + + try { + future.get(); + } catch (final ExecutionException e) { + for (final Future f : futures) { + f.cancel(true); + } + + throw e; + } + } + } finally { + executorService.shutdown(); + executorService.awaitTermination(10, TimeUnit.SECONDS); + } + } +} From 8445e5380713d48cbe07d21dcac38e41d37d1762 Mon Sep 17 00:00:00 2001 From: krad Date: Tue, 26 Jan 2016 11:22:25 -0800 Subject: [PATCH 084/195] Add a mechanism to run all tests in sandcastle Summary: When making environment specific changes, it is better to run all CI tests. This diff provides a mechanism to do that Format is: ROCKSDB_CHECK_ALL=1 arc diff Test Plan: Submit request for diff Reviewers: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53631 --- .../config/FacebookArcanistConfiguration.php | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/arcanist_util/config/FacebookArcanistConfiguration.php b/arcanist_util/config/FacebookArcanistConfiguration.php index 31fa27c9b..156a6dbfb 100644 --- a/arcanist_util/config/FacebookArcanistConfiguration.php +++ b/arcanist_util/config/FacebookArcanistConfiguration.php @@ -42,13 +42,14 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { } function getSteps($diffID, $username, $test) { - $arcrc_content = exec("cat ~/.arcrc | base64 -w0"); + $arcrc_content = exec("cat ~/.arcrc | gzip -f | base64 -w0"); // Sandcastle machines don't have arc setup. We copy the user certificate // and authenticate using that in sandcastle $setup = array( "name" => "Setup arcrc", - "shell" => "echo " . $arcrc_content . " | base64 --decode > ~/.arcrc", + "shell" => "echo " . $arcrc_content . " | base64 --decode" + . " | gzip -d > ~/.arcrc", "user" => "root" ); @@ -121,10 +122,17 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { return; } - // list of tests we want to run in sandcastle - $tests = array( - "unit", "unit_481", "clang_unit", "tsan", "asan", "lite", "valgrind" - ); + if (strcmp(getenv("ROCKSDB_CHECK_ALL"), 1) == 0) { + // extract all tests from the CI definition + $output = file_get_contents("build_tools/rocksdb-lego-determinator"); + preg_match_all('/[ ]{2}([a-zA-Z0-9_]+)[\)]{1}/', $output, $matches); + $tests = $matches[1]; + } else { + // manually list of tests we want to run in sandcastle + $tests = array( + "unit", "unit_481", "clang_unit", "tsan", "asan", "lite", "valgrind" + ); + } // construct a job definition for each test and add it to the master plan foreach ($tests as $test) { @@ -138,7 +146,10 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { // we need supply the job plan as a determinator // so we construct a small job that will spit out the master job plan // which sandcastle will parse and execute - $arg_encoded = base64_encode(json_encode($arg)); + // Why compress ? Otherwise we run over the max string size. + $cmd = "echo " . base64_encode(json_encode($arg)) + . " | gzip -f | base64 -w0"; + $arg_encoded = shell_exec($cmd); $command = array( "name" => "Run diff " . $diffID . "for user " . $username, @@ -147,7 +158,8 @@ class FacebookArcanistConfiguration extends ArcanistConfiguration { $command["steps"][] = array( "name" => "Generate determinator", - "shell" => "echo " . $arg_encoded . " | base64 --decode", + "shell" => "echo " . $arg_encoded . " | base64 --decode | gzip -d" + . " | base64 --decode", "determinator" => true, "user" => "root" ); From 14a322033f25d94c64bdfba708ba99e2590f2278 Mon Sep 17 00:00:00 2001 From: Jonathan Wiepert Date: Wed, 3 Feb 2016 16:47:45 -0800 Subject: [PATCH 085/195] Remove references to files deleted in commit abb405227848581d3e6d2ba40d94dbc0a5513902 Summary: Remove obolete references to files in src.mk Fix incorrect path for reference in source.mk Test Plan: Ran build to ensure changes do not break anything. Reviewers: leveldb, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53733 --- src.mk | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src.mk b/src.mk index 3fb811144..3e54fa66d 100644 --- a/src.mk +++ b/src.mk @@ -209,7 +209,6 @@ TEST_BENCH_SOURCES = \ db/flush_job_test.cc \ db/inlineskiplist_test.cc \ db/listener_test.cc \ - db/log_and_apply_bench.cc \ db/log_test.cc \ db/manual_compaction_test.cc \ db/memtablerep_bench.cc \ @@ -243,8 +242,6 @@ TEST_BENCH_SOURCES = \ tools/sst_dump_test.cc \ util/arena_test.cc \ util/autovector_test.cc \ - util/benchharness.cc \ - util/benchharness_test.cc \ util/bloom_test.cc \ util/cache_bench.cc \ util/cache_test.cc \ @@ -261,7 +258,7 @@ TEST_BENCH_SOURCES = \ utilities/geodb/geodb_test.cc \ utilities/memory/memory_test.cc \ utilities/merge_operators/string_append/stringappend_test.cc \ - utilities/options_util_test.cc \ + utilities/options/options_util_test.cc \ utilities/redis/redis_lists_test.cc \ utilities/spatialdb/spatial_db_test.cc \ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \ From 9656eab0059692a34fe1b188ca6072e310b620c9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 3 Feb 2016 17:06:10 -0800 Subject: [PATCH 086/195] This partially addresses issue https://github.com/facebook/rocksdb/issues/935 testutil.cc and testharness.cc could not be moved out at this time as they are used by 4 benchmarks in release builds. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6084356b..7d91b244a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,7 +177,6 @@ set(SOURCES table/merger.cc table/sst_file_writer.cc table/meta_blocks.cc - table/mock_table.cc table/plain_table_builder.cc table/plain_table_factory.cc table/plain_table_index.cc @@ -214,7 +213,6 @@ set(SOURCES util/logging.cc util/log_buffer.cc util/memenv.cc - util/mock_env.cc util/murmurhash.cc util/mutable_cf_options.cc util/options.cc @@ -277,6 +275,8 @@ set(SOURCES # and linked to tests. Add test only code that is not #ifdefed for Release here. set(TESTUTIL_SOURCE db/db_test_util.cc + table/mock_table.cc + util/mock_env.cc util/thread_status_updater_debug.cc ) From f7c0f4e3efe4c83a6f5b1a1b9c5721981adc27bc Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 3 Feb 2016 16:04:10 -0800 Subject: [PATCH 087/195] perf_context.cc and iostats_context.cc use different output macro (fix unity build) Summary: Unity build because perf_context.cc and iostats_context.cc define a different OUTPUT macro. Fix it. Test Plan: Build unity Reviewers: kradhakrishnan, anthony, yhchiang, andrewkr, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: divchenko, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53769 --- util/iostats_context.cc | 17 ++++++--- util/perf_context.cc | 78 ++++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 44 deletions(-) diff --git a/util/iostats_context.cc b/util/iostats_context.cc index 50a6e8ab1..d6d33baf3 100644 --- a/util/iostats_context.cc +++ b/util/iostats_context.cc @@ -31,14 +31,21 @@ void IOStatsContext::Reset() { logger_nanos = 0; } -#define OUTPUT(counter) #counter << " = " << counter << ", " +#define IOSTATS_CONTEXT_OUTPUT(counter) #counter << " = " << counter << ", " std::string IOStatsContext::ToString() const { std::ostringstream ss; - ss << OUTPUT(thread_pool_id) << OUTPUT(bytes_read) << OUTPUT(bytes_written) - << OUTPUT(open_nanos) << OUTPUT(allocate_nanos) << OUTPUT(write_nanos) - << OUTPUT(read_nanos) << OUTPUT(range_sync_nanos) << OUTPUT(fsync_nanos) - << OUTPUT(prepare_write_nanos) << OUTPUT(logger_nanos); + ss << IOSTATS_CONTEXT_OUTPUT(thread_pool_id) + << IOSTATS_CONTEXT_OUTPUT(bytes_read) + << IOSTATS_CONTEXT_OUTPUT(bytes_written) + << IOSTATS_CONTEXT_OUTPUT(open_nanos) + << IOSTATS_CONTEXT_OUTPUT(allocate_nanos) + << IOSTATS_CONTEXT_OUTPUT(write_nanos) + << IOSTATS_CONTEXT_OUTPUT(read_nanos) + << IOSTATS_CONTEXT_OUTPUT(range_sync_nanos) + << IOSTATS_CONTEXT_OUTPUT(fsync_nanos) + << IOSTATS_CONTEXT_OUTPUT(prepare_write_nanos) + << IOSTATS_CONTEXT_OUTPUT(logger_nanos); return ss.str(); } diff --git a/util/perf_context.cc b/util/perf_context.cc index 07bad40f2..214905ada 100644 --- a/util/perf_context.cc +++ b/util/perf_context.cc @@ -61,7 +61,7 @@ void PerfContext::Reset() { #endif } -#define OUTPUT(counter) \ +#define PERF_CONTEXT_OUTPUT(counter) \ if (!exclude_zero_counters || (counter > 0)) { \ ss << #counter << " = " << counter << ", "; \ } @@ -71,44 +71,44 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const { return ""; #else std::ostringstream ss; - OUTPUT(user_key_comparison_count); - OUTPUT(block_cache_hit_count); - OUTPUT(block_read_count); - OUTPUT(block_read_byte); - OUTPUT(block_read_time); - OUTPUT(block_checksum_time); - OUTPUT(block_decompress_time); - OUTPUT(internal_key_skipped_count); - OUTPUT(internal_delete_skipped_count); - OUTPUT(write_wal_time); - OUTPUT(get_snapshot_time); - OUTPUT(get_from_memtable_time); - OUTPUT(get_from_memtable_count); - OUTPUT(get_post_process_time); - OUTPUT(get_from_output_files_time); - OUTPUT(seek_on_memtable_time); - OUTPUT(seek_on_memtable_count); - OUTPUT(seek_child_seek_time); - OUTPUT(seek_child_seek_count); - OUTPUT(seek_min_heap_time); - OUTPUT(seek_internal_seek_time); - OUTPUT(find_next_user_entry_time); - OUTPUT(write_pre_and_post_process_time); - OUTPUT(write_memtable_time); - OUTPUT(db_mutex_lock_nanos); - OUTPUT(db_condition_wait_nanos); - OUTPUT(merge_operator_time_nanos); - OUTPUT(write_delay_time); - OUTPUT(read_index_block_nanos); - OUTPUT(read_filter_block_nanos); - OUTPUT(new_table_block_iter_nanos); - OUTPUT(new_table_iterator_nanos); - OUTPUT(block_seek_nanos); - OUTPUT(find_table_nanos); - OUTPUT(bloom_memtable_hit_count); - OUTPUT(bloom_memtable_miss_count); - OUTPUT(bloom_sst_hit_count); - OUTPUT(bloom_sst_miss_count); + PERF_CONTEXT_OUTPUT(user_key_comparison_count); + PERF_CONTEXT_OUTPUT(block_cache_hit_count); + PERF_CONTEXT_OUTPUT(block_read_count); + PERF_CONTEXT_OUTPUT(block_read_byte); + PERF_CONTEXT_OUTPUT(block_read_time); + PERF_CONTEXT_OUTPUT(block_checksum_time); + PERF_CONTEXT_OUTPUT(block_decompress_time); + PERF_CONTEXT_OUTPUT(internal_key_skipped_count); + PERF_CONTEXT_OUTPUT(internal_delete_skipped_count); + PERF_CONTEXT_OUTPUT(write_wal_time); + PERF_CONTEXT_OUTPUT(get_snapshot_time); + PERF_CONTEXT_OUTPUT(get_from_memtable_time); + PERF_CONTEXT_OUTPUT(get_from_memtable_count); + PERF_CONTEXT_OUTPUT(get_post_process_time); + PERF_CONTEXT_OUTPUT(get_from_output_files_time); + PERF_CONTEXT_OUTPUT(seek_on_memtable_time); + PERF_CONTEXT_OUTPUT(seek_on_memtable_count); + PERF_CONTEXT_OUTPUT(seek_child_seek_time); + PERF_CONTEXT_OUTPUT(seek_child_seek_count); + PERF_CONTEXT_OUTPUT(seek_min_heap_time); + PERF_CONTEXT_OUTPUT(seek_internal_seek_time); + PERF_CONTEXT_OUTPUT(find_next_user_entry_time); + PERF_CONTEXT_OUTPUT(write_pre_and_post_process_time); + PERF_CONTEXT_OUTPUT(write_memtable_time); + PERF_CONTEXT_OUTPUT(db_mutex_lock_nanos); + PERF_CONTEXT_OUTPUT(db_condition_wait_nanos); + PERF_CONTEXT_OUTPUT(merge_operator_time_nanos); + PERF_CONTEXT_OUTPUT(write_delay_time); + PERF_CONTEXT_OUTPUT(read_index_block_nanos); + PERF_CONTEXT_OUTPUT(read_filter_block_nanos); + PERF_CONTEXT_OUTPUT(new_table_block_iter_nanos); + PERF_CONTEXT_OUTPUT(new_table_iterator_nanos); + PERF_CONTEXT_OUTPUT(block_seek_nanos); + PERF_CONTEXT_OUTPUT(find_table_nanos); + PERF_CONTEXT_OUTPUT(bloom_memtable_hit_count); + PERF_CONTEXT_OUTPUT(bloom_memtable_miss_count); + PERF_CONTEXT_OUTPUT(bloom_sst_hit_count); + PERF_CONTEXT_OUTPUT(bloom_sst_miss_count); return ss.str(); #endif } From 34a40bf911a3418c9f8564a70e967433b1697982 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 4 Feb 2016 15:21:32 -0800 Subject: [PATCH 088/195] Add --allow_concurrent_memtable_write in stress test and run it in crash_test Summary: Add an option of --allow_concurrent_memtable_write in stress test and cover it in crash test Test Plan: Run crash test and make sure three combinations of the two options show up randomly. Reviewers: IslamAbdelRahman, yhchiang, andrewkr, anthony, kradhakrishnan Reviewed By: kradhakrishnan Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53811 --- tools/db_crashtest.py | 15 +++++++++++++-- tools/db_stress.cc | 11 +++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 90fde4a30..449d85427 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -24,6 +24,7 @@ default_params = { "disable_data_sync": 0, "disable_wal": 0, "filter_deletes": lambda: random.randint(0, 1), + "allow_concurrent_memtable_write": lambda: random.randint(0, 1), "iterpercent": 10, "max_background_compactions": 20, "max_bytes_for_level_base": 10485760, @@ -85,6 +86,7 @@ simple_default_params = { "disable_data_sync": 0, "disable_wal": 0, "filter_deletes": lambda: random.randint(0, 1), + "allow_concurrent_memtable_write": lambda: random.randint(0, 1), "iterpercent": 10, "max_background_compactions": 1, "max_bytes_for_level_base": 67108864, @@ -126,6 +128,15 @@ whitebox_simple_default_params = { } +def finalize_and_sanitize(src_params): + dest_params = dict([(k, v() if callable(v) else v) + for (k, v) in src_params.items()]) + # --allow_concurrent_memtable_write with --filter_deletes is not supported. + if dest_params.get("allow_concurrent_memtable_write", 1) == 1: + dest_params["filter_deletes"] = 0 + return dest_params + + def gen_cmd_params(args): params = {} @@ -151,8 +162,8 @@ def gen_cmd_params(args): def gen_cmd(params): cmd = './db_stress ' + ' '.join( - '--{0}={1}'.format(k, v() if callable(v) else v) - for k, v in params.items() + '--{0}={1}'.format(k, v) + for k, v in finalize_and_sanitize(params).items() if k not in set(['test_type', 'simple', 'duration', 'interval']) and v is not None) return cmd diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 102803862..10193a182 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -230,6 +230,13 @@ DEFINE_int64(cache_size, 2LL * KB * KB * KB, DEFINE_uint64(subcompactions, 1, "Maximum number of subcompactions to divide L0-L1 compactions " "into."); + +DEFINE_bool(allow_concurrent_memtable_write, true, + "Allow multi-writers to update mem tables in parallel."); + +DEFINE_bool(enable_write_thread_adaptive_yield, true, + "Use a yielding spin loop for brief writer thread waits."); + static const bool FLAGS_subcompactions_dummy __attribute__((unused)) = RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); @@ -1997,6 +2004,10 @@ class StressTest { options_.filter_deletes = FLAGS_filter_deletes; options_.inplace_update_support = FLAGS_in_place_update; options_.max_subcompactions = static_cast(FLAGS_subcompactions); + options_.allow_concurrent_memtable_write = + FLAGS_allow_concurrent_memtable_write; + options_.enable_write_thread_adaptive_yield = + FLAGS_enable_write_thread_adaptive_yield; if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kHashSkipList)) { fprintf(stderr, "prefix_size should be non-zero iff memtablerep == prefix_hash\n"); From 8e6172bc5771272d99f85a72ff7cd1ef3df668e5 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Fri, 5 Feb 2016 10:22:37 -0800 Subject: [PATCH 089/195] Add BlockBasedTableOptions::index_block_restart_interval Summary: Add a new option to BlockBasedTableOptions that will allow us to change the restart interval for the index block Test Plan: unit tests Reviewers: yhchiang, anthony, andrewkr, sdong Reviewed By: sdong Subscribers: march, dhruba Differential Revision: https://reviews.facebook.net/D53721 --- db/db_test.cc | 1 + db/db_test_util.cc | 4 ++ db/db_test_util.h | 1 + include/rocksdb/table.h | 3 ++ table/block_based_table_builder.cc | 29 ++++++++------ table/block_based_table_factory.cc | 6 +++ table/table_test.cc | 62 ++++++++++++++++++++++++++++++ util/options_helper.h | 5 ++- util/options_test.cc | 3 +- util/testutil.cc | 1 + 10 files changed, 102 insertions(+), 13 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index dfc231969..d39cbfe29 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5272,6 +5272,7 @@ class DBTestRandomized : public DBTest, option_configs.push_back(option_config); } } + option_configs.push_back(kBlockBasedTableWithIndexRestartInterval); return option_configs; } }; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index f2906c7ca..222dc715d 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -339,6 +339,10 @@ Options DBTestBase::CurrentOptions( options.prefix_extractor.reset(NewNoopTransform()); break; } + case kBlockBasedTableWithIndexRestartInterval: { + table_options.index_block_restart_interval = 8; + break; + } case kOptimizeFiltersForHits: { options.optimize_filters_for_hits = true; set_block_based_table_factory = true; diff --git a/db/db_test_util.h b/db/db_test_util.h index b993af8cb..cde352dfe 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -529,6 +529,7 @@ class DBTestBase : public testing::Test { kEnd = 31, kLevelSubcompactions = 31, kUniversalSubcompactions = 32, + kBlockBasedTableWithIndexRestartInterval = 33, }; int option_config_; diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 2e1a91de9..157d4274c 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -120,6 +120,9 @@ struct BlockBasedTableOptions { // value will be silently overwritten with 1. int block_restart_interval = 16; + // Same as block_restart_interval but used for the index block. + int index_block_restart_interval = 1; + // Use delta encoding to compress keys in blocks. // Iterator::PinData() requires this option to be disabled. // diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 006908eaa..ee8c3dd7c 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -113,15 +113,17 @@ class IndexBuilder { // // Optimizations: // 1. Made block's `block_restart_interval` to be 1, which will avoid linear -// search when doing index lookup. +// search when doing index lookup (can be disabled by setting +// index_block_restart_interval). // 2. Shorten the key length for index block. Other than honestly using the // last key in the data block as the index key, we instead find a shortest // substitute key that serves the same function. class ShortenedIndexBuilder : public IndexBuilder { public: - explicit ShortenedIndexBuilder(const Comparator* comparator) + explicit ShortenedIndexBuilder(const Comparator* comparator, + int index_block_restart_interval) : IndexBuilder(comparator), - index_block_builder_(1 /* block_restart_interval == 1 */) {} + index_block_builder_(index_block_restart_interval) {} virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, @@ -178,9 +180,10 @@ class ShortenedIndexBuilder : public IndexBuilder { class HashIndexBuilder : public IndexBuilder { public: explicit HashIndexBuilder(const Comparator* comparator, - const SliceTransform* hash_key_extractor) + const SliceTransform* hash_key_extractor, + int index_block_restart_interval) : IndexBuilder(comparator), - primary_index_builder_(comparator), + primary_index_builder_(comparator, index_block_restart_interval), hash_key_extractor_(hash_key_extractor) {} virtual void AddIndexEntry(std::string* last_key_in_current_block, @@ -266,13 +269,16 @@ namespace { // Create a index builder based on its type. IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator, - const SliceTransform* prefix_extractor) { + const SliceTransform* prefix_extractor, + int index_block_restart_interval) { switch (type) { case BlockBasedTableOptions::kBinarySearch: { - return new ShortenedIndexBuilder(comparator); + return new ShortenedIndexBuilder(comparator, + index_block_restart_interval); } case BlockBasedTableOptions::kHashSearch: { - return new HashIndexBuilder(comparator, prefix_extractor); + return new HashIndexBuilder(comparator, prefix_extractor, + index_block_restart_interval); } default: { assert(!"Do not recognize the index type "); @@ -484,9 +490,10 @@ struct BlockBasedTableBuilder::Rep { data_block(table_options.block_restart_interval, table_options.use_delta_encoding), internal_prefix_transform(_ioptions.prefix_extractor), - index_builder(CreateIndexBuilder(table_options.index_type, - &internal_comparator, - &this->internal_prefix_transform)), + index_builder( + CreateIndexBuilder(table_options.index_type, &internal_comparator, + &this->internal_prefix_transform, + table_options.index_block_restart_interval)), compression_type(_compression_type), compression_opts(_compression_opts), filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder( diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index a6484c4ee..7b38c2136 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -42,6 +42,9 @@ BlockBasedTableFactory::BlockBasedTableFactory( if (table_options_.block_restart_interval < 1) { table_options_.block_restart_interval = 1; } + if (table_options_.index_block_restart_interval < 1) { + table_options_.index_block_restart_interval = 1; + } } Status BlockBasedTableFactory::NewTableReader( @@ -150,6 +153,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " block_restart_interval: %d\n", table_options_.block_restart_interval); ret.append(buffer); + snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n", + table_options_.index_block_restart_interval); + ret.append(buffer); snprintf(buffer, kBufferSize, " filter_policy: %s\n", table_options_.filter_policy == nullptr ? "nullptr" : table_options_.filter_policy->Name()); diff --git a/table/table_test.cc b/table/table_test.cc index 0a84f2750..2bd28ca0a 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -637,6 +637,7 @@ class HarnessTest : public testing::Test { new FlushBlockBySizePolicyFactory()); table_options_.block_size = 256; table_options_.block_restart_interval = args.restart_interval; + table_options_.index_block_restart_interval = args.restart_interval; table_options_.format_version = args.format_version; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); @@ -2282,6 +2283,67 @@ TEST_F(HarnessTest, FooterTests) { } } +class IndexBlockRestartIntervalTest + : public BlockBasedTableTest, + public ::testing::WithParamInterface { + public: + static std::vector GetRestartValues() { return {-1, 0, 1, 8, 16, 32}; } +}; + +INSTANTIATE_TEST_CASE_P( + IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest, + ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues())); + +TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { + const int kKeysInTable = 10000; + const int kKeySize = 100; + const int kValSize = 500; + + int index_block_restart_interval = GetParam(); + + Options options; + BlockBasedTableOptions table_options; + table_options.block_size = 64; // small block size to get big index block + table_options.index_block_restart_interval = index_block_restart_interval; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator()); + static Random rnd(301); + for (int i = 0; i < kKeysInTable; i++) { + InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue); + c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize)); + } + + std::vector keys; + stl_wrappers::KVMap kvmap; + std::unique_ptr comparator( + new InternalKeyComparator(BytewiseComparator())); + const ImmutableCFOptions ioptions(options); + c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap); + auto reader = c.GetTableReader(); + + std::unique_ptr db_iter(reader->NewIterator(ReadOptions())); + + // Test point lookup + for (auto& kv : kvmap) { + db_iter->Seek(kv.first); + + ASSERT_TRUE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); + ASSERT_EQ(db_iter->key(), kv.first); + ASSERT_EQ(db_iter->value(), kv.second); + } + + // Test iterating + auto kv_iter = kvmap.begin(); + for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { + ASSERT_EQ(db_iter->key(), kv_iter->first); + ASSERT_EQ(db_iter->value(), kv_iter->second); + kv_iter++; + } + ASSERT_EQ(kv_iter, kvmap.end()); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/options_helper.h b/util/options_helper.h index 4c4555aca..b0636adc5 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -486,6 +486,9 @@ static std::unordered_mapUniform(10000000); opt.block_size_deviation = rnd->Uniform(100); opt.block_restart_interval = rnd->Uniform(100); + opt.index_block_restart_interval = rnd->Uniform(100); opt.whole_key_filtering = rnd->Uniform(2); return opt; From 6f71d3b68b011569dfaf5c406a15319e4d66ca76 Mon Sep 17 00:00:00 2001 From: reid horuff Date: Fri, 5 Feb 2016 10:44:13 -0800 Subject: [PATCH 090/195] Improve perf of Pessimistic Transaction expirations (and optimistic transactions) Summary: copy from task 8196669: 1) Optimistic transactions do not support batching writes from different threads. 2) Pessimistic transactions do not support batching writes if an expiration time is set. In these 2 cases, we currently do not do any write batching in DBImpl::WriteImpl() because there is a WriteCallback that could decide at the last minute to abort the write. But we could support batching write operations with callbacks if we make sure to process the callbacks correctly. To do this, we would first need to modify write_thread.cc to stop preventing writes with callbacks from being batched together. Then we would need to change DBImpl::WriteImpl() to call all WriteCallback's in a batch, only write the batches that succeed, and correctly set the state of each batch's WriteThread::Writer. Test Plan: Added test WriteWithCallbackTest to write_callback_test.cc which creates multiple client threads and verifies that writes are batched and executed properly. Reviewers: hermanlee4, anthony, ngbronson Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52863 --- db/db_impl.cc | 128 +++++----- db/db_test.cc | 2 - db/write_batch.cc | 15 +- db/write_batch_internal.h | 3 +- db/write_callback.h | 3 + db/write_callback_test.cc | 220 ++++++++++++++++++ db/write_thread.cc | 36 +-- db/write_thread.h | 49 +++- .../optimistic_transaction_impl.h | 2 + utilities/transactions/transaction_impl.h | 20 ++ 10 files changed, 386 insertions(+), 92 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index bc3866816..391bfa6db 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -4095,7 +4095,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, } Status status; - bool callback_failed = false; bool xfunc_attempted_write = false; XFUNC_TEST("transaction", "transaction_xftest_write_impl", @@ -4113,7 +4112,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, w.sync = write_options.sync; w.disableWAL = write_options.disableWAL; w.in_batch_group = false; - w.has_callback = (callback != nullptr) ? true : false; + w.callback = callback; if (!write_options.disableWAL) { RecordTick(stats_, WRITE_WITH_WAL); @@ -4126,30 +4125,32 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, // we are a non-leader in a parallel group PERF_TIMER_GUARD(write_memtable_time); - ColumnFamilyMemTablesImpl column_family_memtables( - versions_->GetColumnFamilySet()); - WriteBatchInternal::SetSequence(w.batch, w.sequence); - w.status = WriteBatchInternal::InsertInto( - w.batch, &column_family_memtables, &flush_scheduler_, - write_options.ignore_missing_column_families, 0 /*log_number*/, this, - true /*dont_filter_deletes*/, true /*concurrent_memtable_writes*/); + if (!w.CallbackFailed()) { + ColumnFamilyMemTablesImpl column_family_memtables( + versions_->GetColumnFamilySet()); + WriteBatchInternal::SetSequence(w.batch, w.sequence); + w.status = WriteBatchInternal::InsertInto( + w.batch, &column_family_memtables, &flush_scheduler_, + write_options.ignore_missing_column_families, 0 /*log_number*/, this, + true /*dont_filter_deletes*/, true /*concurrent_memtable_writes*/); + } if (write_thread_.CompleteParallelWorker(&w)) { // we're responsible for early exit - auto last_sequence = - w.parallel_group->last_writer->sequence + - WriteBatchInternal::Count(w.parallel_group->last_writer->batch) - 1; + auto last_sequence = w.parallel_group->last_sequence; SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence); versions_->SetLastSequence(last_sequence); write_thread_.EarlyExitParallelGroup(&w); } assert(w.state == WriteThread::STATE_COMPLETED); // STATE_COMPLETED conditional below handles exit + + status = w.FinalStatus(); } if (w.state == WriteThread::STATE_COMPLETED) { // write is complete and leader has updated sequence RecordTick(stats_, WRITE_DONE_BY_OTHER); - return w.status; + return w.FinalStatus(); } // else we are the leader of the write batch group assert(w.state == WriteThread::STATE_GROUP_LEADER); @@ -4255,7 +4256,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, uint64_t last_sequence = versions_->LastSequence(); WriteThread::Writer* last_writer = &w; - autovector write_batch_group; + autovector write_group; bool need_log_sync = !write_options.disableWAL && write_options.sync; bool need_log_dir_sync = need_log_sync && !log_dir_synced_; @@ -4274,24 +4275,15 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, // during this phase since &w is currently responsible for logging // and protects against concurrent loggers and concurrent writes // into memtables - - mutex_.Unlock(); - - if (callback != nullptr) { - // If this write has a validation callback, check to see if this write - // is able to be written. Must be called on the write thread. - status = callback->Callback(this); - callback_failed = true; - } - } else { - mutex_.Unlock(); } + mutex_.Unlock(); + // At this point the mutex is unlocked bool exit_completed_early = false; - last_batch_group_size_ = write_thread_.EnterAsBatchGroupLeader( - &w, &last_writer, &write_batch_group); + last_batch_group_size_ = + write_thread_.EnterAsBatchGroupLeader(&w, &last_writer, &write_group); if (status.ok()) { // Rules for when we can update the memtable concurrently @@ -4307,15 +4299,22 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, // assumed to be true. Rule 4 is checked for each batch. We could // relax rules 2 and 3 if we could prevent write batches from referring // more than once to a particular key. - bool parallel = db_options_.allow_concurrent_memtable_write && - write_batch_group.size() > 1; + bool parallel = + db_options_.allow_concurrent_memtable_write && write_group.size() > 1; int total_count = 0; uint64_t total_byte_size = 0; - for (auto b : write_batch_group) { - total_count += WriteBatchInternal::Count(b); - total_byte_size = WriteBatchInternal::AppendedByteSize( - total_byte_size, WriteBatchInternal::ByteSize(b)); - parallel = parallel && !b->HasMerge(); + for (auto writer : write_group) { + if (writer->CheckCallback(this)) { + total_count += WriteBatchInternal::Count(writer->batch); + total_byte_size = WriteBatchInternal::AppendedByteSize( + total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); + parallel = parallel && !writer->batch->HasMerge(); + } + } + + if (total_count == 0) { + write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status); + return w.FinalStatus(); } const SequenceNumber current_sequence = last_sequence + 1; @@ -4336,15 +4335,17 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, PERF_TIMER_GUARD(write_wal_time); WriteBatch* merged_batch = nullptr; - if (write_batch_group.size() == 1) { - merged_batch = write_batch_group[0]; + if (write_group.size() == 1) { + merged_batch = write_group[0]->batch; } else { // WAL needs all of the batches flattened into a single batch. // We could avoid copying here with an iov-like AddRecord // interface merged_batch = &tmp_batch_; - for (auto b : write_batch_group) { - WriteBatchInternal::Append(merged_batch, b); + for (auto writer : write_group) { + if (!writer->CallbackFailed()) { + WriteBatchInternal::Append(merged_batch, writer->batch); + } } } WriteBatchInternal::SetSequence(merged_batch, current_sequence); @@ -4405,7 +4406,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, } stats->AddDBStats(InternalStats::WAL_FILE_BYTES, log_size); } - uint64_t for_other = write_batch_group.size() - 1; + uint64_t for_other = write_group.size() - 1; if (for_other > 0) { stats->AddDBStats(InternalStats::WRITE_DONE_BY_OTHER, for_other); if (!write_options.disableWAL) { @@ -4416,43 +4417,50 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, if (!parallel) { status = WriteBatchInternal::InsertInto( - write_batch_group, current_sequence, column_family_memtables_.get(), + write_group, current_sequence, column_family_memtables_.get(), &flush_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, false /*dont_filter_deletes*/); + + if (status.ok()) { + // There were no write failures. Set leader's status + // in case the write callback returned a non-ok status. + status = w.FinalStatus(); + } + } else { WriteThread::ParallelGroup pg; pg.leader = &w; pg.last_writer = last_writer; + pg.last_sequence = last_sequence; pg.early_exit_allowed = !need_log_sync; - pg.running.store(static_cast(write_batch_group.size()), + pg.running.store(static_cast(write_group.size()), std::memory_order_relaxed); write_thread_.LaunchParallelFollowers(&pg, current_sequence); - ColumnFamilyMemTablesImpl column_family_memtables( - versions_->GetColumnFamilySet()); - assert(w.sequence == current_sequence); - WriteBatchInternal::SetSequence(w.batch, w.sequence); - w.status = WriteBatchInternal::InsertInto( - w.batch, &column_family_memtables, &flush_scheduler_, - write_options.ignore_missing_column_families, 0 /*log_number*/, - this, true /*dont_filter_deletes*/, - true /*concurrent_memtable_writes*/); + if (!w.CallbackFailed()) { + // do leader write + ColumnFamilyMemTablesImpl column_family_memtables( + versions_->GetColumnFamilySet()); + assert(w.sequence == current_sequence); + WriteBatchInternal::SetSequence(w.batch, w.sequence); + w.status = WriteBatchInternal::InsertInto( + w.batch, &column_family_memtables, &flush_scheduler_, + write_options.ignore_missing_column_families, 0 /*log_number*/, + this, true /*dont_filter_deletes*/, + true /*concurrent_memtable_writes*/); + } - assert(last_writer->sequence + - WriteBatchInternal::Count(last_writer->batch) - 1 == - last_sequence); // CompleteParallelWorker returns true if this thread should // handle exit, false means somebody else did exit_completed_early = !write_thread_.CompleteParallelWorker(&w); - status = w.status; - assert(status.ok() || !exit_completed_early); + status = w.FinalStatus(); } - if (status.ok() && !exit_completed_early) { + if (!exit_completed_early && w.status.ok()) { SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence); versions_->SetLastSequence(last_sequence); if (!need_log_sync) { - write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status); + write_thread_.ExitAsBatchGroupLeader(&w, last_writer, w.status); exit_completed_early = true; } } @@ -4465,14 +4473,14 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, // // Is setting bg_error_ enough here? This will at least stop // compaction and fail any further writes. - if (!status.ok() && bg_error_.ok()) { + if (!status.ok() && bg_error_.ok() && !w.CallbackFailed()) { bg_error_ = status; } } } PERF_TIMER_START(write_pre_and_post_process_time); - if (db_options_.paranoid_checks && !status.ok() && !callback_failed && + if (db_options_.paranoid_checks && !status.ok() && !w.CallbackFailed() && !status.IsBusy()) { mutex_.Lock(); if (bg_error_.ok()) { @@ -4488,7 +4496,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, } if (!exit_completed_early) { - write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status); + write_thread_.ExitAsBatchGroupLeader(&w, last_writer, w.status); } return status; diff --git a/db/db_test.cc b/db/db_test.cc index d39cbfe29..c6be57a99 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -235,13 +235,11 @@ TEST_F(DBTest, WriteEmptyBatch) { CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "bar")); - env_->sync_counter_.store(0); WriteOptions wo; wo.sync = true; wo.disableWAL = false; WriteBatch empty_batch; ASSERT_OK(dbfull()->Write(wo, &empty_batch)); - ASSERT_GE(env_->sync_counter_.load(), 1); // make sure we can re-open it. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); diff --git a/db/write_batch.cc b/db/write_batch.cc index 0565c0599..accc313e4 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -798,18 +798,23 @@ class MemTableInserter : public WriteBatch::Handler { // 3) During Write(), in a concurrent context where memtables has been cloned // The reason is that it calls memtables->Seek(), which has a stateful cache Status WriteBatchInternal::InsertInto( - const autovector& batches, SequenceNumber sequence, + const autovector& writers, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, bool ignore_missing_column_families, uint64_t log_number, DB* db, const bool dont_filter_deletes, bool concurrent_memtable_writes) { MemTableInserter inserter(sequence, memtables, flush_scheduler, ignore_missing_column_families, log_number, db, dont_filter_deletes, concurrent_memtable_writes); - Status rv = Status::OK(); - for (size_t i = 0; i < batches.size() && rv.ok(); ++i) { - rv = batches[i]->Iterate(&inserter); + + for (size_t i = 0; i < writers.size(); i++) { + if (!writers[i]->CallbackFailed()) { + writers[i]->status = writers[i]->batch->Iterate(&inserter); + if (!writers[i]->status.ok()) { + return writers[i]->status; + } + } } - return rv; + return Status::OK(); } Status WriteBatchInternal::InsertInto(const WriteBatch* batch, diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h index d75d2ef65..1ee234b84 100644 --- a/db/write_batch_internal.h +++ b/db/write_batch_internal.h @@ -9,6 +9,7 @@ #pragma once #include +#include "db/write_thread.h" #include "rocksdb/types.h" #include "rocksdb/write_batch.h" #include "rocksdb/db.h" @@ -134,7 +135,7 @@ class WriteBatchInternal { // // Under concurrent use, the caller is responsible for making sure that // the memtables object itself is thread-local. - static Status InsertInto(const autovector& batches, + static Status InsertInto(const autovector& batches, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, diff --git a/db/write_callback.h b/db/write_callback.h index 7dcca96fe..a549f415a 100644 --- a/db/write_callback.h +++ b/db/write_callback.h @@ -19,6 +19,9 @@ class WriteCallback { // this function returns a non-OK status, the write will be aborted and this // status will be returned to the caller of DB::Write(). virtual Status Callback(DB* db) = 0; + + // return true if writes with this callback can be batched with other writes + virtual bool AllowWriteBatching() = 0; }; } // namespace rocksdb diff --git a/db/write_callback_test.cc b/db/write_callback_test.cc index 47b7cf72a..3b76fd2d1 100644 --- a/db/write_callback_test.cc +++ b/db/write_callback_test.cc @@ -6,12 +6,15 @@ #ifndef ROCKSDB_LITE #include +#include +#include #include "db/db_impl.h" #include "db/write_callback.h" #include "rocksdb/db.h" #include "rocksdb/write_batch.h" #include "util/logging.h" +#include "util/sync_point.h" #include "util/testharness.h" using std::string; @@ -42,6 +45,8 @@ class WriteCallbackTestWriteCallback1 : public WriteCallback { return Status::OK(); } + + bool AllowWriteBatching() override { return true; } }; class WriteCallbackTestWriteCallback2 : public WriteCallback { @@ -49,8 +54,223 @@ class WriteCallbackTestWriteCallback2 : public WriteCallback { Status Callback(DB *db) override { return Status::Busy(); } + bool AllowWriteBatching() override { return true; } }; +class MockWriteCallback : public WriteCallback { + public: + bool should_fail_ = false; + bool was_called_ = false; + bool allow_batching_ = false; + + Status Callback(DB* db) override { + was_called_ = true; + if (should_fail_) { + return Status::Busy(); + } else { + return Status::OK(); + } + } + + bool AllowWriteBatching() override { return allow_batching_; } +}; + +TEST_F(WriteCallbackTest, WriteWithCallbackTest) { + struct WriteOP { + WriteOP(bool should_fail = false) { callback_.should_fail_ = should_fail; } + + void Put(const string& key, const string& val) { + kvs_.push_back(std::make_pair(key, val)); + write_batch_.Put(key, val); + } + + void Clear() { + kvs_.clear(); + write_batch_.Clear(); + callback_.was_called_ = false; + } + + MockWriteCallback callback_; + WriteBatch write_batch_; + std::vector> kvs_; + }; + + std::vector> write_scenarios = { + {true}, + {false}, + {false, false}, + {true, true}, + {true, false}, + {false, true}, + {false, false, false}, + {true, true, true}, + {false, true, false}, + {true, false, true}, + {true, false, false, false, false}, + {false, false, false, false, true}, + {false, false, true, false, true}, + }; + + for (auto& allow_parallel : {true, false}) { + for (auto& allow_batching : {true, false}) { + for (auto& write_group : write_scenarios) { + Options options; + options.create_if_missing = true; + options.allow_concurrent_memtable_write = allow_parallel; + + WriteOptions write_options; + ReadOptions read_options; + DB* db; + DBImpl* db_impl; + + ASSERT_OK(DB::Open(options, dbname, &db)); + + db_impl = dynamic_cast(db); + ASSERT_TRUE(db_impl); + + std::atomic threads_waiting(0); + std::atomic seq(db_impl->GetLatestSequenceNumber()); + ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { + uint64_t cur_threads_waiting = 0; + bool is_leader = false; + bool is_last = false; + + // who am i + do { + cur_threads_waiting = threads_waiting.load(); + is_leader = (cur_threads_waiting == 0); + is_last = (cur_threads_waiting == write_group.size() - 1); + } while (!threads_waiting.compare_exchange_strong( + cur_threads_waiting, cur_threads_waiting + 1)); + + // check my state + auto* writer = reinterpret_cast(arg); + + if (is_leader) { + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_GROUP_LEADER); + } else { + ASSERT_TRUE(writer->state == WriteThread::State::STATE_INIT); + } + + // (meta test) the first WriteOP should indeed be the first + // and the last should be the last (all others can be out of + // order) + if (is_leader) { + ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == + !write_group.front().callback_.should_fail_); + } else if (is_last) { + ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == + !write_group.back().callback_.should_fail_); + } + + // wait for friends + while (threads_waiting.load() < write_group.size()) { + } + }); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) { + // check my state + auto* writer = reinterpret_cast(arg); + + if (!allow_batching) { + // no batching so everyone should be a leader + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_GROUP_LEADER); + } else if (!allow_parallel) { + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_COMPLETED); + } + }); + + std::atomic thread_num(0); + std::atomic dummy_key(0); + std::function write_with_callback_func = [&]() { + uint32_t i = thread_num.fetch_add(1); + Random rnd(i); + + // leaders gotta lead + while (i > 0 && threads_waiting.load() < 1) { + } + + // loser has to lose + while (i == write_group.size() - 1 && + threads_waiting.load() < write_group.size() - 1) { + } + + auto& write_op = write_group.at(i); + write_op.Clear(); + write_op.callback_.allow_batching_ = allow_batching; + + // insert some keys + for (uint32_t j = 0; j < rnd.Next() % 50; j++) { + // grab unique key + char my_key = 0; + do { + my_key = dummy_key.load(); + } while (!dummy_key.compare_exchange_strong(my_key, my_key + 1)); + + string skey(5, my_key); + string sval(10, my_key); + write_op.Put(skey, sval); + + if (!write_op.callback_.should_fail_) { + seq.fetch_add(1); + } + } + + WriteOptions woptions; + Status s = db_impl->WriteWithCallback( + woptions, &write_op.write_batch_, &write_op.callback_); + + if (write_op.callback_.should_fail_) { + ASSERT_TRUE(s.IsBusy()); + } else { + ASSERT_OK(s); + } + }; + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // do all the writes + std::vector threads; + for (uint32_t i = 0; i < write_group.size(); i++) { + threads.emplace_back(write_with_callback_func); + } + for (auto& t : threads) { + t.join(); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + // check for keys + string value; + for (auto& w : write_group) { + ASSERT_TRUE(w.callback_.was_called_); + for (auto& kvp : w.kvs_) { + if (w.callback_.should_fail_) { + ASSERT_TRUE( + db->Get(read_options, kvp.first, &value).IsNotFound()); + } else { + ASSERT_OK(db->Get(read_options, kvp.first, &value)); + ASSERT_EQ(value, kvp.second); + } + } + } + + ASSERT_EQ(seq.load(), db_impl->GetLatestSequenceNumber()); + + delete db; + DestroyDB(dbname, options); + } + } + } +} + TEST_F(WriteCallbackTest, WriteCallBackTest) { Options options; WriteOptions write_options; diff --git a/db/write_thread.cc b/db/write_thread.cc index e153f319b..ce269f664 100644 --- a/db/write_thread.cc +++ b/db/write_thread.cc @@ -218,21 +218,25 @@ void WriteThread::JoinBatchGroup(Writer* w) { assert(w->batch != nullptr); bool linked_as_leader; LinkOne(w, &linked_as_leader); + + TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Wait", w); + if (!linked_as_leader) { AwaitState(w, STATE_GROUP_LEADER | STATE_PARALLEL_FOLLOWER | STATE_COMPLETED, &ctx); + TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w); } } size_t WriteThread::EnterAsBatchGroupLeader( Writer* leader, WriteThread::Writer** last_writer, - autovector* write_batch_group) { + autovector* write_batch_group) { assert(leader->link_older == nullptr); assert(leader->batch != nullptr); size_t size = WriteBatchInternal::ByteSize(leader->batch); - write_batch_group->push_back(leader->batch); + write_batch_group->push_back(leader); // Allow the group to grow up to a maximum size, but if the // original write is small, limit the growth so we do not slow @@ -244,12 +248,6 @@ size_t WriteThread::EnterAsBatchGroupLeader( *last_writer = leader; - if (leader->has_callback) { - // TODO(agiardullo:) Batching not currently supported as this write may - // fail if the callback function decides to abort this write. - return size; - } - Writer* newest_writer = newest_writer_.load(std::memory_order_acquire); // This is safe regardless of any db mutex status of the caller. Previous @@ -276,18 +274,17 @@ size_t WriteThread::EnterAsBatchGroupLeader( break; } - if (w->has_callback) { - // Do not include writes which may be aborted if the callback does not - // succeed. - break; - } - if (w->batch == nullptr) { // Do not include those writes with nullptr batch. Those are not writes, // those are something else. They want to be alone break; } + if (w->callback != nullptr && !w->callback->AllowWriteBatching()) { + // dont batch writes that don't want to be batched + break; + } + auto batch_size = WriteBatchInternal::ByteSize(w->batch); if (size + batch_size > max_size) { // Do not make batch too big @@ -295,7 +292,7 @@ size_t WriteThread::EnterAsBatchGroupLeader( } size += batch_size; - write_batch_group->push_back(w->batch); + write_batch_group->push_back(w); w->in_batch_group = true; *last_writer = w; } @@ -313,7 +310,10 @@ void WriteThread::LaunchParallelFollowers(ParallelGroup* pg, w->sequence = sequence; while (w != pg->last_writer) { - sequence += WriteBatchInternal::Count(w->batch); + // Writers that won't write don't get sequence allotment + if (!w->CallbackFailed()) { + sequence += WriteBatchInternal::Count(w->batch); + } w = w->link_newer; w->sequence = sequence; @@ -330,6 +330,7 @@ bool WriteThread::CompleteParallelWorker(Writer* w) { std::lock_guard guard(w->StateMutex()); pg->status = w->status; } + auto leader = pg->leader; auto early_exit_allowed = pg->early_exit_allowed; @@ -364,8 +365,8 @@ void WriteThread::EarlyExitParallelGroup(Writer* w) { assert(w->state == STATE_PARALLEL_FOLLOWER); assert(pg->status.ok()); ExitAsBatchGroupLeader(pg->leader, pg->last_writer, pg->status); - assert(w->state == STATE_COMPLETED); assert(w->status.ok()); + assert(w->state == STATE_COMPLETED); SetState(pg->leader, STATE_COMPLETED); } @@ -407,7 +408,6 @@ void WriteThread::ExitAsBatchGroupLeader(Writer* leader, Writer* last_writer, while (last_writer != leader) { last_writer->status = status; - // we need to read link_older before calling SetState, because as soon // as it is marked committed the other thread's Await may return and // deallocate the Writer. diff --git a/db/write_thread.h b/db/write_thread.h index e31904ed1..b1dbaca32 100644 --- a/db/write_thread.h +++ b/db/write_thread.h @@ -13,8 +13,10 @@ #include #include #include -#include "db/write_batch_internal.h" +#include "db/write_callback.h" +#include "rocksdb/types.h" #include "rocksdb/status.h" +#include "rocksdb/write_batch.h" #include "util/autovector.h" #include "util/instrumented_mutex.h" @@ -65,6 +67,7 @@ class WriteThread { struct ParallelGroup { Writer* leader; Writer* last_writer; + SequenceNumber last_sequence; bool early_exit_allowed; // before running goes to zero, status needs leader->StateMutex() Status status; @@ -77,12 +80,13 @@ class WriteThread { bool sync; bool disableWAL; bool in_batch_group; - bool has_callback; + WriteCallback* callback; bool made_waitable; // records lazy construction of mutex and cv std::atomic state; // write under StateMutex() or pre-link ParallelGroup* parallel_group; SequenceNumber sequence; // the sequence number to use - Status status; + Status status; // status of memtable inserter + Status callback_status; // status returned by callback->Callback() std::aligned_storage::type state_mutex_bytes; std::aligned_storage::type state_cv_bytes; Writer* link_older; // read/write only before linking, or as leader @@ -93,9 +97,10 @@ class WriteThread { sync(false), disableWAL(false), in_batch_group(false), - has_callback(false), + callback(nullptr), made_waitable(false), state(STATE_INIT), + parallel_group(nullptr), link_older(nullptr), link_newer(nullptr) {} @@ -106,6 +111,13 @@ class WriteThread { } } + bool CheckCallback(DB* db) { + if (callback != nullptr) { + callback_status = callback->Callback(db); + } + return callback_status.ok(); + } + void CreateMutex() { if (!made_waitable) { // Note that made_waitable is tracked separately from state @@ -117,6 +129,30 @@ class WriteThread { } } + // returns the aggregate status of this Writer + Status FinalStatus() { + if (!status.ok()) { + // a non-ok memtable write status takes presidence + assert(callback == nullptr || callback_status.ok()); + return status; + } else if (!callback_status.ok()) { + // if the callback failed then that is the status we want + // because a memtable insert should not have been attempted + assert(callback != nullptr); + assert(status.ok()); + return callback_status; + } else { + // if there is no callback then we only care about + // the memtable insert status + assert(callback == nullptr || callback_status.ok()); + return status; + } + } + + bool CallbackFailed() { + return (callback != nullptr) && !callback_status.ok(); + } + // No other mutexes may be acquired while holding StateMutex(), it is // always last in the order std::mutex& StateMutex() { @@ -160,8 +196,9 @@ class WriteThread { // Writer** last_writer: Out-param that identifies the last follower // autovector* write_batch_group: Out-param of group members // returns: Total batch group byte size - size_t EnterAsBatchGroupLeader(Writer* leader, Writer** last_writer, - autovector* write_batch_group); + size_t EnterAsBatchGroupLeader( + Writer* leader, Writer** last_writer, + autovector* write_batch_group); // Causes JoinBatchGroup to return STATE_PARALLEL_FOLLOWER for all of the // non-leader members of this write batch group. Sets Writer::sequence diff --git a/utilities/transactions/optimistic_transaction_impl.h b/utilities/transactions/optimistic_transaction_impl.h index a18561efd..36db5e94c 100644 --- a/utilities/transactions/optimistic_transaction_impl.h +++ b/utilities/transactions/optimistic_transaction_impl.h @@ -71,6 +71,8 @@ class OptimisticTransactionCallback : public WriteCallback { return txn_->CheckTransactionForConflicts(db); } + bool AllowWriteBatching() override { return false; } + private: OptimisticTransactionImpl* txn_; }; diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index caed15d3a..37a556ef6 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -110,6 +110,26 @@ class TransactionImpl : public TransactionBaseImpl { void operator=(const TransactionImpl&); }; +// Used at commit time to check whether transaction is committing before its +// expiration time. +class TransactionCallback : public WriteCallback { + public: + explicit TransactionCallback(TransactionImpl* txn) : txn_(txn) {} + + Status Callback(DB* db) override { + if (txn_->IsExpired()) { + return Status::Expired(); + } else { + return Status::OK(); + } + } + + bool AllowWriteBatching() override { return true; } + + private: + TransactionImpl* txn_; +}; + } // namespace rocksdb #endif // ROCKSDB_LITE From 73a9b0f4ba62b304786f2d12f430fb7b875bbed6 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 5 Feb 2016 12:10:23 -0800 Subject: [PATCH 091/195] Update version to 4.5 Summary: Time to cut branch for release 4.5. Change the versions. Test Plan: Not needed Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, andrewkr, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53883 --- HISTORY.md | 2 ++ include/rocksdb/version.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index f2476b8f1..a6f58d16a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased + +## 4.5.0 (2/5/2016) ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index d8e93db42..9f55c0070 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -5,7 +5,7 @@ #pragma once #define ROCKSDB_MAJOR 4 -#define ROCKSDB_MINOR 4 +#define ROCKSDB_MINOR 5 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with From 8ed3438778329f2b81803559a5f481e8a0d4dcb8 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Fri, 5 Feb 2016 13:20:56 -0800 Subject: [PATCH 092/195] Add option to run fillseq with WAL enabled in addition to WAL disabled Summary: This set of changes is part of the work to introduce benchmark for universal style compaction in RocksDB. It's conceptually separate from the compaction work, so sending it out as a separate diff to get it out of the way. Test Plan: - Run `./tools/run_flash_bench.sh`. - Look at the contents of `report.txt` and `report2.txt` to make sure that data is reported and attributed correctly. - During `db_bench` execution time make sure that the correct flags are passed to `--disable_wal` depending on the benchmark being executed. Reviewers: MarkCallaghan Reviewed By: MarkCallaghan Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53865 --- tools/benchmark.sh | 38 ++++++++++++++++++++++++++++++-------- tools/run_flash_bench.sh | 19 +++++++++++++++---- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index b0d1babd9..09a2fb369 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -109,6 +109,10 @@ function summarize_result { test_name=$2 bench_name=$3 + # Note that this function assumes that the benchmark executes long enough so + # that "Compaction Stats" is written to stdout at least once. If it won't + # happen then empty output from grep when searching for "Sum" will cause + # syntax errors. uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' ) stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' ) stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' ) @@ -159,8 +163,22 @@ function run_bulkload { } function run_fillseq { - # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the - # client can discover where to restart a load after a crash. I think this is a good way to load. + # This runs with a vector memtable. WAL can be either disabled or enabled + # depending on the input parameter (1 for disabled, 0 for enabled). The main + # benefit behind disabling WAL is to make loading faster. It is still crash + # safe and the client can discover where to restart a load after a crash. I + # think this is a good way to load. + + # Make sure that we'll have unique names for all the files so that data won't + # be overwritten. + if [ $1 == 1 ]; then + log_file_name=$output_dir/benchmark_fillseq.wal_disabled.v${value_size}.log + test_name=fillseq.wal_disabled.v${value_size} + else + log_file_name=$output_dir/benchmark_fillseq.wal_enabled.v${value_size}.log + test_name=fillseq.wal_enabled.v${value_size} + fi + echo "Loading $num_keys keys sequentially" cmd="./db_bench --benchmarks=fillseq \ --use_existing_db=0 \ @@ -169,12 +187,14 @@ function run_fillseq { --min_level_to_compress=0 \ --threads=1 \ --memtablerep=vector \ - --disable_wal=1 \ + --disable_wal=$1 \ --seed=$( date +%s ) \ - 2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log" - echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log + 2>&1 | tee -a $log_file_name" + echo $cmd | tee $log_file_name eval $cmd - summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq + + # The constant "fillseq" which we pass to db_bench is the benchmark name. + summarize_result $log_file_name $test_name fillseq } function run_change { @@ -310,8 +330,10 @@ for job in ${jobs[@]}; do start=$(now) if [ $job = bulkload ]; then run_bulkload - elif [ $job = fillseq ]; then - run_fillseq + elif [ $job = fillseq_disable_wal ]; then + run_fillseq 1 + elif [ $job = fillseq_enable_wal ]; then + run_fillseq 0 elif [ $job = overwrite ]; then run_change overwrite elif [ $job = updaterandom ]; then diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index c24b0f5eb..873b2c7ca 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -137,10 +137,17 @@ if [[ $do_setup != 0 ]]; then # Test 2a: sequential fill with large values to get peak ingest # adjust NUM_KEYS given the use of larger values env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \ - ./tools/benchmark.sh fillseq + ./tools/benchmark.sh fillseq_disable_wal # Test 2b: sequential fill with the configured value size - env $ARGS ./tools/benchmark.sh fillseq + env $ARGS ./tools/benchmark.sh fillseq_disable_wal + + # Test 2c: same as 2a, but with WAL being enabled. + env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \ + ./tools/benchmark.sh fillseq_enable_wal + + # Test 2d: same as 2b, but with WAL being enabled. + env $ARGS ./tools/benchmark.sh fillseq_enable_wal # Test 3: single-threaded overwrite env $ARGS NUM_THREADS=1 DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite @@ -263,9 +270,13 @@ if [[ $skip_low_pri_tests != 1 ]]; then grep bulkload $output_dir/report.txt >> $output_dir/report2.txt fi -echo fillseq >> $output_dir/report2.txt +echo fillseq_wal_disabled >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep fillseq $output_dir/report.txt >> $output_dir/report2.txt +grep fillseq.wal_disabled $output_dir/report.txt >> $output_dir/report2.txt + +echo fillseq_wal_enabled >> $output_dir/report2.txt +head -1 $output_dir/report.txt >> $output_dir/report2.txt +grep fillseq.wal_enabled $output_dir/report.txt >> $output_dir/report2.txt echo overwrite sync=0 >> $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt From b1887c5dd972b41c791b3ef751b6cee974b77652 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 5 Feb 2016 13:46:17 -0800 Subject: [PATCH 093/195] Explictly fail when memtable doesn't support concurrent insert Summary: If users turn on concurrent insert but the memtable doesn't support it, they might see unexcepted crash. Fix it by explicitly fail. Test Plan: Run different setting of stress_test and make sure it fails correctly. Will add a unit test too. Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, andrewkr, ngbronson Reviewed By: ngbronson Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53895 --- db/column_family.cc | 4 ++++ db/db_test.cc | 22 ++++++++++++++++++++++ include/rocksdb/memtablerep.h | 10 ++++++---- memtable/skiplistrep.cc | 2 -- tools/db_crashtest.py | 2 +- 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index ca3be7855..d472d8d7f 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -135,6 +135,10 @@ Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) { "Delete filtering (filter_deletes) is not compatible with concurrent " "memtable writes (allow_concurrent_memtable_writes)"); } + if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) { + return Status::InvalidArgument( + "Memtable doesn't concurrent writes (allow_concurrent_memtable_write)"); + } return Status::OK(); } diff --git a/db/db_test.cc b/db/db_test.cc index c6be57a99..569f6e334 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5736,8 +5736,30 @@ TEST_F(DBTest, TableOptionsSanitizeTest) { options.prefix_extractor.reset(NewFixedPrefixTransform(1)); ASSERT_OK(TryReopen(options)); } + #endif // ROCKSDB_LITE +TEST_F(DBTest, ConcurrentMemtableNotSupported) { + Options options = CurrentOptions(); + options.allow_concurrent_memtable_write = true; + options.soft_pending_compaction_bytes_limit = 0; + options.hard_pending_compaction_bytes_limit = 100; + options.create_if_missing = true; + + DestroyDB(dbname_, options); + options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4)); + ASSERT_NOK(TryReopen(options)); + + options.memtable_factory.reset(new SkipListFactory); + ASSERT_OK(TryReopen(options)); + + ColumnFamilyOptions cf_options(options); + cf_options.memtable_factory.reset( + NewHashLinkListRepFactory(4, 0, 3, true, 4)); + ColumnFamilyHandle* handle; + ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle)); +} + TEST_F(DBTest, SanitizeNumThreads) { for (int attempt = 0; attempt < 2; attempt++) { const size_t kTotalTasks = 8; diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 6cd92d823..75717907d 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -188,10 +188,6 @@ class MemTableRep { // Default: true virtual bool IsSnapshotSupported() const { return true; } - // Return true if the current MemTableRep supports concurrent inserts - // Default: false - virtual bool IsInsertConcurrentlySupported() const { return false; } - protected: // When *key is an internal key concatenated with the value, returns the // user key. @@ -210,6 +206,10 @@ class MemTableRepFactory { const SliceTransform*, Logger* logger) = 0; virtual const char* Name() const = 0; + + // Return true if the current MemTableRep supports concurrent inserts + // Default: false + virtual bool IsInsertConcurrentlySupported() const { return false; } }; // This uses a skip list to store keys. It is the default. @@ -229,6 +229,8 @@ class SkipListFactory : public MemTableRepFactory { Logger* logger) override; virtual const char* Name() const override { return "SkipListFactory"; } + bool IsInsertConcurrentlySupported() const override { return true; } + private: const size_t lookahead_; }; diff --git a/memtable/skiplistrep.cc b/memtable/skiplistrep.cc index 7108008a8..3588b568a 100644 --- a/memtable/skiplistrep.cc +++ b/memtable/skiplistrep.cc @@ -25,8 +25,6 @@ public: transform_(transform), lookahead_(lookahead) { } - virtual bool IsInsertConcurrentlySupported() const override { return true; } - virtual KeyHandle Allocate(const size_t len, char** buf) override { *buf = skip_list_.AllocateKey(len); return static_cast(*buf); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 449d85427..a10ee6c24 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -24,7 +24,7 @@ default_params = { "disable_data_sync": 0, "disable_wal": 0, "filter_deletes": lambda: random.randint(0, 1), - "allow_concurrent_memtable_write": lambda: random.randint(0, 1), + "allow_concurrent_memtable_write": 0, "iterpercent": 10, "max_background_compactions": 20, "max_bytes_for_level_base": 10485760, From a76e9093f06a47251e0ebd8ba2d54019388a554f Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 5 Feb 2016 14:26:26 -0800 Subject: [PATCH 094/195] Fix LITE db_test build broken by previous commit Summary: Previous commit introduces a test that is not supported in LITE. Fix it. Test Plan: Build the test with ROCKSDB_LITE. Reviewers: kradhakrishnan, IslamAbdelRahman, anthony, yhchiang, andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53901 --- db/db_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 569f6e334..7e93f35a7 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5737,8 +5737,6 @@ TEST_F(DBTest, TableOptionsSanitizeTest) { ASSERT_OK(TryReopen(options)); } -#endif // ROCKSDB_LITE - TEST_F(DBTest, ConcurrentMemtableNotSupported) { Options options = CurrentOptions(); options.allow_concurrent_memtable_write = true; @@ -5760,6 +5758,8 @@ TEST_F(DBTest, ConcurrentMemtableNotSupported) { ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle)); } +#endif // ROCKSDB_LITE + TEST_F(DBTest, SanitizeNumThreads) { for (int attempt = 0; attempt < 2; attempt++) { const size_t kTotalTasks = 8; From 2608219cc90c82cdce839073f78c4d4ca143ca8c Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 5 Feb 2016 14:30:18 -0800 Subject: [PATCH 095/195] crash_test: cover concurrent memtable insert in default crash test Summary: Default crash test uses prefix hash memtable, which is not compatible to concurrent memtable. Allow prefix test run with skip list and use skip list memtable when concurrent insert is used. Test Plan: Run "python -u tools/db_crashtest.py whitebox" and watch sometimes skip list is used. Reviewers: anthony, yhchiang, kradhakrishnan, andrewkr, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D53907 --- tools/db_crashtest.py | 1 + tools/db_stress.cc | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index a10ee6c24..3fab88d88 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -134,6 +134,7 @@ def finalize_and_sanitize(src_params): # --allow_concurrent_memtable_write with --filter_deletes is not supported. if dest_params.get("allow_concurrent_memtable_write", 1) == 1: dest_params["filter_deletes"] = 0 + dest_params["memtablerep"] = "skip_list" return dest_params diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 10193a182..8e0f9796a 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -2008,11 +2008,17 @@ class StressTest { FLAGS_allow_concurrent_memtable_write; options_.enable_write_thread_adaptive_yield = FLAGS_enable_write_thread_adaptive_yield; - if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kHashSkipList)) { + + if (FLAGS_prefix_size == 0 && FLAGS_rep_factory == kHashSkipList) { fprintf(stderr, - "prefix_size should be non-zero iff memtablerep == prefix_hash\n"); + "prefeix_size cannot be zero if memtablerep == prefix_hash\n"); exit(1); } + if (FLAGS_prefix_size != 0 && FLAGS_rep_factory != kHashSkipList) { + fprintf(stderr, + "WARNING: prefix_size is non-zero but " + "memtablerep != prefix_hash\n"); + } switch (FLAGS_rep_factory) { case kSkipList: // no need to do anything From fe93bf9b5da6e91c171969c2f4655b31873d4443 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Mon, 14 Sep 2015 17:11:52 -0700 Subject: [PATCH 096/195] Transaction::UndoGetForUpdate Summary: MyRocks wants to be able to un-lock a key that was just locked by GetForUpdate(). To do this safely, I am now keeping track of the number of reads(for update) and writes for each key in a transaction. UndoGetForUpdate() will only unlock a key if it hasn't been written and the read count reaches 0. Test Plan: more unit tests Reviewers: igor, rven, yhchiang, spetrunia, sdong Reviewed By: spetrunia, sdong Subscribers: spetrunia, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D47043 --- include/rocksdb/utilities/transaction.h | 22 ++ .../optimistic_transaction_impl.cc | 5 +- .../optimistic_transaction_impl.h | 7 +- .../optimistic_transaction_test.cc | 153 ++++++++ utilities/transactions/transaction_base.cc | 202 ++++++++-- utilities/transactions/transaction_base.h | 26 +- utilities/transactions/transaction_impl.cc | 21 +- utilities/transactions/transaction_impl.h | 5 +- utilities/transactions/transaction_test.cc | 353 +++++++++++++++++- utilities/transactions/transaction_util.cc | 2 +- utilities/transactions/transaction_util.h | 13 +- 11 files changed, 751 insertions(+), 58 deletions(-) diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 8e9ead11c..4c8ca4dfd 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -356,6 +356,28 @@ class Transaction { // Reset the WriteOptions that will be used during Commit(). virtual void SetWriteOptions(const WriteOptions& write_options) = 0; + // If this key was previously fetched in this transaction using + // GetForUpdate/MultigetForUpdate(), calling UndoGetForUpdate will tell + // the transaction that it no longer needs to do any conflict checking + // for this key. + // + // If a key has been fetched N times via GetForUpdate/MultigetForUpdate(), + // then UndoGetForUpdate will only have an effect if it is also called N + // times. If this key has been written to in this transaction, + // UndoGetForUpdate() will have no effect. + // + // If SetSavePoint() has been called after the GetForUpdate(), + // UndoGetForUpdate() will not have any effect. + // + // If this Transaction was created by an OptimisticTransactionDB, + // calling UndoGetForUpdate can affect whether this key is conflict checked + // at commit time. + // If this Transaction was created by a TransactionDB, + // calling UndoGetForUpdate may release any held locks for this key. + virtual void UndoGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) = 0; + virtual void UndoGetForUpdate(const Slice& key) = 0; + protected: explicit Transaction(const TransactionDB* db) {} Transaction() {} diff --git a/utilities/transactions/optimistic_transaction_impl.cc b/utilities/transactions/optimistic_transaction_impl.cc index 897e2711c..24ecd11d7 100644 --- a/utilities/transactions/optimistic_transaction_impl.cc +++ b/utilities/transactions/optimistic_transaction_impl.cc @@ -67,7 +67,8 @@ void OptimisticTransactionImpl::Rollback() { Clear(); } // Record this key so that we can check it for conflicts at commit time. Status OptimisticTransactionImpl::TryLock(ColumnFamilyHandle* column_family, - const Slice& key, bool untracked) { + const Slice& key, bool read_only, + bool untracked) { if (untracked) { return Status::OK(); } @@ -84,7 +85,7 @@ Status OptimisticTransactionImpl::TryLock(ColumnFamilyHandle* column_family, std::string key_str = key.ToString(); - TrackKey(cfh_id, key_str, seq); + TrackKey(cfh_id, key_str, seq, read_only); // Always return OK. Confilct checking will happen at commit time. return Status::OK(); diff --git a/utilities/transactions/optimistic_transaction_impl.h b/utilities/transactions/optimistic_transaction_impl.h index 36db5e94c..cdca23269 100644 --- a/utilities/transactions/optimistic_transaction_impl.h +++ b/utilities/transactions/optimistic_transaction_impl.h @@ -40,7 +40,7 @@ class OptimisticTransactionImpl : public TransactionBaseImpl { protected: Status TryLock(ColumnFamilyHandle* column_family, const Slice& key, - bool untracked = false) override; + bool read_only, bool untracked = false) override; private: OptimisticTransactionDB* const txn_db_; @@ -56,6 +56,11 @@ class OptimisticTransactionImpl : public TransactionBaseImpl { void Clear() override; + void UnlockGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) override { + // Nothing to unlock. + } + // No copying allowed OptimisticTransactionImpl(const OptimisticTransactionImpl&); void operator=(const OptimisticTransactionImpl&); diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 6fe7e95f9..e3ac43796 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1114,6 +1114,159 @@ TEST_F(OptimisticTransactionTest, SavepointTest) { delete txn; } +TEST_F(OptimisticTransactionTest, UndoGetForUpdateTest) { + WriteOptions write_options; + ReadOptions read_options, snapshot_read_options; + OptimisticTransactionOptions txn_options; + string value; + Status s; + + db->Put(write_options, "A", ""); + + Transaction* txn1 = txn_db->BeginTransaction(write_options); + ASSERT_TRUE(txn1); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + + Transaction* txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 can commit since A isn't conflict checked + s = txn1->Commit(); + ASSERT_OK(s); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + txn1->Put("A", "a"); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 cannot commit since A will still be conflict checked + s = txn1->Commit(); + ASSERT_TRUE(s.IsBusy()); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 cannot commit since A will still be conflict checked + s = txn1->Commit(); + ASSERT_TRUE(s.IsBusy()); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 can commit since A isn't conflict checked + s = txn1->Commit(); + ASSERT_OK(s); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->SetSavePoint(); + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 cannot commit since A will still be conflict checked + s = txn1->Commit(); + ASSERT_TRUE(s.IsBusy()); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->SetSavePoint(); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 cannot commit since A will still be conflict checked + s = txn1->Commit(); + ASSERT_TRUE(s.IsBusy()); + delete txn1; + + txn1 = txn_db->BeginTransaction(write_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + + txn1->SetSavePoint(); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + txn1->UndoGetForUpdate("A"); + + txn1->RollbackToSavePoint(); + txn1->UndoGetForUpdate("A"); + + txn2 = txn_db->BeginTransaction(write_options); + txn2->Put("A", "x"); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + // Verify that txn1 can commit since A isn't conflict checked + s = txn1->Commit(); + ASSERT_OK(s); + delete txn1; +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index aeea21e73..cd5ebe98a 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -66,7 +66,8 @@ void TransactionBaseImpl::SetSnapshotIfNeeded() { } Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family, - const SliceParts& key, bool untracked) { + const SliceParts& key, bool read_only, + bool untracked) { size_t key_size = 0; for (int i = 0; i < key.num_parts; ++i) { key_size += key.parts[i].size(); @@ -79,7 +80,7 @@ Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family, str.append(key.parts[i].data(), key.parts[i].size()); } - return TryLock(column_family, str, untracked); + return TryLock(column_family, str, read_only, untracked); } void TransactionBaseImpl::SetSavePoint() { @@ -107,15 +108,35 @@ Status TransactionBaseImpl::RollbackToSavePoint() { assert(s.ok()); // Rollback any keys that were tracked since the last savepoint - const TransactionKeyMap* key_map = GetTrackedKeysSinceSavePoint(); - assert(key_map); - for (auto& key_map_iter : *key_map) { + const TransactionKeyMap& key_map = save_point.new_keys_; + for (const auto& key_map_iter : key_map) { uint32_t column_family_id = key_map_iter.first; auto& keys = key_map_iter.second; - for (auto& key_iter : keys) { + auto& cf_tracked_keys = tracked_keys_[column_family_id]; + + for (const auto& key_iter : keys) { const std::string& key = key_iter.first; - tracked_keys_[column_family_id].erase(key); + uint32_t num_reads = key_iter.second.num_reads; + uint32_t num_writes = key_iter.second.num_writes; + + auto tracked_keys_iter = cf_tracked_keys.find(key); + assert(tracked_keys_iter != cf_tracked_keys.end()); + + // Decrement the total reads/writes of this key by the number of + // reads/writes done since the last SavePoint. + if (num_reads > 0) { + assert(tracked_keys_iter->second.num_reads >= num_reads); + tracked_keys_iter->second.num_reads -= num_reads; + } + if (num_writes > 0) { + assert(tracked_keys_iter->second.num_writes >= num_writes); + tracked_keys_iter->second.num_writes -= num_writes; + } + if (tracked_keys_iter->second.num_reads == 0 && + tracked_keys_iter->second.num_writes == 0) { + tracked_keys_[column_family_id].erase(tracked_keys_iter); + } } } @@ -138,7 +159,7 @@ Status TransactionBaseImpl::Get(const ReadOptions& read_options, Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, true /* read_only */); if (s.ok() && value != nullptr) { s = Get(read_options, column_family, key, value); @@ -172,7 +193,7 @@ std::vector TransactionBaseImpl::MultiGetForUpdate( // Lock all keys for (size_t i = 0; i < num_keys; ++i) { - Status s = TryLock(column_family[i], keys[i]); + Status s = TryLock(column_family[i], keys[i], true /* read_only */); if (!s.ok()) { // Fail entire multiget if we cannot lock all keys return std::vector(num_keys, s); @@ -206,7 +227,7 @@ Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options, Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->Put(column_family, key, value); @@ -219,7 +240,7 @@ Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->Put(column_family, key, value); @@ -231,7 +252,7 @@ Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->Merge(column_family, key, value); @@ -243,7 +264,7 @@ Status TransactionBaseImpl::Merge(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, const Slice& key) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->Delete(column_family, key); @@ -255,7 +276,7 @@ Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, const SliceParts& key) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->Delete(column_family, key); @@ -267,7 +288,7 @@ Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, const Slice& key) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->SingleDelete(column_family, key); @@ -279,7 +300,7 @@ Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key) { - Status s = TryLock(column_family, key); + Status s = TryLock(column_family, key, false /* read_only */); if (s.ok()) { GetBatchForWrite()->SingleDelete(column_family, key); @@ -291,8 +312,8 @@ Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { - bool untracked = true; - Status s = TryLock(column_family, key, untracked); + Status s = + TryLock(column_family, key, false /* read_only */, true /* untracked */); if (s.ok()) { GetBatchForWrite()->Put(column_family, key, value); @@ -305,8 +326,8 @@ Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { - bool untracked = true; - Status s = TryLock(column_family, key, untracked); + Status s = + TryLock(column_family, key, false /* read_only */, true /* untracked */); if (s.ok()) { GetBatchForWrite()->Put(column_family, key, value); @@ -319,8 +340,8 @@ Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::MergeUntracked(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { - bool untracked = true; - Status s = TryLock(column_family, key, untracked); + Status s = + TryLock(column_family, key, false /* read_only */, true /* untracked */); if (s.ok()) { GetBatchForWrite()->Merge(column_family, key, value); @@ -332,8 +353,8 @@ Status TransactionBaseImpl::MergeUntracked(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family, const Slice& key) { - bool untracked = true; - Status s = TryLock(column_family, key, untracked); + Status s = + TryLock(column_family, key, false /* read_only */, true /* untracked */); if (s.ok()) { GetBatchForWrite()->Delete(column_family, key); @@ -345,8 +366,8 @@ Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family, Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family, const SliceParts& key) { - bool untracked = true; - Status s = TryLock(column_family, key, untracked); + Status s = + TryLock(column_family, key, false /* read_only */, true /* untracked */); if (s.ok()) { GetBatchForWrite()->Delete(column_family, key); @@ -387,26 +408,73 @@ uint64_t TransactionBaseImpl::GetNumKeys() const { } void TransactionBaseImpl::TrackKey(uint32_t cfh_id, const std::string& key, - SequenceNumber seq) { - auto iter = tracked_keys_[cfh_id].find(key); - if (iter == tracked_keys_[cfh_id].end()) { - tracked_keys_[cfh_id].insert({key, seq}); + SequenceNumber seq, bool read_only) { + // Update map of all tracked keys for this transaction + TrackKey(&tracked_keys_, cfh_id, key, seq, read_only); - if (save_points_ != nullptr && !save_points_->empty()) { - // Aren't tracking this key, add it. - save_points_->top().new_keys_[cfh_id][key] = seq; - } - } else if (seq < iter->second) { - // Now tracking this key with an earlier sequence number - iter->second = seq; + if (save_points_ != nullptr && !save_points_->empty()) { + // Update map of tracked keys in this SavePoint + TrackKey(&save_points_->top().new_keys_, cfh_id, key, seq, read_only); } } -const TransactionKeyMap* TransactionBaseImpl::GetTrackedKeysSinceSavePoint() { - if (save_points_ != nullptr && !save_points_->empty()) { - return &save_points_->top().new_keys_; +// Add a key to the given TransactionKeyMap +void TransactionBaseImpl::TrackKey(TransactionKeyMap* key_map, uint32_t cfh_id, + const std::string& key, SequenceNumber seq, + bool read_only) { + auto& cf_key_map = (*key_map)[cfh_id]; + auto iter = cf_key_map.find(key); + if (iter == cf_key_map.end()) { + auto result = cf_key_map.insert({key, TransactionKeyMapInfo(seq)}); + iter = result.first; + } else if (seq < iter->second.seq) { + // Now tracking this key with an earlier sequence number + iter->second.seq = seq; } + if (read_only) { + iter->second.num_reads++; + } else { + iter->second.num_writes++; + } +} + +std::unique_ptr +TransactionBaseImpl::GetTrackedKeysSinceSavePoint() { + if (save_points_ != nullptr && !save_points_->empty()) { + // Examine the number of reads/writes performed on all keys written + // since the last SavePoint and compare to the total number of reads/writes + // for each key. + TransactionKeyMap* result = new TransactionKeyMap(); + for (const auto& key_map_iter : save_points_->top().new_keys_) { + uint32_t column_family_id = key_map_iter.first; + auto& keys = key_map_iter.second; + + auto& cf_tracked_keys = tracked_keys_[column_family_id]; + + for (const auto& key_iter : keys) { + const std::string& key = key_iter.first; + uint32_t num_reads = key_iter.second.num_reads; + uint32_t num_writes = key_iter.second.num_writes; + + auto total_key_info = cf_tracked_keys.find(key); + assert(total_key_info != cf_tracked_keys.end()); + assert(total_key_info->second.num_reads >= num_reads); + assert(total_key_info->second.num_writes >= num_writes); + + if (total_key_info->second.num_reads == num_reads && + total_key_info->second.num_writes == num_writes) { + // All the reads/writes to this key were done in the last savepoint. + bool read_only = (num_writes == 0); + TrackKey(result, column_family_id, key, key_iter.second.seq, + read_only); + } + } + } + return std::unique_ptr(result); + } + + // No SavePoint return nullptr; } @@ -428,6 +496,60 @@ void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) { db->ReleaseSnapshot(snapshot); } +void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) { + uint32_t column_family_id = GetColumnFamilyID(column_family); + auto& cf_tracked_keys = tracked_keys_[column_family_id]; + std::string key_str = key.ToString(); + bool can_decrement = false; + bool can_unlock = false; + + if (save_points_ != nullptr && !save_points_->empty()) { + // Check if this key was fetched ForUpdate in this SavePoint + auto& cf_savepoint_keys = save_points_->top().new_keys_[column_family_id]; + + auto savepoint_iter = cf_savepoint_keys.find(key_str); + if (savepoint_iter != cf_savepoint_keys.end()) { + if (savepoint_iter->second.num_reads > 0) { + savepoint_iter->second.num_reads--; + can_decrement = true; + + if (savepoint_iter->second.num_reads == 0 && + savepoint_iter->second.num_writes == 0) { + // No other GetForUpdates or write on this key in this SavePoint + cf_savepoint_keys.erase(savepoint_iter); + can_unlock = true; + } + } + } + } else { + // No SavePoint set + can_decrement = true; + can_unlock = true; + } + + // We can only decrement the read count for this key if we were able to + // decrement the read count in the current SavePoint, OR if there is no + // SavePoint set. + if (can_decrement) { + auto key_iter = cf_tracked_keys.find(key_str); + + if (key_iter != cf_tracked_keys.end()) { + if (key_iter->second.num_reads > 0) { + key_iter->second.num_reads--; + + if (key_iter->second.num_reads == 0 && + key_iter->second.num_writes == 0) { + // No other GetForUpdates or writes on this key + assert(can_unlock); + cf_tracked_keys.erase(key_iter); + UnlockGetForUpdate(column_family, key); + } + } + } + } +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index 3fe3513b9..5a6f4799e 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -37,7 +37,7 @@ class TransactionBaseImpl : public Transaction { // untracked will be true if called from PutUntracked, DeleteUntracked, or // MergeUntracked. virtual Status TryLock(ColumnFamilyHandle* column_family, const Slice& key, - bool untracked = false) = 0; + bool read_only, bool untracked = false) = 0; void SetSavePoint() override; @@ -192,6 +192,12 @@ class TransactionBaseImpl : public Transaction { uint64_t GetNumKeys() const override; + void UndoGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) override; + void UndoGetForUpdate(const Slice& key) override { + return UndoGetForUpdate(nullptr, key); + }; + // Get list of keys in this transaction that must not have any conflicts // with writes in other transactions. const TransactionKeyMap& GetTrackedKeys() const { return tracked_keys_; } @@ -207,10 +213,22 @@ class TransactionBaseImpl : public Transaction { protected: // Add a key to the list of tracked keys. + // // seqno is the earliest seqno this key was involved with this transaction. - void TrackKey(uint32_t cfh_id, const std::string& key, SequenceNumber seqno); + // readonly should be set to true if no data was written for this key + void TrackKey(uint32_t cfh_id, const std::string& key, SequenceNumber seqno, + bool readonly); - const TransactionKeyMap* GetTrackedKeysSinceSavePoint(); + // Helper function to add a key to the given TransactionKeyMap + static void TrackKey(TransactionKeyMap* key_map, uint32_t cfh_id, + const std::string& key, SequenceNumber seqno, + bool readonly); + + // Called when UndoGetForUpdate determines that this key can be unlocked. + virtual void UnlockGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) = 0; + + std::unique_ptr GetTrackedKeysSinceSavePoint(); // Sets a snapshot if SetSnapshotOnNextOperation() has been called. void SetSnapshotIfNeeded(); @@ -285,7 +303,7 @@ class TransactionBaseImpl : public Transaction { std::shared_ptr snapshot_notifier_ = nullptr; Status TryLock(ColumnFamilyHandle* column_family, const SliceParts& key, - bool untracked = false); + bool read_only, bool untracked = false); WriteBatchBase* GetBatchForWrite(); }; diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index 2602d30e6..e8b57018e 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -143,9 +143,11 @@ void TransactionImpl::Rollback() { Clear(); } Status TransactionImpl::RollbackToSavePoint() { // Unlock any keys locked since last transaction - const TransactionKeyMap* keys = GetTrackedKeysSinceSavePoint(); + const std::unique_ptr& keys = + GetTrackedKeysSinceSavePoint(); + if (keys) { - txn_db_impl_->UnLock(this, keys); + txn_db_impl_->UnLock(this, keys.get()); } return TransactionBaseImpl::RollbackToSavePoint(); @@ -210,7 +212,8 @@ Status TransactionImpl::LockBatch(WriteBatch* batch, if (!s.ok()) { break; } - (*keys_to_unlock)[cfh_id].insert({std::move(key), kMaxSequenceNumber}); + TrackKey(keys_to_unlock, cfh_id, std::move(key), kMaxSequenceNumber, + false); } if (!s.ok()) { @@ -231,7 +234,8 @@ Status TransactionImpl::LockBatch(WriteBatch* batch, // this key will only be locked if there have been no writes to this key since // the snapshot time. Status TransactionImpl::TryLock(ColumnFamilyHandle* column_family, - const Slice& key, bool untracked) { + const Slice& key, bool read_only, + bool untracked) { uint32_t cfh_id = GetColumnFamilyID(column_family); std::string key_str = key.ToString(); bool previously_locked; @@ -251,7 +255,7 @@ Status TransactionImpl::TryLock(ColumnFamilyHandle* column_family, previously_locked = false; } else { previously_locked = true; - current_seqno = iter->second; + current_seqno = iter->second.seq; } } @@ -298,7 +302,7 @@ Status TransactionImpl::TryLock(ColumnFamilyHandle* column_family, if (s.ok()) { // Let base class know we've conflict checked this key. - TrackKey(cfh_id, key_str, new_seqno); + TrackKey(cfh_id, key_str, new_seqno, read_only); } return s; @@ -340,6 +344,11 @@ bool TransactionImpl::TryStealingLocks() { LOCKS_STOLEN); } +void TransactionImpl::UnlockGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) { + txn_db_impl_->UnLock(this, GetColumnFamilyID(column_family), key.ToString()); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index 37a556ef6..94dbe7df3 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -71,7 +71,7 @@ class TransactionImpl : public TransactionBaseImpl { protected: Status TryLock(ColumnFamilyHandle* column_family, const Slice& key, - bool untracked = false) override; + bool read_only, bool untracked = false) override; private: enum ExecutionStatus { STARTED, COMMITTING, LOCKS_STOLEN }; @@ -105,6 +105,9 @@ class TransactionImpl : public TransactionBaseImpl { void RollbackLastN(size_t num); + void UnlockGetForUpdate(ColumnFamilyHandle* column_family, + const Slice& key) override; + // No copying allowed TransactionImpl(const TransactionImpl&); void operator=(const TransactionImpl&); diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 859b02bce..a81a4ce8f 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -1717,9 +1717,8 @@ TEST_F(TransactionTest, SavepointTest) { TEST_F(TransactionTest, SavepointTest2) { WriteOptions write_options; - ReadOptions read_options, snapshot_read_options; + ReadOptions read_options; TransactionOptions txn_options; - string value; Status s; txn_options.lock_timeout = 1; // 1 ms @@ -1814,6 +1813,356 @@ TEST_F(TransactionTest, SavepointTest2) { delete txn2; } +TEST_F(TransactionTest, UndoGetForUpdateTest) { + WriteOptions write_options; + ReadOptions read_options; + TransactionOptions txn_options; + string value; + Status s; + + txn_options.lock_timeout = 1; // 1 ms + Transaction* txn1 = db->BeginTransaction(write_options, txn_options); + ASSERT_TRUE(txn1); + + txn1->UndoGetForUpdate("A"); + + s = txn1->Commit(); + ASSERT_OK(s); + delete txn1; + + txn1 = db->BeginTransaction(write_options, txn_options); + + txn1->UndoGetForUpdate("A"); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_TRUE(s.IsNotFound()); + + // Verify that A is locked + Transaction* txn2 = db->BeginTransaction(write_options, txn_options); + s = txn2->Put("A", "a"); + ASSERT_TRUE(s.IsTimedOut()); + + txn1->UndoGetForUpdate("A"); + + // Verify that A is now unlocked + s = txn2->Put("A", "a2"); + ASSERT_OK(s); + txn2->Commit(); + delete txn2; + s = db->Get(read_options, "A", &value); + ASSERT_OK(s); + ASSERT_EQ("a2", value); + + s = txn1->Delete("A"); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_TRUE(s.IsNotFound()); + + s = txn1->Put("B", "b3"); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "B", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + + // Verify that A and B are still locked + txn2 = db->BeginTransaction(write_options, txn_options); + s = txn2->Put("A", "a4"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b4"); + ASSERT_TRUE(s.IsTimedOut()); + + txn1->Rollback(); + delete txn1; + + // Verify that A and B are no longer locked + s = txn2->Put("A", "a5"); + ASSERT_OK(s); + s = txn2->Put("B", "b5"); + ASSERT_OK(s); + s = txn2->Commit(); + delete txn2; + ASSERT_OK(s); + + txn1 = db->BeginTransaction(write_options, txn_options); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "C", &value); + ASSERT_TRUE(s.IsNotFound()); + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "C", &value); + ASSERT_TRUE(s.IsNotFound()); + s = txn1->GetForUpdate(read_options, "B", &value); + ASSERT_OK(s); + s = txn1->Put("B", "b5"); + s = txn1->GetForUpdate(read_options, "B", &value); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("X"); + + // Verify A,B,C are locked + txn2 = db->BeginTransaction(write_options, txn_options); + s = txn2->Put("A", "a6"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Delete("B"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c6"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("X", "x6"); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("X"); + + // Verify A,B are locked and C is not + s = txn2->Put("A", "a6"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Delete("B"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c6"); + ASSERT_OK(s); + s = txn2->Put("X", "x6"); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("X"); + + // Verify B is locked and A and C are not + s = txn2->Put("A", "a7"); + ASSERT_OK(s); + s = txn2->Delete("B"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c7"); + ASSERT_OK(s); + s = txn2->Put("X", "x7"); + ASSERT_OK(s); + + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + s = txn1->Commit(); + ASSERT_OK(s); + delete txn1; +} + +TEST_F(TransactionTest, UndoGetForUpdateTest2) { + WriteOptions write_options; + ReadOptions read_options; + TransactionOptions txn_options; + string value; + Status s; + + s = db->Put(write_options, "A", ""); + ASSERT_OK(s); + + txn_options.lock_timeout = 1; // 1 ms + Transaction* txn1 = db->BeginTransaction(write_options, txn_options); + ASSERT_TRUE(txn1); + + s = txn1->GetForUpdate(read_options, "A", &value); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "B", &value); + ASSERT_TRUE(s.IsNotFound()); + + s = txn1->Put("F", "f"); + ASSERT_OK(s); + + txn1->SetSavePoint(); // 1 + + txn1->UndoGetForUpdate("A"); + + s = txn1->GetForUpdate(read_options, "C", &value); + ASSERT_TRUE(s.IsNotFound()); + s = txn1->GetForUpdate(read_options, "D", &value); + ASSERT_TRUE(s.IsNotFound()); + + s = txn1->Put("E", "e"); + ASSERT_OK(s); + s = txn1->GetForUpdate(read_options, "E", &value); + ASSERT_OK(s); + + s = txn1->GetForUpdate(read_options, "F", &value); + ASSERT_OK(s); + + // Verify A,B,C,D,E,F are still locked + Transaction* txn2 = db->BeginTransaction(write_options, txn_options); + s = txn2->Put("A", "a1"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b1"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c1"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("D", "d1"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("E", "e1"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f1"); + ASSERT_TRUE(s.IsTimedOut()); + + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("E"); + + // Verify A,B,D,E,F are still locked and C is not. + s = txn2->Put("A", "a2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("D", "d2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("E", "e2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f2"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c2"); + ASSERT_OK(s); + + txn1->SetSavePoint(); // 2 + + s = txn1->Put("H", "h"); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("D"); + txn1->UndoGetForUpdate("E"); + txn1->UndoGetForUpdate("F"); + txn1->UndoGetForUpdate("G"); + txn1->UndoGetForUpdate("H"); + + // Verify A,B,D,E,F,H are still locked and C,G are not. + s = txn2->Put("A", "a3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("D", "d3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("E", "e3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("H", "h3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c3"); + ASSERT_OK(s); + s = txn2->Put("G", "g3"); + ASSERT_OK(s); + + txn1->RollbackToSavePoint(); // rollback to 2 + + // Verify A,B,D,E,F are still locked and C,G,H are not. + s = txn2->Put("A", "a3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("D", "d3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("E", "e3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c3"); + ASSERT_OK(s); + s = txn2->Put("G", "g3"); + ASSERT_OK(s); + s = txn2->Put("H", "h3"); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("D"); + txn1->UndoGetForUpdate("E"); + txn1->UndoGetForUpdate("F"); + txn1->UndoGetForUpdate("G"); + txn1->UndoGetForUpdate("H"); + + // Verify A,B,E,F are still locked and C,D,G,H are not. + s = txn2->Put("A", "a3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("E", "e3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c3"); + ASSERT_OK(s); + s = txn2->Put("D", "d3"); + ASSERT_OK(s); + s = txn2->Put("G", "g3"); + ASSERT_OK(s); + s = txn2->Put("H", "h3"); + ASSERT_OK(s); + + txn1->RollbackToSavePoint(); // rollback to 1 + + // Verify A,B,F are still locked and C,D,E,G,H are not. + s = txn2->Put("A", "a3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("B", "b3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("F", "f3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("C", "c3"); + ASSERT_OK(s); + s = txn2->Put("D", "d3"); + ASSERT_OK(s); + s = txn2->Put("E", "e3"); + ASSERT_OK(s); + s = txn2->Put("G", "g3"); + ASSERT_OK(s); + s = txn2->Put("H", "h3"); + ASSERT_OK(s); + + txn1->UndoGetForUpdate("A"); + txn1->UndoGetForUpdate("B"); + txn1->UndoGetForUpdate("C"); + txn1->UndoGetForUpdate("D"); + txn1->UndoGetForUpdate("E"); + txn1->UndoGetForUpdate("F"); + txn1->UndoGetForUpdate("G"); + txn1->UndoGetForUpdate("H"); + + // Verify F is still locked and A,B,C,D,E,G,H are not. + s = txn2->Put("F", "f3"); + ASSERT_TRUE(s.IsTimedOut()); + s = txn2->Put("A", "a3"); + ASSERT_OK(s); + s = txn2->Put("B", "b3"); + ASSERT_OK(s); + s = txn2->Put("C", "c3"); + ASSERT_OK(s); + s = txn2->Put("D", "d3"); + ASSERT_OK(s); + s = txn2->Put("E", "e3"); + ASSERT_OK(s); + s = txn2->Put("G", "g3"); + ASSERT_OK(s); + s = txn2->Put("H", "h3"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + s = txn2->Commit(); + ASSERT_OK(s); + + delete txn1; + delete txn2; +} + TEST_F(TransactionTest, TimeoutTest) { WriteOptions write_options; ReadOptions read_options; diff --git a/utilities/transactions/transaction_util.cc b/utilities/transactions/transaction_util.cc index 0cf4c7329..d4622bd2a 100644 --- a/utilities/transactions/transaction_util.cc +++ b/utilities/transactions/transaction_util.cc @@ -137,7 +137,7 @@ Status TransactionUtil::CheckKeysForConflicts(DBImpl* db_impl, // written to this key since the start of the transaction. for (const auto& key_iter : keys) { const auto& key = key_iter.first; - const SequenceNumber key_seq = key_iter.second; + const SequenceNumber key_seq = key_iter.second.seq; result = CheckKey(db_impl, sv, earliest_seq, key_seq, key, cache_only); diff --git a/utilities/transactions/transaction_util.h b/utilities/transactions/transaction_util.h index b2ce7da19..1bb880ce0 100644 --- a/utilities/transactions/transaction_util.h +++ b/utilities/transactions/transaction_util.h @@ -17,9 +17,20 @@ namespace rocksdb { +struct TransactionKeyMapInfo { + // Earliest sequence number that is relevant to this transaction for this key + SequenceNumber seq; + + uint32_t num_writes; + uint32_t num_reads; + + explicit TransactionKeyMapInfo(SequenceNumber seq_no) + : seq(seq_no), num_writes(0), num_reads(0) {} +}; + using TransactionKeyMap = std::unordered_map>; + std::unordered_map>; class DBImpl; struct SuperVersion; From 4a8cbf4e3196931d48e63c29f60830cab7b85b9c Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 9 Feb 2016 11:20:22 -0800 Subject: [PATCH 097/195] Allows Get and MultiGet to read directly from SST files. Summary: Add kSstFileTier to ReadTier, which allows Get and MultiGet to read only directly from SST files and skip mem-tables. kSstFileTier = 0x2 // data in SST files. // Note that this ReadTier currently only supports // Get and MultiGet and does not support iterators. Test Plan: add new test in db_test. Reviewers: anthony, IslamAbdelRahman, rven, kradhakrishnan, sdong Reviewed By: sdong Subscribers: igor, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53511 --- HISTORY.md | 3 +- db/db_impl.cc | 54 +++++++++---- db/db_impl.h | 5 +- db/db_test.cc | 129 ++++++++++++++++++++++++++++++ db/table_cache.cc | 2 +- include/rocksdb/options.h | 8 +- table/block_based_table_reader.cc | 3 +- 7 files changed, 183 insertions(+), 21 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index a6f58d16a..9a14a8bf4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,12 +3,13 @@ ## 4.5.0 (2/5/2016) ### Public API Changes -* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now doesn't include timers for mutexes. +* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. * DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead ### New Features * ldb tool now supports operations to non-default column families. +* Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. * Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. ## 4.4.0 (1/14/2016) diff --git a/db/db_impl.cc b/db/db_impl.cc index 391bfa6db..87fb9fca7 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -275,7 +275,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname) db_options_.delete_obsolete_files_period_micros), last_stats_dump_time_microsec_(0), next_job_id_(1), - flush_on_destroy_(false), + has_unpersisted_data_(false), env_options_(db_options_), #ifndef ROCKSDB_LITE wal_manager_(db_options_, env_options_), @@ -322,7 +322,8 @@ void DBImpl::CancelAllBackgroundWork(bool wait) { DBImpl::~DBImpl() { mutex_.Lock(); - if (!shutting_down_.load(std::memory_order_acquire) && flush_on_destroy_) { + if (!shutting_down_.load(std::memory_order_acquire) && + has_unpersisted_data_) { for (auto cfd : *versions_->GetColumnFamilySet()) { if (!cfd->IsDropped() && !cfd->mem()->IsEmpty()) { cfd->Ref(); @@ -3306,13 +3307,19 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, LookupKey lkey(key, snapshot); PERF_TIMER_STOP(get_snapshot_time); - if (sv->mem->Get(lkey, value, &s, &merge_context)) { - // Done - RecordTick(stats_, MEMTABLE_HIT); - } else if (sv->imm->Get(lkey, value, &s, &merge_context)) { - // Done - RecordTick(stats_, MEMTABLE_HIT); - } else { + bool skip_memtable = + (read_options.read_tier == kPersistedTier && has_unpersisted_data_); + bool done = false; + if (!skip_memtable) { + if (sv->mem->Get(lkey, value, &s, &merge_context)) { + done = true; + RecordTick(stats_, MEMTABLE_HIT); + } else if (sv->imm->Get(lkey, value, &s, &merge_context)) { + done = true; + RecordTick(stats_, MEMTABLE_HIT); + } + } + if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); sv->current->Get(read_options, lkey, value, &s, &merge_context, value_found); @@ -3397,14 +3404,23 @@ std::vector DBImpl::MultiGet( assert(mgd_iter != multiget_cf_data.end()); auto mgd = mgd_iter->second; auto super_version = mgd->super_version; - if (super_version->mem->Get(lkey, value, &s, &merge_context)) { - // Done - } else if (super_version->imm->Get(lkey, value, &s, &merge_context)) { - // Done - } else { + bool skip_memtable = + (read_options.read_tier == kPersistedTier && has_unpersisted_data_); + bool done = false; + if (!skip_memtable) { + if (super_version->mem->Get(lkey, value, &s, &merge_context)) { + done = true; + // TODO(?): RecordTick(stats_, MEMTABLE_HIT)? + } else if (super_version->imm->Get(lkey, value, &s, &merge_context)) { + done = true; + // TODO(?): RecordTick(stats_, MEMTABLE_HIT)? + } + } + if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); super_version->current->Get(read_options, lkey, value, &s, &merge_context); + // TODO(?): RecordTick(stats_, MEMTABLE_MISS)? } if (s.ok()) { @@ -3843,6 +3859,10 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options, Iterator* DBImpl::NewIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family) { + if (read_options.read_tier == kPersistedTier) { + return NewErrorIterator(Status::NotSupported( + "ReadTier::kPersistedData is not yet supported in iterators.")); + } auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); @@ -3949,6 +3969,10 @@ Status DBImpl::NewIterators( const ReadOptions& read_options, const std::vector& column_families, std::vector* iterators) { + if (read_options.read_tier == kPersistedTier) { + return Status::NotSupported( + "ReadTier::kPersistedData is not yet supported in iterators."); + } iterators->clear(); iterators->reserve(column_families.size()); XFUNC_TEST("", "managed_new", managed_new1, xf_manage_new, @@ -4327,7 +4351,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, PERF_TIMER_STOP(write_pre_and_post_process_time); if (write_options.disableWAL) { - flush_on_destroy_ = true; + has_unpersisted_data_ = true; } uint64_t log_size = 0; diff --git a/db/db_impl.h b/db/db_impl.h index 429589360..82dd39135 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -822,7 +822,10 @@ class DBImpl : public DB { // they're unique std::atomic next_job_id_; - bool flush_on_destroy_; // Used when disableWAL is true. + // A flag indicating whether the current rocksdb database has any + // data that is not yet persisted into either WAL or SST file. + // Used when disableWAL is true. + bool has_unpersisted_data_; static const int KEEP_LOG_FILE_NUM = 1000; // MSVC version 1800 still does not have constexpr for ::max() diff --git a/db/db_test.cc b/db/db_test.cc index 7e93f35a7..76a9ec396 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -519,6 +519,135 @@ TEST_F(DBTest, PutSingleDeleteGet) { kSkipUniversalCompaction | kSkipMergePut)); } +TEST_F(DBTest, ReadFromPersistedTier) { + do { + Random rnd(301); + Options options = CurrentOptions(); + for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) { + CreateAndReopenWithCF({"pikachu"}, options); + WriteOptions wopt; + wopt.disableWAL = (disableWAL == 1); + // 1st round: put but not flush + ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first")); + ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one")); + ASSERT_EQ("first", Get(1, "foo")); + ASSERT_EQ("one", Get(1, "bar")); + + // Read directly from persited data. + ReadOptions ropt; + ropt.read_tier = kPersistedTier; + std::string value; + if (wopt.disableWAL) { + // as data has not yet being flushed, we expect not found. + ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); + ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); + } else { + ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value)); + ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); + } + + // Multiget + std::vector multiget_cfs; + multiget_cfs.push_back(handles_[1]); + multiget_cfs.push_back(handles_[1]); + std::vector multiget_keys; + multiget_keys.push_back("foo"); + multiget_keys.push_back("bar"); + std::vector multiget_values; + auto statuses = + db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[0].IsNotFound()); + ASSERT_TRUE(statuses[1].IsNotFound()); + } else { + ASSERT_OK(statuses[0]); + ASSERT_OK(statuses[1]); + } + + // 2nd round: flush and put a new value in memtable. + ASSERT_OK(Flush(1)); + ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello")); + + // once the data has been flushed, we are able to get the + // data when kPersistedTier is used. + ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok()); + ASSERT_EQ(value, "first"); + ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); + ASSERT_EQ(value, "one"); + if (wopt.disableWAL) { + ASSERT_TRUE( + db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound()); + } else { + ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value)); + ASSERT_EQ(value, "hello"); + } + + // Expect same result in multiget + multiget_cfs.push_back(handles_[1]); + multiget_keys.push_back("rocksdb"); + statuses = + db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); + ASSERT_TRUE(statuses[0].ok()); + ASSERT_EQ("first", multiget_values[0]); + ASSERT_TRUE(statuses[1].ok()); + ASSERT_EQ("one", multiget_values[1]); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[2].IsNotFound()); + } else { + ASSERT_OK(statuses[2]); + } + + // 3rd round: delete and flush + ASSERT_OK(db_->Delete(wopt, handles_[1], "foo")); + Flush(1); + ASSERT_OK(db_->Delete(wopt, handles_[1], "bar")); + + ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); + if (wopt.disableWAL) { + // Still expect finding the value as its delete has not yet being + // flushed. + ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); + ASSERT_EQ(value, "one"); + } else { + ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); + } + ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok()); + ASSERT_EQ(value, "hello"); + + statuses = + db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); + ASSERT_TRUE(statuses[0].IsNotFound()); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[1].ok()); + ASSERT_EQ("one", multiget_values[1]); + } else { + ASSERT_TRUE(statuses[1].IsNotFound()); + } + ASSERT_TRUE(statuses[2].ok()); + ASSERT_EQ("hello", multiget_values[2]); + if (wopt.disableWAL == 0) { + DestroyAndReopen(options); + } + } + } while (ChangeOptions(kSkipHashCuckoo)); +} + +TEST_F(DBTest, PersistedTierOnIterator) { + // The test needs to be changed if kPersistedTier is supported in iterator. + Options options = CurrentOptions(); + CreateAndReopenWithCF({"pikachu"}, options); + ReadOptions ropt; + ropt.read_tier = kPersistedTier; + + auto* iter = db_->NewIterator(ropt, handles_[1]); + ASSERT_TRUE(iter->status().IsNotSupported()); + delete iter; + + std::vector iters; + ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported()); + Close(); +} + TEST_F(DBTest, SingleDeleteFlush) { // Test to check whether flushing preserves a single delete hidden // behind a put. diff --git a/db/table_cache.cc b/db/table_cache.cc index 663315840..1d506f002 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -273,7 +273,7 @@ Status TableCache::Get(const ReadOptions& options, if (handle != nullptr) { ReleaseHandle(handle); } - } else if (options.read_tier && s.IsIncomplete()) { + } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { // Couldn't find Table in cache but treat as kFound if no_io set get_context->MarkKeyMayExist(); return Status::OK(); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index a26ed7d81..b1f752cf8 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1294,8 +1294,12 @@ struct Options : public DBOptions, public ColumnFamilyOptions { // the block cache. It will not page in data from the OS cache or data that // resides in storage. enum ReadTier { - kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage - kBlockCacheTier = 0x1 // data in memtable or block cache + kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage + kBlockCacheTier = 0x1, // data in memtable or block cache + kPersistedTier = 0x2 // persisted data. When WAL is disabled, this option + // will skip data in memtable. + // Note that this ReadTier currently only supports + // Get and MultiGet and does not support iterators. }; // Options that control read operations diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 00997f3ea..8c0149fd0 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1255,7 +1255,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, BlockIter biter; NewDataBlockIterator(rep_, read_options, iiter.value(), &biter); - if (read_options.read_tier && biter.status().IsIncomplete()) { + if (read_options.read_tier == kBlockCacheTier && + biter.status().IsIncomplete()) { // couldn't get block from block_cache // Update Saver.state to Found because we are only looking for whether // we can guarantee the key is not there when "no_io" is set From 59b3ee658fdf49c68bfbb6f4520a36bdbd1fc3c4 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 9 Feb 2016 14:54:32 -0800 Subject: [PATCH 098/195] Env function for bulk metadata retrieval Summary: Added this new function, which returns filename, size, and modified timestamp for each file in the provided directory. The default implementation retrieves the metadata sequentially using existing functions. In the next diff I'll make HdfsEnv override this function to use libhdfs's bulk get function. This won't work on windows due to the path separator. Test Plan: new unit test $ ./env_test --gtest_filter=EnvPosixTest.ConsistentChildrenMetadata Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: IslamAbdelRahman, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53781 --- include/rocksdb/env.h | 21 +++++++++++++++++++++ port/win/env_win.cc | 10 +++++++--- util/env.cc | 26 ++++++++++++++++++++++++++ util/env_test.cc | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index e5f892a75..4165a13f0 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -103,6 +103,14 @@ struct EnvOptions { class Env { public: + struct FileAttributes { + // File name + std::string name; + + // Size of file in bytes + uint64_t size_bytes; + }; + Env() : thread_status_updater_(nullptr) {} virtual ~Env(); @@ -177,6 +185,15 @@ class Env { virtual Status GetChildren(const std::string& dir, std::vector* result) = 0; + // Store in *result the attributes of the children of the specified directory. + // In case the implementation lists the directory prior to iterating the files + // and files are concurrently deleted, the deleted files will be omitted from + // result. + // The name attributes are relative to "dir". + // Original contents of *results are dropped. + virtual Status GetChildrenFileAttributes(const std::string& dir, + std::vector* result); + // Delete the named file. virtual Status DeleteFile(const std::string& fname) = 0; @@ -789,6 +806,10 @@ class EnvWrapper : public Env { std::vector* r) override { return target_->GetChildren(dir, r); } + Status GetChildrenFileAttributes( + const std::string& dir, std::vector* result) override { + return target_->GetChildrenFileAttributes(dir, result); + } Status DeleteFile(const std::string& f) override { return target_->DeleteFile(f); } diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 1ae0b1932..cbfb18256 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -1415,6 +1415,11 @@ class WinEnv : public Env { return status; } + virtual Status Env::GetChildrenFileMetadata( + const std::string& dir, std::vector* result) override { + return Status::NotSupported("Not supported in WinEnv"); + } + virtual Status CreateDir(const std::string& name) override { Status result; @@ -1723,9 +1728,8 @@ class WinEnv : public Env { virtual Status GetHostName(char* name, uint64_t len) override { Status s; - DWORD nSize = - static_cast(std::min(len, - std::numeric_limits::max())); + DWORD nSize = static_cast( + std::min(len, std::numeric_limits::max())); if (!::GetComputerNameA(name, &nSize)) { auto lastError = GetLastError(); diff --git a/util/env.cc b/util/env.cc index 968d300b8..efb149689 100644 --- a/util/env.cc +++ b/util/env.cc @@ -38,6 +38,32 @@ Status Env::ReuseWritableFile(const std::string& fname, return NewWritableFile(fname, result, options); } +Status Env::GetChildrenFileAttributes(const std::string& dir, + std::vector* result) { + assert(result != nullptr); + std::vector child_fnames; + Status s = GetChildren(dir, &child_fnames); + if (!s.ok()) { + return s; + } + result->resize(child_fnames.size()); + size_t result_size = 0; + for (size_t i = 0; i < child_fnames.size(); ++i) { + const std::string path = dir + "/" + child_fnames[i]; + if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) { + if (FileExists(path).IsNotFound()) { + // The file may have been deleted since we listed the directory + continue; + } + return s; + } + (*result)[result_size].name = std::move(child_fnames[i]); + result_size++; + } + result->resize(result_size); + return Status::OK(); +} + SequentialFile::~SequentialFile() { } diff --git a/util/env_test.cc b/util/env_test.cc index e5fa37099..cfc521c29 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -935,6 +935,42 @@ TEST_F(EnvPosixTest, Preallocation) { ASSERT_EQ(last_allocated_block, 7UL); } +// Test that the two ways to get children file attributes (in bulk or +// individually) behave consistently. +TEST_F(EnvPosixTest, ConsistentChildrenAttributes) { + const EnvOptions soptions; + const int kNumChildren = 10; + + std::string data; + for (int i = 0; i < kNumChildren; ++i) { + std::ostringstream oss; + oss << test::TmpDir() << "/testfile_" << i; + const std::string path = oss.str(); + unique_ptr file; + ASSERT_OK(env_->NewWritableFile(path, &file, soptions)); + file->Append(data); + data.append("test"); + } + + std::vector file_attrs; + ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(), &file_attrs)); + for (int i = 0; i < kNumChildren; ++i) { + std::ostringstream oss; + oss << "testfile_" << i; + const std::string name = oss.str(); + const std::string path = test::TmpDir() + "/" + name; + + auto file_attrs_iter = std::find_if( + file_attrs.begin(), file_attrs.end(), + [&name](const Env::FileAttributes& fm) { return fm.name == name; }); + ASSERT_TRUE(file_attrs_iter != file_attrs.end()); + uint64_t size; + ASSERT_OK(env_->GetFileSize(path, &size)); + ASSERT_EQ(size, 4 * i); + ASSERT_EQ(size, file_attrs_iter->size_bytes); + } +} + // Test that all WritableFileWrapper forwards all calls to WritableFile. TEST_F(EnvPosixTest, WritableFileWrapper) { class Base : public WritableFile { From 21e95811d14d556436ae849e40744806f31f00a0 Mon Sep 17 00:00:00 2001 From: Baraa Hamodi Date: Tue, 9 Feb 2016 15:12:00 -0800 Subject: [PATCH 099/195] Updated all copyright headers to the new format. --- LICENSE | 2 +- arcanist_util/cpp_linter/cpplint.py | 2 +- build_tools/make_new_version.sh | 4 ++-- db/auto_roll_logger.cc | 2 +- db/auto_roll_logger.h | 2 +- db/auto_roll_logger_test.cc | 2 +- db/builder.cc | 2 +- db/builder.h | 2 +- db/c.cc | 2 +- db/column_family.cc | 2 +- db/column_family.h | 2 +- db/column_family_test.cc | 2 +- db/compact_files_test.cc | 2 +- db/compacted_db_impl.cc | 2 +- db/compacted_db_impl.h | 2 +- db/compaction.cc | 2 +- db/compaction.h | 2 +- db/compaction_iterator.cc | 2 +- db/compaction_iterator.h | 2 +- db/compaction_iterator_test.cc | 2 +- db/compaction_job.cc | 2 +- db/compaction_job.h | 2 +- db/compaction_job_stats_test.cc | 2 +- db/compaction_job_test.cc | 2 +- db/compaction_picker.cc | 2 +- db/compaction_picker.h | 2 +- db/compaction_picker_test.cc | 2 +- db/comparator_db_test.cc | 2 +- db/convenience.cc | 2 +- db/corruption_test.cc | 2 +- db/cuckoo_table_db_test.cc | 2 +- db/db_bench.cc | 2 +- db/db_compaction_filter_test.cc | 2 +- db/db_compaction_test.cc | 2 +- db/db_dynamic_level_test.cc | 2 +- db/db_filesnapshot.cc | 2 +- db/db_impl.cc | 2 +- db/db_impl.h | 2 +- db/db_impl_debug.cc | 2 +- db/db_impl_experimental.cc | 2 +- db/db_impl_readonly.cc | 2 +- db/db_impl_readonly.h | 2 +- db/db_info_dumper.cc | 2 +- db/db_info_dumper.h | 2 +- db/db_inplace_update_test.cc | 2 +- db/db_iter.cc | 2 +- db/db_iter.h | 2 +- db/db_iter_test.cc | 2 +- db/db_log_iter_test.cc | 2 +- db/db_properties_test.cc | 2 +- db/db_table_properties_test.cc | 2 +- db/db_tailing_iter_test.cc | 2 +- db/db_test.cc | 2 +- db/db_test_util.cc | 2 +- db/db_test_util.h | 2 +- db/db_universal_compaction_test.cc | 2 +- db/db_wal_test.cc | 2 +- db/dbformat.cc | 2 +- db/dbformat.h | 2 +- db/dbformat_test.cc | 2 +- db/deletefile_test.cc | 2 +- db/event_helpers.cc | 2 +- db/event_helpers.h | 2 +- db/experimental.cc | 2 +- db/fault_injection_test.cc | 2 +- db/file_indexer.cc | 2 +- db/file_indexer.h | 2 +- db/file_indexer_test.cc | 2 +- db/filename.cc | 2 +- db/filename.h | 2 +- db/filename_test.cc | 2 +- db/flush_job.cc | 2 +- db/flush_job.h | 2 +- db/flush_job_test.cc | 2 +- db/flush_scheduler.cc | 2 +- db/flush_scheduler.h | 2 +- db/forward_iterator.cc | 2 +- db/forward_iterator.h | 2 +- db/forward_iterator_bench.cc | 2 +- db/inlineskiplist.h | 2 +- db/inlineskiplist_test.cc | 2 +- db/internal_stats.h | 2 +- db/job_context.h | 2 +- db/listener_test.cc | 2 +- db/log_format.h | 2 +- db/log_reader.cc | 2 +- db/log_reader.h | 2 +- db/log_test.cc | 2 +- db/log_writer.cc | 2 +- db/log_writer.h | 2 +- db/managed_iterator.cc | 2 +- db/managed_iterator.h | 2 +- db/manual_compaction_test.cc | 2 +- db/memtable.cc | 2 +- db/memtable.h | 2 +- db/memtable_allocator.cc | 2 +- db/memtable_allocator.h | 2 +- db/memtable_list.cc | 2 +- db/memtable_list.h | 2 +- db/memtable_list_test.cc | 2 +- db/memtablerep_bench.cc | 2 +- db/merge_context.h | 2 +- db/merge_helper.cc | 2 +- db/merge_helper.h | 2 +- db/merge_helper_test.cc | 2 +- db/merge_operator.cc | 2 +- db/merge_test.cc | 2 +- db/options_file_test.cc | 2 +- db/perf_context_test.cc | 2 +- db/plain_table_db_test.cc | 2 +- db/prefix_test.cc | 2 +- db/repair.cc | 2 +- db/skiplist.h | 2 +- db/skiplist_test.cc | 2 +- db/slice.cc | 2 +- db/snapshot_impl.cc | 2 +- db/snapshot_impl.h | 2 +- db/table_cache.cc | 2 +- db/table_cache.h | 2 +- db/table_properties_collector.cc | 2 +- db/table_properties_collector.h | 2 +- db/table_properties_collector_test.cc | 2 +- db/transaction_log_impl.cc | 2 +- db/transaction_log_impl.h | 2 +- db/version_builder.cc | 2 +- db/version_builder.h | 2 +- db/version_builder_test.cc | 2 +- db/version_edit.cc | 2 +- db/version_edit.h | 2 +- db/version_edit_test.cc | 2 +- db/version_set.cc | 2 +- db/version_set.h | 2 +- db/version_set_test.cc | 2 +- db/wal_manager.cc | 2 +- db/wal_manager.h | 2 +- db/wal_manager_test.cc | 2 +- db/write_batch.cc | 2 +- db/write_batch_base.cc | 2 +- db/write_batch_internal.h | 2 +- db/write_batch_test.cc | 2 +- db/write_callback.h | 2 +- db/write_callback_test.cc | 2 +- db/write_controller.cc | 2 +- db/write_controller.h | 2 +- db/write_controller_test.cc | 2 +- db/write_thread.cc | 2 +- db/write_thread.h | 2 +- db/writebuffer.h | 2 +- db/xfunc_test_points.cc | 2 +- db/xfunc_test_points.h | 2 +- examples/column_families_example.cc | 2 +- examples/compact_files_example.cc | 2 +- examples/compaction_filter_example.cc | 2 +- examples/optimistic_transaction_example.cc | 2 +- examples/options_file_example.cc | 2 +- examples/simple_example.cc | 2 +- examples/transaction_example.cc | 2 +- hdfs/env_hdfs.h | 2 +- include/rocksdb/c.h | 2 +- include/rocksdb/cache.h | 2 +- include/rocksdb/compaction_filter.h | 2 +- include/rocksdb/compaction_job_stats.h | 2 +- include/rocksdb/comparator.h | 2 +- include/rocksdb/convenience.h | 2 +- include/rocksdb/db.h | 2 +- include/rocksdb/db_dump_tool.h | 2 +- include/rocksdb/env.h | 2 +- include/rocksdb/experimental.h | 2 +- include/rocksdb/filter_policy.h | 2 +- include/rocksdb/flush_block_policy.h | 2 +- include/rocksdb/immutable_options.h | 2 +- include/rocksdb/iostats_context.h | 2 +- include/rocksdb/iterator.h | 2 +- include/rocksdb/ldb_tool.h | 2 +- include/rocksdb/memtablerep.h | 2 +- include/rocksdb/merge_operator.h | 2 +- include/rocksdb/metadata.h | 2 +- include/rocksdb/options.h | 2 +- include/rocksdb/perf_context.h | 2 +- include/rocksdb/perf_level.h | 2 +- include/rocksdb/rate_limiter.h | 2 +- include/rocksdb/slice.h | 2 +- include/rocksdb/slice_transform.h | 2 +- include/rocksdb/snapshot.h | 2 +- include/rocksdb/sst_dump_tool.h | 2 +- include/rocksdb/sst_file_manager.h | 2 +- include/rocksdb/sst_file_writer.h | 2 +- include/rocksdb/statistics.h | 2 +- include/rocksdb/status.h | 4 ++-- include/rocksdb/thread_status.h | 2 +- include/rocksdb/transaction_log.h | 2 +- include/rocksdb/types.h | 2 +- include/rocksdb/universal_compaction.h | 2 +- include/rocksdb/utilities/backupable_db.h | 2 +- include/rocksdb/utilities/checkpoint.h | 2 +- include/rocksdb/utilities/convenience.h | 2 +- include/rocksdb/utilities/db_ttl.h | 2 +- include/rocksdb/utilities/document_db.h | 2 +- include/rocksdb/utilities/flashcache.h | 2 +- include/rocksdb/utilities/geo_db.h | 2 +- include/rocksdb/utilities/info_log_finder.h | 2 +- include/rocksdb/utilities/json_document.h | 2 +- include/rocksdb/utilities/leveldb_options.h | 2 +- include/rocksdb/utilities/memory_util.h | 2 +- include/rocksdb/utilities/optimistic_transaction_db.h | 2 +- include/rocksdb/utilities/options_util.h | 2 +- include/rocksdb/utilities/spatial_db.h | 2 +- include/rocksdb/utilities/table_properties_collectors.h | 2 +- include/rocksdb/utilities/transaction.h | 2 +- include/rocksdb/utilities/transaction_db.h | 2 +- include/rocksdb/utilities/transaction_db_mutex.h | 2 +- include/rocksdb/utilities/write_batch_with_index.h | 2 +- include/rocksdb/version.h | 2 +- include/rocksdb/wal_filter.h | 2 +- include/rocksdb/write_batch.h | 2 +- include/rocksdb/write_batch_base.h | 2 +- .../src/main/java/org/rocksdb/benchmark/DbBenchmark.java | 2 +- java/rocksjni/backupablejni.cc | 2 +- java/rocksjni/backupenginejni.cc | 2 +- java/rocksjni/checkpoint.cc | 2 +- java/rocksjni/columnfamilyhandle.cc | 2 +- java/rocksjni/compaction_filter.cc | 2 +- java/rocksjni/comparator.cc | 2 +- java/rocksjni/comparatorjnicallback.cc | 2 +- java/rocksjni/comparatorjnicallback.h | 2 +- java/rocksjni/env.cc | 2 +- java/rocksjni/filter.cc | 2 +- java/rocksjni/iterator.cc | 2 +- java/rocksjni/loggerjnicallback.cc | 2 +- java/rocksjni/loggerjnicallback.h | 2 +- java/rocksjni/memtablejni.cc | 2 +- java/rocksjni/options.cc | 2 +- java/rocksjni/portal.h | 2 +- java/rocksjni/ratelimiterjni.cc | 2 +- java/rocksjni/remove_emptyvalue_compactionfilterjni.cc | 2 +- java/rocksjni/restorejni.cc | 2 +- java/rocksjni/rocksjni.cc | 2 +- java/rocksjni/slice.cc | 2 +- java/rocksjni/snapshot.cc | 2 +- java/rocksjni/statistics.cc | 2 +- java/rocksjni/table.cc | 2 +- java/rocksjni/transaction_log.cc | 2 +- java/rocksjni/ttl.cc | 2 +- java/rocksjni/write_batch.cc | 2 +- java/rocksjni/write_batch_test.cc | 2 +- java/rocksjni/write_batch_with_index.cc | 2 +- java/rocksjni/writebatchhandlerjnicallback.cc | 2 +- java/rocksjni/writebatchhandlerjnicallback.h | 2 +- java/samples/src/main/java/RocksDBColumnFamilySample.java | 2 +- java/samples/src/main/java/RocksDBSample.java | 2 +- .../src/main/java/org/rocksdb/AbstractCompactionFilter.java | 2 +- java/src/main/java/org/rocksdb/AbstractComparator.java | 2 +- java/src/main/java/org/rocksdb/AbstractRocksIterator.java | 2 +- java/src/main/java/org/rocksdb/AbstractSlice.java | 2 +- java/src/main/java/org/rocksdb/AbstractWriteBatch.java | 2 +- java/src/main/java/org/rocksdb/BackupEngine.java | 2 +- java/src/main/java/org/rocksdb/BackupInfo.java | 2 +- java/src/main/java/org/rocksdb/BackupableDB.java | 2 +- java/src/main/java/org/rocksdb/BackupableDBOptions.java | 2 +- java/src/main/java/org/rocksdb/BlockBasedTableConfig.java | 2 +- java/src/main/java/org/rocksdb/BloomFilter.java | 2 +- java/src/main/java/org/rocksdb/BuiltinComparator.java | 2 +- java/src/main/java/org/rocksdb/Checkpoint.java | 2 +- java/src/main/java/org/rocksdb/ChecksumType.java | 2 +- java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java | 2 +- java/src/main/java/org/rocksdb/ColumnFamilyHandle.java | 2 +- java/src/main/java/org/rocksdb/ColumnFamilyOptions.java | 2 +- .../main/java/org/rocksdb/ColumnFamilyOptionsInterface.java | 2 +- java/src/main/java/org/rocksdb/CompactionStyle.java | 2 +- java/src/main/java/org/rocksdb/Comparator.java | 2 +- java/src/main/java/org/rocksdb/CompressionType.java | 2 +- java/src/main/java/org/rocksdb/DBOptions.java | 2 +- java/src/main/java/org/rocksdb/DBOptionsInterface.java | 2 +- java/src/main/java/org/rocksdb/DirectComparator.java | 2 +- java/src/main/java/org/rocksdb/DirectSlice.java | 2 +- java/src/main/java/org/rocksdb/EncodingType.java | 2 +- java/src/main/java/org/rocksdb/Env.java | 2 +- java/src/main/java/org/rocksdb/Filter.java | 2 +- .../src/main/java/org/rocksdb/GenericRateLimiterConfig.java | 2 +- java/src/main/java/org/rocksdb/HistogramData.java | 2 +- java/src/main/java/org/rocksdb/HistogramType.java | 2 +- java/src/main/java/org/rocksdb/IndexType.java | 2 +- java/src/main/java/org/rocksdb/Logger.java | 2 +- java/src/main/java/org/rocksdb/MemTableConfig.java | 2 +- java/src/main/java/org/rocksdb/Options.java | 2 +- java/src/main/java/org/rocksdb/PlainTableConfig.java | 2 +- java/src/main/java/org/rocksdb/RateLimiterConfig.java | 2 +- java/src/main/java/org/rocksdb/ReadOptions.java | 2 +- .../java/org/rocksdb/RemoveEmptyValueCompactionFilter.java | 2 +- java/src/main/java/org/rocksdb/RestoreBackupableDB.java | 2 +- java/src/main/java/org/rocksdb/RestoreOptions.java | 2 +- java/src/main/java/org/rocksdb/RocksDB.java | 2 +- java/src/main/java/org/rocksdb/RocksDBException.java | 2 +- java/src/main/java/org/rocksdb/RocksEnv.java | 2 +- java/src/main/java/org/rocksdb/RocksIterator.java | 2 +- java/src/main/java/org/rocksdb/RocksIteratorInterface.java | 2 +- java/src/main/java/org/rocksdb/RocksMemEnv.java | 2 +- java/src/main/java/org/rocksdb/RocksObject.java | 2 +- java/src/main/java/org/rocksdb/Slice.java | 2 +- java/src/main/java/org/rocksdb/Snapshot.java | 2 +- java/src/main/java/org/rocksdb/Statistics.java | 2 +- java/src/main/java/org/rocksdb/StatisticsCollector.java | 2 +- .../main/java/org/rocksdb/StatisticsCollectorCallback.java | 2 +- java/src/main/java/org/rocksdb/StatsCollectorInput.java | 2 +- java/src/main/java/org/rocksdb/TableFormatConfig.java | 2 +- java/src/main/java/org/rocksdb/TickerType.java | 2 +- java/src/main/java/org/rocksdb/TtlDB.java | 2 +- java/src/main/java/org/rocksdb/WBWIRocksIterator.java | 2 +- java/src/main/java/org/rocksdb/WriteBatch.java | 2 +- java/src/main/java/org/rocksdb/WriteBatchInterface.java | 2 +- java/src/main/java/org/rocksdb/WriteBatchWithIndex.java | 2 +- java/src/main/java/org/rocksdb/WriteOptions.java | 2 +- java/src/main/java/org/rocksdb/util/SizeUnit.java | 2 +- java/src/test/java/org/rocksdb/AbstractComparatorTest.java | 2 +- java/src/test/java/org/rocksdb/BackupEngineTest.java | 2 +- java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/BackupableDBTest.java | 2 +- .../test/java/org/rocksdb/BlockBasedTableConfigTest.java | 2 +- java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/ColumnFamilyTest.java | 2 +- java/src/test/java/org/rocksdb/ComparatorOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/ComparatorTest.java | 2 +- java/src/test/java/org/rocksdb/CompressionOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/DBOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/DirectComparatorTest.java | 2 +- java/src/test/java/org/rocksdb/DirectSliceTest.java | 2 +- java/src/test/java/org/rocksdb/FilterTest.java | 2 +- java/src/test/java/org/rocksdb/FlushTest.java | 2 +- java/src/test/java/org/rocksdb/KeyMayExistTest.java | 2 +- java/src/test/java/org/rocksdb/MemTableTest.java | 2 +- java/src/test/java/org/rocksdb/MergeTest.java | 2 +- java/src/test/java/org/rocksdb/MixedOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java | 2 +- java/src/test/java/org/rocksdb/OptionsTest.java | 2 +- java/src/test/java/org/rocksdb/PlainTableConfigTest.java | 2 +- java/src/test/java/org/rocksdb/PlatformRandomHelper.java | 2 +- java/src/test/java/org/rocksdb/ReadOnlyTest.java | 2 +- java/src/test/java/org/rocksdb/ReadOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/RocksDBTest.java | 2 +- java/src/test/java/org/rocksdb/RocksEnvTest.java | 2 +- java/src/test/java/org/rocksdb/RocksIteratorTest.java | 2 +- java/src/test/java/org/rocksdb/RocksMemEnvTest.java | 2 +- java/src/test/java/org/rocksdb/SliceTest.java | 2 +- java/src/test/java/org/rocksdb/SnapshotTest.java | 2 +- java/src/test/java/org/rocksdb/StatisticsCollectorTest.java | 2 +- java/src/test/java/org/rocksdb/StatsCallbackMock.java | 2 +- java/src/test/java/org/rocksdb/TtlDBTest.java | 2 +- java/src/test/java/org/rocksdb/Types.java | 2 +- java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java | 2 +- java/src/test/java/org/rocksdb/WriteBatchTest.java | 2 +- java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java | 2 +- java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java | 2 +- java/src/test/java/org/rocksdb/WriteOptionsTest.java | 2 +- java/src/test/java/org/rocksdb/test/RocksJunitRunner.java | 2 +- java/src/test/java/org/rocksdb/util/EnvironmentTest.java | 2 +- java/src/test/java/org/rocksdb/util/SizeUnitTest.java | 2 +- memtable/hash_cuckoo_rep.cc | 2 +- memtable/hash_cuckoo_rep.h | 2 +- memtable/hash_linklist_rep.cc | 2 +- memtable/hash_linklist_rep.h | 2 +- memtable/hash_skiplist_rep.cc | 2 +- memtable/hash_skiplist_rep.h | 2 +- memtable/skiplistrep.cc | 2 +- memtable/stl_wrappers.h | 2 +- memtable/vectorrep.cc | 2 +- port/dirent.h | 2 +- port/likely.h | 2 +- port/port.h | 2 +- port/port_example.h | 2 +- port/port_posix.cc | 2 +- port/port_posix.h | 2 +- port/stack_trace.cc | 2 +- port/stack_trace.h | 2 +- port/sys_time.h | 2 +- port/util_logger.h | 2 +- port/win/env_win.cc | 2 +- port/win/port_win.cc | 4 ++-- port/win/port_win.h | 2 +- port/win/win_logger.cc | 2 +- port/win/win_logger.h | 2 +- table/block.cc | 2 +- table/block.h | 2 +- table/block_based_filter_block.cc | 2 +- table/block_based_filter_block.h | 2 +- table/block_based_filter_block_test.cc | 2 +- table/block_based_table_builder.cc | 2 +- table/block_based_table_builder.h | 2 +- table/block_based_table_factory.cc | 2 +- table/block_based_table_factory.h | 2 +- table/block_based_table_reader.cc | 2 +- table/block_based_table_reader.h | 2 +- table/block_builder.cc | 2 +- table/block_builder.h | 2 +- table/block_hash_index.cc | 2 +- table/block_hash_index.h | 2 +- table/block_hash_index_test.cc | 2 +- table/block_prefix_index.cc | 2 +- table/block_prefix_index.h | 2 +- table/block_test.cc | 2 +- table/bloom_block.cc | 2 +- table/bloom_block.h | 2 +- table/cuckoo_table_builder.cc | 4 ++-- table/cuckoo_table_builder.h | 2 +- table/cuckoo_table_builder_test.cc | 2 +- table/cuckoo_table_factory.cc | 2 +- table/cuckoo_table_factory.h | 2 +- table/cuckoo_table_reader.cc | 2 +- table/cuckoo_table_reader.h | 2 +- table/cuckoo_table_reader_test.cc | 2 +- table/filter_block.h | 2 +- table/flush_block_policy.cc | 2 +- table/format.cc | 2 +- table/format.h | 2 +- table/full_filter_block.cc | 2 +- table/full_filter_block.h | 2 +- table/full_filter_block_test.cc | 2 +- table/get_context.cc | 2 +- table/get_context.h | 2 +- table/internal_iterator.h | 2 +- table/iter_heap.h | 2 +- table/iterator.cc | 2 +- table/iterator_wrapper.h | 2 +- table/merger.cc | 2 +- table/merger.h | 2 +- table/merger_test.cc | 2 +- table/meta_blocks.cc | 2 +- table/meta_blocks.h | 2 +- table/mock_table.cc | 2 +- table/mock_table.h | 2 +- table/plain_table_builder.cc | 2 +- table/plain_table_builder.h | 2 +- table/plain_table_index.cc | 4 ++-- table/plain_table_index.h | 2 +- table/plain_table_key_coding.cc | 2 +- table/plain_table_key_coding.h | 2 +- table/scoped_arena_iterator.h | 2 +- table/sst_file_writer.cc | 2 +- table/table_builder.h | 2 +- table/table_properties.cc | 2 +- table/table_properties_internal.h | 2 +- table/table_reader.h | 2 +- table/table_reader_bench.cc | 2 +- table/table_test.cc | 2 +- table/two_level_iterator.cc | 2 +- table/two_level_iterator.h | 2 +- third-party/fbson/FbsonDocument.h | 2 +- third-party/fbson/FbsonJsonParser.h | 2 +- third-party/fbson/FbsonStream.h | 2 +- third-party/fbson/FbsonUtil.h | 2 +- third-party/fbson/FbsonWriter.h | 2 +- tools/db_repl_stress.cc | 2 +- tools/db_sanity_test.cc | 2 +- tools/db_stress.cc | 2 +- tools/dump/db_dump_tool.cc | 2 +- tools/dump/rocksdb_dump.cc | 2 +- tools/dump/rocksdb_undump.cc | 2 +- tools/ldb.cc | 2 +- tools/ldb_cmd.cc | 2 +- tools/ldb_cmd.h | 2 +- tools/ldb_cmd_execute_result.h | 2 +- tools/ldb_cmd_test.cc | 2 +- tools/ldb_tool.cc | 2 +- tools/reduce_levels_test.cc | 2 +- tools/sst_dump.cc | 2 +- tools/sst_dump_test.cc | 2 +- tools/sst_dump_tool.cc | 2 +- tools/sst_dump_tool_imp.h | 2 +- tools/write_stress.cc | 2 +- util/aligned_buffer.h | 2 +- util/allocator.h | 2 +- util/arena.cc | 2 +- util/arena.h | 2 +- util/arena_test.cc | 2 +- util/autovector.h | 2 +- util/autovector_test.cc | 2 +- util/bloom.cc | 2 +- util/bloom_test.cc | 2 +- util/build_version.h | 2 +- util/cache.cc | 2 +- util/cache_bench.cc | 2 +- util/cache_test.cc | 2 +- util/channel.h | 2 +- util/coding.cc | 2 +- util/coding.h | 2 +- util/coding_test.cc | 2 +- util/compaction_job_stats_impl.cc | 2 +- util/comparator.cc | 2 +- util/compression.h | 2 +- util/concurrent_arena.cc | 2 +- util/concurrent_arena.h | 2 +- util/crc32c.cc | 2 +- util/crc32c.h | 2 +- util/crc32c_test.cc | 2 +- util/delete_scheduler.cc | 2 +- util/delete_scheduler.h | 2 +- util/delete_scheduler_test.cc | 2 +- util/dynamic_bloom.cc | 2 +- util/dynamic_bloom.h | 2 +- util/dynamic_bloom_test.cc | 2 +- util/env.cc | 2 +- util/env_hdfs.cc | 2 +- util/env_posix.cc | 2 +- util/env_test.cc | 2 +- util/event_logger.cc | 2 +- util/event_logger.h | 2 +- util/event_logger_test.cc | 2 +- util/file_reader_writer.cc | 2 +- util/file_reader_writer.h | 2 +- util/file_reader_writer_test.cc | 2 +- util/file_util.cc | 2 +- util/file_util.h | 2 +- util/filelock_test.cc | 2 +- util/filter_policy.cc | 2 +- util/hash.cc | 2 +- util/hash.h | 2 +- util/heap.h | 2 +- util/heap_test.cc | 2 +- util/histogram.cc | 6 +++--- util/histogram.h | 2 +- util/histogram_test.cc | 2 +- util/instrumented_mutex.cc | 2 +- util/instrumented_mutex.h | 2 +- util/io_posix.cc | 2 +- util/io_posix.h | 2 +- util/iostats_context.cc | 2 +- util/iostats_context_imp.h | 2 +- util/kv_map.h | 2 +- util/log_buffer.cc | 2 +- util/log_buffer.h | 2 +- util/log_write_bench.cc | 2 +- util/logging.cc | 2 +- util/logging.h | 2 +- util/mock_env.cc | 2 +- util/mock_env.h | 2 +- util/murmurhash.cc | 2 +- util/murmurhash.h | 2 +- util/mutable_cf_options.cc | 2 +- util/mutable_cf_options.h | 2 +- util/mutexlock.h | 2 +- util/options.cc | 2 +- util/options_builder.cc | 2 +- util/options_helper.cc | 2 +- util/options_helper.h | 2 +- util/options_parser.cc | 2 +- util/options_parser.h | 2 +- util/options_sanity_check.cc | 2 +- util/options_sanity_check.h | 2 +- util/options_test.cc | 2 +- util/perf_context.cc | 2 +- util/perf_context_imp.h | 2 +- util/perf_level.cc | 2 +- util/perf_level_imp.h | 2 +- util/perf_step_timer.h | 2 +- util/posix_logger.h | 2 +- util/random.cc | 2 +- util/random.h | 2 +- util/rate_limiter.cc | 2 +- util/rate_limiter.h | 2 +- util/rate_limiter_test.cc | 2 +- util/slice.cc | 2 +- util/slice_transform_test.cc | 2 +- util/sst_file_manager_impl.cc | 2 +- util/sst_file_manager_impl.h | 2 +- util/statistics.cc | 2 +- util/statistics.h | 2 +- util/status.cc | 2 +- util/status_message.cc | 2 +- util/stop_watch.h | 2 +- util/string_util.cc | 2 +- util/string_util.h | 2 +- util/sync_point.cc | 2 +- util/sync_point.h | 2 +- util/testharness.cc | 2 +- util/testharness.h | 2 +- util/testutil.cc | 2 +- util/testutil.h | 2 +- util/thread_list_test.cc | 2 +- util/thread_local.cc | 2 +- util/thread_local.h | 2 +- util/thread_local_test.cc | 2 +- util/thread_operation.h | 2 +- util/thread_posix.cc | 2 +- util/thread_posix.h | 2 +- util/thread_status_impl.cc | 2 +- util/thread_status_updater.cc | 2 +- util/thread_status_updater.h | 2 +- util/thread_status_updater_debug.cc | 2 +- util/thread_status_util.cc | 2 +- util/thread_status_util.h | 2 +- util/thread_status_util_debug.cc | 2 +- util/xfunc.cc | 2 +- util/xfunc.h | 2 +- utilities/backupable/backupable_db.cc | 2 +- utilities/backupable/backupable_db_test.cc | 2 +- utilities/backupable/backupable_db_testutil.h | 2 +- utilities/checkpoint/checkpoint.cc | 2 +- utilities/checkpoint/checkpoint_test.cc | 2 +- .../remove_emptyvalue_compactionfilter.cc | 2 +- .../compaction_filters/remove_emptyvalue_compactionfilter.h | 2 +- utilities/convenience/info_log_finder.cc | 2 +- utilities/document/document_db.cc | 2 +- utilities/document/document_db_test.cc | 2 +- utilities/document/json_document.cc | 2 +- utilities/document/json_document_builder.cc | 2 +- utilities/document/json_document_test.cc | 2 +- utilities/flashcache/flashcache.cc | 2 +- utilities/flashcache/flashcache.h | 2 +- utilities/geodb/geodb_impl.cc | 2 +- utilities/geodb/geodb_impl.h | 2 +- utilities/geodb/geodb_test.cc | 2 +- utilities/leveldb_options/leveldb_options.cc | 2 +- utilities/memory/memory_test.cc | 2 +- utilities/memory/memory_util.cc | 2 +- utilities/merge_operators.h | 2 +- utilities/merge_operators/put.cc | 2 +- utilities/merge_operators/uint64add.cc | 2 +- utilities/options/options_util.cc | 2 +- utilities/options/options_util_test.cc | 2 +- utilities/redis/redis_lists_test.cc | 2 +- utilities/spatialdb/spatial_db.cc | 2 +- utilities/spatialdb/spatial_db_test.cc | 2 +- utilities/spatialdb/utils.h | 2 +- .../compact_on_deletion_collector.cc | 2 +- .../compact_on_deletion_collector.h | 2 +- .../compact_on_deletion_collector_test.cc | 2 +- utilities/transactions/optimistic_transaction_db_impl.cc | 2 +- utilities/transactions/optimistic_transaction_db_impl.h | 2 +- utilities/transactions/optimistic_transaction_impl.cc | 2 +- utilities/transactions/optimistic_transaction_impl.h | 2 +- utilities/transactions/optimistic_transaction_test.cc | 2 +- utilities/transactions/transaction_base.cc | 2 +- utilities/transactions/transaction_base.h | 2 +- utilities/transactions/transaction_db_impl.cc | 2 +- utilities/transactions/transaction_db_impl.h | 2 +- utilities/transactions/transaction_db_mutex_impl.cc | 2 +- utilities/transactions/transaction_db_mutex_impl.h | 2 +- utilities/transactions/transaction_impl.cc | 2 +- utilities/transactions/transaction_impl.h | 2 +- utilities/transactions/transaction_lock_mgr.cc | 2 +- utilities/transactions/transaction_lock_mgr.h | 2 +- utilities/transactions/transaction_test.cc | 2 +- utilities/transactions/transaction_util.cc | 2 +- utilities/transactions/transaction_util.h | 2 +- utilities/write_batch_with_index/write_batch_with_index.cc | 2 +- .../write_batch_with_index_internal.cc | 2 +- .../write_batch_with_index_internal.h | 2 +- .../write_batch_with_index/write_batch_with_index_test.cc | 2 +- 647 files changed, 654 insertions(+), 654 deletions(-) diff --git a/LICENSE b/LICENSE index b13290186..46f685e96 100644 --- a/LICENSE +++ b/LICENSE @@ -2,7 +2,7 @@ BSD License For rocksdb software -Copyright (c) 2014, Facebook, Inc. +Copyright (c) 2011-present, Facebook, Inc. All rights reserved. --------------------------------------------------------------------- diff --git a/arcanist_util/cpp_linter/cpplint.py b/arcanist_util/cpp_linter/cpplint.py index d6201945a..3bb33e17b 100755 --- a/arcanist_util/cpp_linter/cpplint.py +++ b/arcanist_util/cpp_linter/cpplint.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# Copyright (c) 2013, Facebook, Inc. All rights reserved. +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. diff --git a/build_tools/make_new_version.sh b/build_tools/make_new_version.sh index 409944f83..76a847355 100755 --- a/build_tools/make_new_version.sh +++ b/build_tools/make_new_version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2013, Facebook, Inc. All rights reserved. +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. @@ -10,7 +10,7 @@ then GIT="git" fi -# Print out the colored progress info so that it can be brainlessly +# Print out the colored progress info so that it can be brainlessly # distinguished by users. function title() { echo -e "\033[1;32m$*\033[0m" diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index 2349bd0c0..b6efc3d13 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/auto_roll_logger.h b/db/auto_roll_logger.h index 1288cdf3a..d25d883f9 100644 --- a/db/auto_roll_logger.h +++ b/db/auto_roll_logger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/auto_roll_logger_test.cc b/db/auto_roll_logger_test.cc index e3e04c93f..62d41887e 100644 --- a/db/auto_roll_logger_test.cc +++ b/db/auto_roll_logger_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/builder.cc b/db/builder.cc index 52605b27d..ae6015003 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/builder.h b/db/builder.h index 9a4d3b60b..b4b72b7d7 100644 --- a/db/builder.h +++ b/db/builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/c.cc b/db/c.cc index 731a76331..85e911491 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/column_family.cc b/db/column_family.cc index d472d8d7f..6972b52e9 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/column_family.h b/db/column_family.h index 6266d40a2..ce5f409c4 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/column_family_test.cc b/db/column_family_test.cc index e3b51fc85..fddbaf518 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc index 5512ed11f..1b8c5b942 100644 --- a/db/compact_files_test.cc +++ b/db/compact_files_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compacted_db_impl.cc b/db/compacted_db_impl.cc index 980b34e12..db8daa549 100644 --- a/db/compacted_db_impl.cc +++ b/db/compacted_db_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compacted_db_impl.h b/db/compacted_db_impl.h index ec2d53762..9c42010a6 100644 --- a/db/compacted_db_impl.h +++ b/db/compacted_db_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction.cc b/db/compaction.cc index e28cf68ee..21bdcf2a0 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction.h b/db/compaction.h index 19e1e7581..dc8712363 100644 --- a/db/compaction.h +++ b/db/compaction.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_iterator.cc b/db/compaction_iterator.cc index 1e3e140f4..20eed4f3d 100644 --- a/db/compaction_iterator.cc +++ b/db/compaction_iterator.cc @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_iterator.h b/db/compaction_iterator.h index cc8e4a531..b13aef3ff 100644 --- a/db/compaction_iterator.h +++ b/db/compaction_iterator.h @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_iterator_test.cc b/db/compaction_iterator_test.cc index a59f56771..4cbccca55 100644 --- a/db/compaction_iterator_test.cc +++ b/db/compaction_iterator_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 5be4a2c2e..3247036b5 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_job.h b/db/compaction_job.h index e4d5244e3..125dc8fe4 100644 --- a/db/compaction_job.h +++ b/db/compaction_job.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_job_stats_test.cc b/db/compaction_job_stats_test.cc index df38bd3b7..4ead93510 100644 --- a/db/compaction_job_stats_test.cc +++ b/db/compaction_job_stats_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_job_test.cc b/db/compaction_job_test.cc index 0c438296f..fc3a6b9f8 100644 --- a/db/compaction_job_test.cc +++ b/db/compaction_job_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 5bb27907e..7d43b76d1 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_picker.h b/db/compaction_picker.h index c082a9fce..b8611b4cb 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index 50f97f247..f3801ae9f 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index 530c91060..e4e84107e 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/convenience.cc b/db/convenience.cc index a9d113ff1..b1042c74d 100644 --- a/db/convenience.cc +++ b/db/convenience.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/corruption_test.cc b/db/corruption_test.cc index f6e06bec8..85bfe57cb 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index 09a68de92..f48b5b436 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_bench.cc b/db/db_bench.cc index a0d767ea6..9260501b1 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index 6fde1fe06..88738ec36 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 5d9e0536f..b900aa5f6 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_dynamic_level_test.cc b/db/db_dynamic_level_test.cc index f29985e05..7b3a15e56 100644 --- a/db/db_dynamic_level_test.cc +++ b/db/db_dynamic_level_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index e39ccf496..89ebb6650 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl.cc b/db/db_impl.cc index 87fb9fca7..3745eaab1 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl.h b/db/db_impl.h index 82dd39135..e0d10e99e 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl_debug.cc b/db/db_impl_debug.cc index e494c4ee5..af4553f89 100644 --- a/db/db_impl_debug.cc +++ b/db/db_impl_debug.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl_experimental.cc b/db/db_impl_experimental.cc index 6bf0ba6a1..af3663e60 100644 --- a/db/db_impl_experimental.cc +++ b/db/db_impl_experimental.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index 618ade8c9..d96362f79 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_impl_readonly.h b/db/db_impl_readonly.h index 8f3103aac..a410a4e32 100644 --- a/db/db_impl_readonly.h +++ b/db/db_impl_readonly.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_info_dumper.cc b/db/db_info_dumper.cc index de9b77b1b..56cf3e288 100644 --- a/db/db_info_dumper.cc +++ b/db/db_info_dumper.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_info_dumper.h b/db/db_info_dumper.h index ed0a63ded..470b6224f 100644 --- a/db/db_info_dumper.h +++ b/db/db_info_dumper.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_inplace_update_test.cc b/db/db_inplace_update_test.cc index 046ddead4..2c15a5f14 100644 --- a/db/db_inplace_update_test.cc +++ b/db/db_inplace_update_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_iter.cc b/db/db_iter.cc index 10e9658cc..a0a0529a7 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_iter.h b/db/db_iter.h index ba52c8cf9..23bedb660 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc index f1e3324d8..023a0a09c 100644 --- a/db/db_iter_test.cc +++ b/db/db_iter_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_log_iter_test.cc b/db/db_log_iter_test.cc index 33b5e4ef9..956f601a7 100644 --- a/db/db_log_iter_test.cc +++ b/db/db_log_iter_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 98c9d4e20..5af94569f 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc index becf76e6f..87ba13d86 100644 --- a/db/db_table_properties_test.cc +++ b/db/db_table_properties_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index 75f69e622..195424386 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_test.cc b/db/db_test.cc index 76a9ec396..a1dd6fb18 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 222dc715d..d601ec7eb 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_test_util.h b/db/db_test_util.h index cde352dfe..9eedf8d35 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index a4cf6657f..9459e9761 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 9e8a19dce..14b9e2ffd 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/dbformat.cc b/db/dbformat.cc index eb19a7b17..d840aea86 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/dbformat.h b/db/dbformat.h index 1e1169639..3a9682d1d 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/dbformat_test.cc b/db/dbformat_test.cc index 0273dd062..e79dbc683 100644 --- a/db/dbformat_test.cc +++ b/db/dbformat_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc index a4cb296d9..3cc060db9 100644 --- a/db/deletefile_test.cc +++ b/db/deletefile_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/event_helpers.cc b/db/event_helpers.cc index 834ad9b1b..1a591dc91 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/event_helpers.h b/db/event_helpers.h index a60bc9a9e..a36010e16 100644 --- a/db/event_helpers.h +++ b/db/event_helpers.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/experimental.cc b/db/experimental.cc index 0b5018aef..26b2113d2 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc index 4c4f38d4a..6e8363516 100644 --- a/db/fault_injection_test.cc +++ b/db/fault_injection_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/file_indexer.cc b/db/file_indexer.cc index 222cca9c0..9b31c2bd6 100644 --- a/db/file_indexer.cc +++ b/db/file_indexer.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/file_indexer.h b/db/file_indexer.h index 418ae0f68..5eb10bc4d 100644 --- a/db/file_indexer.h +++ b/db/file_indexer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/file_indexer_test.cc b/db/file_indexer_test.cc index 98fea47fe..9b3cdd4d6 100644 --- a/db/file_indexer_test.cc +++ b/db/file_indexer_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/filename.cc b/db/filename.cc index 32cd8758a..d1f0958bb 100644 --- a/db/filename.cc +++ b/db/filename.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/filename.h b/db/filename.h index f7196c9f2..9a0a1eee3 100644 --- a/db/filename.h +++ b/db/filename.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/filename_test.cc b/db/filename_test.cc index 2eafd5230..0f8e37e7f 100644 --- a/db/filename_test.cc +++ b/db/filename_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/flush_job.cc b/db/flush_job.cc index da1124474..b83f9dbe6 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/flush_job.h b/db/flush_job.h index d12da141e..4d088b58e 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index f7071c1ee..3bba6337b 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/flush_scheduler.cc b/db/flush_scheduler.cc index f970f1ca8..60db59dd4 100644 --- a/db/flush_scheduler.cc +++ b/db/flush_scheduler.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/flush_scheduler.h b/db/flush_scheduler.h index dd439e410..820bd7b71 100644 --- a/db/flush_scheduler.h +++ b/db/flush_scheduler.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 15110fec3..7af8e18af 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/forward_iterator.h b/db/forward_iterator.h index 1c4d4975e..16a726b08 100644 --- a/db/forward_iterator.h +++ b/db/forward_iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/forward_iterator_bench.cc b/db/forward_iterator_bench.cc index 69833a4af..0f44a9e44 100644 --- a/db/forward_iterator_bench.cc +++ b/db/forward_iterator_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/inlineskiplist.h b/db/inlineskiplist.h index 98621829c..cfd47f39f 100644 --- a/db/inlineskiplist.h +++ b/db/inlineskiplist.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional // grant of patent rights can be found in the PATENTS file in the same diff --git a/db/inlineskiplist_test.cc b/db/inlineskiplist_test.cc index 5c2dd6fa5..5743bacec 100644 --- a/db/inlineskiplist_test.cc +++ b/db/inlineskiplist_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/internal_stats.h b/db/internal_stats.h index 65408e53f..958731a58 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/job_context.h b/db/job_context.h index 5a54e2d85..ce71103de 100644 --- a/db/job_context.h +++ b/db/job_context.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/listener_test.cc b/db/listener_test.cc index f194a113a..fe5547705 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_format.h b/db/log_format.h index 97eb13393..cf48a202f 100644 --- a/db/log_format.h +++ b/db/log_format.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_reader.cc b/db/log_reader.cc index 512dd08d3..c33c43c53 100644 --- a/db/log_reader.cc +++ b/db/log_reader.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_reader.h b/db/log_reader.h index 28f0a2c1e..ab9dbab7e 100644 --- a/db/log_reader.h +++ b/db/log_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_test.cc b/db/log_test.cc index 41f4c8223..427a31a2b 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_writer.cc b/db/log_writer.cc index 84780d87f..3277088be 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/log_writer.h b/db/log_writer.h index 876e4c56f..23d896746 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/managed_iterator.cc b/db/managed_iterator.cc index c8d943724..ceb7ba40b 100644 --- a/db/managed_iterator.cc +++ b/db/managed_iterator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/managed_iterator.h b/db/managed_iterator.h index 00f56aea4..d9a87596e 100644 --- a/db/managed_iterator.h +++ b/db/managed_iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/manual_compaction_test.cc b/db/manual_compaction_test.cc index 8613b7b36..0ff52d184 100644 --- a/db/manual_compaction_test.cc +++ b/db/manual_compaction_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable.cc b/db/memtable.cc index a8f869261..f34acb319 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable.h b/db/memtable.h index 110985620..a01a598f0 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable_allocator.cc b/db/memtable_allocator.cc index 1ed2019b6..f9b2fbd73 100644 --- a/db/memtable_allocator.cc +++ b/db/memtable_allocator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable_allocator.h b/db/memtable_allocator.h index c2cf130cc..d8bd4c808 100644 --- a/db/memtable_allocator.h +++ b/db/memtable_allocator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 1734eda03..9668f8396 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable_list.h b/db/memtable_list.h index 117b4a506..37f5c7784 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 7bb8b3b21..50f96b9f8 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/memtablerep_bench.cc b/db/memtablerep_bench.cc index 42edfdfc7..a897adeab 100644 --- a/db/memtablerep_bench.cc +++ b/db/memtablerep_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_context.h b/db/merge_context.h index f8609da75..74264c4c9 100644 --- a/db/merge_context.h +++ b/db/merge_context.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_helper.cc b/db/merge_helper.cc index c443ca2d9..145a72b0d 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_helper.h b/db/merge_helper.h index 488c7ac2b..7128b1a09 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_helper_test.cc b/db/merge_helper_test.cc index 2ef0d39e4..b21f56078 100644 --- a/db/merge_helper_test.cc +++ b/db/merge_helper_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_operator.cc b/db/merge_operator.cc index c6645a910..5c5d04008 100644 --- a/db/merge_operator.cc +++ b/db/merge_operator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/merge_test.cc b/db/merge_test.cc index 50f0e7c93..020f33ba6 100644 --- a/db/merge_test.cc +++ b/db/merge_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/options_file_test.cc b/db/options_file_test.cc index 86a98899a..fbbc8c552 100644 --- a/db/options_file_test.cc +++ b/db/options_file_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 8a345e5bb..adab6d78e 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index e3d9fc402..8cb070ac6 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/prefix_test.cc b/db/prefix_test.cc index a210e4d65..eccce06d5 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/repair.cc b/db/repair.cc index f4758d0cd..6aa72f792 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/skiplist.h b/db/skiplist.h index b80ecf210..3fdbd8f54 100644 --- a/db/skiplist.h +++ b/db/skiplist.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/skiplist_test.cc b/db/skiplist_test.cc index 3d1418625..b4f98e34c 100644 --- a/db/skiplist_test.cc +++ b/db/skiplist_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/slice.cc b/db/slice.cc index 7e7245d79..10b0ca592 100644 --- a/db/slice.cc +++ b/db/slice.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/snapshot_impl.cc b/db/snapshot_impl.cc index d901b61d2..5c4f6abaa 100644 --- a/db/snapshot_impl.cc +++ b/db/snapshot_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/snapshot_impl.h b/db/snapshot_impl.h index 277cf3a20..aaac7a0e3 100644 --- a/db/snapshot_impl.h +++ b/db/snapshot_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/table_cache.cc b/db/table_cache.cc index 1d506f002..53e35d3a0 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/table_cache.h b/db/table_cache.h index 44246fbf4..f8416e0b4 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/table_properties_collector.cc b/db/table_properties_collector.cc index c14ecec11..204f42895 100644 --- a/db/table_properties_collector.cc +++ b/db/table_properties_collector.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/table_properties_collector.h b/db/table_properties_collector.h index 0e5f4e347..2b0310b0d 100644 --- a/db/table_properties_collector.h +++ b/db/table_properties_collector.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 7cea86fdd..d096e6c79 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/transaction_log_impl.cc b/db/transaction_log_impl.cc index 28c4490f5..624a3af99 100644 --- a/db/transaction_log_impl.cc +++ b/db/transaction_log_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/transaction_log_impl.h b/db/transaction_log_impl.h index f89cc3207..d4a2468e7 100644 --- a/db/transaction_log_impl.h +++ b/db/transaction_log_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_builder.cc b/db/version_builder.cc index adc7b82b6..d0e7640fd 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_builder.h b/db/version_builder.h index 143da9905..c09815217 100644 --- a/db/version_builder.h +++ b/db/version_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index 66230eef4..2a87dc238 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_edit.cc b/db/version_edit.cc index 23df641af..4cbf61f51 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_edit.h b/db/version_edit.h index 65213ed1a..ec5df00a4 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index 629f904b0..ab109be60 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_set.cc b/db/version_set.cc index 6804730d7..2e600f8a2 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_set.h b/db/version_set.h index 7ce4a6bdf..52bf964e7 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 9dc6e95d6..98b20a110 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/wal_manager.cc b/db/wal_manager.cc index adeb6b96f..e1d911e6e 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/wal_manager.h b/db/wal_manager.h index fc04863b2..a3079ed48 100644 --- a/db/wal_manager.h +++ b/db/wal_manager.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc index 764706d33..4d3f5b6e2 100644 --- a/db/wal_manager_test.cc +++ b/db/wal_manager_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_batch.cc b/db/write_batch.cc index accc313e4..3742ae694 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_batch_base.cc b/db/write_batch_base.cc index 9f7f00d2c..3936fbd92 100644 --- a/db/write_batch_base.cc +++ b/db/write_batch_base.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h index 1ee234b84..1e7f61e69 100644 --- a/db/write_batch_internal.h +++ b/db/write_batch_internal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index c475dbeb6..58c7273c3 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_callback.h b/db/write_callback.h index a549f415a..93c80d651 100644 --- a/db/write_callback.h +++ b/db/write_callback.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_callback_test.cc b/db/write_callback_test.cc index 3b76fd2d1..d552b1178 100644 --- a/db/write_callback_test.cc +++ b/db/write_callback_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_controller.cc b/db/write_controller.cc index a0c18835f..d46d8d3dd 100644 --- a/db/write_controller.cc +++ b/db/write_controller.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_controller.h b/db/write_controller.h index 6cba2c643..052047177 100644 --- a/db/write_controller.h +++ b/db/write_controller.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_controller_test.cc b/db/write_controller_test.cc index dc5614855..db9a9db1b 100644 --- a/db/write_controller_test.cc +++ b/db/write_controller_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_thread.cc b/db/write_thread.cc index ce269f664..531da55df 100644 --- a/db/write_thread.cc +++ b/db/write_thread.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/write_thread.h b/db/write_thread.h index b1dbaca32..c3cb5cc0e 100644 --- a/db/write_thread.h +++ b/db/write_thread.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/writebuffer.h b/db/writebuffer.h index 4fe51d8a7..19d51d925 100644 --- a/db/writebuffer.h +++ b/db/writebuffer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/xfunc_test_points.cc b/db/xfunc_test_points.cc index 82cf685e2..67e96dd05 100644 --- a/db/xfunc_test_points.cc +++ b/db/xfunc_test_points.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/db/xfunc_test_points.h b/db/xfunc_test_points.h index 2887d2dfc..8ed9f2c73 100644 --- a/db/xfunc_test_points.h +++ b/db/xfunc_test_points.h @@ -1,4 +1,4 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/column_families_example.cc b/examples/column_families_example.cc index 3ffac064d..f2dec691e 100644 --- a/examples/column_families_example.cc +++ b/examples/column_families_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/compact_files_example.cc b/examples/compact_files_example.cc index 6c0456675..023ae403b 100644 --- a/examples/compact_files_example.cc +++ b/examples/compact_files_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/compaction_filter_example.cc b/examples/compaction_filter_example.cc index 050f4611a..6b0feb149 100644 --- a/examples/compaction_filter_example.cc +++ b/examples/compaction_filter_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/optimistic_transaction_example.cc b/examples/optimistic_transaction_example.cc index e9ab0e5ee..d28a305b3 100644 --- a/examples/optimistic_transaction_example.cc +++ b/examples/optimistic_transaction_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/options_file_example.cc b/examples/options_file_example.cc index 916ff02f3..360ccddf2 100644 --- a/examples/options_file_example.cc +++ b/examples/options_file_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/simple_example.cc b/examples/simple_example.cc index 28a7c9e8b..453443479 100644 --- a/examples/simple_example.cc +++ b/examples/simple_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/examples/transaction_example.cc b/examples/transaction_example.cc index a7d506129..914f1bc30 100644 --- a/examples/transaction_example.cc +++ b/examples/transaction_example.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/hdfs/env_hdfs.h b/hdfs/env_hdfs.h index d2afbfb3c..ab27e0440 100644 --- a/hdfs/env_hdfs.h +++ b/hdfs/env_hdfs.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 5cea81d15..4bb870e20 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013, Facebook, Inc. All rights reserved. +/* Copyright (c) 2011-present, Facebook, Inc. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. An additional grant of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 47657b90a..30d9c67d3 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/compaction_filter.h b/include/rocksdb/compaction_filter.h index 1286840fe..acdc3aa1b 100644 --- a/include/rocksdb/compaction_filter.h +++ b/include/rocksdb/compaction_filter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/compaction_job_stats.h b/include/rocksdb/compaction_job_stats.h index 533190015..d06fbe403 100644 --- a/include/rocksdb/compaction_job_stats.h +++ b/include/rocksdb/compaction_job_stats.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/comparator.h b/include/rocksdb/comparator.h index 8fc2710aa..1c67b0d4e 100644 --- a/include/rocksdb/comparator.h +++ b/include/rocksdb/comparator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/convenience.h b/include/rocksdb/convenience.h index f9111b4e3..b4935ce6e 100644 --- a/include/rocksdb/convenience.h +++ b/include/rocksdb/convenience.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 6f5229b5f..cbcb1f211 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/db_dump_tool.h b/include/rocksdb/db_dump_tool.h index 67575a94b..1acc63176 100644 --- a/include/rocksdb/db_dump_tool.h +++ b/include/rocksdb/db_dump_tool.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 4165a13f0..7bdb6ee61 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/experimental.h b/include/rocksdb/experimental.h index 1d02e0238..70ad0b914 100644 --- a/include/rocksdb/experimental.h +++ b/include/rocksdb/experimental.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 90aefb388..2c1588a23 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/flush_block_policy.h b/include/rocksdb/flush_block_policy.h index 939725cf4..022e0be4a 100644 --- a/include/rocksdb/flush_block_policy.h +++ b/include/rocksdb/flush_block_policy.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/immutable_options.h b/include/rocksdb/immutable_options.h index 52978691b..5a1500826 100644 --- a/include/rocksdb/immutable_options.h +++ b/include/rocksdb/immutable_options.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/iostats_context.h b/include/rocksdb/iostats_context.h index e81092b52..10ea420cf 100644 --- a/include/rocksdb/iostats_context.h +++ b/include/rocksdb/iostats_context.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 885232db6..ca08c35bf 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/ldb_tool.h b/include/rocksdb/ldb_tool.h index cb8188be0..8a6918ba4 100644 --- a/include/rocksdb/ldb_tool.h +++ b/include/rocksdb/ldb_tool.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 75717907d..f6f030946 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/merge_operator.h b/include/rocksdb/merge_operator.h index 05b66f202..09b9d7dd6 100644 --- a/include/rocksdb/merge_operator.h +++ b/include/rocksdb/merge_operator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/metadata.h b/include/rocksdb/metadata.h index 7cdf4a1a9..5425146d7 100644 --- a/include/rocksdb/metadata.h +++ b/include/rocksdb/metadata.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index b1f752cf8..0718b2342 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 7a6b6f367..7cae30aee 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/perf_level.h b/include/rocksdb/perf_level.h index cd7480097..61970cf54 100644 --- a/include/rocksdb/perf_level.h +++ b/include/rocksdb/perf_level.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/rate_limiter.h b/include/rocksdb/rate_limiter.h index ae3ab8f84..b1bf3f427 100644 --- a/include/rocksdb/rate_limiter.h +++ b/include/rocksdb/rate_limiter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 3d39f3a04..3663716dc 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/slice_transform.h b/include/rocksdb/slice_transform.h index 3694c5802..d12325812 100644 --- a/include/rocksdb/slice_transform.h +++ b/include/rocksdb/slice_transform.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/snapshot.h b/include/rocksdb/snapshot.h index 95822d297..d8d999dc2 100644 --- a/include/rocksdb/snapshot.h +++ b/include/rocksdb/snapshot.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/sst_dump_tool.h b/include/rocksdb/sst_dump_tool.h index 39bfb519b..0dd94caba 100644 --- a/include/rocksdb/sst_dump_tool.h +++ b/include/rocksdb/sst_dump_tool.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h index 665f01add..56d28c69f 100644 --- a/include/rocksdb/sst_file_manager.h +++ b/include/rocksdb/sst_file_manager.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index eb2f89491..fb01feb1f 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 35de93334..c16c3a7ca 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/status.h b/include/rocksdb/status.h index 511f6661f..bff15ee0f 100644 --- a/include/rocksdb/status.h +++ b/include/rocksdb/status.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -244,7 +244,7 @@ noexcept *this = std::move(s); } -inline Status& Status::operator=(Status&& s) +inline Status& Status::operator=(Status&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif diff --git a/include/rocksdb/thread_status.h b/include/rocksdb/thread_status.h index d8a61b490..0cdea2b51 100644 --- a/include/rocksdb/thread_status.h +++ b/include/rocksdb/thread_status.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/transaction_log.h b/include/rocksdb/transaction_log.h index 1b80b9a0c..1fb93ace1 100644 --- a/include/rocksdb/transaction_log.h +++ b/include/rocksdb/transaction_log.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/types.h b/include/rocksdb/types.h index f20bf8277..6a477cab8 100644 --- a/include/rocksdb/types.h +++ b/include/rocksdb/types.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/universal_compaction.h b/include/rocksdb/universal_compaction.h index e0f9f830f..11490e413 100644 --- a/include/rocksdb/universal_compaction.h +++ b/include/rocksdb/universal_compaction.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index f5f394c22..640f1d390 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/checkpoint.h b/include/rocksdb/utilities/checkpoint.h index b2d5458e5..b4523c25e 100644 --- a/include/rocksdb/utilities/checkpoint.h +++ b/include/rocksdb/utilities/checkpoint.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/convenience.h b/include/rocksdb/utilities/convenience.h index fae420b77..b0ac15c6d 100644 --- a/include/rocksdb/utilities/convenience.h +++ b/include/rocksdb/utilities/convenience.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/db_ttl.h b/include/rocksdb/utilities/db_ttl.h index 4534e1ff7..09107c50c 100644 --- a/include/rocksdb/utilities/db_ttl.h +++ b/include/rocksdb/utilities/db_ttl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/document_db.h b/include/rocksdb/utilities/document_db.h index 7fde5ec9f..52f225705 100644 --- a/include/rocksdb/utilities/document_db.h +++ b/include/rocksdb/utilities/document_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/flashcache.h b/include/rocksdb/utilities/flashcache.h index 7bb760924..b54d245f0 100644 --- a/include/rocksdb/utilities/flashcache.h +++ b/include/rocksdb/utilities/flashcache.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/geo_db.h b/include/rocksdb/utilities/geo_db.h index d603c5770..37e5ebdc7 100644 --- a/include/rocksdb/utilities/geo_db.h +++ b/include/rocksdb/utilities/geo_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/info_log_finder.h b/include/rocksdb/utilities/info_log_finder.h index 916c54c28..4b7530c28 100644 --- a/include/rocksdb/utilities/info_log_finder.h +++ b/include/rocksdb/utilities/info_log_finder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/json_document.h b/include/rocksdb/utilities/json_document.h index a5e3ab256..9473258c8 100644 --- a/include/rocksdb/utilities/json_document.h +++ b/include/rocksdb/utilities/json_document.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/leveldb_options.h b/include/rocksdb/utilities/leveldb_options.h index 8e2c3a1d5..ea5063459 100644 --- a/include/rocksdb/utilities/leveldb_options.h +++ b/include/rocksdb/utilities/leveldb_options.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/memory_util.h b/include/rocksdb/utilities/memory_util.h index 323a8a127..d89bb6adc 100644 --- a/include/rocksdb/utilities/memory_util.h +++ b/include/rocksdb/utilities/memory_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/optimistic_transaction_db.h b/include/rocksdb/utilities/optimistic_transaction_db.h index 772e64549..42ebe191f 100644 --- a/include/rocksdb/utilities/optimistic_transaction_db.h +++ b/include/rocksdb/utilities/optimistic_transaction_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/options_util.h b/include/rocksdb/utilities/options_util.h index a2c09fe31..9c0894bea 100644 --- a/include/rocksdb/utilities/options_util.h +++ b/include/rocksdb/utilities/options_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h index 50abbf446..108915fd7 100644 --- a/include/rocksdb/utilities/spatial_db.h +++ b/include/rocksdb/utilities/spatial_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/table_properties_collectors.h b/include/rocksdb/utilities/table_properties_collectors.h index d31baf9a0..68a88e718 100644 --- a/include/rocksdb/utilities/table_properties_collectors.h +++ b/include/rocksdb/utilities/table_properties_collectors.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 4c8ca4dfd..4ccbb7fb9 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index f9023fc21..243b7a143 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/transaction_db_mutex.h b/include/rocksdb/utilities/transaction_db_mutex.h index d9274df50..cedf54295 100644 --- a/include/rocksdb/utilities/transaction_db_mutex.h +++ b/include/rocksdb/utilities/transaction_db_mutex.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index 1e41e7869..aab12ba02 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index 9f55c0070..0c98df903 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/wal_filter.h b/include/rocksdb/wal_filter.h index 226d6971c..a80b81a37 100644 --- a/include/rocksdb/wal_filter.h +++ b/include/rocksdb/wal_filter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index f4a7ac06e..e9bd72b58 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/include/rocksdb/write_batch_base.h b/include/rocksdb/write_batch_base.h index c4083754d..86ccbaa18 100644 --- a/include/rocksdb/write_batch_base.h +++ b/include/rocksdb/write_batch_base.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java b/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java index 14eea09e9..fde2824b7 100644 --- a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java +++ b/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/backupablejni.cc b/java/rocksjni/backupablejni.cc index 59e6dc781..f2304dadb 100644 --- a/java/rocksjni/backupablejni.cc +++ b/java/rocksjni/backupablejni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/backupenginejni.cc b/java/rocksjni/backupenginejni.cc index e597c98c9..a42399873 100644 --- a/java/rocksjni/backupenginejni.cc +++ b/java/rocksjni/backupenginejni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/checkpoint.cc b/java/rocksjni/checkpoint.cc index 72a40be00..45f0fde6b 100644 --- a/java/rocksjni/checkpoint.cc +++ b/java/rocksjni/checkpoint.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/columnfamilyhandle.cc b/java/rocksjni/columnfamilyhandle.cc index be3b4c82f..2a874b1d9 100644 --- a/java/rocksjni/columnfamilyhandle.cc +++ b/java/rocksjni/columnfamilyhandle.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/compaction_filter.cc b/java/rocksjni/compaction_filter.cc index 5fa52c0dc..20b36a412 100644 --- a/java/rocksjni/compaction_filter.cc +++ b/java/rocksjni/compaction_filter.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/comparator.cc b/java/rocksjni/comparator.cc index 196376235..8765daa34 100644 --- a/java/rocksjni/comparator.cc +++ b/java/rocksjni/comparator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/comparatorjnicallback.cc b/java/rocksjni/comparatorjnicallback.cc index a85b45085..1c0317003 100644 --- a/java/rocksjni/comparatorjnicallback.cc +++ b/java/rocksjni/comparatorjnicallback.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/comparatorjnicallback.h b/java/rocksjni/comparatorjnicallback.h index 65b986ca4..821a91e45 100644 --- a/java/rocksjni/comparatorjnicallback.h +++ b/java/rocksjni/comparatorjnicallback.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/env.cc b/java/rocksjni/env.cc index b50d5ae30..a58f54ea7 100644 --- a/java/rocksjni/env.cc +++ b/java/rocksjni/env.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/filter.cc b/java/rocksjni/filter.cc index 2ce17d499..2b662d03f 100644 --- a/java/rocksjni/filter.cc +++ b/java/rocksjni/filter.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc index e9eb0bb37..c5e64adfb 100644 --- a/java/rocksjni/iterator.cc +++ b/java/rocksjni/iterator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/loggerjnicallback.cc b/java/rocksjni/loggerjnicallback.cc index 71e50b9a9..56857b750 100644 --- a/java/rocksjni/loggerjnicallback.cc +++ b/java/rocksjni/loggerjnicallback.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/loggerjnicallback.h b/java/rocksjni/loggerjnicallback.h index 3936252bc..2355a3985 100644 --- a/java/rocksjni/loggerjnicallback.h +++ b/java/rocksjni/loggerjnicallback.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/memtablejni.cc b/java/rocksjni/memtablejni.cc index ce27f9769..ead038d50 100644 --- a/java/rocksjni/memtablejni.cc +++ b/java/rocksjni/memtablejni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index d1702a0fe..9cb466538 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 5423ea7b2..0c5a9245f 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/ratelimiterjni.cc b/java/rocksjni/ratelimiterjni.cc index ab6160e0d..7b4bc1f22 100644 --- a/java/rocksjni/ratelimiterjni.cc +++ b/java/rocksjni/ratelimiterjni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc b/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc index e442d8daf..3cf7b3a03 100644 --- a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc +++ b/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/restorejni.cc b/java/rocksjni/restorejni.cc index c53ed9a60..40b13dac5 100644 --- a/java/rocksjni/restorejni.cc +++ b/java/rocksjni/restorejni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 0ebbcad49..d9c0c6147 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/slice.cc b/java/rocksjni/slice.cc index 811117397..5e05e46f7 100644 --- a/java/rocksjni/slice.cc +++ b/java/rocksjni/slice.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/snapshot.cc b/java/rocksjni/snapshot.cc index cd10c97c8..fa8ede7ab 100644 --- a/java/rocksjni/snapshot.cc +++ b/java/rocksjni/snapshot.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/statistics.cc b/java/rocksjni/statistics.cc index 0e44ea564..6d1ef8db0 100644 --- a/java/rocksjni/statistics.cc +++ b/java/rocksjni/statistics.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc index e78e7e0d7..97aef9888 100644 --- a/java/rocksjni/table.cc +++ b/java/rocksjni/table.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/transaction_log.cc b/java/rocksjni/transaction_log.cc index 1d3d7c100..eed8d84b5 100644 --- a/java/rocksjni/transaction_log.cc +++ b/java/rocksjni/transaction_log.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/ttl.cc b/java/rocksjni/ttl.cc index ec5b419f1..219e6c4db 100644 --- a/java/rocksjni/ttl.cc +++ b/java/rocksjni/ttl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/write_batch.cc b/java/rocksjni/write_batch.cc index dc3f6d2c6..83d2e6dfe 100644 --- a/java/rocksjni/write_batch.cc +++ b/java/rocksjni/write_batch.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/write_batch_test.cc b/java/rocksjni/write_batch_test.cc index e9cd2fbba..9b4c7fd61 100644 --- a/java/rocksjni/write_batch_test.cc +++ b/java/rocksjni/write_batch_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/write_batch_with_index.cc b/java/rocksjni/write_batch_with_index.cc index 7c57a0e06..51296427e 100644 --- a/java/rocksjni/write_batch_with_index.cc +++ b/java/rocksjni/write_batch_with_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/writebatchhandlerjnicallback.cc b/java/rocksjni/writebatchhandlerjnicallback.cc index b12e35544..b25236518 100644 --- a/java/rocksjni/writebatchhandlerjnicallback.cc +++ b/java/rocksjni/writebatchhandlerjnicallback.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/rocksjni/writebatchhandlerjnicallback.h b/java/rocksjni/writebatchhandlerjnicallback.h index 9a2a47e80..1c421db03 100644 --- a/java/rocksjni/writebatchhandlerjnicallback.h +++ b/java/rocksjni/writebatchhandlerjnicallback.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/samples/src/main/java/RocksDBColumnFamilySample.java b/java/samples/src/main/java/RocksDBColumnFamilySample.java index da9f4d28b..8d682928c 100644 --- a/java/samples/src/main/java/RocksDBColumnFamilySample.java +++ b/java/samples/src/main/java/RocksDBColumnFamilySample.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/samples/src/main/java/RocksDBSample.java b/java/samples/src/main/java/RocksDBSample.java index 402fd8f89..3ac17777d 100644 --- a/java/samples/src/main/java/RocksDBSample.java +++ b/java/samples/src/main/java/RocksDBSample.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java b/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java index 2b78deddb..1ecedf156 100644 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java +++ b/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/AbstractComparator.java b/java/src/main/java/org/rocksdb/AbstractComparator.java index c2412d7f2..04a26bfba 100644 --- a/java/src/main/java/org/rocksdb/AbstractComparator.java +++ b/java/src/main/java/org/rocksdb/AbstractComparator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java b/java/src/main/java/org/rocksdb/AbstractRocksIterator.java index f3f89a671..b7419cba9 100644 --- a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java +++ b/java/src/main/java/org/rocksdb/AbstractRocksIterator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/AbstractSlice.java b/java/src/main/java/org/rocksdb/AbstractSlice.java index a37bd023e..ea77f5384 100644 --- a/java/src/main/java/org/rocksdb/AbstractSlice.java +++ b/java/src/main/java/org/rocksdb/AbstractSlice.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java b/java/src/main/java/org/rocksdb/AbstractWriteBatch.java index b380c5d8a..984e400ab 100644 --- a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java +++ b/java/src/main/java/org/rocksdb/AbstractWriteBatch.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BackupEngine.java b/java/src/main/java/org/rocksdb/BackupEngine.java index 2f944e5fb..4791719aa 100644 --- a/java/src/main/java/org/rocksdb/BackupEngine.java +++ b/java/src/main/java/org/rocksdb/BackupEngine.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BackupInfo.java b/java/src/main/java/org/rocksdb/BackupInfo.java index 48a52a789..4f3a62845 100644 --- a/java/src/main/java/org/rocksdb/BackupInfo.java +++ b/java/src/main/java/org/rocksdb/BackupInfo.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BackupableDB.java b/java/src/main/java/org/rocksdb/BackupableDB.java index f2646d22a..6de20736f 100644 --- a/java/src/main/java/org/rocksdb/BackupableDB.java +++ b/java/src/main/java/org/rocksdb/BackupableDB.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BackupableDBOptions.java b/java/src/main/java/org/rocksdb/BackupableDBOptions.java index 17a0afc28..d32f2db8c 100644 --- a/java/src/main/java/org/rocksdb/BackupableDBOptions.java +++ b/java/src/main/java/org/rocksdb/BackupableDBOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java index c3c6309b3..f569e6f42 100644 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/rocksdb/BloomFilter.java index 67c45d717..2c9585f71 100644 --- a/java/src/main/java/org/rocksdb/BloomFilter.java +++ b/java/src/main/java/org/rocksdb/BloomFilter.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/BuiltinComparator.java b/java/src/main/java/org/rocksdb/BuiltinComparator.java index ee92e8dd9..436cb513f 100644 --- a/java/src/main/java/org/rocksdb/BuiltinComparator.java +++ b/java/src/main/java/org/rocksdb/BuiltinComparator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Checkpoint.java b/java/src/main/java/org/rocksdb/Checkpoint.java index 816eceacf..9faa355e1 100644 --- a/java/src/main/java/org/rocksdb/Checkpoint.java +++ b/java/src/main/java/org/rocksdb/Checkpoint.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ChecksumType.java b/java/src/main/java/org/rocksdb/ChecksumType.java index e685376bf..7f560170c 100644 --- a/java/src/main/java/org/rocksdb/ChecksumType.java +++ b/java/src/main/java/org/rocksdb/ChecksumType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java b/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java index 8def05e74..84581f465 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java b/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java index 613cb892c..d414ee587 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index 4304f589a..612efbe7f 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java index c4d7245a1..9856ec686 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/CompactionStyle.java b/java/src/main/java/org/rocksdb/CompactionStyle.java index 76064395c..22dc7dcf5 100644 --- a/java/src/main/java/org/rocksdb/CompactionStyle.java +++ b/java/src/main/java/org/rocksdb/CompactionStyle.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Comparator.java b/java/src/main/java/org/rocksdb/Comparator.java index c8e050bca..41f7fbc93 100644 --- a/java/src/main/java/org/rocksdb/Comparator.java +++ b/java/src/main/java/org/rocksdb/Comparator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/CompressionType.java b/java/src/main/java/org/rocksdb/CompressionType.java index ec0c42f4d..b4d86166e 100644 --- a/java/src/main/java/org/rocksdb/CompressionType.java +++ b/java/src/main/java/org/rocksdb/CompressionType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/rocksdb/DBOptions.java index 85aad1e72..d2e1bf94c 100644 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ b/java/src/main/java/org/rocksdb/DBOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/java/src/main/java/org/rocksdb/DBOptionsInterface.java index 0c230e436..917e26ab0 100644 --- a/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/DBOptionsInterface.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/DirectComparator.java b/java/src/main/java/org/rocksdb/DirectComparator.java index 47f4d7256..68ad11f6c 100644 --- a/java/src/main/java/org/rocksdb/DirectComparator.java +++ b/java/src/main/java/org/rocksdb/DirectComparator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/DirectSlice.java b/java/src/main/java/org/rocksdb/DirectSlice.java index 765b01586..7a59a3d82 100644 --- a/java/src/main/java/org/rocksdb/DirectSlice.java +++ b/java/src/main/java/org/rocksdb/DirectSlice.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/EncodingType.java b/java/src/main/java/org/rocksdb/EncodingType.java index d639542aa..e27a9853f 100644 --- a/java/src/main/java/org/rocksdb/EncodingType.java +++ b/java/src/main/java/org/rocksdb/EncodingType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Env.java b/java/src/main/java/org/rocksdb/Env.java index 929a394c3..74088fd86 100644 --- a/java/src/main/java/org/rocksdb/Env.java +++ b/java/src/main/java/org/rocksdb/Env.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Filter.java b/java/src/main/java/org/rocksdb/Filter.java index ce5c41f26..1cc0ccd4c 100644 --- a/java/src/main/java/org/rocksdb/Filter.java +++ b/java/src/main/java/org/rocksdb/Filter.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/GenericRateLimiterConfig.java b/java/src/main/java/org/rocksdb/GenericRateLimiterConfig.java index 89951c5d1..cc00c6f0a 100644 --- a/java/src/main/java/org/rocksdb/GenericRateLimiterConfig.java +++ b/java/src/main/java/org/rocksdb/GenericRateLimiterConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/HistogramData.java b/java/src/main/java/org/rocksdb/HistogramData.java index 020a9c9a5..a920f4b4e 100644 --- a/java/src/main/java/org/rocksdb/HistogramData.java +++ b/java/src/main/java/org/rocksdb/HistogramData.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java index 9b4548108..a4459eecc 100644 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ b/java/src/main/java/org/rocksdb/HistogramType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/IndexType.java b/java/src/main/java/org/rocksdb/IndexType.java index f3c104566..db24a6f68 100644 --- a/java/src/main/java/org/rocksdb/IndexType.java +++ b/java/src/main/java/org/rocksdb/IndexType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Logger.java b/java/src/main/java/org/rocksdb/Logger.java index 05c53b56e..26359ff2e 100644 --- a/java/src/main/java/org/rocksdb/Logger.java +++ b/java/src/main/java/org/rocksdb/Logger.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/MemTableConfig.java b/java/src/main/java/org/rocksdb/MemTableConfig.java index 7c34826e1..8b854917f 100644 --- a/java/src/main/java/org/rocksdb/MemTableConfig.java +++ b/java/src/main/java/org/rocksdb/MemTableConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 771de0ac6..dfce746bf 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/PlainTableConfig.java b/java/src/main/java/org/rocksdb/PlainTableConfig.java index 3a41bea84..044c18d80 100644 --- a/java/src/main/java/org/rocksdb/PlainTableConfig.java +++ b/java/src/main/java/org/rocksdb/PlainTableConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RateLimiterConfig.java b/java/src/main/java/org/rocksdb/RateLimiterConfig.java index 09d1c7a04..d2e7459e3 100644 --- a/java/src/main/java/org/rocksdb/RateLimiterConfig.java +++ b/java/src/main/java/org/rocksdb/RateLimiterConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/ReadOptions.java b/java/src/main/java/org/rocksdb/ReadOptions.java index a72a6e0d8..3baf8e808 100644 --- a/java/src/main/java/org/rocksdb/ReadOptions.java +++ b/java/src/main/java/org/rocksdb/ReadOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java b/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java index 61c46131b..2f54cdf45 100644 --- a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java +++ b/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RestoreBackupableDB.java b/java/src/main/java/org/rocksdb/RestoreBackupableDB.java index 5a3b2fc9a..90592e845 100644 --- a/java/src/main/java/org/rocksdb/RestoreBackupableDB.java +++ b/java/src/main/java/org/rocksdb/RestoreBackupableDB.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RestoreOptions.java b/java/src/main/java/org/rocksdb/RestoreOptions.java index d98167aeb..8cfe56640 100644 --- a/java/src/main/java/org/rocksdb/RestoreOptions.java +++ b/java/src/main/java/org/rocksdb/RestoreOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 2af55c420..786335745 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksDBException.java b/java/src/main/java/org/rocksdb/RocksDBException.java index a65d40124..ee869f20f 100644 --- a/java/src/main/java/org/rocksdb/RocksDBException.java +++ b/java/src/main/java/org/rocksdb/RocksDBException.java @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksEnv.java b/java/src/main/java/org/rocksdb/RocksEnv.java index 4c399eafa..4c34a9f4b 100644 --- a/java/src/main/java/org/rocksdb/RocksEnv.java +++ b/java/src/main/java/org/rocksdb/RocksEnv.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index bb9a6e697..d93a96197 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java b/java/src/main/java/org/rocksdb/RocksIteratorInterface.java index fce8fe314..3ac74a90a 100644 --- a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java +++ b/java/src/main/java/org/rocksdb/RocksIteratorInterface.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksMemEnv.java b/java/src/main/java/org/rocksdb/RocksMemEnv.java index 54c9f9981..4517577be 100644 --- a/java/src/main/java/org/rocksdb/RocksMemEnv.java +++ b/java/src/main/java/org/rocksdb/RocksMemEnv.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/RocksObject.java b/java/src/main/java/org/rocksdb/RocksObject.java index 6e24a1385..2d645805a 100644 --- a/java/src/main/java/org/rocksdb/RocksObject.java +++ b/java/src/main/java/org/rocksdb/RocksObject.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Slice.java b/java/src/main/java/org/rocksdb/Slice.java index d26490e5f..2a1ae6fae 100644 --- a/java/src/main/java/org/rocksdb/Slice.java +++ b/java/src/main/java/org/rocksdb/Slice.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Snapshot.java b/java/src/main/java/org/rocksdb/Snapshot.java index 7ef5c383d..c71eac937 100644 --- a/java/src/main/java/org/rocksdb/Snapshot.java +++ b/java/src/main/java/org/rocksdb/Snapshot.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/Statistics.java b/java/src/main/java/org/rocksdb/Statistics.java index a099444f4..7e014ce99 100644 --- a/java/src/main/java/org/rocksdb/Statistics.java +++ b/java/src/main/java/org/rocksdb/Statistics.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/StatisticsCollector.java b/java/src/main/java/org/rocksdb/StatisticsCollector.java index 4f1577ca7..f435b514f 100644 --- a/java/src/main/java/org/rocksdb/StatisticsCollector.java +++ b/java/src/main/java/org/rocksdb/StatisticsCollector.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java b/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java index 2ce92c5ee..18f81790e 100644 --- a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java +++ b/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/StatsCollectorInput.java b/java/src/main/java/org/rocksdb/StatsCollectorInput.java index 0e842c256..a3acede3f 100644 --- a/java/src/main/java/org/rocksdb/StatsCollectorInput.java +++ b/java/src/main/java/org/rocksdb/StatsCollectorInput.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/TableFormatConfig.java b/java/src/main/java/org/rocksdb/TableFormatConfig.java index 58a533b22..29cd262c2 100644 --- a/java/src/main/java/org/rocksdb/TableFormatConfig.java +++ b/java/src/main/java/org/rocksdb/TableFormatConfig.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index 180fbf4a6..9ff819a20 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/TtlDB.java b/java/src/main/java/org/rocksdb/TtlDB.java index de6dea9a5..351ab5c07 100644 --- a/java/src/main/java/org/rocksdb/TtlDB.java +++ b/java/src/main/java/org/rocksdb/TtlDB.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java b/java/src/main/java/org/rocksdb/WBWIRocksIterator.java index f42f5498b..b807810dc 100644 --- a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java +++ b/java/src/main/java/org/rocksdb/WBWIRocksIterator.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/WriteBatch.java b/java/src/main/java/org/rocksdb/WriteBatch.java index 960d122e2..65223bb99 100644 --- a/java/src/main/java/org/rocksdb/WriteBatch.java +++ b/java/src/main/java/org/rocksdb/WriteBatch.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/WriteBatchInterface.java b/java/src/main/java/org/rocksdb/WriteBatchInterface.java index d5c24ec3a..885f1213d 100644 --- a/java/src/main/java/org/rocksdb/WriteBatchInterface.java +++ b/java/src/main/java/org/rocksdb/WriteBatchInterface.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java b/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java index bde037bc3..cec3d0393 100644 --- a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java +++ b/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/WriteOptions.java b/java/src/main/java/org/rocksdb/WriteOptions.java index c27dc9b3c..d6a32fb4f 100644 --- a/java/src/main/java/org/rocksdb/WriteOptions.java +++ b/java/src/main/java/org/rocksdb/WriteOptions.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/main/java/org/rocksdb/util/SizeUnit.java b/java/src/main/java/org/rocksdb/util/SizeUnit.java index 8d50cd10e..e66fc371c 100644 --- a/java/src/main/java/org/rocksdb/util/SizeUnit.java +++ b/java/src/main/java/org/rocksdb/util/SizeUnit.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/AbstractComparatorTest.java b/java/src/test/java/org/rocksdb/AbstractComparatorTest.java index a776351c0..bf8b3c0f7 100644 --- a/java/src/test/java/org/rocksdb/AbstractComparatorTest.java +++ b/java/src/test/java/org/rocksdb/AbstractComparatorTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/BackupEngineTest.java b/java/src/test/java/org/rocksdb/BackupEngineTest.java index 48dff19e1..f010ff3ac 100644 --- a/java/src/test/java/org/rocksdb/BackupEngineTest.java +++ b/java/src/test/java/org/rocksdb/BackupEngineTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java b/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java index 9bad1af3d..44dc5b578 100644 --- a/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/BackupableDBTest.java b/java/src/test/java/org/rocksdb/BackupableDBTest.java index 3f358bdb7..b5e2f129c 100644 --- a/java/src/test/java/org/rocksdb/BackupableDBTest.java +++ b/java/src/test/java/org/rocksdb/BackupableDBTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java b/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java index aacf44054..2b1ce5ffa 100644 --- a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java +++ b/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java b/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java index af7216128..e0ebd67ac 100644 --- a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java +++ b/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java b/java/src/test/java/org/rocksdb/ColumnFamilyTest.java index decdbbcb2..5c62cca73 100644 --- a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java +++ b/java/src/test/java/org/rocksdb/ColumnFamilyTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java b/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java index 4f8a7d1a6..2a86515e3 100644 --- a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java +++ b/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ComparatorTest.java b/java/src/test/java/org/rocksdb/ComparatorTest.java index e689a9cf5..d4cea0cb8 100644 --- a/java/src/test/java/org/rocksdb/ComparatorTest.java +++ b/java/src/test/java/org/rocksdb/ComparatorTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java b/java/src/test/java/org/rocksdb/CompressionOptionsTest.java index bff4d5f6c..2e2633524 100644 --- a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java +++ b/java/src/test/java/org/rocksdb/CompressionOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/rocksdb/DBOptionsTest.java index 98ba4ce38..7cb29a4a5 100644 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/DBOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/DirectComparatorTest.java b/java/src/test/java/org/rocksdb/DirectComparatorTest.java index be84d6647..abdbeada9 100644 --- a/java/src/test/java/org/rocksdb/DirectComparatorTest.java +++ b/java/src/test/java/org/rocksdb/DirectComparatorTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/DirectSliceTest.java b/java/src/test/java/org/rocksdb/DirectSliceTest.java index 123eed2e7..615adab38 100644 --- a/java/src/test/java/org/rocksdb/DirectSliceTest.java +++ b/java/src/test/java/org/rocksdb/DirectSliceTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/FilterTest.java b/java/src/test/java/org/rocksdb/FilterTest.java index 36ce37970..d5a1830b3 100644 --- a/java/src/test/java/org/rocksdb/FilterTest.java +++ b/java/src/test/java/org/rocksdb/FilterTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/FlushTest.java b/java/src/test/java/org/rocksdb/FlushTest.java index 94a32d383..094910f27 100644 --- a/java/src/test/java/org/rocksdb/FlushTest.java +++ b/java/src/test/java/org/rocksdb/FlushTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/KeyMayExistTest.java b/java/src/test/java/org/rocksdb/KeyMayExistTest.java index b670caddc..a39ddbb21 100644 --- a/java/src/test/java/org/rocksdb/KeyMayExistTest.java +++ b/java/src/test/java/org/rocksdb/KeyMayExistTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/MemTableTest.java b/java/src/test/java/org/rocksdb/MemTableTest.java index bfc898c42..b54f583d0 100644 --- a/java/src/test/java/org/rocksdb/MemTableTest.java +++ b/java/src/test/java/org/rocksdb/MemTableTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/MergeTest.java b/java/src/test/java/org/rocksdb/MergeTest.java index a5f8e1fe9..9eec4e1eb 100644 --- a/java/src/test/java/org/rocksdb/MergeTest.java +++ b/java/src/test/java/org/rocksdb/MergeTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/MixedOptionsTest.java b/java/src/test/java/org/rocksdb/MixedOptionsTest.java index f095e99d8..a3090a1b1 100644 --- a/java/src/test/java/org/rocksdb/MixedOptionsTest.java +++ b/java/src/test/java/org/rocksdb/MixedOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java index fe964b60a..4e9ad27a2 100644 --- a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java +++ b/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/rocksdb/OptionsTest.java index 1c1dfc63a..6d11e6fa7 100644 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/java/src/test/java/org/rocksdb/OptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java b/java/src/test/java/org/rocksdb/PlainTableConfigTest.java index 850b050a0..b815cd058 100644 --- a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java +++ b/java/src/test/java/org/rocksdb/PlainTableConfigTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java b/java/src/test/java/org/rocksdb/PlatformRandomHelper.java index 0155ce263..e88a8951d 100644 --- a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java +++ b/java/src/test/java/org/rocksdb/PlatformRandomHelper.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ReadOnlyTest.java b/java/src/test/java/org/rocksdb/ReadOnlyTest.java index 70ea75d15..5cf2b32d4 100644 --- a/java/src/test/java/org/rocksdb/ReadOnlyTest.java +++ b/java/src/test/java/org/rocksdb/ReadOnlyTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/ReadOptionsTest.java b/java/src/test/java/org/rocksdb/ReadOptionsTest.java index af88ce351..df42cf0cd 100644 --- a/java/src/test/java/org/rocksdb/ReadOptionsTest.java +++ b/java/src/test/java/org/rocksdb/ReadOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 31d2c5238..c8e59a5b3 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/RocksEnvTest.java b/java/src/test/java/org/rocksdb/RocksEnvTest.java index 5914e6e29..a051a3562 100644 --- a/java/src/test/java/org/rocksdb/RocksEnvTest.java +++ b/java/src/test/java/org/rocksdb/RocksEnvTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/java/src/test/java/org/rocksdb/RocksIteratorTest.java index 170170f5c..eb841d3e6 100644 --- a/java/src/test/java/org/rocksdb/RocksIteratorTest.java +++ b/java/src/test/java/org/rocksdb/RocksIteratorTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java b/java/src/test/java/org/rocksdb/RocksMemEnvTest.java index d2791c93e..7530e51b1 100644 --- a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java +++ b/java/src/test/java/org/rocksdb/RocksMemEnvTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/SliceTest.java b/java/src/test/java/org/rocksdb/SliceTest.java index fbd602b14..51f542fa5 100644 --- a/java/src/test/java/org/rocksdb/SliceTest.java +++ b/java/src/test/java/org/rocksdb/SliceTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/SnapshotTest.java b/java/src/test/java/org/rocksdb/SnapshotTest.java index 87ccdbcb5..19e4c5021 100644 --- a/java/src/test/java/org/rocksdb/SnapshotTest.java +++ b/java/src/test/java/org/rocksdb/SnapshotTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java b/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java index 927826d71..0feaa4237 100644 --- a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java +++ b/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/StatsCallbackMock.java b/java/src/test/java/org/rocksdb/StatsCallbackMock.java index 3c5800e42..2e28f28ef 100644 --- a/java/src/test/java/org/rocksdb/StatsCallbackMock.java +++ b/java/src/test/java/org/rocksdb/StatsCallbackMock.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/TtlDBTest.java b/java/src/test/java/org/rocksdb/TtlDBTest.java index c60b1d512..934363a87 100644 --- a/java/src/test/java/org/rocksdb/TtlDBTest.java +++ b/java/src/test/java/org/rocksdb/TtlDBTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/Types.java b/java/src/test/java/org/rocksdb/Types.java index 5ad35f463..ca5feb4cb 100644 --- a/java/src/test/java/org/rocksdb/Types.java +++ b/java/src/test/java/org/rocksdb/Types.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java b/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java index b09cc9259..257ef6438 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java +++ b/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/WriteBatchTest.java b/java/src/test/java/org/rocksdb/WriteBatchTest.java index 89a9d5405..0cdfb7b1d 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchTest.java +++ b/java/src/test/java/org/rocksdb/WriteBatchTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java b/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java index ab38c475f..66e1c8966 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java +++ b/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java b/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java index b0c729a58..837610d29 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java +++ b/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/WriteOptionsTest.java b/java/src/test/java/org/rocksdb/WriteOptionsTest.java index 4d8e6d97e..333a76194 100644 --- a/java/src/test/java/org/rocksdb/WriteOptionsTest.java +++ b/java/src/test/java/org/rocksdb/WriteOptionsTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java b/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java index c800574f5..044f96b94 100644 --- a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java +++ b/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java index 1fc51ee73..2de1c45f7 100644 --- a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java +++ b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java b/java/src/test/java/org/rocksdb/util/SizeUnitTest.java index 517e1b2b5..e74c04103 100644 --- a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java +++ b/java/src/test/java/org/rocksdb/util/SizeUnitTest.java @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_cuckoo_rep.cc b/memtable/hash_cuckoo_rep.cc index 6f3cdbf67..6ae3e098b 100644 --- a/memtable/hash_cuckoo_rep.cc +++ b/memtable/hash_cuckoo_rep.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_cuckoo_rep.h b/memtable/hash_cuckoo_rep.h index 6de4baa20..173a907b4 100644 --- a/memtable/hash_cuckoo_rep.h +++ b/memtable/hash_cuckoo_rep.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_linklist_rep.cc b/memtable/hash_linklist_rep.cc index 2e761ce15..902c30e8a 100644 --- a/memtable/hash_linklist_rep.cc +++ b/memtable/hash_linklist_rep.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_linklist_rep.h b/memtable/hash_linklist_rep.h index 1bab441ed..5197e7cfb 100644 --- a/memtable/hash_linklist_rep.h +++ b/memtable/hash_linklist_rep.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_skiplist_rep.cc b/memtable/hash_skiplist_rep.cc index cbdd88993..73a917607 100644 --- a/memtable/hash_skiplist_rep.cc +++ b/memtable/hash_skiplist_rep.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/hash_skiplist_rep.h b/memtable/hash_skiplist_rep.h index a6544ff04..56a289c4b 100644 --- a/memtable/hash_skiplist_rep.h +++ b/memtable/hash_skiplist_rep.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/skiplistrep.cc b/memtable/skiplistrep.cc index 3588b568a..b8c90c6d6 100644 --- a/memtable/skiplistrep.cc +++ b/memtable/skiplistrep.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/stl_wrappers.h b/memtable/stl_wrappers.h index cef8301fa..a43133017 100644 --- a/memtable/stl_wrappers.h +++ b/memtable/stl_wrappers.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/memtable/vectorrep.cc b/memtable/vectorrep.cc index 324439a1d..b9d9ebe0a 100644 --- a/memtable/vectorrep.cc +++ b/memtable/vectorrep.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/dirent.h b/port/dirent.h index ee4ded143..f927db7e2 100644 --- a/port/dirent.h +++ b/port/dirent.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/likely.h b/port/likely.h index ede0df5a1..d6e6295cc 100644 --- a/port/likely.h +++ b/port/likely.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/port.h b/port/port.h index 670006e82..5f45dbb42 100644 --- a/port/port.h +++ b/port/port.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/port_example.h b/port/port_example.h index ba14618fa..e4bcb329b 100644 --- a/port/port_example.h +++ b/port/port_example.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/port_posix.cc b/port/port_posix.cc index 73ad3caf1..ca1909bf6 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/port_posix.h b/port/port_posix.h index 74c42f31b..454d6c1c3 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/stack_trace.cc b/port/stack_trace.cc index e2211e987..debeb5a46 100644 --- a/port/stack_trace.cc +++ b/port/stack_trace.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/stack_trace.h b/port/stack_trace.h index 8bc6c7d2e..3108b4d2e 100644 --- a/port/stack_trace.h +++ b/port/stack_trace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/sys_time.h b/port/sys_time.h index 6c23d8e50..53e646e69 100644 --- a/port/sys_time.h +++ b/port/sys_time.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/util_logger.h b/port/util_logger.h index dbb67173f..05782b0c5 100644 --- a/port/util_logger.h +++ b/port/util_logger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/win/env_win.cc b/port/win/env_win.cc index cbfb18256..cf548e9be 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/win/port_win.cc b/port/win/port_win.cc index e08f0ec22..66b0f90e9 100644 --- a/port/win/port_win.cc +++ b/port/win/port_win.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -40,7 +40,7 @@ void gettimeofday(struct timeval* tv, struct timezone* /* tz */) { seconds secNow(duration_cast(usNow)); tv->tv_sec = static_cast(secNow.count()); - tv->tv_usec = static_cast(usNow.count() - + tv->tv_usec = static_cast(usNow.count() - duration_cast(secNow).count()); } diff --git a/port/win/port_win.h b/port/win/port_win.h index d901c40bf..1c3e94ef1 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/win/win_logger.cc b/port/win/win_logger.cc index 9d6203cc4..764d75325 100644 --- a/port/win/win_logger.cc +++ b/port/win/win_logger.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/port/win/win_logger.h b/port/win/win_logger.h index 1460ba8b6..87c4dfe46 100644 --- a/port/win/win_logger.h +++ b/port/win/win_logger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block.cc b/table/block.cc index c84dc173d..6e6cae576 100644 --- a/table/block.cc +++ b/table/block.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block.h b/table/block.h index 4fe63add6..c3a26ef1f 100644 --- a/table/block.h +++ b/table/block.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_filter_block.cc b/table/block_based_filter_block.cc index 9992e9bd0..bc0a8c3f4 100644 --- a/table/block_based_filter_block.cc +++ b/table/block_based_filter_block.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_filter_block.h b/table/block_based_filter_block.h index d339ac68a..92c8c0da8 100644 --- a/table/block_based_filter_block.h +++ b/table/block_based_filter_block.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_filter_block_test.cc b/table/block_based_filter_block_test.cc index 017de5906..d77def3d9 100644 --- a/table/block_based_filter_block_test.cc +++ b/table/block_based_filter_block_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index ee8c3dd7c..1484acb51 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_builder.h b/table/block_based_table_builder.h index 7dc93b754..49fe8dbf4 100644 --- a/table/block_based_table_builder.h +++ b/table/block_based_table_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 7b38c2136..4cfff93a3 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 714a4f82a..6b4e563e2 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 8c0149fd0..42c5aa494 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index f8d0649e0..a35b8ae41 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_builder.cc b/table/block_builder.cc index 846d62369..aa9d46669 100644 --- a/table/block_builder.cc +++ b/table/block_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_builder.h b/table/block_builder.h index 9eec4ce33..f9ced8ad4 100644 --- a/table/block_builder.h +++ b/table/block_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_hash_index.cc b/table/block_hash_index.cc index b38cc8a57..9b73ff2af 100644 --- a/table/block_hash_index.cc +++ b/table/block_hash_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_hash_index.h b/table/block_hash_index.h index fc110d54a..ceaed626f 100644 --- a/table/block_hash_index.h +++ b/table/block_hash_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_hash_index_test.cc b/table/block_hash_index_test.cc index ffca663d1..b51d3ef25 100644 --- a/table/block_hash_index_test.cc +++ b/table/block_hash_index_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_prefix_index.cc b/table/block_prefix_index.cc index 147bcf56e..bc6465a32 100644 --- a/table/block_prefix_index.cc +++ b/table/block_prefix_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_prefix_index.h b/table/block_prefix_index.h index bc36c48f6..d9c3b97e0 100644 --- a/table/block_prefix_index.h +++ b/table/block_prefix_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/block_test.cc b/table/block_test.cc index e9c0179c1..7c36cf1ca 100644 --- a/table/block_test.cc +++ b/table/block_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/bloom_block.cc b/table/bloom_block.cc index cfea8a2c5..7eef9cc05 100644 --- a/table/bloom_block.cc +++ b/table/bloom_block.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/bloom_block.h b/table/bloom_block.h index 5b60d2bca..5ba74601f 100644 --- a/table/bloom_block.h +++ b/table/bloom_block.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 475055fcb..f2f71b78e 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -246,7 +246,7 @@ Status CuckooTableBuilder::Finish() { if (num_entries_ > 0) { // Calculate the real hash size if module hash is enabled. if (use_module_hash_) { - hash_table_size_ = + hash_table_size_ = static_cast(num_entries_ / max_hash_table_ratio_); } s = MakeHashTable(&buckets); diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index 093e1c245..8e6873e88 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index a3cd21224..fef7bc3a5 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 2b9407f2f..bf1561a87 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 3f89ca86d..82aa57150 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index b1f910181..fb1aef2e8 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index b936e70c8..5e3e5528a 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index c398b1383..aa4a93d40 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/filter_block.h b/table/filter_block.h index 855a23169..e326018f7 100644 --- a/table/filter_block.h +++ b/table/filter_block.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/flush_block_policy.cc b/table/flush_block_policy.cc index 4c12b30bb..fa4c3e331 100644 --- a/table/flush_block_policy.cc +++ b/table/flush_block_policy.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/format.cc b/table/format.cc index a58bbee24..bb028c99a 100644 --- a/table/format.cc +++ b/table/format.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/format.h b/table/format.h index 74ec808c6..48bcf6785 100644 --- a/table/format.h +++ b/table/format.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/full_filter_block.cc b/table/full_filter_block.cc index 3744d417f..11c8a016c 100644 --- a/table/full_filter_block.cc +++ b/table/full_filter_block.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/full_filter_block.h b/table/full_filter_block.h index 1ecc07a01..27e10eba1 100644 --- a/table/full_filter_block.h +++ b/table/full_filter_block.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/full_filter_block_test.cc b/table/full_filter_block_test.cc index 0275a6ca6..5840cb035 100644 --- a/table/full_filter_block_test.cc +++ b/table/full_filter_block_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/get_context.cc b/table/get_context.cc index 59d44f3a5..39b841e25 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/get_context.h b/table/get_context.h index c06c3c8d4..283df90c8 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/internal_iterator.h b/table/internal_iterator.h index cc2430ca0..0be1c4ce2 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/iter_heap.h b/table/iter_heap.h index 5343175c3..642383345 100644 --- a/table/iter_heap.h +++ b/table/iter_heap.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/iterator.cc b/table/iterator.cc index 2db321edd..d99a8301f 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index eef300499..0a0a819d7 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/merger.cc b/table/merger.cc index 81eb9608c..1cc80ec8d 100644 --- a/table/merger.cc +++ b/table/merger.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/merger.h b/table/merger.h index 5ea624648..7291a0378 100644 --- a/table/merger.h +++ b/table/merger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/merger_test.cc b/table/merger_test.cc index e9397dc1d..97979af7c 100644 --- a/table/merger_test.cc +++ b/table/merger_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 505dbacd0..b94d0b6ea 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 085ae308e..ab4f7e127 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/mock_table.cc b/table/mock_table.cc index 4525994d3..7d5cefa78 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/mock_table.h b/table/mock_table.h index 1b822d783..4352a2c7a 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -1,6 +1,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 2306a7efb..efaf47b07 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/plain_table_builder.h b/table/plain_table_builder.h index 5c0cad977..02fb8d87a 100644 --- a/table/plain_table_builder.h +++ b/table/plain_table_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/plain_table_index.cc b/table/plain_table_index.cc index 4f4ebabf1..c8081c006 100644 --- a/table/plain_table_index.cc +++ b/table/plain_table_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -117,7 +117,7 @@ void PlainTableIndexBuilder::AllocateIndex() { index_size_ = 1; } else { double hash_table_size_multipier = 1.0 / hash_table_ratio_; - index_size_ = + index_size_ = static_cast(num_prefixes_ * hash_table_size_multipier) + 1; assert(index_size_ > 0); } diff --git a/table/plain_table_index.h b/table/plain_table_index.h index be8ad1639..ab2be3d1e 100644 --- a/table/plain_table_index.h +++ b/table/plain_table_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/plain_table_key_coding.cc b/table/plain_table_key_coding.cc index b1aa14d8d..8442f1129 100644 --- a/table/plain_table_key_coding.cc +++ b/table/plain_table_key_coding.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/plain_table_key_coding.h b/table/plain_table_key_coding.h index e2dc7dff4..ed4ce5d38 100644 --- a/table/plain_table_key_coding.h +++ b/table/plain_table_key_coding.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/scoped_arena_iterator.h b/table/scoped_arena_iterator.h index 0372b5691..5629ba5aa 100644 --- a/table/scoped_arena_iterator.h +++ b/table/scoped_arena_iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index d13adbe08..58ca0e84a 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_builder.h b/table/table_builder.h index 930c99f10..ed79bed0e 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_properties.cc b/table/table_properties.cc index 7a51779fe..5bf3e0103 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_properties_internal.h b/table/table_properties_internal.h index 10f38cdf2..77042acbb 100644 --- a/table/table_properties_internal.h +++ b/table/table_properties_internal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_reader.h b/table/table_reader.h index 2fef5df30..5751ab03f 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index cee0d10e3..1a7d7b5a7 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/table_test.cc b/table/table_test.cc index 2bd28ca0a..a234d5cdc 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc index a01c12007..2656b317a 100644 --- a/table/two_level_iterator.cc +++ b/table/two_level_iterator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/table/two_level_iterator.h b/table/two_level_iterator.h index ed5380bd4..d210132cb 100644 --- a/table/two_level_iterator.h +++ b/table/two_level_iterator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/third-party/fbson/FbsonDocument.h b/third-party/fbson/FbsonDocument.h index c70f9ecb2..9a00e2471 100644 --- a/third-party/fbson/FbsonDocument.h +++ b/third-party/fbson/FbsonDocument.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Facebook, Inc. + * Copyright (c) 2011-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the diff --git a/third-party/fbson/FbsonJsonParser.h b/third-party/fbson/FbsonJsonParser.h index 1c9c8ed6e..73c1febfa 100644 --- a/third-party/fbson/FbsonJsonParser.h +++ b/third-party/fbson/FbsonJsonParser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Facebook, Inc. + * Copyright (c) 2011-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the diff --git a/third-party/fbson/FbsonStream.h b/third-party/fbson/FbsonStream.h index 22851240d..5f70221db 100644 --- a/third-party/fbson/FbsonStream.h +++ b/third-party/fbson/FbsonStream.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Facebook, Inc. + * Copyright (c) 2011-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the diff --git a/third-party/fbson/FbsonUtil.h b/third-party/fbson/FbsonUtil.h index ab965630d..2c4154769 100644 --- a/third-party/fbson/FbsonUtil.h +++ b/third-party/fbson/FbsonUtil.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Facebook, Inc. + * Copyright (c) 2011-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the diff --git a/third-party/fbson/FbsonWriter.h b/third-party/fbson/FbsonWriter.h index 21bd6f232..4efaf817c 100644 --- a/third-party/fbson/FbsonWriter.h +++ b/third-party/fbson/FbsonWriter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Facebook, Inc. + * Copyright (c) 2011-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the diff --git a/tools/db_repl_stress.cc b/tools/db_repl_stress.cc index 0fca5d506..a01909069 100644 --- a/tools/db_repl_stress.cc +++ b/tools/db_repl_stress.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc index b7176f41c..773acff6b 100644 --- a/tools/db_sanity_test.cc +++ b/tools/db_sanity_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 8e0f9796a..f77dc445f 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/dump/db_dump_tool.cc b/tools/dump/db_dump_tool.cc index 389e65dba..1db793f73 100644 --- a/tools/dump/db_dump_tool.cc +++ b/tools/dump/db_dump_tool.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/dump/rocksdb_dump.cc b/tools/dump/rocksdb_dump.cc index 2bfc6cee3..fa5fcf5ed 100644 --- a/tools/dump/rocksdb_dump.cc +++ b/tools/dump/rocksdb_dump.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/dump/rocksdb_undump.cc b/tools/dump/rocksdb_undump.cc index 81034f0ce..7da002be3 100644 --- a/tools/dump/rocksdb_undump.cc +++ b/tools/dump/rocksdb_undump.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb.cc b/tools/ldb.cc index cb5ef5204..ec2559a5d 100644 --- a/tools/ldb.cc +++ b/tools/ldb.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 326c64b30..152e9f71e 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index fdc8fbba5..0b6911233 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb_cmd_execute_result.h b/tools/ldb_cmd_execute_result.h index 29ebfc240..94f271c86 100644 --- a/tools/ldb_cmd_execute_result.h +++ b/tools/ldb_cmd_execute_result.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index edb6a2106..892f5843c 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index d99931dfe..4e24997af 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc index d55c82a61..2f009cb84 100644 --- a/tools/reduce_levels_test.cc +++ b/tools/reduce_levels_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 403893779..a0b253e9b 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index b40a3346c..84edaea37 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index b348da38b..316ee4045 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/sst_dump_tool_imp.h b/tools/sst_dump_tool_imp.h index 44c74f909..6bbc4d676 100644 --- a/tools/sst_dump_tool_imp.h +++ b/tools/sst_dump_tool_imp.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/tools/write_stress.cc b/tools/write_stress.cc index 05321fece..c2cbec4f4 100644 --- a/tools/write_stress.cc +++ b/tools/write_stress.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/aligned_buffer.h b/util/aligned_buffer.h index 2244316fe..2f79f12f7 100644 --- a/util/aligned_buffer.h +++ b/util/aligned_buffer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/allocator.h b/util/allocator.h index 58bf0da31..ee253528a 100644 --- a/util/allocator.h +++ b/util/allocator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/arena.cc b/util/arena.cc index 1d292ec01..8bb482dbf 100644 --- a/util/arena.cc +++ b/util/arena.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/arena.h b/util/arena.h index db2150a8f..f96794ad1 100644 --- a/util/arena.h +++ b/util/arena.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/arena_test.cc b/util/arena_test.cc index d4fa48cfc..3e9a74b86 100644 --- a/util/arena_test.cc +++ b/util/arena_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/autovector.h b/util/autovector.h index 266a53a56..74fcc70ec 100644 --- a/util/autovector.h +++ b/util/autovector.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/autovector_test.cc b/util/autovector_test.cc index 94e992660..d72bd507f 100644 --- a/util/autovector_test.cc +++ b/util/autovector_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/bloom.cc b/util/bloom.cc index d3f3abd61..4d30d7e82 100644 --- a/util/bloom.cc +++ b/util/bloom.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/bloom_test.cc b/util/bloom_test.cc index aac5b3978..6cc256316 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/build_version.h b/util/build_version.h index ca1dbf5f9..e53c3a06a 100644 --- a/util/build_version.h +++ b/util/build_version.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/cache.cc b/util/cache.cc index 9c47edd0e..078b10e1a 100644 --- a/util/cache.cc +++ b/util/cache.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/cache_bench.cc b/util/cache_bench.cc index 92df77267..0e0d70d62 100644 --- a/util/cache_bench.cc +++ b/util/cache_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/cache_test.cc b/util/cache_test.cc index c8b2de8f5..d49cd4fdf 100644 --- a/util/cache_test.cc +++ b/util/cache_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/channel.h b/util/channel.h index a8987163f..974bed093 100644 --- a/util/channel.h +++ b/util/channel.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/coding.cc b/util/coding.cc index f09e67284..d38fdb1f8 100644 --- a/util/coding.cc +++ b/util/coding.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/coding.h b/util/coding.h index 5ea9aad40..6aaf403a3 100644 --- a/util/coding.h +++ b/util/coding.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/coding_test.cc b/util/coding_test.cc index e3c265b69..d724ef4c1 100644 --- a/util/coding_test.cc +++ b/util/coding_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/compaction_job_stats_impl.cc b/util/compaction_job_stats_impl.cc index 01f022f3c..4610496f8 100644 --- a/util/compaction_job_stats_impl.cc +++ b/util/compaction_job_stats_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/comparator.cc b/util/comparator.cc index 6d7709db5..cb802d55b 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/compression.h b/util/compression.h index ac285e5f1..2690e3001 100644 --- a/util/compression.h +++ b/util/compression.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/concurrent_arena.cc b/util/concurrent_arena.cc index fae09d7d2..6aa82751f 100644 --- a/util/concurrent_arena.cc +++ b/util/concurrent_arena.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/concurrent_arena.h b/util/concurrent_arena.h index fb29c87a1..ba4b024b9 100644 --- a/util/concurrent_arena.h +++ b/util/concurrent_arena.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/crc32c.cc b/util/crc32c.cc index 2b237b48e..ce574544e 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/crc32c.h b/util/crc32c.h index 14167c1a0..90d950c6e 100644 --- a/util/crc32c.h +++ b/util/crc32c.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/crc32c_test.cc b/util/crc32c_test.cc index 413302a24..47a24ddac 100644 --- a/util/crc32c_test.cc +++ b/util/crc32c_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/delete_scheduler.cc b/util/delete_scheduler.cc index 650b8582d..b403c0572 100644 --- a/util/delete_scheduler.cc +++ b/util/delete_scheduler.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/delete_scheduler.h b/util/delete_scheduler.h index 8ce2e3005..eec118708 100644 --- a/util/delete_scheduler.h +++ b/util/delete_scheduler.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/delete_scheduler_test.cc b/util/delete_scheduler_test.cc index 21b8a5b19..563813f9b 100644 --- a/util/delete_scheduler_test.cc +++ b/util/delete_scheduler_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/dynamic_bloom.cc b/util/dynamic_bloom.cc index 4df81d527..bd54ed933 100644 --- a/util/dynamic_bloom.cc +++ b/util/dynamic_bloom.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/dynamic_bloom.h b/util/dynamic_bloom.h index 8d1b7b4af..909db54c8 100644 --- a/util/dynamic_bloom.h +++ b/util/dynamic_bloom.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/dynamic_bloom_test.cc b/util/dynamic_bloom_test.cc index e7a730fcf..bad88a94b 100644 --- a/util/dynamic_bloom_test.cc +++ b/util/dynamic_bloom_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/env.cc b/util/env.cc index efb149689..38509c0f4 100644 --- a/util/env.cc +++ b/util/env.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/env_hdfs.cc b/util/env_hdfs.cc index 7e12c747a..e82895ee0 100644 --- a/util/env_hdfs.cc +++ b/util/env_hdfs.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/env_posix.cc b/util/env_posix.cc index 2ea8eebb1..06de7a486 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/env_test.cc b/util/env_test.cc index cfc521c29..a07eed042 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/event_logger.cc b/util/event_logger.cc index 92a781c19..7e71b0cf9 100644 --- a/util/event_logger.cc +++ b/util/event_logger.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/event_logger.h b/util/event_logger.h index 53a40c255..f845ab6a5 100644 --- a/util/event_logger.h +++ b/util/event_logger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/event_logger_test.cc b/util/event_logger_test.cc index 1aad0acc2..807f64b1c 100644 --- a/util/event_logger_test.cc +++ b/util/event_logger_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/file_reader_writer.cc b/util/file_reader_writer.cc index 6d548c449..b12263610 100644 --- a/util/file_reader_writer.cc +++ b/util/file_reader_writer.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/file_reader_writer.h b/util/file_reader_writer.h index c10cde2ab..1a7b81bed 100644 --- a/util/file_reader_writer.h +++ b/util/file_reader_writer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/file_reader_writer_test.cc b/util/file_reader_writer_test.cc index 69b8cfea8..367de8b9e 100644 --- a/util/file_reader_writer_test.cc +++ b/util/file_reader_writer_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/file_util.cc b/util/file_util.cc index 1e2e84211..0748a4cf9 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/file_util.h b/util/file_util.h index f19dc6f0b..b5cb0cf66 100644 --- a/util/file_util.h +++ b/util/file_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/filelock_test.cc b/util/filelock_test.cc index 33362f8c7..d8c1172dc 100644 --- a/util/filelock_test.cc +++ b/util/filelock_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/filter_policy.cc b/util/filter_policy.cc index e950b75f7..8924982b4 100644 --- a/util/filter_policy.cc +++ b/util/filter_policy.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/hash.cc b/util/hash.cc index 427f0d138..dfd2dc403 100644 --- a/util/hash.cc +++ b/util/hash.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/hash.h b/util/hash.h index cab8d4677..5c90e63ec 100644 --- a/util/hash.h +++ b/util/hash.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/heap.h b/util/heap.h index 7d9e11113..9c25297f4 100644 --- a/util/heap.h +++ b/util/heap.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/heap_test.cc b/util/heap_test.cc index dd73e11a0..f2b902df0 100644 --- a/util/heap_test.cc +++ b/util/heap_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/histogram.cc b/util/histogram.cc index dea3808c6..4b5013a55 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -116,9 +116,9 @@ double HistogramImpl::Percentile(double p) const { sum += buckets_[b]; if (sum >= threshold) { // Scale linearly within this bucket - double left_point = + double left_point = static_cast((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)); - double right_point = + double right_point = static_cast(bucketMapper.BucketLimit(b)); double left_sum = sum - buckets_[b]; double right_sum = sum; diff --git a/util/histogram.h b/util/histogram.h index 5f73bf3cd..2b6cd8bab 100644 --- a/util/histogram.h +++ b/util/histogram.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/histogram_test.cc b/util/histogram_test.cc index 22ddb4b42..b9657db06 100644 --- a/util/histogram_test.cc +++ b/util/histogram_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/instrumented_mutex.cc b/util/instrumented_mutex.cc index e5603fe08..4eba27720 100644 --- a/util/instrumented_mutex.cc +++ b/util/instrumented_mutex.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/instrumented_mutex.h b/util/instrumented_mutex.h index 3f233494a..45d553ae8 100644 --- a/util/instrumented_mutex.h +++ b/util/instrumented_mutex.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/io_posix.cc b/util/io_posix.cc index dd41e2a03..05a7f2788 100644 --- a/util/io_posix.cc +++ b/util/io_posix.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/io_posix.h b/util/io_posix.h index 2a45d10ff..39b7b8fdf 100644 --- a/util/io_posix.h +++ b/util/io_posix.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/iostats_context.cc b/util/iostats_context.cc index d6d33baf3..666fddcd1 100644 --- a/util/iostats_context.cc +++ b/util/iostats_context.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/iostats_context_imp.h b/util/iostats_context_imp.h index 4617b4120..d0464ce0c 100644 --- a/util/iostats_context_imp.h +++ b/util/iostats_context_imp.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/kv_map.h b/util/kv_map.h index 486db1918..ac3e96020 100644 --- a/util/kv_map.h +++ b/util/kv_map.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/log_buffer.cc b/util/log_buffer.cc index 7d15cf22e..e04f9f2ae 100644 --- a/util/log_buffer.cc +++ b/util/log_buffer.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/log_buffer.h b/util/log_buffer.h index bd842b731..daf8ba6f5 100644 --- a/util/log_buffer.h +++ b/util/log_buffer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/log_write_bench.cc b/util/log_write_bench.cc index 16e7af7e2..d9b08762a 100644 --- a/util/log_write_bench.cc +++ b/util/log_write_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/logging.cc b/util/logging.cc index 8917d099a..2e0881bb5 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/logging.h b/util/logging.h index 10801bb88..13aebb46d 100644 --- a/util/logging.h +++ b/util/logging.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/mock_env.cc b/util/mock_env.cc index 409e16e3a..5001dfc39 100644 --- a/util/mock_env.cc +++ b/util/mock_env.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/mock_env.h b/util/mock_env.h index bcc74a731..d4bbdc8b2 100644 --- a/util/mock_env.h +++ b/util/mock_env.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/murmurhash.cc b/util/murmurhash.cc index d9d8b7061..9a01bf11b 100644 --- a/util/murmurhash.cc +++ b/util/murmurhash.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/murmurhash.h b/util/murmurhash.h index 40ee357a7..856fece9c 100644 --- a/util/murmurhash.h +++ b/util/murmurhash.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/mutable_cf_options.cc b/util/mutable_cf_options.cc index 582c0eadf..83eb2fafb 100644 --- a/util/mutable_cf_options.cc +++ b/util/mutable_cf_options.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index 209aa3d51..dbae48e33 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/mutexlock.h b/util/mutexlock.h index 63a0f5ce1..a2d14aedf 100644 --- a/util/mutexlock.h +++ b/util/mutexlock.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options.cc b/util/options.cc index 00d797167..01eeb9d23 100644 --- a/util/options.cc +++ b/util/options.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_builder.cc b/util/options_builder.cc index 1b480f22b..89aeda5f5 100644 --- a/util/options_builder.cc +++ b/util/options_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_helper.cc b/util/options_helper.cc index c397c2f56..f2929a740 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_helper.h b/util/options_helper.h index b0636adc5..1c8b585d6 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_parser.cc b/util/options_parser.cc index 20ae51e8b..e5689fdc4 100644 --- a/util/options_parser.cc +++ b/util/options_parser.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_parser.h b/util/options_parser.h index 94e69cc2a..0c96df83e 100644 --- a/util/options_parser.h +++ b/util/options_parser.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_sanity_check.cc b/util/options_sanity_check.cc index a84031bf9..1294a6ecc 100644 --- a/util/options_sanity_check.cc +++ b/util/options_sanity_check.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_sanity_check.h b/util/options_sanity_check.h index 6f18a58c8..bfadbdaf2 100644 --- a/util/options_sanity_check.h +++ b/util/options_sanity_check.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/options_test.cc b/util/options_test.cc index bd3cb3a19..0bde6d618 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/perf_context.cc b/util/perf_context.cc index 214905ada..5fdda1081 100644 --- a/util/perf_context.cc +++ b/util/perf_context.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/perf_context_imp.h b/util/perf_context_imp.h index d28b55179..6ef2a8011 100644 --- a/util/perf_context_imp.h +++ b/util/perf_context_imp.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/perf_level.cc b/util/perf_level.cc index 387ff5f1d..746510f21 100644 --- a/util/perf_level.cc +++ b/util/perf_level.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/perf_level_imp.h b/util/perf_level_imp.h index 7a8341062..ced05e924 100644 --- a/util/perf_level_imp.h +++ b/util/perf_level_imp.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/perf_step_timer.h b/util/perf_step_timer.h index 631cd317c..33a709f93 100644 --- a/util/perf_step_timer.h +++ b/util/perf_step_timer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/posix_logger.h b/util/posix_logger.h index 1c7d39866..1ddadd547 100644 --- a/util/posix_logger.h +++ b/util/posix_logger.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/random.cc b/util/random.cc index 56944773f..9f0f9bb4c 100644 --- a/util/random.cc +++ b/util/random.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/random.h b/util/random.h index 8f90c7675..57d5bd65b 100644 --- a/util/random.h +++ b/util/random.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/rate_limiter.cc b/util/rate_limiter.cc index 188d5f0c7..352925bec 100644 --- a/util/rate_limiter.cc +++ b/util/rate_limiter.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/rate_limiter.h b/util/rate_limiter.h index 62ae6b5ad..d413d9c6e 100644 --- a/util/rate_limiter.h +++ b/util/rate_limiter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/rate_limiter_test.cc b/util/rate_limiter_test.cc index d635010a4..9085835de 100644 --- a/util/rate_limiter_test.cc +++ b/util/rate_limiter_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/slice.cc b/util/slice.cc index 4c50ff9a6..d1ddb7cd7 100644 --- a/util/slice.cc +++ b/util/slice.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/slice_transform_test.cc b/util/slice_transform_test.cc index 5b7c1b402..624b11f11 100644 --- a/util/slice_transform_test.cc +++ b/util/slice_transform_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc index f4cc82e5d..b518bb7e5 100644 --- a/util/sst_file_manager_impl.cc +++ b/util/sst_file_manager_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/sst_file_manager_impl.h b/util/sst_file_manager_impl.h index 216f7cf05..5f44d631c 100644 --- a/util/sst_file_manager_impl.h +++ b/util/sst_file_manager_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/statistics.cc b/util/statistics.cc index 8a7525c81..62993b029 100644 --- a/util/statistics.cc +++ b/util/statistics.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/statistics.h b/util/statistics.h index b42fb43a5..001c6715f 100644 --- a/util/statistics.h +++ b/util/statistics.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/status.cc b/util/status.cc index 6ff5005f9..93590d2d7 100644 --- a/util/status.cc +++ b/util/status.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/status_message.cc b/util/status_message.cc index 26ab06ddd..fc251a9b4 100644 --- a/util/status_message.cc +++ b/util/status_message.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/stop_watch.h b/util/stop_watch.h index 86cb2653c..663661aba 100644 --- a/util/stop_watch.h +++ b/util/stop_watch.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/string_util.cc b/util/string_util.cc index 4e0bc4668..d1830d27e 100644 --- a/util/string_util.cc +++ b/util/string_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/string_util.h b/util/string_util.h index c7cc57dab..7afbc402e 100644 --- a/util/string_util.h +++ b/util/string_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/sync_point.cc b/util/sync_point.cc index 147e3e47a..88d36bd3d 100644 --- a/util/sync_point.cc +++ b/util/sync_point.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/sync_point.h b/util/sync_point.h index f169636a1..a9aac755e 100644 --- a/util/sync_point.h +++ b/util/sync_point.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/testharness.cc b/util/testharness.cc index 603f6f6e1..4c4455bfe 100644 --- a/util/testharness.cc +++ b/util/testharness.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/testharness.h b/util/testharness.h index b212b1e3a..298b16632 100644 --- a/util/testharness.h +++ b/util/testharness.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/testutil.cc b/util/testutil.cc index a5aa59913..527402706 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/testutil.h b/util/testutil.h index 02e786b47..80f6c55c1 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_list_test.cc b/util/thread_list_test.cc index eeb2b1688..c706a2e3e 100644 --- a/util/thread_list_test.cc +++ b/util/thread_list_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_local.cc b/util/thread_local.cc index 7fb7a27dc..846c33877 100644 --- a/util/thread_local.cc +++ b/util/thread_local.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_local.h b/util/thread_local.h index 72991724e..a4feac38d 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index 368818669..537737650 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_operation.h b/util/thread_operation.h index e55596c1b..ace619817 100644 --- a/util/thread_operation.h +++ b/util/thread_operation.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_posix.cc b/util/thread_posix.cc index 88e67ed76..f09abd54c 100644 --- a/util/thread_posix.cc +++ b/util/thread_posix.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_posix.h b/util/thread_posix.h index c5d643878..96dfe1e1e 100644 --- a/util/thread_posix.h +++ b/util/thread_posix.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_impl.cc b/util/thread_status_impl.cc index 50cb355bb..e9a702bba 100644 --- a/util/thread_status_impl.cc +++ b/util/thread_status_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_updater.cc b/util/thread_status_updater.cc index 3b93f2087..375b2f321 100644 --- a/util/thread_status_updater.cc +++ b/util/thread_status_updater.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_updater.h b/util/thread_status_updater.h index e7c7007d4..23d6d6f58 100644 --- a/util/thread_status_updater.h +++ b/util/thread_status_updater.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_updater_debug.cc b/util/thread_status_updater_debug.cc index 274f427d3..501181faf 100644 --- a/util/thread_status_updater_debug.cc +++ b/util/thread_status_updater_debug.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_util.cc b/util/thread_status_util.cc index 6039c5f1d..d573e0566 100644 --- a/util/thread_status_util.cc +++ b/util/thread_status_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_util.h b/util/thread_status_util.h index 101cd0ef1..3445182ec 100644 --- a/util/thread_status_util.h +++ b/util/thread_status_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/thread_status_util_debug.cc b/util/thread_status_util_debug.cc index 94b19f3d2..355bd9784 100644 --- a/util/thread_status_util_debug.cc +++ b/util/thread_status_util_debug.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/xfunc.cc b/util/xfunc.cc index 3da5e2a75..fc812279f 100644 --- a/util/xfunc.cc +++ b/util/xfunc.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/util/xfunc.h b/util/xfunc.h index 122d235b0..e19a03f1c 100644 --- a/util/xfunc.h +++ b/util/xfunc.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index bbaf75b98..b8cb46c7c 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index a3d32090c..cc70f69b5 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/backupable/backupable_db_testutil.h b/utilities/backupable/backupable_db_testutil.h index 6c45f33ed..efe4acdf2 100644 --- a/utilities/backupable/backupable_db_testutil.h +++ b/utilities/backupable/backupable_db_testutil.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/checkpoint/checkpoint.cc b/utilities/checkpoint/checkpoint.cc index 6e6fac004..dd41d9cdf 100644 --- a/utilities/checkpoint/checkpoint.cc +++ b/utilities/checkpoint/checkpoint.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/checkpoint/checkpoint_test.cc b/utilities/checkpoint/checkpoint_test.cc index 27c1beb5f..42d180bba 100644 --- a/utilities/checkpoint/checkpoint_test.cc +++ b/utilities/checkpoint/checkpoint_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc b/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc index 4ef4edf92..ad9043755 100644 --- a/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +++ b/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h b/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h index ec9342d38..df303e8cd 100644 --- a/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +++ b/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/convenience/info_log_finder.cc b/utilities/convenience/info_log_finder.cc index acdec5119..ecbdd7714 100644 --- a/utilities/convenience/info_log_finder.cc +++ b/utilities/convenience/info_log_finder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/document/document_db.cc b/utilities/document/document_db.cc index 7f7bc781a..85330b123 100644 --- a/utilities/document/document_db.cc +++ b/utilities/document/document_db.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/document/document_db_test.cc b/utilities/document/document_db_test.cc index 03bebf48e..9c9da552b 100644 --- a/utilities/document/document_db_test.cc +++ b/utilities/document/document_db_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/document/json_document.cc b/utilities/document/json_document.cc index f26787b97..9cf110830 100644 --- a/utilities/document/json_document.cc +++ b/utilities/document/json_document.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/document/json_document_builder.cc b/utilities/document/json_document_builder.cc index 8cbccc832..812239588 100644 --- a/utilities/document/json_document_builder.cc +++ b/utilities/document/json_document_builder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/document/json_document_test.cc b/utilities/document/json_document_test.cc index b9d6dcf0f..f8c11d9cf 100644 --- a/utilities/document/json_document_test.cc +++ b/utilities/document/json_document_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/flashcache/flashcache.cc b/utilities/flashcache/flashcache.cc index d50232440..3765300d3 100644 --- a/utilities/flashcache/flashcache.cc +++ b/utilities/flashcache/flashcache.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/flashcache/flashcache.h b/utilities/flashcache/flashcache.h index a8a3d7d13..94a3ed41f 100644 --- a/utilities/flashcache/flashcache.h +++ b/utilities/flashcache/flashcache.h @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/geodb/geodb_impl.cc b/utilities/geodb/geodb_impl.cc index 7fb76f36b..bd57ca1f9 100644 --- a/utilities/geodb/geodb_impl.cc +++ b/utilities/geodb/geodb_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/geodb/geodb_impl.h b/utilities/geodb/geodb_impl.h index d63102856..a61f1674a 100644 --- a/utilities/geodb/geodb_impl.h +++ b/utilities/geodb/geodb_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/geodb/geodb_test.cc b/utilities/geodb/geodb_test.cc index 503332e44..91b3621a2 100644 --- a/utilities/geodb/geodb_test.cc +++ b/utilities/geodb/geodb_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/leveldb_options/leveldb_options.cc b/utilities/leveldb_options/leveldb_options.cc index cb7dfb8ea..cd12f3b50 100644 --- a/utilities/leveldb_options/leveldb_options.cc +++ b/utilities/leveldb_options/leveldb_options.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/memory/memory_test.cc b/utilities/memory/memory_test.cc index 079514736..63420be61 100644 --- a/utilities/memory/memory_test.cc +++ b/utilities/memory/memory_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/memory/memory_util.cc b/utilities/memory/memory_util.cc index f5580174a..403f2f5ad 100644 --- a/utilities/memory/memory_util.cc +++ b/utilities/memory/memory_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/merge_operators.h b/utilities/merge_operators.h index fdf06645f..eb60ed5cc 100644 --- a/utilities/merge_operators.h +++ b/utilities/merge_operators.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/merge_operators/put.cc b/utilities/merge_operators/put.cc index 333084313..04c1270b2 100644 --- a/utilities/merge_operators/put.cc +++ b/utilities/merge_operators/put.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/merge_operators/uint64add.cc b/utilities/merge_operators/uint64add.cc index 6024beb95..90eb3d4c6 100644 --- a/utilities/merge_operators/uint64add.cc +++ b/utilities/merge_operators/uint64add.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/options/options_util.cc b/utilities/options/options_util.cc index 1c6a068ac..2526c5690 100644 --- a/utilities/options/options_util.cc +++ b/utilities/options/options_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/options/options_util_test.cc b/utilities/options/options_util_test.cc index c6d8cdb5f..e93d8a837 100644 --- a/utilities/options/options_util_test.cc +++ b/utilities/options/options_util_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/redis/redis_lists_test.cc b/utilities/redis/redis_lists_test.cc index 3ef35f75e..3c97be271 100644 --- a/utilities/redis/redis_lists_test.cc +++ b/utilities/redis/redis_lists_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc index 36c9ed188..e7943120c 100644 --- a/utilities/spatialdb/spatial_db.cc +++ b/utilities/spatialdb/spatial_db.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc index 41f3cd620..92c34c87d 100644 --- a/utilities/spatialdb/spatial_db_test.cc +++ b/utilities/spatialdb/spatial_db_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/spatialdb/utils.h b/utilities/spatialdb/utils.h index b8c664a92..d4dae0200 100644 --- a/utilities/spatialdb/utils.h +++ b/utilities/spatialdb/utils.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/table_properties_collectors/compact_on_deletion_collector.cc b/utilities/table_properties_collectors/compact_on_deletion_collector.cc index 2079ccb86..59eaf81c3 100644 --- a/utilities/table_properties_collectors/compact_on_deletion_collector.cc +++ b/utilities/table_properties_collectors/compact_on_deletion_collector.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/table_properties_collectors/compact_on_deletion_collector.h b/utilities/table_properties_collectors/compact_on_deletion_collector.h index 3001ce913..50d363cd3 100644 --- a/utilities/table_properties_collectors/compact_on_deletion_collector.h +++ b/utilities/table_properties_collectors/compact_on_deletion_collector.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc b/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc index 89e6bbcb8..ab16b37bb 100644 --- a/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +++ b/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/optimistic_transaction_db_impl.cc b/utilities/transactions/optimistic_transaction_db_impl.cc index ca9897211..d54173d3d 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.cc +++ b/utilities/transactions/optimistic_transaction_db_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/optimistic_transaction_db_impl.h b/utilities/transactions/optimistic_transaction_db_impl.h index ec5b42823..72f186188 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.h +++ b/utilities/transactions/optimistic_transaction_db_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/optimistic_transaction_impl.cc b/utilities/transactions/optimistic_transaction_impl.cc index 24ecd11d7..5cb1a8f8d 100644 --- a/utilities/transactions/optimistic_transaction_impl.cc +++ b/utilities/transactions/optimistic_transaction_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/optimistic_transaction_impl.h b/utilities/transactions/optimistic_transaction_impl.h index cdca23269..cbd167505 100644 --- a/utilities/transactions/optimistic_transaction_impl.h +++ b/utilities/transactions/optimistic_transaction_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index e3ac43796..991771757 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index cd5ebe98a..cb17fab4a 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index 5a6f4799e..cb8ca2483 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_db_impl.cc b/utilities/transactions/transaction_db_impl.cc index 91440ae65..bc5b9e596 100644 --- a/utilities/transactions/transaction_db_impl.cc +++ b/utilities/transactions/transaction_db_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_db_impl.h b/utilities/transactions/transaction_db_impl.h index 060510136..ace218d19 100644 --- a/utilities/transactions/transaction_db_impl.h +++ b/utilities/transactions/transaction_db_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_db_mutex_impl.cc b/utilities/transactions/transaction_db_mutex_impl.cc index ec905fbdb..ad1a3c066 100644 --- a/utilities/transactions/transaction_db_mutex_impl.cc +++ b/utilities/transactions/transaction_db_mutex_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_db_mutex_impl.h b/utilities/transactions/transaction_db_mutex_impl.h index 7c915ca56..0dfac4fa8 100644 --- a/utilities/transactions/transaction_db_mutex_impl.h +++ b/utilities/transactions/transaction_db_mutex_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index e8b57018e..7cda2cd0e 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index 94dbe7df3..01521f172 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc index 51b8d4a82..f4fd9aff2 100644 --- a/utilities/transactions/transaction_lock_mgr.cc +++ b/utilities/transactions/transaction_lock_mgr.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h index fa46c62be..5018f39c3 100644 --- a/utilities/transactions/transaction_lock_mgr.h +++ b/utilities/transactions/transaction_lock_mgr.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index a81a4ce8f..98dfc732b 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_util.cc b/utilities/transactions/transaction_util.cc index d4622bd2a..363e8dece 100644 --- a/utilities/transactions/transaction_util.cc +++ b/utilities/transactions/transaction_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/transactions/transaction_util.h b/utilities/transactions/transaction_util.h index 1bb880ce0..b9579f7f1 100644 --- a/utilities/transactions/transaction_util.h +++ b/utilities/transactions/transaction_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index bae4535d5..e57c95c42 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index ba88e67d4..7b1a6dd27 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index b88cd768e..ec4da19e4 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index da695c4ca..d91482db4 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. From 2a04268be31d98ba16ecfb6e55d27c86ad59924d Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 9 Feb 2016 16:26:20 -0800 Subject: [PATCH 100/195] Temporarily disable unstable tests in memory_test.cc Summary: memory_test.cc has some tests that are not unstable but hard to reproduce, and the cause is the test itself not the code. Temporarily disable the tests until we have a good fix. Test Plan: memory_test Reviewers: sdong, anthony, IslamAbdelRahman, rven, kradhakrishnan Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54009 --- utilities/memory/memory_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities/memory/memory_test.cc b/utilities/memory/memory_test.cc index 63420be61..5c97f78bd 100644 --- a/utilities/memory/memory_test.cc +++ b/utilities/memory/memory_test.cc @@ -93,7 +93,7 @@ class MemoryTest : public testing::Test { std::vector usage_history_[MemoryUtil::kNumUsageTypes]; }; -TEST_F(MemoryTest, SharedBlockCacheTotal) { +TEST_F(MemoryTest, DISABLED_SharedBlockCacheTotal) { std::vector dbs; std::vector usage_by_type; const int kNumDBs = 10; @@ -144,7 +144,7 @@ TEST_F(MemoryTest, SharedBlockCacheTotal) { } } -TEST_F(MemoryTest, MemTableAndTableReadersTotal) { +TEST_F(MemoryTest, DISABLED_MemTableAndTableReadersTotal) { std::vector dbs; std::vector usage_by_type; std::vector> vec_handles; From 3a67bffaa8c03f7c7fe06e81962eccc347f977cc Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Wed, 10 Feb 2016 12:06:59 -0800 Subject: [PATCH 101/195] Fix an ASAN error in transaction_test.cc Summary: One test in transaction_test.cc forgets to call SyncPoint::DisableProcessing(). As a result, a program might to access the SyncPoint singleton after it already goes out of scope. This patch fix this error by calling SyncPoint::DisableProcessing(). Test Plan: transaction_test Reviewers: sdong, IslamAbdelRahman, kradhakrishnan, anthony Reviewed By: anthony Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54033 --- utilities/transactions/transaction_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 98dfc732b..d51912b38 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -2876,6 +2876,7 @@ TEST_F(TransactionTest, ExpiredTransactionDataRace1) { ASSERT_EQ("1", value); delete txn1; + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } } // namespace rocksdb From 337671b688b06fde07963b440a95de2bc4914b27 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Wed, 10 Feb 2016 15:30:47 -0800 Subject: [PATCH 102/195] Add universal compaction benchmarks to run_flash_bench.sh Summary: Implement a benchmark for universal compaction based on the feature description (see below), in-person discussions, and reading source code: https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide https://github.com/facebook/rocksdb/wiki/Universal-Compaction https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide#universal-compaction Universal compaction benchmark is based on `overwrite` benchmark, adding compaction specific options to it, and executing it for different values of subcompaction to understand the impact of scaling out subcompactions for a particular scenario. Test Plan: - Execute the benchmark on various machines for multiple iterations to verify the reliability. - Observe the output to make sure that compaction is taking place. - Observe the execution to make sure that arguments passed to `db_bench` are correct. Reviewers: sdong, MarkCallaghan Reviewed By: MarkCallaghan Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54045 --- tools/benchmark.sh | 50 ++++++++++++++++++++++++++++++++++++++++ tools/run_flash_bench.sh | 9 ++++++++ 2 files changed, 59 insertions(+) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 09a2fb369..518577d08 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -162,6 +162,54 @@ function run_bulkload { eval $cmd } +function run_univ_compaction_worker { + # Worker function intended to be called from run_univ_compaction. + echo -e "\nCompacting ...\n" + + compact_output_file=$output_dir/benchmark_univ_compact_sub_$3.t${num_threads}.s${syncval}.log + + # The essence of the command is borrowed from run_change overwrite with + # compaction specific options being added. + cmd="./db_bench --benchmarks=overwrite \ + --use_existing_db=1 \ + --sync=$syncval \ + $params_w \ + --threads=$num_threads \ + --merge_operator=\"put\" \ + --seed=$( date +%s ) \ + --compaction_measure_io_stats=$1 \ + --compaction_style=$2 \ + --subcompactions=$3 \ + 2>&1 | tee -a $compact_output_file" + echo $cmd | tee $compact_output_file + eval $cmd + + summarize_result $compact_output_file univ_compact_sub_comp_$3 overwrite +} + +function run_univ_compaction { + # Always ask for I/O statistics to be measured. + io_stats=1 + + # Values: kCompactionStyleLevel = 0x0, kCompactionStyleUniversal = 0x1. + compaction_style=1 + + # Get the basic understanding about impact of scaling out the subcompactions + # by allowing the usage of { 1, 2, 4, 8, 16 } threads for different runs. + subcompactions=("1" "2" "4" "8" "16") + + # Have a separate suffix for each experiment so that separate results will be + # persisted. + log_suffix=1 + + # Do the real work of running various experiments. + for ((i=0; i < ${#subcompactions[@]}; i++)) + do + run_univ_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} $log_suffix + ((log_suffix++)) + done +} + function run_fillseq { # This runs with a vector memtable. WAL can be either disabled or enabled # depending on the input parameter (1 for disabled, 0 for enabled). The main @@ -362,6 +410,8 @@ for job in ${jobs[@]}; do run_rangewhile merging $job true elif [ $job = randomtransaction ]; then run_randomtransaction + elif [ $job = universal_compaction ]; then + run_univ_compaction elif [ $job = debug ]; then num_keys=1000; # debug echo "Setting num_keys to $num_keys" diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index 873b2c7ca..8fe0b34ad 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -264,6 +264,11 @@ for num_thr in "${nthreads[@]}" ; do fi done +###### Universal compaction tests. + +# Use a single thread to reduce the variability in the benchmark. +env $ARGS NUM_THREADS=1 ./tools/benchmark.sh universal_compaction + if [[ $skip_low_pri_tests != 1 ]]; then echo bulkload > $output_dir/report2.txt head -1 $output_dir/report.txt >> $output_dir/report2.txt @@ -350,4 +355,8 @@ if [[ $skip_low_pri_tests != 1 ]]; then grep revrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt fi +echo universal_compaction >> $output_dir/report2.txt +head -1 $output_dir/report.txt >> $output_dir/report2.txt +grep univ_compact $output_dir/report.txt >> $output_dir/report2.txt + cat $output_dir/report2.txt From 908100399cdc8855980ca9f8c05d37836857ab79 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Wed, 10 Feb 2016 16:56:01 -0800 Subject: [PATCH 103/195] Fixed a dependency issue of ThreadLocalPtr Summary: When a child thread that uses ThreadLocalPtr, ThreadLocalPtr::OnThreadExit will be called when that child thread is destroyed. However, OnThreadExit will try to access a static singleton of ThreadLocalPtr, which will be destroyed when the main thread exit. As a result, when a child thread that uses ThreadLocalPtr exits AFTER the main thread exits, illegal memory access will occur. This diff includes a test that reproduce this legacy bug. ==2095206==ERROR: AddressSanitizer: heap-use-after-free on address 0x608000007fa0 at pc 0x959b79 bp 0x7f5fa7426b60 sp 0x7f5fa7426b58 READ of size 8 at 0x608000007fa0 thread T1 This patch fix this issue by having the thread local mutex never be deleted (but will leak small piece of memory at the end.) The patch also describe a better solution (thread_local) in the comment that requires gcc 4.8.1 and in latest clang as a future work once we agree to move toward gcc 4.8. Test Plan: COMPILE_WITH_ASAN=1 make thread_local_test -j32 ./thread_local_test --gtest_filter="*MainThreadDiesFirst" Reviewers: anthony, hermanlee4, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53013 --- util/env_posix.cc | 1 + util/thread_local.cc | 47 ++++++++++++++++++++++++------------ util/thread_local.h | 51 ++++++++++++++++++++++++--------------- util/thread_local_test.cc | 36 ++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 36 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index 06de7a486..132d865dd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -132,6 +132,7 @@ class PosixEnv : public Env { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; + TEST_SYNC_POINT("PosixEnv::~PosixEnv():End"); } void SetFD_CLOEXEC(int fd, const EnvOptions* options) { diff --git a/util/thread_local.cc b/util/thread_local.cc index 846c33877..5f3fddae5 100644 --- a/util/thread_local.cc +++ b/util/thread_local.cc @@ -104,7 +104,6 @@ PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::StaticMeta::InitSingletons(); - ThreadLocalPtr::Instance(); } ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { @@ -113,30 +112,46 @@ ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { // when the function is first call. As a result, we can properly // control their construction order by properly preparing their // first function call. - static ThreadLocalPtr::StaticMeta inst; - return &inst; + // + // Note that here we decide to make "inst" a static pointer w/o deleting + // it at the end instead of a static variable. This is to avoid the following + // destruction order desester happens when a child thread using ThreadLocalPtr + // dies AFTER the main thread dies: When a child thread happens to use + // ThreadLocalPtr, it will try to delete its thread-local data on its + // OnThreadExit when the child thread dies. However, OnThreadExit depends + // on the following variable. As a result, if the main thread dies before any + // child thread happen to use ThreadLocalPtr dies, then the destruction of + // the following variable will go first, then OnThreadExit, therefore causing + // invalid access. + // + // The above problem can be solved by using thread_local to store tls_ instead + // of using __thread. The major difference between thread_local and __thread + // is that thread_local supports dynamic construction and destruction of + // non-primitive typed variables. As a result, we can guarantee the + // desturction order even when the main thread dies before any child threads. + // However, thread_local requires gcc 4.8 and is not supported in all the + // compilers that accepts -std=c++11 (e.g., the default clang on Mac), while + // the current RocksDB still accept gcc 4.7. + static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta(); + return inst; } void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); } -port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { - // Here we prefer function static variable instead of global - // static variable as function static variable is initialized - // when the function is first call. As a result, we can properly - // control their construction order by properly preparing their - // first function call. - static port::Mutex mutex; - return &mutex; -} +port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; } void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { auto* tls = static_cast(ptr); assert(tls != nullptr); - auto* inst = Instance(); + // Use the cached StaticMeta::Instance() instead of directly calling + // the variable inside StaticMeta::Instance() might already go out of + // scope here in case this OnThreadExit is called after the main thread + // dies. + auto* inst = tls->inst; pthread_setspecific(inst->pthread_key_, nullptr); - MutexLock l(Mutex()); + MutexLock l(inst->MemberMutex()); inst->RemoveThreadData(tls); // Unref stored pointers of current thread from all instances uint32_t id = 0; @@ -154,7 +169,7 @@ void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { delete tls; } -ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0) { +ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) { if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) { abort(); } @@ -221,7 +236,7 @@ ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() { if (UNLIKELY(tls_ == nullptr)) { auto* inst = Instance(); - tls_ = new ThreadData(); + tls_ = new ThreadData(inst); { // Register it in the global chain, needs to be done before thread exit // handler registration diff --git a/util/thread_local.h b/util/thread_local.h index a4feac38d..3adf8ba85 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -79,6 +79,8 @@ class ThreadLocalPtr { std::atomic ptr; }; + class StaticMeta; + // This is the structure that is declared as "thread_local" storage. // The vector keep list of atomic pointer for all instances for "current" // thread. The vector is indexed by an Id that is unique in process and @@ -95,10 +97,11 @@ class ThreadLocalPtr { // | thread 3 | void* | void* | void* | <- ThreadData // --------------------------------------------------- struct ThreadData { - ThreadData() : entries() {} + explicit ThreadData(StaticMeta* _inst) : entries(), inst(_inst) {} std::vector entries; ThreadData* next; ThreadData* prev; + StaticMeta* inst; }; class StaticMeta { @@ -139,6 +142,31 @@ class ThreadLocalPtr { // initialized will be no-op. static void InitSingletons(); + // protect inst, next_instance_id_, free_instance_ids_, head_, + // ThreadData.entries + // + // Note that here we prefer function static variable instead of the usual + // global static variable. The reason is that c++ destruction order of + // static variables in the reverse order of their construction order. + // However, C++ does not guarantee any construction order when global + // static variables are defined in different files, while the function + // static variables are initialized when their function are first called. + // As a result, the construction order of the function static variables + // can be controlled by properly invoke their first function calls in + // the right order. + // + // For instance, the following function contains a function static + // variable. We place a dummy function call of this inside + // Env::Default() to ensure the construction order of the construction + // order. + static port::Mutex* Mutex(); + + // Returns the member mutex of the current StaticMeta. In general, + // Mutex() should be used instead of this one. However, in case where + // the static variable inside Instance() goes out of scope, MemberMutex() + // should be used. One example is OnThreadExit() function. + port::Mutex* MemberMutex() { return &mutex_; } + private: // Get UnrefHandler for id with acquiring mutex // REQUIRES: mutex locked @@ -169,24 +197,9 @@ class ThreadLocalPtr { std::unordered_map handler_map_; - // protect inst, next_instance_id_, free_instance_ids_, head_, - // ThreadData.entries - // - // Note that here we prefer function static variable instead of the usual - // global static variable. The reason is that c++ destruction order of - // static variables in the reverse order of their construction order. - // However, C++ does not guarantee any construction order when global - // static variables are defined in different files, while the function - // static variables are initialized when their function are first called. - // As a result, the construction order of the function static variables - // can be controlled by properly invoke their first function calls in - // the right order. - // - // For instance, the following function contains a function static - // variable. We place a dummy function call of this inside - // Env::Default() to ensure the construction order of the construction - // order. - static port::Mutex* Mutex(); + // The private mutex. Developers should always use Mutex() instead of + // using this variable directly. + port::Mutex mutex_; #if ROCKSDB_SUPPORT_THREAD_LOCAL // Thread local storage static __thread ThreadData* tls_; diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index 537737650..f531dfd0c 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -3,14 +3,17 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include #include +#include #include "rocksdb/env.h" #include "port/port.h" #include "util/autovector.h" -#include "util/thread_local.h" +#include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" +#include "util/thread_local.h" namespace rocksdb { @@ -467,6 +470,37 @@ TEST_F(ThreadLocalTest, CompareAndSwap) { ASSERT_EQ(tls.Get(), reinterpret_cast(3)); } +namespace { + +void* AccessThreadLocal(void* arg) { + TEST_SYNC_POINT("AccessThreadLocal:Start"); + ThreadLocalPtr tlp; + tlp.Reset(new std::string("hello RocksDB")); + TEST_SYNC_POINT("AccessThreadLocal:End"); + return nullptr; +} + +} // namespace + +// The following test is disabled as it requires manual steps to run it +// correctly. +// +// Currently we have no way to acess SyncPoint w/o ASAN error when the +// child thread dies after the main thread dies. So if you manually enable +// this test and only see an ASAN error on SyncPoint, it means you pass the +// test. +TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"}, + {"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}}); + + // Triggers the initialization of singletons. + Env::Default(); + pthread_t t; + pthread_create(&t, nullptr, &AccessThreadLocal, nullptr); + TEST_SYNC_POINT("MainThreadDiesFirst:End"); +} + } // namespace rocksdb int main(int argc, char** argv) { From 239aaf2fc0fddadaa4e4130592d9c581f196d98d Mon Sep 17 00:00:00 2001 From: Peter Mattis Date: Thu, 11 Feb 2016 08:26:25 -0500 Subject: [PATCH 104/195] Use user_comparator when comparing against iterate_upper_bound. Fixes #983. --- db/db_iter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index a0a0529a7..afffc4dcc 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -275,7 +275,7 @@ void DBIter::FindNextUserEntryInternal(bool skipping) { if (ParseKey(&ikey)) { if (iterate_upper_bound_ != nullptr && - ikey.user_key.compare(*iterate_upper_bound_) >= 0) { + user_comparator_->Compare(ikey.user_key, *iterate_upper_bound_) >= 0) { break; } From 92a9ccf1a63ecb883440f73df0a255a7b82ff1fd Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 11 Feb 2016 13:45:53 -0800 Subject: [PATCH 105/195] Add a new compaction priority that picks file whose overlapping ratio is smallest Summary: Add a new compaction priority as following: For every file, we calculate total size of files overalapping with the file in the next level, over the file's size itself. The file with smallest ratio will be picked first. My "db_bench --fillrandom" shows about 5% less compaction than kOldestSmallestSeqFirst if --hard_pending_compaction_bytes_limit value to keep LSM tree in shape. If not limiting hard_pending_compaction_bytes_limit, improvement is only 1% or 2%. Test Plan: Add a unit test Reviewers: andrewkr, kradhakrishnan, anthony, IslamAbdelRahman, yhchiang Reviewed By: yhchiang Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54075 --- HISTORY.md | 2 + db/compaction_picker_test.cc | 81 ++++++++++++++++++++++++++++++++++++ db/db_compaction_test.cc | 8 +++- db/version_set.cc | 45 ++++++++++++++++++++ include/rocksdb/options.h | 7 ++++ 5 files changed, 141 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 9a14a8bf4..abf86a430 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased +### New Features +* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. ## 4.5.0 (2/5/2016) ### Public API Changes diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index f3801ae9f..98d80eea8 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -487,6 +487,87 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { } #endif // ROCKSDB_LITE +TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { + NewVersionStorage(6, kCompactionStyleLevel); + mutable_cf_options_.target_file_size_base = 10000000; + mutable_cf_options_.target_file_size_multiplier = 10; + mutable_cf_options_.compaction_pri = kMinOverlappingRatio; + + Add(2, 6U, "150", "179", 50000000U); + Add(2, 7U, "180", "220", 50000000U); + Add(2, 8U, "321", "400", 50000000U); // File not overlapping + Add(2, 9U, "721", "800", 50000000U); + + Add(3, 26U, "150", "170", 260000000U); + Add(3, 27U, "171", "179", 260000000U); + Add(3, 28U, "191", "220", 260000000U); + Add(3, 29U, "221", "300", 260000000U); + Add(3, 30U, "750", "900", 260000000U); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + // Pick file 8 because it overlaps with 0 files on level 3. + ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) { + NewVersionStorage(6, kCompactionStyleLevel); + mutable_cf_options_.target_file_size_base = 10000000; + mutable_cf_options_.target_file_size_multiplier = 10; + mutable_cf_options_.compaction_pri = kMinOverlappingRatio; + + Add(2, 6U, "150", "175", + 60000000U); // Overlaps with file 26, 27, total size 521M + Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size + // 520M, the smalelst overlapping + Add(2, 8U, "201", "300", + 60000000U); // Overlaps with file 28, 29, total size 521M + + Add(3, 26U, "100", "110", 261000000U); + Add(3, 26U, "150", "170", 261000000U); + Add(3, 27U, "171", "179", 260000000U); + Add(3, 28U, "191", "220", 260000000U); + Add(3, 29U, "221", "300", 261000000U); + Add(3, 30U, "321", "400", 261000000U); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + // Picking file 7 because overlapping ratio is the biggest. + ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) { + NewVersionStorage(6, kCompactionStyleLevel); + mutable_cf_options_.target_file_size_base = 10000000; + mutable_cf_options_.target_file_size_multiplier = 10; + mutable_cf_options_.compaction_pri = kMinOverlappingRatio; + + // file 7 and 8 over lap with the same file, but file 8 is smaller so + // it will be picked. + Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27 + Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27 + Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 27 + + Add(3, 26U, "160", "165", 260000000U); + Add(3, 26U, "166", "170", 260000000U); + Add(3, 27U, "180", "400", 260000000U); + Add(3, 28U, "401", "500", 260000000U); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + // Picking file 8 because overlapping ratio is the biggest. + ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); +} + // This test exhibits the bug where we don't properly reset parent_index in // PickCompaction() TEST_F(CompactionPickerTest, ParentIndexResetBug) { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index b900aa5f6..6dedccd82 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2507,8 +2507,12 @@ TEST_P(CompactionPriTest, Test) { } } -INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest, - ::testing::Values(0, 1, 2)); +INSTANTIATE_TEST_CASE_P( + CompactionPriTest, CompactionPriTest, + ::testing::Values(CompactionPri::kByCompensatedSize, + CompactionPri::kOldestLargestSeqFirst, + CompactionPri::kOldestSmallestSeqFirst, + CompactionPri::kMinOverlappingRatio)); #endif // !defined(ROCKSDB_LITE) } // namespace rocksdb diff --git a/db/version_set.cc b/db/version_set.cc index 2e600f8a2..4cf493f91 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1377,6 +1377,47 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() { } } +namespace { +// Sort `temp` based on ratio of overlapping size over file size +void SortFileByOverlappingRatio( + const InternalKeyComparator& icmp, const std::vector& files, + const std::vector& next_level_files, + std::vector* temp) { + std::unordered_map file_to_order; + auto next_level_it = next_level_files.begin(); + + for (auto& file : files) { + uint64_t overlapping_bytes = 0; + // Skip files in next level that is smaller than current file + while (next_level_it != next_level_files.end() && + icmp.Compare((*next_level_it)->largest, file->smallest) < 0) { + next_level_it++; + } + + while (next_level_it != next_level_files.end() && + icmp.Compare((*next_level_it)->smallest, file->largest) < 0) { + overlapping_bytes += (*next_level_it)->fd.file_size; + + if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) { + // next level file cross large boundary of current file. + break; + } + next_level_it++; + } + + assert(file->fd.file_size != 0); + file_to_order[file->fd.GetNumber()] = + overlapping_bytes * 1024u / file->fd.file_size; + } + + std::sort(temp->begin(), temp->end(), + [&](const Fsize& f1, const Fsize& f2) -> bool { + return file_to_order[f1.file->fd.GetNumber()] < + file_to_order[f2.file->fd.GetNumber()]; + }); +} +} // namespace + void VersionStorageInfo::UpdateFilesByCompactionPri( const MutableCFOptions& mutable_cf_options) { if (compaction_style_ == kCompactionStyleFIFO || @@ -1419,6 +1460,10 @@ void VersionStorageInfo::UpdateFilesByCompactionPri( return f1.file->smallest_seqno < f2.file->smallest_seqno; }); break; + case kMinOverlappingRatio: + SortFileByOverlappingRatio(*internal_comparator_, files_[level], + files_[level + 1], &temp); + break; default: assert(false); } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0718b2342..c0fe0b81a 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -80,6 +80,9 @@ enum CompactionStyle : char { kCompactionStyleNone = 0x3, }; +// In Level-based comapction, it Determines which file from a level to be +// picked to merge to the next level. We suggest people try +// kMinOverlappingRatio first when you tune your database. enum CompactionPri : char { // Slightly Priotize larger files by size compensated by #deletes kByCompensatedSize = 0x0, @@ -90,6 +93,10 @@ enum CompactionPri : char { // for the longest. If your updates are random across the key space, // write amplification is slightly better with this option. kOldestSmallestSeqFirst = 0x2, + // First compact files whose ratio between overlapping size in next level + // and its size is the smallest. It in many cases can optimize write + // amplification. + kMinOverlappingRatio = 0x3, }; enum class WALRecoveryMode : char { From 6a2b4fcb805c4bf3c8ec283a45100ca5ff352f82 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 11 Feb 2016 17:00:01 -0800 Subject: [PATCH 106/195] Add flag to forcibly disable fallocate Summary: see https://github.com/facebook/rocksdb/issues/977; there are issues with fallocate() on certain filesystems/kernel versions that can lead it to pre- allocating blocks but never freeing them, even if they're unused. Test Plan: verified build commands omit DROCKSDB_FALLOCATE_PRESENT when this env variable is set. without disabling it: $ ROCKSDB_NO_FBCODE=1 make -n env_test | grep -q DROCKSDB_FALLOCATE_PRESENT ; echo $? 0 with disabling it: $ ROCKSDB_NO_FBCODE=1 DISABLE_FALLOCATE=1 make -n env_test | grep -q DROCKSDB_FALLOCATE_PRESENT ; echo $? 1 Reviewers: kradhakrishnan, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54069 --- build_tools/build_detect_platform | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 80905266f..5832b03a9 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -189,17 +189,19 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then # Also don't need any compilation tests if compiling on fbcode true else - # Test whether fallocate is available - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < - #include - int main() { - int fd = open("/dev/null", 0); - fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, 1024); - } + if ! test $ROCKSDB_DISABLE_FALLOCATE; then + # Test whether fallocate is available + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < + #include + int main() { + int fd = open("/dev/null", 0); + fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, 1024); + } EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT" + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT" + fi fi # Test whether Snappy library is installed From 5bb7371ca655329334d7b51469372a1dbc8f0f6d Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 12 Feb 2016 10:49:35 -0800 Subject: [PATCH 107/195] [build] Evaluate test names only when db_test exists Summary: as titled, this will prevent the error that was printed because test_names was evaluated before db_test was built. Test Plan: verified below command works and no longer prints errors: $ make release -j32 verified below command still finds the right tests: $ make J=32 parallel_check Reviewers: igor Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54117 --- Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6d4e48d4e..7399ff6a7 100644 --- a/Makefile +++ b/Makefile @@ -644,8 +644,6 @@ parloop: exit $$ret_bad; endif -all_tests:=$(shell $(test_names)) - parallel_check: $(TESTS) $(AM_V_GEN)if test "$(J)" > 1 \ && (parallel --gnu --help 2>/dev/null) | \ @@ -659,7 +657,7 @@ parallel_check: $(TESTS) echo $(J);\ echo Test Dir: $(TMPD); \ seq $(J) | parallel --gnu 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \ - $(MAKE) PAR_TEST="$(all_tests)" TMPD=$(TMPD) \ + $(MAKE) PAR_TEST="$(shell $(test_names))" TMPD=$(TMPD) \ J=$(J) db_test=1 parloop; \ $(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \ TMPD=$(TMPD) J=$(J) db_test=0 parloop; From 545a193952bfbc43f0703c2266b99f1a90fd71bb Mon Sep 17 00:00:00 2001 From: krad Date: Sun, 7 Feb 2016 00:42:37 -0800 Subject: [PATCH 108/195] Add J to commit_prereq so comilation/execution happens in parallel Summary: Users are confused on how to get the parallel compilation going. This can help wire the parallelism. Test Plan: Run manually Reviewers: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53931 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7399ff6a7..ca2c2a7c5 100644 --- a/Makefile +++ b/Makefile @@ -1205,7 +1205,7 @@ jdb_bench: commit_prereq: build_tools/rocksdb-lego-determinator \ build_tools/precommit_checker.py - build_tools/precommit_checker.py unit unit_481 clang_unit tsan asan lite + J=$(J) build_tools/precommit_checker.py unit unit_481 clang_unit tsan asan lite $(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava; xfunc: From 1c868d684822f760636d66785d3aeae5c55cc57d Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 12 Feb 2016 14:17:14 -0800 Subject: [PATCH 109/195] Fix includes for env_test Summary: Previously compilation failed when ROCKSDB_NO_FBCODE=1 because fcntl.h wasn't included for open(). Related issue: https://github.com/facebook/rocksdb/issues/977 Test Plan: verified below command works now: $ make clean && ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_FALLOCATE=1 make -j32 env_test Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54135 --- util/env_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/env_test.cc b/util/env_test.cc index a07eed042..ab4976df8 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -18,6 +18,7 @@ #include #ifdef OS_LINUX +#include #include #include #include @@ -26,7 +27,6 @@ #ifdef ROCKSDB_FALLOCATE_PRESENT #include -#include #endif #include "rocksdb/env.h" From bd5f842bbad5cf54d8fd38e2f76af52b6be82730 Mon Sep 17 00:00:00 2001 From: John Dennison Date: Mon, 15 Feb 2016 23:55:29 +0000 Subject: [PATCH 110/195] fixes typo in options logging --- util/options.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/options.cc b/util/options.cc index 01eeb9d23..274fda8c5 100644 --- a/util/options.cc +++ b/util/options.cc @@ -600,7 +600,7 @@ void ColumnFamilyOptions::Dump(Logger* log) const { Header(log, " Options.max_successive_merges: %" ROCKSDB_PRIszt, max_successive_merges); - Header(log, " Options.optimize_fllters_for_hits: %d", + Header(log, " Options.optimize_filters_for_hits: %d", optimize_filters_for_hits); Header(log, " Options.paranoid_file_checks: %d", paranoid_file_checks); From 7bd284c3744d6e79e534320b08361e7a328716e6 Mon Sep 17 00:00:00 2001 From: Jonathan Wiepert Date: Tue, 16 Feb 2016 06:17:31 -0800 Subject: [PATCH 111/195] Separeate main from bench functionality to allow cusomizations Summary: Isolate db_bench functionality from main so custom benchmark code can be written and managed Test Plan: Tested commands ./build_tools/regression_build_test.sh ./db_bench --db=/tmp/rocksdbtest-12321/dbbench --stats_interval_seconds=1 --num=1000 ./db_bench --db=/tmp/rocksdbtest-12321/dbbench --stats_interval_seconds=1 --num=1000 --reads=500 --writes=500 ./db_bench --db=/tmp/rocksdbtest-12321/dbbench --stats_interval_seconds=1 --num=1000 --merge_keys=100 --numdistinct=100 --num_column_families=3 --num_hot_column_families=1 ./db_bench --stats_interval_seconds=1 --num=1000 --bloom_locality=1 --seed=5 --threads=5 ./db_bench --duration=60 --value_size=50 --seek_nexts=10 --reverse_iterator=true --usee_uint64_comparator=true --batch-size=5 ./db_bench --duration=60 --value_size=50 --seek_nexts=10 --reverse_iterator=true --use_uint64_comparator=true --batch_size=5 ./db_bench --duration=60 --value_size=50 --seek_nexts=10 --reverse_iterator=true --usee_uint64_comparator=true --batch-size=5 Test Results - https://phabricator.fb.com/P56130387 Additional tests for: ./db_bench --duration=60 --value_size=50 --seek_nexts=10 --reverse_iterator=true --use_uint64_comparator=true --batch_size=5 --key_size=8 --merge_operator=put ./db_bench --stats_interval_seconds=1 --num=1000 --bloom_locality=1 --seed=5 --threads=5 --merge_operator=uint64add Results: https://phabricator.fb.com/P56130607 Reviewers: yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53991 --- Makefile | 4 +++- include/rocksdb/db_bench_tool.h | 9 +++++++++ src.mk | 5 ++++- tools/db_bench.cc | 23 ++++++++++++++++++++++ db/db_bench.cc => tools/db_bench_tool.cc | 25 ++++++------------------ 5 files changed, 45 insertions(+), 21 deletions(-) create mode 100644 include/rocksdb/db_bench_tool.h create mode 100644 tools/db_bench.cc rename db/db_bench.cc => tools/db_bench_tool.cc (99%) diff --git a/Makefile b/Makefile index ca2c2a7c5..66e7ac334 100644 --- a/Makefile +++ b/Makefile @@ -242,6 +242,8 @@ VALGRIND_VER := $(join $(VALGRIND_VER),valgrind) VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full +BENCHTOOLOBJECTS = $(BENCH_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) + TESTS = \ db_test \ db_iter_test \ @@ -713,7 +715,7 @@ $(LIBRARY): $(LIBOBJECTS) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIBOBJECTS) -db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) +db_bench: tools/db_bench.o $(BENCHTOOLOBJECTS) $(AM_LINK) cache_bench: util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) diff --git a/include/rocksdb/db_bench_tool.h b/include/rocksdb/db_bench_tool.h new file mode 100644 index 000000000..0e33ae96e --- /dev/null +++ b/include/rocksdb/db_bench_tool.h @@ -0,0 +1,9 @@ +// Copyright (c) 2013-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +#pragma once + +namespace rocksdb { +int db_bench_tool(int argc, char** argv); +} // namespace rocksdb diff --git a/src.mk b/src.mk index 3e54fa66d..8c6216771 100644 --- a/src.mk +++ b/src.mk @@ -179,6 +179,9 @@ MOCK_SOURCES = \ table/mock_table.cc \ util/mock_env.cc +BENCH_SOURCES = \ + tools/db_bench_tool.cc + TEST_BENCH_SOURCES = \ third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \ db/auto_roll_logger_test.cc \ @@ -189,7 +192,7 @@ TEST_BENCH_SOURCES = \ db/comparator_db_test.cc \ db/corruption_test.cc \ db/cuckoo_table_db_test.cc \ - db/db_bench.cc \ + tools/db_bench_tool.cc \ db/dbformat_test.cc \ db/db_iter_test.cc \ db/db_test.cc \ diff --git a/tools/db_bench.cc b/tools/db_bench.cc new file mode 100644 index 000000000..692ff1d23 --- /dev/null +++ b/tools/db_bench.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2013-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#ifndef GFLAGS +#include +int main() { + fprintf(stderr, "Please install gflags to run rocksdb tools\n"); + return 1; +} +#else +#include +int main(int argc, char** argv) { rocksdb::db_bench_tool(argc, argv); } +#endif // GFLAGS diff --git a/db/db_bench.cc b/tools/db_bench_tool.cc similarity index 99% rename from db/db_bench.cc rename to tools/db_bench_tool.cc index 9260501b1..26c9f38e1 100644 --- a/db/db_bench.cc +++ b/tools/db_bench_tool.cc @@ -11,14 +11,6 @@ #define __STDC_FORMAT_MACROS #endif -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run rocksdb tools\n"); - return 1; -} -#else - #ifdef NUMA #include #include @@ -76,6 +68,7 @@ int main() { #include // open/close #endif +namespace { using GFLAGS::ParseCommandLineFlags; using GFLAGS::RegisterFlagValidator; using GFLAGS::SetUsageMessage; @@ -521,7 +514,6 @@ DEFINE_uint64(transaction_lock_timeout, 100, DEFINE_bool(compaction_measure_io_stats, false, "Measure times spents on I/Os while in compactions. "); -namespace { enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { assert(ctype); @@ -541,7 +533,7 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { return rocksdb::kZSTDNotFinalCompression; fprintf(stdout, "Cannot parse compression type '%s'\n", ctype); - return rocksdb::kSnappyCompression; //default value + return rocksdb::kSnappyCompression; // default value } std::string ColumnFamilyName(size_t i) { @@ -553,7 +545,6 @@ std::string ColumnFamilyName(size_t i) { return std::string(name); } } -} // namespace DEFINE_string(compression_type, "snappy", "Algorithm to use to compress the database"); @@ -764,7 +755,6 @@ enum RepFactory { kCuckoo }; -namespace { enum RepFactory StringToRepFactory(const char* ctype) { assert(ctype); @@ -782,7 +772,6 @@ enum RepFactory StringToRepFactory(const char* ctype) { fprintf(stdout, "Cannot parse memreptable %s\n", ctype); return kSkipList; } -} // namespace static enum RepFactory FLAGS_rep_factory; DEFINE_string(memtablerep, "skip_list", ""); @@ -834,6 +823,7 @@ static const bool FLAGS_deletepercent_dummy __attribute__((unused)) = static const bool FLAGS_table_cache_numshardbits_dummy __attribute__((unused)) = RegisterFlagValidator(&FLAGS_table_cache_numshardbits, &ValidateTableCacheNumshardbits); +} // namespace namespace rocksdb { @@ -2249,7 +2239,7 @@ class Benchmark { count++; thread->stats.FinishedOps(nullptr, nullptr, 1, kOthers); } - if (ptr == nullptr) exit(1); // Disable unused variable warning. + if (ptr == nullptr) exit(1); // Disable unused variable warning. } void Compress(ThreadState *thread) { @@ -4072,9 +4062,7 @@ class Benchmark { } }; -} // namespace rocksdb - -int main(int argc, char** argv) { +int db_bench_tool(int argc, char** argv) { rocksdb::port::InstallStackTraceHandler(); SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + " [OPTIONS]..."); @@ -4143,5 +4131,4 @@ int main(int argc, char** argv) { benchmark.Run(); return 0; } - -#endif // GFLAGS +} // namespace rocksdb From 44371501f00588530d876d938a46c8c276975912 Mon Sep 17 00:00:00 2001 From: Mike Kolupaev Date: Tue, 16 Feb 2016 11:11:16 -0800 Subject: [PATCH 112/195] Fixed a segfault when compaction fails Summary: We've hit it today. Test Plan: `make -j check`; didn't reproduce the issue Reviewers: yhchiang Reviewed By: yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D54219 --- db/db_impl.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 3745eaab1..cf8b17bb0 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1849,7 +1849,6 @@ Status DBImpl::CompactFilesImpl( c->column_family_data(), job_context, *c->mutable_cf_options()); } c->ReleaseCompactionFiles(s); - c.reset(); if (status.ok()) { // Done @@ -1865,6 +1864,8 @@ Status DBImpl::CompactFilesImpl( } } + c.reset(); + bg_compaction_scheduled_--; if (bg_compaction_scheduled_ == 0) { bg_cv_.SignalAll(); From c90d63a23dbf8d2250ad8e8db74cda483d8347cd Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Tue, 16 Feb 2016 11:24:40 -0800 Subject: [PATCH 113/195] can_unlock set but not used Test Plan: I couldn't repro, but I hope this fixes it. See the error here: https://evergreen.mongodb.com/task_log_raw/mongodb_mongo_master_ubuntu1404_rocksdb_compile_6e9fd902d5cb25aef992363efa128640affd5196_16_02_11_04_33_37/0?type=T Reviewers: yhchiang, andrewkr, sdong, anthony Reviewed By: anthony Subscribers: meyering, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54123 --- utilities/transactions/transaction_base.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index cb17fab4a..2754d38cb 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -502,7 +502,7 @@ void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family, auto& cf_tracked_keys = tracked_keys_[column_family_id]; std::string key_str = key.ToString(); bool can_decrement = false; - bool can_unlock = false; + bool can_unlock __attribute__((unused)) = false; if (save_points_ != nullptr && !save_points_->empty()) { // Check if this key was fetched ForUpdate in this SavePoint From 871cc5f9874c3a4b9c8f113f0cb9079745a7e840 Mon Sep 17 00:00:00 2001 From: Jonathan Wiepert Date: Tue, 16 Feb 2016 12:16:47 -0800 Subject: [PATCH 114/195] fix build without gflags Test Plan: Built and ran with gflags: % ./db_bench LevelDB: version 4.5 Date: Tue Feb 16 12:04:23 2016 CPU: 40 * Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz ... And without gflags: % ./db_bench Please install gflags to run rocksdb tools % Reviewers: sdong, igor Reviewed By: igor Subscribers: igor, dhruba Differential Revision: https://reviews.facebook.net/D54243 --- tools/db_bench_tool.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 26c9f38e1..65ce703f1 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -11,6 +11,7 @@ #define __STDC_FORMAT_MACROS #endif +#ifdef GFLAGS #ifdef NUMA #include #include @@ -4132,3 +4133,4 @@ int db_bench_tool(int argc, char** argv) { return 0; } } // namespace rocksdb +#endif From 5bcf952a8774e5abbc7f42ebbb32f492ecf4181f Mon Sep 17 00:00:00 2001 From: reid horuff Date: Thu, 11 Feb 2016 12:19:21 -0800 Subject: [PATCH 115/195] Fix WriteImpl empty batch hanging issue Summary: There is an issue in DBImpl::WriteImpl where if an empty writebatch comes in and sync=true then the logs will be marked as being synced yet the sync never actually happens because there is no data in the writebatch. This causes the next incoming batch to hang while waiting for the logs to complete syncing. This fix syncs logs even if the writebatch is empty. Test Plan: DoubleEmptyBatch unit test in transaction_test. Reviewers: yoshinorim, hermanlee4, sdong, ngbronson, anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54057 --- db/db_impl.cc | 8 +------- utilities/transactions/transaction_test.cc | 11 +++++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index cf8b17bb0..c5349ef0c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -4337,11 +4337,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, } } - if (total_count == 0) { - write_thread_.ExitAsBatchGroupLeader(&w, last_writer, status); - return w.FinalStatus(); - } - const SequenceNumber current_sequence = last_sequence + 1; last_sequence += total_count; @@ -4360,7 +4355,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, PERF_TIMER_GUARD(write_wal_time); WriteBatch* merged_batch = nullptr; - if (write_group.size() == 1) { + if (write_group.size() == 1 && !write_group[0]->CallbackFailed()) { merged_batch = write_group[0]->batch; } else { // WAL needs all of the batches flattened into a single batch. @@ -4376,7 +4371,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, WriteBatchInternal::SetSequence(merged_batch, current_sequence); assert(WriteBatchInternal::Count(merged_batch) == total_count); - assert(WriteBatchInternal::ByteSize(merged_batch) == total_byte_size); Slice log_entry = WriteBatchInternal::Contents(merged_batch); status = logs_.back().writer->AddRecord(log_entry); diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index d51912b38..fec6c974f 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -62,6 +62,17 @@ class TransactionTest : public testing::Test { } }; +TEST_F(TransactionTest, DoubleEmptyWrite) { + WriteOptions write_options; + write_options.sync = true; + write_options.disableWAL = false; + + WriteBatch batch; + + ASSERT_OK(db->Write(write_options, &batch)); + ASSERT_OK(db->Write(write_options, &batch)); +} + TEST_F(TransactionTest, SuccessTest) { WriteOptions write_options; ReadOptions read_options; From a7b6f0748a1bcdead0168df19901cfc15c9dc881 Mon Sep 17 00:00:00 2001 From: reid horuff Date: Tue, 16 Feb 2016 12:39:55 -0800 Subject: [PATCH 116/195] Improve write_with_callback_test to sync WAL Summary: Currently write_with_callback_test does not test with WAL syncing enabled. This addresses that. Test Plan: write_with_callback_test Reviewers: anthony Reviewed By: anthony Subscribers: leveldb, dhruba, hermanlee4 Differential Revision: https://reviews.facebook.net/D54255 --- db/write_callback_test.cc | 275 +++++++++++++++++++------------------- 1 file changed, 139 insertions(+), 136 deletions(-) diff --git a/db/write_callback_test.cc b/db/write_callback_test.cc index d552b1178..8acd60df8 100644 --- a/db/write_callback_test.cc +++ b/db/write_callback_test.cc @@ -113,159 +113,162 @@ TEST_F(WriteCallbackTest, WriteWithCallbackTest) { for (auto& allow_parallel : {true, false}) { for (auto& allow_batching : {true, false}) { - for (auto& write_group : write_scenarios) { - Options options; - options.create_if_missing = true; - options.allow_concurrent_memtable_write = allow_parallel; + for (auto& enable_WAL : {true, false}) { + for (auto& write_group : write_scenarios) { + Options options; + options.create_if_missing = true; + options.allow_concurrent_memtable_write = allow_parallel; - WriteOptions write_options; - ReadOptions read_options; - DB* db; - DBImpl* db_impl; + ReadOptions read_options; + DB* db; + DBImpl* db_impl; - ASSERT_OK(DB::Open(options, dbname, &db)); + ASSERT_OK(DB::Open(options, dbname, &db)); - db_impl = dynamic_cast(db); - ASSERT_TRUE(db_impl); + db_impl = dynamic_cast(db); + ASSERT_TRUE(db_impl); - std::atomic threads_waiting(0); - std::atomic seq(db_impl->GetLatestSequenceNumber()); - ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0); + std::atomic threads_waiting(0); + std::atomic seq(db_impl->GetLatestSequenceNumber()); + ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { - uint64_t cur_threads_waiting = 0; - bool is_leader = false; - bool is_last = false; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { + uint64_t cur_threads_waiting = 0; + bool is_leader = false; + bool is_last = false; - // who am i + // who am i + do { + cur_threads_waiting = threads_waiting.load(); + is_leader = (cur_threads_waiting == 0); + is_last = (cur_threads_waiting == write_group.size() - 1); + } while (!threads_waiting.compare_exchange_strong( + cur_threads_waiting, cur_threads_waiting + 1)); + + // check my state + auto* writer = reinterpret_cast(arg); + + if (is_leader) { + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_GROUP_LEADER); + } else { + ASSERT_TRUE(writer->state == WriteThread::State::STATE_INIT); + } + + // (meta test) the first WriteOP should indeed be the first + // and the last should be the last (all others can be out of + // order) + if (is_leader) { + ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == + !write_group.front().callback_.should_fail_); + } else if (is_last) { + ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == + !write_group.back().callback_.should_fail_); + } + + // wait for friends + while (threads_waiting.load() < write_group.size()) { + } + }); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) { + // check my state + auto* writer = reinterpret_cast(arg); + + if (!allow_batching) { + // no batching so everyone should be a leader + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_GROUP_LEADER); + } else if (!allow_parallel) { + ASSERT_TRUE(writer->state == + WriteThread::State::STATE_COMPLETED); + } + }); + + std::atomic thread_num(0); + std::atomic dummy_key(0); + std::function write_with_callback_func = [&]() { + uint32_t i = thread_num.fetch_add(1); + Random rnd(i); + + // leaders gotta lead + while (i > 0 && threads_waiting.load() < 1) { + } + + // loser has to lose + while (i == write_group.size() - 1 && + threads_waiting.load() < write_group.size() - 1) { + } + + auto& write_op = write_group.at(i); + write_op.Clear(); + write_op.callback_.allow_batching_ = allow_batching; + + // insert some keys + for (uint32_t j = 0; j < rnd.Next() % 50; j++) { + // grab unique key + char my_key = 0; do { - cur_threads_waiting = threads_waiting.load(); - is_leader = (cur_threads_waiting == 0); - is_last = (cur_threads_waiting == write_group.size() - 1); - } while (!threads_waiting.compare_exchange_strong( - cur_threads_waiting, cur_threads_waiting + 1)); + my_key = dummy_key.load(); + } while (!dummy_key.compare_exchange_strong(my_key, my_key + 1)); - // check my state - auto* writer = reinterpret_cast(arg); + string skey(5, my_key); + string sval(10, my_key); + write_op.Put(skey, sval); - if (is_leader) { - ASSERT_TRUE(writer->state == - WriteThread::State::STATE_GROUP_LEADER); - } else { - ASSERT_TRUE(writer->state == WriteThread::State::STATE_INIT); + if (!write_op.callback_.should_fail_) { + seq.fetch_add(1); } - - // (meta test) the first WriteOP should indeed be the first - // and the last should be the last (all others can be out of - // order) - if (is_leader) { - ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == - !write_group.front().callback_.should_fail_); - } else if (is_last) { - ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == - !write_group.back().callback_.should_fail_); - } - - // wait for friends - while (threads_waiting.load() < write_group.size()) { - } - }); - - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) { - // check my state - auto* writer = reinterpret_cast(arg); - - if (!allow_batching) { - // no batching so everyone should be a leader - ASSERT_TRUE(writer->state == - WriteThread::State::STATE_GROUP_LEADER); - } else if (!allow_parallel) { - ASSERT_TRUE(writer->state == - WriteThread::State::STATE_COMPLETED); - } - }); - - std::atomic thread_num(0); - std::atomic dummy_key(0); - std::function write_with_callback_func = [&]() { - uint32_t i = thread_num.fetch_add(1); - Random rnd(i); - - // leaders gotta lead - while (i > 0 && threads_waiting.load() < 1) { - } - - // loser has to lose - while (i == write_group.size() - 1 && - threads_waiting.load() < write_group.size() - 1) { - } - - auto& write_op = write_group.at(i); - write_op.Clear(); - write_op.callback_.allow_batching_ = allow_batching; - - // insert some keys - for (uint32_t j = 0; j < rnd.Next() % 50; j++) { - // grab unique key - char my_key = 0; - do { - my_key = dummy_key.load(); - } while (!dummy_key.compare_exchange_strong(my_key, my_key + 1)); - - string skey(5, my_key); - string sval(10, my_key); - write_op.Put(skey, sval); - - if (!write_op.callback_.should_fail_) { - seq.fetch_add(1); } - } - WriteOptions woptions; - Status s = db_impl->WriteWithCallback( - woptions, &write_op.write_batch_, &write_op.callback_); + WriteOptions woptions; + woptions.disableWAL = !enable_WAL; + woptions.sync = enable_WAL; + Status s = db_impl->WriteWithCallback( + woptions, &write_op.write_batch_, &write_op.callback_); - if (write_op.callback_.should_fail_) { - ASSERT_TRUE(s.IsBusy()); - } else { - ASSERT_OK(s); - } - }; - - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - // do all the writes - std::vector threads; - for (uint32_t i = 0; i < write_group.size(); i++) { - threads.emplace_back(write_with_callback_func); - } - for (auto& t : threads) { - t.join(); - } - - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); - - // check for keys - string value; - for (auto& w : write_group) { - ASSERT_TRUE(w.callback_.was_called_); - for (auto& kvp : w.kvs_) { - if (w.callback_.should_fail_) { - ASSERT_TRUE( - db->Get(read_options, kvp.first, &value).IsNotFound()); + if (write_op.callback_.should_fail_) { + ASSERT_TRUE(s.IsBusy()); } else { - ASSERT_OK(db->Get(read_options, kvp.first, &value)); - ASSERT_EQ(value, kvp.second); + ASSERT_OK(s); + } + }; + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // do all the writes + std::vector threads; + for (uint32_t i = 0; i < write_group.size(); i++) { + threads.emplace_back(write_with_callback_func); + } + for (auto& t : threads) { + t.join(); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + // check for keys + string value; + for (auto& w : write_group) { + ASSERT_TRUE(w.callback_.was_called_); + for (auto& kvp : w.kvs_) { + if (w.callback_.should_fail_) { + ASSERT_TRUE( + db->Get(read_options, kvp.first, &value).IsNotFound()); + } else { + ASSERT_OK(db->Get(read_options, kvp.first, &value)); + ASSERT_EQ(value, kvp.second); + } } } + + ASSERT_EQ(seq.load(), db_impl->GetLatestSequenceNumber()); + + delete db; + DestroyDB(dbname, options); } - - ASSERT_EQ(seq.load(), db_impl->GetLatestSequenceNumber()); - - delete db; - DestroyDB(dbname, options); } } } From 730a422c3a41df569e0c51cc7b7e0d8938d33ee8 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 16 Feb 2016 14:55:24 -0800 Subject: [PATCH 117/195] Improve the documentation of LoadLatestOptions Summary: Improve the documentation of LoadLatestOptions Test Plan: No code change Reviewers: anthony, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54087 --- include/rocksdb/utilities/options_util.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/rocksdb/utilities/options_util.h b/include/rocksdb/utilities/options_util.h index 9c0894bea..1d961a2bb 100644 --- a/include/rocksdb/utilities/options_util.h +++ b/include/rocksdb/utilities/options_util.h @@ -44,6 +44,12 @@ namespace rocksdb { // examples/options_file_example.cc demonstrates how to use this function // to open a RocksDB instance. // +// @return the function returns an OK status when it went successfully. If +// the specified "dbpath" does not contain any option file, then a +// Status::NotFound will be returned. A return value other than +// Status::OK or Status::NotFound indicates there're some error related +// to the options file itself. +// // @see LoadOptionsFromFile Status LoadLatestOptions(const std::string& dbpath, Env* env, DBOptions* db_options, From d08d50295cfaae2153c88c4065be1881e5ca0c99 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Tue, 9 Feb 2016 18:24:41 -0800 Subject: [PATCH 118/195] Fix transaction locking Summary: Broke transaction locking in 4.4 in D52197. Will cherry-pick this change into 4.4 (which hasn't yet been fully released). Repro'd using db_bench. Test Plan: unit tests and db_Bench Reviewers: sdong, yhchiang, kradhakrishnan, ngbronson Reviewed By: ngbronson Subscribers: ngbronson, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54021 --- .../transactions/transaction_db_mutex_impl.cc | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/utilities/transactions/transaction_db_mutex_impl.cc b/utilities/transactions/transaction_db_mutex_impl.cc index ad1a3c066..c6649159c 100644 --- a/utilities/transactions/transaction_db_mutex_impl.cc +++ b/utilities/transactions/transaction_db_mutex_impl.cc @@ -18,20 +18,19 @@ namespace rocksdb { class TransactionDBMutexImpl : public TransactionDBMutex { public: - TransactionDBMutexImpl() : lock_(mutex_, std::defer_lock) {} + TransactionDBMutexImpl() {} ~TransactionDBMutexImpl() {} Status Lock() override; Status TryLockFor(int64_t timeout_time) override; - void UnLock() override { lock_.unlock(); } + void UnLock() override { mutex_.unlock(); } friend class TransactionDBCondVarImpl; private: - std::mutex mutex_; // Do not acquire mutex_ directly. Use lock_. - std::unique_lock lock_; + std::mutex mutex_; }; class TransactionDBCondVarImpl : public TransactionDBCondVar { @@ -63,7 +62,7 @@ TransactionDBMutexFactoryImpl::AllocateCondVar() { } Status TransactionDBMutexImpl::Lock() { - lock_.lock(); + mutex_.lock(); return Status::OK(); } @@ -71,7 +70,7 @@ Status TransactionDBMutexImpl::TryLockFor(int64_t timeout_time) { bool locked = true; if (timeout_time == 0) { - locked = lock_.try_lock(); + locked = mutex_.try_lock(); } else { // Previously, this code used a std::timed_mutex. However, this was changed // due to known bugs in gcc versions < 4.9. @@ -80,7 +79,7 @@ Status TransactionDBMutexImpl::TryLockFor(int64_t timeout_time) { // Since this mutex isn't held for long and only a single mutex is ever // held at a time, it is reasonable to ignore the lock timeout_time here // and only check it when waiting on the condition_variable. - lock_.lock(); + mutex_.lock(); } if (!locked) { @@ -95,30 +94,40 @@ Status TransactionDBCondVarImpl::Wait( std::shared_ptr mutex) { auto mutex_impl = reinterpret_cast(mutex.get()); - cv_.wait(mutex_impl->lock_); + std::unique_lock lock(mutex_impl->mutex_, std::adopt_lock); + cv_.wait(lock); + + // Make sure unique_lock doesn't unlock mutex when it destructs + lock.release(); return Status::OK(); } Status TransactionDBCondVarImpl::WaitFor( std::shared_ptr mutex, int64_t timeout_time) { + Status s; + auto mutex_impl = reinterpret_cast(mutex.get()); + std::unique_lock lock(mutex_impl->mutex_, std::adopt_lock); if (timeout_time < 0) { // If timeout is negative, do not use a timeout - cv_.wait(mutex_impl->lock_); + cv_.wait(lock); } else { auto duration = std::chrono::microseconds(timeout_time); - auto cv_status = cv_.wait_for(mutex_impl->lock_, duration); + auto cv_status = cv_.wait_for(lock, duration); // Check if the wait stopped due to timing out. if (cv_status == std::cv_status::timeout) { - return Status::TimedOut(Status::SubCode::kMutexTimeout); + s = Status::TimedOut(Status::SubCode::kMutexTimeout); } } + // Make sure unique_lock doesn't unlock mutex when it destructs + lock.release(); + // CV was signaled, or we spuriously woke up (but didn't time out) - return Status::OK(); + return s; } } // namespace rocksdb From 1336052497b982f4ae5c27133da1d51c15a2bbf1 Mon Sep 17 00:00:00 2001 From: fengjian Date: Wed, 17 Feb 2016 20:22:40 +0800 Subject: [PATCH 119/195] fix ios build error --- util/perf_context_imp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/perf_context_imp.h b/util/perf_context_imp.h index 6ef2a8011..ee1a7c318 100644 --- a/util/perf_context_imp.h +++ b/util/perf_context_imp.h @@ -13,7 +13,7 @@ namespace rocksdb { #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #define PERF_TIMER_GUARD(metric) -#define PERF_CONDITIONAL_TIMER_GUARD(metric, condition) +#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition) #define PERF_TIMER_MEASURE(metric) #define PERF_TIMER_STOP(metric) #define PERF_TIMER_START(metric) From cf38e56f2f7f18e80a8ab6d7e6581239e1a62a44 Mon Sep 17 00:00:00 2001 From: Jonathan Wiepert Date: Wed, 17 Feb 2016 11:49:52 -0800 Subject: [PATCH 120/195] Fix broken appveyor build caused by D53991 Test Plan: pushed remote as D53991_appveyor_test branch. Appveyor run: https://ci.appveyor.com/project/Facebook/rocksdb/build/1.0.651 shows the file is now found and the run is back to the state before the initial change: https://ci.appveyor.com/project/Facebook/rocksdb/build/1.0.620 Reviewers: sdong, igor Reviewed By: igor Subscribers: igor, dhruba Differential Revision: https://reviews.facebook.net/D54309 --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d91b244a..399ae5c58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,6 +185,7 @@ set(SOURCES table/table_properties.cc table/two_level_iterator.cc tools/sst_dump_tool.cc + tools/db_bench_tool.cc tools/dump/db_dump_tool.cc util/arena.cc util/bloom.cc @@ -301,7 +302,7 @@ else() endif() set(APPS - db/db_bench.cc + tools/db_bench.cc db/memtablerep_bench.cc table/table_reader_bench.cc tools/db_stress.cc From d733dd572856677fab530dc930ab45ac29bf861b Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 17 Feb 2016 11:57:04 -0800 Subject: [PATCH 121/195] [build] Fix env_win.cc compiler errors Summary: I broke it in D53781. Test Plan: tried the same code in util/env_posix.cc and it compiled successfully Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54303 --- port/win/env_win.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/port/win/env_win.cc b/port/win/env_win.cc index cf548e9be..b125e1dce 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -1415,8 +1415,8 @@ class WinEnv : public Env { return status; } - virtual Status Env::GetChildrenFileMetadata( - const std::string& dir, std::vector* result) override { + virtual Status GetChildrenFileAttributes( + const std::string& dir, std::vector* result) override { return Status::NotSupported("Not supported in WinEnv"); } From 3943d16780b66881fd3d3e62ab47525b7957a4f7 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 17 Feb 2016 12:06:45 -0800 Subject: [PATCH 122/195] Fix race conditions in auto-rolling logger Summary: For GetLogFileSize() and Flush(), they previously did not follow the synchronization pattern for accessing logger_. This meant ResetLogger() could cause logger_ destruction while the unsynchronized functions were accessing it, causing a segfault. Also made the mutex instance variable mutable so we can preserve GetLogFileSize()'s const-ness. Test Plan: new test case, it's quite ugly because both threads need to access one of the functions with SyncPoints (PosixLogger::Flush()), and also special handling is needed to prevent the mutex and sync points from conflicting. Reviewers: kradhakrishnan, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54237 --- db/auto_roll_logger.cc | 4 +++ db/auto_roll_logger.h | 24 ++++++++++++++--- db/auto_roll_logger_test.cc | 53 ++++++++++++++++++++++++++++++++++++- util/posix_logger.h | 3 +++ 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index b6efc3d13..853637ff3 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -12,7 +12,9 @@ namespace rocksdb { // -- AutoRollLogger Status AutoRollLogger::ResetLogger() { + TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger"); status_ = env_->NewLogger(log_fname_, &logger_); + TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger"); if (!status_.ok()) { return status_; @@ -75,6 +77,8 @@ void AutoRollLogger::Logv(const char* format, va_list ap) { if ((kLogFileTimeToRoll > 0 && LogExpired()) || (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) { RollLogFile(); + TEST_SYNC_POINT_CALLBACK("AutoRollLogger::Logv:BeforeResetLogger", + logger_.get()); Status s = ResetLogger(); if (!s.ok()) { // can't really log the error if creating a new LOG file failed diff --git a/db/auto_roll_logger.h b/db/auto_roll_logger.h index d25d883f9..6bf79c5c6 100644 --- a/db/auto_roll_logger.h +++ b/db/auto_roll_logger.h @@ -13,6 +13,7 @@ #include "db/filename.h" #include "port/port.h" #include "port/util_logger.h" +#include "util/sync_point.h" namespace rocksdb { @@ -53,11 +54,26 @@ class AutoRollLogger : public Logger { return status_; } - size_t GetLogFileSize() const override { return logger_->GetLogFileSize(); } + size_t GetLogFileSize() const override { + std::shared_ptr logger; + { + MutexLock l(&mutex_); + // pin down the current logger_ instance before releasing the mutex. + logger = logger_; + } + return logger->GetLogFileSize(); + } void Flush() override { - if (logger_) { - logger_->Flush(); + std::shared_ptr logger; + { + MutexLock l(&mutex_); + // pin down the current logger_ instance before releasing the mutex. + logger = logger_; + } + TEST_SYNC_POINT_CALLBACK("AutoRollLogger::Flush:PinnedLogger", nullptr); + if (logger) { + logger->Flush(); } } @@ -101,7 +117,7 @@ class AutoRollLogger : public Logger { uint64_t ctime_; uint64_t cached_now_access_count; uint64_t call_NowMicros_every_N_records_; - port::Mutex mutex_; + mutable port::Mutex mutex_; }; // Facade to craete logger automatically diff --git a/db/auto_roll_logger_test.cc b/db/auto_roll_logger_test.cc index 62d41887e..0be5b113b 100644 --- a/db/auto_roll_logger_test.cc +++ b/db/auto_roll_logger_test.cc @@ -4,6 +4,7 @@ // of patent rights can be found in the PATENTS file in the same directory. // #include +#include #include #include #include @@ -11,6 +12,7 @@ #include #include #include "db/auto_roll_logger.h" +#include "util/sync_point.h" #include "util/testharness.h" #include "rocksdb/db.h" #include @@ -260,7 +262,56 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { auto_roll_logger, options.log_file_time_to_roll, kSampleMessage + ":CreateLoggerFromOptions - both"); } -#endif + +TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { + DBOptions options; + shared_ptr logger; + + InitTestDb(); + options.max_log_file_size = 1024 * 5; + ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); + AutoRollLogger* auto_roll_logger = + dynamic_cast(logger.get()); + ASSERT_TRUE(auto_roll_logger); + + // The test is split into two parts, with the below callback happening between + // them: + // (1) Before ResetLogger() is reached, the log rolling test code occasionally + // invokes PosixLogger::Flush(). For this part, dependencies should not be + // enforced. + // (2) After ResetLogger() has begun, any calls to PosixLogger::Flush() will + // be from threads other than the log rolling thread. We want to only + // enforce dependencies for this part. + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "AutoRollLogger::Logv:BeforeResetLogger", [&](void* arg) { + rocksdb::SyncPoint::GetInstance()->LoadDependency({ + {"PosixLogger::Flush:1", + "AutoRollLogger::ResetLogger:BeforeNewLogger"}, + {"AutoRollLogger::ResetLogger:AfterNewLogger", + "PosixLogger::Flush:2"}, + }); + }); + std::thread flush_thread; + // Additionally, to exercise the edge case, we need to ensure the old logger + // is used. For this, we pause after pinning the logger until dependencies + // have probably been loaded. + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "AutoRollLogger::Flush:PinnedLogger", [&](void* arg) { + if (std::this_thread::get_id() == flush_thread.get_id()) { + sleep(2); + } + }); + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + flush_thread = std::thread([&]() { auto_roll_logger->Flush(); }); + sleep(1); + RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, + kSampleMessage + ":LogFlushWhileRolling"); + flush_thread.join(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + +#endif // OS_WIN TEST_F(AutoRollLoggerTest, InfoLogLevel) { InitTestDb(); diff --git a/util/posix_logger.h b/util/posix_logger.h index 1ddadd547..9fde5f046 100644 --- a/util/posix_logger.h +++ b/util/posix_logger.h @@ -25,6 +25,7 @@ #include "rocksdb/env.h" #include "util/iostats_context_imp.h" +#include "util/sync_point.h" #include namespace rocksdb { @@ -56,6 +57,8 @@ class PosixLogger : public Logger { fclose(file_); } virtual void Flush() override { + TEST_SYNC_POINT("PosixLogger::Flush:1"); + TEST_SYNC_POINT("PosixLogger::Flush:2"); if (flush_pending_) { flush_pending_ = false; fflush(file_); From df9ba6df62fecf4c3af5716a1dd6898041e78f49 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Wed, 17 Feb 2016 15:20:23 -0800 Subject: [PATCH 123/195] Introduce SstFileManager::SetMaxAllowedSpaceUsage() to cap disk space usage Summary: Introude SstFileManager::SetMaxAllowedSpaceUsage() that can be used to limit the maximum space usage allowed for RocksDB. When this limit is exceeded WriteImpl() will fail and return Status::Aborted() Test Plan: unit testing Reviewers: yhchiang, anthony, andrewkr, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D53763 --- db/compaction_job.cc | 41 +++++++++---- db/compaction_job.h | 11 ++-- db/compaction_job_test.cc | 10 +-- db/db_impl.cc | 24 +++++--- db/db_test.cc | 98 +++++++++++++++++++++++++++++- db/db_test_util.cc | 9 ++- db/db_test_util.h | 3 +- include/rocksdb/sst_file_manager.h | 16 +++++ util/sst_file_manager_impl.cc | 13 ++++ util/sst_file_manager_impl.h | 18 ++++++ 10 files changed, 209 insertions(+), 34 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 3247036b5..bcf803e14 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -212,6 +212,7 @@ CompactionJob::CompactionJob( const EnvOptions& env_options, VersionSet* versions, std::atomic* shutting_down, LogBuffer* log_buffer, Directory* db_directory, Directory* output_directory, Statistics* stats, + InstrumentedMutex* db_mutex, Status* db_bg_error, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, std::shared_ptr table_cache, EventLogger* event_logger, @@ -231,6 +232,8 @@ CompactionJob::CompactionJob( db_directory_(db_directory), output_directory_(output_directory), stats_(stats), + db_mutex_(db_mutex), + db_bg_error_(db_bg_error), existing_snapshots_(std::move(existing_snapshots)), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), table_cache_(std::move(table_cache)), @@ -499,16 +502,11 @@ Status CompactionJob::Run() { } TablePropertiesCollection tp; - auto sfm = - static_cast(db_options_.sst_file_manager.get()); for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { auto fn = TableFileName(db_options_.db_paths, output.meta.fd.GetNumber(), output.meta.fd.GetPathId()); tp[fn] = output.table_properties; - if (sfm && output.meta.fd.GetPathId() == 0) { - sfm->OnAddFile(fn); - } } } compact_->compaction->SetOutputTableProperties(std::move(tp)); @@ -524,18 +522,17 @@ Status CompactionJob::Run() { return status; } -Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options, - InstrumentedMutex* db_mutex) { +Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_INSTALL); - db_mutex->AssertHeld(); + db_mutex_->AssertHeld(); Status status = compact_->status; ColumnFamilyData* cfd = compact_->compaction->column_family_data(); cfd->internal_stats()->AddCompactionStats( compact_->compaction->output_level(), compaction_stats_); if (status.ok()) { - status = InstallCompactionResults(mutable_cf_options, db_mutex); + status = InstallCompactionResults(mutable_cf_options); } VersionStorageInfo::LevelSummaryStorage tmp; auto vstorage = cfd->current()->storage_info(); @@ -861,13 +858,33 @@ Status CompactionJob::FinishCompactionOutputFile( event_logger_, cfd->ioptions()->listeners, meta->fd, info); } } + + // Report new file to SstFileManagerImpl + auto sfm = + static_cast(db_options_.sst_file_manager.get()); + if (sfm && meta->fd.GetPathId() == 0) { + ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); + auto fn = TableFileName(cfd->ioptions()->db_paths, meta->fd.GetNumber(), + meta->fd.GetPathId()); + sfm->OnAddFile(fn); + if (sfm->IsMaxAllowedSpaceReached()) { + InstrumentedMutexLock l(db_mutex_); + if (db_bg_error_->ok()) { + s = Status::IOError("Max allowed space was reached"); + *db_bg_error_ = s; + TEST_SYNC_POINT( + "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached"); + } + } + } + sub_compact->builder.reset(); return s; } Status CompactionJob::InstallCompactionResults( - const MutableCFOptions& mutable_cf_options, InstrumentedMutex* db_mutex) { - db_mutex->AssertHeld(); + const MutableCFOptions& mutable_cf_options) { + db_mutex_->AssertHeld(); auto* compaction = compact_->compaction; // paranoia: verify that the files that we started with @@ -902,7 +919,7 @@ Status CompactionJob::InstallCompactionResults( } return versions_->LogAndApply(compaction->column_family_data(), mutable_cf_options, compaction->edit(), - db_mutex, db_directory_); + db_mutex_, db_directory_); } void CompactionJob::RecordCompactionIOStats() { diff --git a/db/compaction_job.h b/db/compaction_job.h index 125dc8fe4..c6edefbe0 100644 --- a/db/compaction_job.h +++ b/db/compaction_job.h @@ -56,7 +56,8 @@ class CompactionJob { const EnvOptions& env_options, VersionSet* versions, std::atomic* shutting_down, LogBuffer* log_buffer, Directory* db_directory, Directory* output_directory, - Statistics* stats, + Statistics* stats, InstrumentedMutex* db_mutex, + Status* db_bg_error, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, std::shared_ptr table_cache, EventLogger* event_logger, @@ -77,8 +78,7 @@ class CompactionJob { Status Run(); // REQUIRED: mutex held - Status Install(const MutableCFOptions& mutable_cf_options, - InstrumentedMutex* db_mutex); + Status Install(const MutableCFOptions& mutable_cf_options); private: struct SubcompactionState; @@ -95,8 +95,7 @@ class CompactionJob { Status FinishCompactionOutputFile(const Status& input_status, SubcompactionState* sub_compact); - Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options, - InstrumentedMutex* db_mutex); + Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options); void RecordCompactionIOStats(); Status OpenCompactionOutputFile(SubcompactionState* sub_compact); void CleanupCompaction(); @@ -130,6 +129,8 @@ class CompactionJob { Directory* db_directory_; Directory* output_directory_; Statistics* stats_; + InstrumentedMutex* db_mutex_; + Status* db_bg_error_; // If there were two snapshots with seq numbers s1 and // s2 and s1 < s2, and if we find two instances of a key k1 then lies // entirely within s1 and s2, then the earlier version of k1 can be safely diff --git a/db/compaction_job_test.cc b/db/compaction_job_test.cc index fc3a6b9f8..f3bc4cca9 100644 --- a/db/compaction_job_test.cc +++ b/db/compaction_job_test.cc @@ -250,9 +250,9 @@ class CompactionJobTest : public testing::Test { EventLogger event_logger(db_options_.info_log.get()); CompactionJob compaction_job( 0, &compaction, db_options_, env_options_, versions_.get(), - &shutting_down_, &log_buffer, nullptr, nullptr, nullptr, snapshots, - earliest_write_conflict_snapshot, table_cache_, &event_logger, false, - false, dbname_, &compaction_job_stats_); + &shutting_down_, &log_buffer, nullptr, nullptr, nullptr, &mutex_, + &bg_error_, snapshots, earliest_write_conflict_snapshot, table_cache_, + &event_logger, false, false, dbname_, &compaction_job_stats_); VerifyInitializationOfCompactionJobStats(compaction_job_stats_); @@ -262,8 +262,7 @@ class CompactionJobTest : public testing::Test { s = compaction_job.Run(); ASSERT_OK(s); mutex_.Lock(); - ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions(), - &mutex_)); + ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions())); mutex_.Unlock(); if (expected_results.size() == 0) { @@ -295,6 +294,7 @@ class CompactionJobTest : public testing::Test { ColumnFamilyData* cfd_; std::unique_ptr compaction_filter_; std::shared_ptr merge_op_; + Status bg_error_; }; TEST_F(CompactionJobTest, Simple) { diff --git a/db/db_impl.cc b/db/db_impl.cc index c5349ef0c..afe1a9c9d 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1510,11 +1510,12 @@ Status DBImpl::FlushMemTableToOutputFile( bg_error_ = s; } RecordFlushIOStats(); -#ifndef ROCKSDB_LITE if (s.ok()) { +#ifndef ROCKSDB_LITE // may temporarily unlock and lock the mutex. NotifyOnFlushCompleted(cfd, &file_meta, mutable_cf_options, job_context->job_id, flush_job.GetTableProperties()); +#endif // ROCKSDB_LITE auto sfm = static_cast(db_options_.sst_file_manager.get()); if (sfm) { @@ -1522,9 +1523,13 @@ Status DBImpl::FlushMemTableToOutputFile( std::string file_path = MakeTableFileName(db_options_.db_paths[0].path, file_meta.fd.GetNumber()); sfm->OnAddFile(file_path); + if (sfm->IsMaxAllowedSpaceReached() && bg_error_.ok()) { + bg_error_ = Status::IOError("Max allowed space was reached"); + TEST_SYNC_POINT( + "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached"); + } } } -#endif // ROCKSDB_LITE return s; } @@ -1818,9 +1823,9 @@ Status DBImpl::CompactFilesImpl( CompactionJob compaction_job( job_context->job_id, c.get(), db_options_, env_options_, versions_.get(), &shutting_down_, log_buffer, directories_.GetDbDir(), - directories_.GetDataDir(c->output_path_id()), stats_, snapshot_seqs, - earliest_write_conflict_snapshot, table_cache_, &event_logger_, - c->mutable_cf_options()->paranoid_file_checks, + directories_.GetDataDir(c->output_path_id()), stats_, &mutex_, &bg_error_, + snapshot_seqs, earliest_write_conflict_snapshot, table_cache_, + &event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->compaction_measure_io_stats, dbname_, nullptr); // Here we pass a nullptr for CompactionJobStats because // CompactFiles does not trigger OnCompactionCompleted(), @@ -1843,7 +1848,7 @@ Status DBImpl::CompactFilesImpl( compaction_job.Run(); mutex_.Lock(); - Status status = compaction_job.Install(*c->mutable_cf_options(), &mutex_); + Status status = compaction_job.Install(*c->mutable_cf_options()); if (status.ok()) { InstallSuperVersionAndScheduleWorkWrapper( c->column_family_data(), job_context, *c->mutable_cf_options()); @@ -2994,8 +2999,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, CompactionJob compaction_job( job_context->job_id, c.get(), db_options_, env_options_, versions_.get(), &shutting_down_, log_buffer, directories_.GetDbDir(), - directories_.GetDataDir(c->output_path_id()), stats_, snapshot_seqs, - earliest_write_conflict_snapshot, table_cache_, &event_logger_, + directories_.GetDataDir(c->output_path_id()), stats_, &mutex_, + &bg_error_, snapshot_seqs, earliest_write_conflict_snapshot, + table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->compaction_measure_io_stats, dbname_, &compaction_job_stats); @@ -3006,7 +3012,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun"); mutex_.Lock(); - status = compaction_job.Install(*c->mutable_cf_options(), &mutex_); + status = compaction_job.Install(*c->mutable_cf_options()); if (status.ok()) { InstallSuperVersionAndScheduleWorkWrapper( c->column_family_data(), job_context, *c->mutable_cf_options()); diff --git a/db/db_test.cc b/db/db_test.cc index a1dd6fb18..c9c2a6392 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -8636,7 +8636,6 @@ TEST_F(DBTest, DeletingOldWalAfterDrop) { EXPECT_GT(lognum2, lognum1); } -#ifndef ROCKSDB_LITE TEST_F(DBTest, DBWithSstFileManager) { std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); @@ -8701,6 +8700,7 @@ TEST_F(DBTest, DBWithSstFileManager) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +#ifndef ROCKSDB_LITE TEST_F(DBTest, RateLimitedDelete) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"DBTest::RateLimitedDelete:1", "DeleteScheduler::BackgroundEmptyTrash"}, @@ -8873,6 +8873,102 @@ TEST_F(DBTest, DestroyDBWithRateLimitedDelete) { } #endif // ROCKSDB_LITE +TEST_F(DBTest, DBWithMaxSpaceAllowed) { + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + + Random rnd(301); + + // Generate a file containing 100 keys. + for (int i = 0; i < 100; i++) { + ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); + } + ASSERT_OK(Flush()); + + uint64_t first_file_size = 0; + auto files_in_db = GetAllSSTFiles(&first_file_size); + ASSERT_EQ(sfm->GetTotalSize(), first_file_size); + + // Set the maximum allowed space usage to the current total size + sfm->SetMaxAllowedSpaceUsage(first_file_size + 1); + + ASSERT_OK(Put("key1", "val1")); + // This flush will cause bg_error_ and will fail + ASSERT_NOK(Flush()); +} + +TEST_F(DBTest, DBWithMaxSpaceAllowedRandomized) { + // This test will set a maximum allowed space for the DB, then it will + // keep filling the DB until the limit is reached and bg_error_ is set. + // When bg_error_ is set we will verify that the DB size is greater + // than the limit. + + std::vector max_space_limits_mbs = {1, 2, 4, 8, 10}; + + bool bg_error_set = false; + uint64_t total_sst_files_size = 0; + + int reached_max_space_on_flush = 0; + int reached_max_space_on_compaction = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", + [&](void* arg) { + bg_error_set = true; + GetAllSSTFiles(&total_sst_files_size); + reached_max_space_on_flush++; + }); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached", + [&](void* arg) { + bg_error_set = true; + GetAllSSTFiles(&total_sst_files_size); + reached_max_space_on_compaction++; + }); + + for (auto limit_mb : max_space_limits_mbs) { + bg_error_set = false; + total_sst_files_size = 0; + rocksdb::SyncPoint::GetInstance()->ClearTrace(); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + options.write_buffer_size = 1024 * 512; // 512 Kb + DestroyAndReopen(options); + Random rnd(301); + + sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024); + + int keys_written = 0; + uint64_t estimated_db_size = 0; + while (true) { + auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50)); + if (!s.ok()) { + break; + } + keys_written++; + // Check the estimated db size vs the db limit just to make sure we + // dont run into an infinite loop + estimated_db_size = keys_written * 60; // ~60 bytes per key + ASSERT_LT(estimated_db_size, limit_mb * 1024 * 1024 * 2); + } + ASSERT_TRUE(bg_error_set); + ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + } + + ASSERT_GT(reached_max_space_on_flush, 0); + ASSERT_GT(reached_max_space_on_compaction, 0); +} + TEST_F(DBTest, UnsupportedManualSync) { DestroyAndReopen(CurrentOptions()); env_->is_wal_sync_thread_safe_.store(false); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index d601ec7eb..950941817 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1015,9 +1015,13 @@ void DBTestBase::CopyFile(const std::string& source, ASSERT_OK(destfile->Close()); } -std::unordered_map DBTestBase::GetAllSSTFiles() { +std::unordered_map DBTestBase::GetAllSSTFiles( + uint64_t* total_size) { std::unordered_map res; + if (total_size) { + *total_size = 0; + } std::vector files; env_->GetChildren(dbname_, &files); for (auto& file_name : files) { @@ -1028,6 +1032,9 @@ std::unordered_map DBTestBase::GetAllSSTFiles() { uint64_t file_size = 0; env_->GetFileSize(file_path, &file_size); res[file_path] = file_size; + if (total_size) { + *total_size += file_size; + } } } return res; diff --git a/db/db_test_util.h b/db/db_test_util.h index 9eedf8d35..ca2b466e9 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -754,7 +754,8 @@ class DBTestBase : public testing::Test { void CopyFile(const std::string& source, const std::string& destination, uint64_t size = 0); - std::unordered_map GetAllSSTFiles(); + std::unordered_map GetAllSSTFiles( + uint64_t* total_size = nullptr); }; } // namespace rocksdb diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h index 56d28c69f..bee243e4a 100644 --- a/include/rocksdb/sst_file_manager.h +++ b/include/rocksdb/sst_file_manager.h @@ -23,6 +23,22 @@ class SstFileManager { public: virtual ~SstFileManager() {} + // Update the maximum allowed space that should be used by RocksDB, if + // the total size of the SST files exceeds max_allowed_space, writes to + // RocksDB will fail. + // + // Setting max_allowed_space to 0 will disable this feature, maximum allowed + // space will be infinite (Default value). + // + // thread-safe. + virtual void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) = 0; + + // Return true if the total size of SST files exceeded the maximum allowed + // space usage. + // + // thread-safe. + virtual bool IsMaxAllowedSpaceReached() = 0; + // Return the total size of all tracked files. // thread-safe virtual uint64_t GetTotalSize() = 0; diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc index b518bb7e5..8a29f1fec 100644 --- a/util/sst_file_manager_impl.cc +++ b/util/sst_file_manager_impl.cc @@ -56,6 +56,19 @@ Status SstFileManagerImpl::OnMoveFile(const std::string& old_path, return Status::OK(); } +void SstFileManagerImpl::SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) { + MutexLock l(&mu_); + max_allowed_space_ = max_allowed_space; +} + +bool SstFileManagerImpl::IsMaxAllowedSpaceReached() { + MutexLock l(&mu_); + if (max_allowed_space_ <= 0) { + return false; + } + return total_files_size_ >= max_allowed_space_; +} + uint64_t SstFileManagerImpl::GetTotalSize() { MutexLock l(&mu_); return total_files_size_; diff --git a/util/sst_file_manager_impl.h b/util/sst_file_manager_impl.h index 5f44d631c..ca9ddedba 100644 --- a/util/sst_file_manager_impl.h +++ b/util/sst_file_manager_impl.h @@ -37,6 +37,22 @@ class SstFileManagerImpl : public SstFileManager { // DB will call OnMoveFile whenever an sst file is move to a new path. Status OnMoveFile(const std::string& old_path, const std::string& new_path); + // Update the maximum allowed space that should be used by RocksDB, if + // the total size of the SST files exceeds max_allowed_space, writes to + // RocksDB will fail. + // + // Setting max_allowed_space to 0 will disable this feature, maximum allowed + // space will be infinite (Default value). + // + // thread-safe. + void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override; + + // Return true if the total size of SST files exceeded the maximum allowed + // space usage. + // + // thread-safe. + bool IsMaxAllowedSpaceReached() override; + // Return the total size of all tracked files. uint64_t GetTotalSize() override; @@ -68,6 +84,8 @@ class SstFileManagerImpl : public SstFileManager { // A map containing all tracked files and there sizes // file_path => file_size std::unordered_map tracked_files_; + // The maximum allowed space (in bytes) for sst files. + uint64_t max_allowed_space_; // DeleteScheduler used to throttle file deletition, if SstFileManagerImpl was // created with rate_bytes_per_sec == 0 or trash_dir == "", delete_scheduler_ // rate limiting will be disabled and will simply delete the files. From a3db93c2618f66411db51198cc6dab49d1f29335 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Wed, 17 Feb 2016 23:32:14 -0800 Subject: [PATCH 124/195] Remove the SyncPoint usage in the destructor of PosixEnv Summary: Remove the SyncPoint usage in the destructor of PosixEnv as none of any active tests is using it. SyncPoint is a test-only utility class, and it's a static varible. As a result, using SyncPoint in the destructor of PosixEnv will make default Env depends on SyncPoint. Removing such dependency could solve the problem crash issue only reproducable in Mac environment. Test Plan: OPT=-DTRAVIS V=1 make -j4 check on Mac environment Reviewers: sdong, anthony Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54333 --- util/env_posix.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index 132d865dd..06de7a486 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -132,7 +132,6 @@ class PosixEnv : public Env { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; - TEST_SYNC_POINT("PosixEnv::~PosixEnv():End"); } void SetFD_CLOEXEC(int fd, const EnvOptions* options) { From 6b2a047dfcc290bbb79659a67e3180c1ace0669d Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 18 Feb 2016 11:25:19 -0800 Subject: [PATCH 125/195] Fix SstFileManager uninitialized data member Summary: I have introduced max_allowed_space_ but did not initialize it Test Plan: make check Reviewers: sdong, yhchiang, anthony Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D54357 --- util/sst_file_manager_impl.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/util/sst_file_manager_impl.cc b/util/sst_file_manager_impl.cc index 8a29f1fec..bbf240cad 100644 --- a/util/sst_file_manager_impl.cc +++ b/util/sst_file_manager_impl.cc @@ -20,6 +20,7 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr logger, : env_(env), logger_(logger), total_files_size_(0), + max_allowed_space_(0), delete_scheduler_(env, trash_dir, rate_bytes_per_sec, logger.get(), this) {} From d825fc70d4995527a8daebc64c51515baf2670ad Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 18 Feb 2016 18:03:53 -0800 Subject: [PATCH 126/195] Use condition variable in log roller test Summary: Previously I just slept until the flush_thread was "probably" ready since proper synchronization in test cases seemed like overkill. But then tsan complained about it, so I did the synchronization (mostly) properly now. Test Plan: $ COMPILE_WITH_TSAN=1 make -j32 auto_roll_logger_test $ ./auto_roll_logger_test Reviewers: anthony, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54399 --- db/auto_roll_logger_test.cc | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/db/auto_roll_logger_test.cc b/db/auto_roll_logger_test.cc index 0be5b113b..814a29b08 100644 --- a/db/auto_roll_logger_test.cc +++ b/db/auto_roll_logger_test.cc @@ -12,6 +12,8 @@ #include #include #include "db/auto_roll_logger.h" +#include "port/port.h" +#include "util/mutexlock.h" #include "util/sync_point.h" #include "util/testharness.h" #include "rocksdb/db.h" @@ -291,20 +293,33 @@ TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { "PosixLogger::Flush:2"}, }); }); + + port::Mutex flush_thread_mutex; + port::CondVar flush_thread_cv{&flush_thread_mutex}; std::thread flush_thread; // Additionally, to exercise the edge case, we need to ensure the old logger // is used. For this, we pause after pinning the logger until dependencies // have probably been loaded. + const int kWaitForDepsSeconds = 1; rocksdb::SyncPoint::GetInstance()->SetCallBack( "AutoRollLogger::Flush:PinnedLogger", [&](void* arg) { + MutexLock ml{&flush_thread_mutex}; + while (flush_thread.get_id() == std::thread::id()) { + flush_thread_cv.Wait(); + } if (std::this_thread::get_id() == flush_thread.get_id()) { - sleep(2); + Env::Default()->SleepForMicroseconds(kWaitForDepsSeconds * 1000 * 1000); + sleep(1); } }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - flush_thread = std::thread([&]() { auto_roll_logger->Flush(); }); - sleep(1); + { + MutexLock ml{&flush_thread_mutex}; + flush_thread = std::thread([&]() { auto_roll_logger->Flush(); }); + flush_thread_cv.Signal(); + } + RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, kSampleMessage + ":LogFlushWhileRolling"); flush_thread.join(); From d37d348da87ea0e87edf158edfa62fd20a325ad3 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 19 Feb 2016 12:29:54 -0800 Subject: [PATCH 127/195] This addresses build issues on Windows https://github.com/facebook/rocksdb/issues/1002 --- db/auto_roll_logger.h | 1 + util/thread_local_test.cc | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/db/auto_roll_logger.h b/db/auto_roll_logger.h index 6bf79c5c6..0e4974d4d 100644 --- a/db/auto_roll_logger.h +++ b/db/auto_roll_logger.h @@ -14,6 +14,7 @@ #include "port/port.h" #include "port/util_logger.h" #include "util/sync_point.h" +#include "util/mutexlock.h" namespace rocksdb { diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index f531dfd0c..3fc5bc288 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -3,7 +3,7 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. -#include +#include #include #include @@ -496,9 +496,15 @@ TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { // Triggers the initialization of singletons. Env::Default(); - pthread_t t; - pthread_create(&t, nullptr, &AccessThreadLocal, nullptr); - TEST_SYNC_POINT("MainThreadDiesFirst:End"); + + try { + std::thread th(&AccessThreadLocal, nullptr); + th.detach(); + TEST_SYNC_POINT("MainThreadDiesFirst:End"); + } catch (const std::system_error& ex) { + std::cerr << "Start thread: " << ex.code() << std::endl; + ASSERT_TRUE(false); + } } } // namespace rocksdb From c7f1a8a468e9de32e0d0abee858f3b6e5f3a2120 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 19 Feb 2016 13:47:19 -0800 Subject: [PATCH 128/195] Fix LITE build thread_local_test Summary: Recent change break thread_local_test by introducing exception, which is disabled in LITE build. Fix it by disabling exception handling in LITE build. Test Plan: Build with both of LITE and non-LITE Reviewers: anthony, IslamAbdelRahman, yhchiang, kradhakrishnan, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54513 --- util/thread_local_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index 3fc5bc288..737a2654f 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -497,14 +497,18 @@ TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { // Triggers the initialization of singletons. Env::Default(); +#ifndef ROCKSDB_LITE try { +#endif // ROCKSDB_LITE std::thread th(&AccessThreadLocal, nullptr); th.detach(); TEST_SYNC_POINT("MainThreadDiesFirst:End"); +#ifndef ROCKSDB_LITE } catch (const std::system_error& ex) { std::cerr << "Start thread: " << ex.code() << std::endl; ASSERT_TRUE(false); } +#endif // ROCKSDB_LITE } } // namespace rocksdb From 9ea2968d26546e456faed9eb6b62cd9ce58126dc Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 19 Feb 2016 14:20:34 -0800 Subject: [PATCH 129/195] Implement ConsistentChildrenAttribute by using default implementation for now as it works. --- port/win/env_win.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/port/win/env_win.cc b/port/win/env_win.cc index b125e1dce..f0825651b 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -1415,11 +1415,6 @@ class WinEnv : public Env { return status; } - virtual Status GetChildrenFileAttributes( - const std::string& dir, std::vector* result) override { - return Status::NotSupported("Not supported in WinEnv"); - } - virtual Status CreateDir(const std::string& name) override { Status result; From 79ca039eb44546ddb71c1f6ba6626dc614a662a7 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 19 Feb 2016 14:42:24 -0800 Subject: [PATCH 130/195] Relax the check condition of prefix_extractor in CheckOptionsCompatibility Summary: Relax the check condition of prefix_extractor in CheckOptionsCompatibility by allowing changing value from non-nullptr to nullptr or nullptr to non-nullptr. Test Plan: options_test options_util_test Reviewers: sdong, anthony, IslamAbdelRahman, kradhakrishnan, gunnarku Reviewed By: gunnarku Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54477 --- util/options_helper.cc | 12 +++++++++--- util/options_helper.h | 18 +++++++++-------- util/options_parser.cc | 10 +++++++++- util/options_test.cc | 18 +++++++++++++---- utilities/options/options_util_test.cc | 27 ++++++++++++++++++++++++-- 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/util/options_helper.cc b/util/options_helper.cc index f2929a740..44b57f48b 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -805,6 +805,7 @@ Status ParseColumnFamilyOption(const std::string& name, } switch (opt_info.verification) { case OptionVerificationType::kByName: + case OptionVerificationType::kByNameAllowNull: return Status::NotSupported( "Deserializing the specified CF option " + name + " is not supported"); @@ -985,6 +986,7 @@ Status ParseDBOption(const std::string& name, } switch (opt_info.verification) { case OptionVerificationType::kByName: + case OptionVerificationType::kByNameAllowNull: return Status::NotSupported( "Deserializing the specified DB option " + name + " is not supported"); @@ -1082,6 +1084,8 @@ Status GetBlockBasedTableOptionsFromMap( // the old API, where everything is // parsable. (iter->second.verification != OptionVerificationType::kByName && + iter->second.verification != + OptionVerificationType::kByNameAllowNull && iter->second.verification != OptionVerificationType::kDeprecated)) { return Status::InvalidArgument("Can't parse BlockBasedTableOptions:", o.first + " " + error_message); @@ -1116,10 +1120,12 @@ Status GetPlainTableOptionsFromMap( if (error_message != "") { const auto iter = plain_table_type_info.find(o.first); if (iter == plain_table_type_info.end() || - !input_strings_escaped ||// !input_strings_escaped indicates - // the old API, where everything is - // parsable. + !input_strings_escaped || // !input_strings_escaped indicates + // the old API, where everything is + // parsable. (iter->second.verification != OptionVerificationType::kByName && + iter->second.verification != + OptionVerificationType::kByNameAllowNull && iter->second.verification != OptionVerificationType::kDeprecated)) { return Status::InvalidArgument("Can't parse PlainTableOptions:", o.first + " " + error_message); diff --git a/util/options_helper.h b/util/options_helper.h index 1c8b585d6..fc7e2c2e2 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -98,13 +98,15 @@ enum class OptionType { enum class OptionVerificationType { kNormal, - kByName, // The option is pointer typed so we can only verify - // based on it's name. - kDeprecated // The option is no longer used in rocksdb. The RocksDB - // OptionsParser will still accept this option if it - // happen to exists in some Options file. However, the - // parser will not include it in serialization and - // verification processes. + kByName, // The option is pointer typed so we can only verify + // based on it's name. + kByNameAllowNull, // Same as kByName, but it also allows the case + // where one of them is a nullptr. + kDeprecated // The option is no longer used in rocksdb. The RocksDB + // OptionsParser will still accept this option if it + // happen to exists in some Options file. However, the + // parser will not include it in serialization and + // verification processes. }; // A struct for storing constant option information such as option name, @@ -433,7 +435,7 @@ static std::unordered_map cf_options_type_info = { OptionVerificationType::kByName}}, {"prefix_extractor", {offsetof(struct ColumnFamilyOptions, prefix_extractor), - OptionType::kSliceTransform, OptionVerificationType::kByName}}, + OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull}}, {"memtable_factory", {offsetof(struct ColumnFamilyOptions, memtable_factory), OptionType::kMemTableRepFactory, OptionVerificationType::kByName}}, diff --git a/util/options_parser.cc b/util/options_parser.cc index e5689fdc4..e01529bff 100644 --- a/util/options_parser.cc +++ b/util/options_parser.cc @@ -510,6 +510,7 @@ bool AreEqualOptions( const std::unordered_map* opt_map) { const char* offset1 = opt1 + type_info.offset; const char* offset2 = opt2 + type_info.offset; + static const std::string kNullptrString = "nullptr"; switch (type_info.type) { case OptionType::kBoolean: return (*reinterpret_cast(offset1) == @@ -557,7 +558,8 @@ bool AreEqualOptions( offset1) == *reinterpret_cast(offset2)); default: - if (type_info.verification == OptionVerificationType::kByName) { + if (type_info.verification == OptionVerificationType::kByName || + type_info.verification == OptionVerificationType::kByNameAllowNull) { std::string value1; bool result = SerializeSingleOptionHelper(offset1, type_info.type, &value1); @@ -571,6 +573,12 @@ bool AreEqualOptions( if (iter == opt_map->end()) { return true; } else { + if (type_info.verification == + OptionVerificationType::kByNameAllowNull) { + if (iter->second == kNullptrString || value1 == kNullptrString) { + return true; + } + } return (value1 == iter->second); } } diff --git a/util/options_test.cc b/util/options_test.cc index 0bde6d618..02f128d69 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1308,10 +1308,10 @@ TEST_F(OptionsSanityCheckTest, SanityCheck) { // prefix_extractor { - // change the prefix extractor and expect only pass when - // sanity-level == kSanityLevelNone + // Okay to change prefix_extractor form nullptr to non-nullptr + ASSERT_EQ(opts.prefix_extractor.get(), nullptr); opts.prefix_extractor.reset(NewCappedPrefixTransform(10)); - ASSERT_NOK(SanityCheckCFOptions(opts, kSanityLevelLooselyCompatible)); + ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelNone)); // persist the change @@ -1338,11 +1338,21 @@ TEST_F(OptionsSanityCheckTest, SanityCheck) { // expect pass only in kSanityLevelNone ASSERT_NOK(SanityCheckCFOptions(opts, kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelNone)); + + // Change prefix extractor from non-nullptr to nullptr + opts.prefix_extractor.reset(); + // expect pass as it's safe to change prefix_extractor + // from non-null to null + ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelLooselyCompatible)); + ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelNone)); } + // persist the change + ASSERT_OK(PersistCFOptions(opts)); + ASSERT_OK(SanityCheckCFOptions(opts, kSanityLevelExactMatch)); // table_factory { - for (int tb = 2; tb >= 0; --tb) { + for (int tb = 0; tb <= 2; ++tb) { // change the table factory opts.table_factory.reset(test::RandomTableFactory(&rnd, tb)); ASSERT_NOK(SanityCheckCFOptions(opts, kSanityLevelLooselyCompatible)); diff --git a/utilities/options/options_util_test.cc b/utilities/options/options_util_test.cc index e93d8a837..94ddbc408 100644 --- a/utilities/options/options_util_test.cc +++ b/utilities/options/options_util_test.cc @@ -173,8 +173,9 @@ TEST_F(OptionsUtilTest, SanityCheck) { (i == 0) ? kDefaultColumnFamilyName : test::RandomName(&rnd_, 10); cf_descs.back().options.table_factory.reset(NewBlockBasedTableFactory()); + // Assign non-null values to prefix_extractors except the first cf. cf_descs.back().options.prefix_extractor.reset( - test::RandomSliceTransform(&rnd_)); + i != 0 ? test::RandomSliceTransform(&rnd_) : nullptr); cf_descs.back().options.merge_operator.reset( test::RandomMergeOperator(&rnd_)); } @@ -223,9 +224,10 @@ TEST_F(OptionsUtilTest, SanityCheck) { std::shared_ptr prefix_extractor = cf_descs[1].options.prefix_extractor; + // It's okay to set prefix_extractor to nullptr. ASSERT_NE(prefix_extractor, nullptr); cf_descs[1].options.prefix_extractor.reset(); - ASSERT_NOK( + ASSERT_OK( CheckOptionsCompatibility(dbname_, Env::Default(), db_opt, cf_descs)); cf_descs[1].options.prefix_extractor.reset(new DummySliceTransform()); @@ -237,6 +239,27 @@ TEST_F(OptionsUtilTest, SanityCheck) { CheckOptionsCompatibility(dbname_, Env::Default(), db_opt, cf_descs)); } + // prefix extractor nullptr case + { + std::shared_ptr prefix_extractor = + cf_descs[0].options.prefix_extractor; + + // It's okay to set prefix_extractor to nullptr. + ASSERT_EQ(prefix_extractor, nullptr); + cf_descs[0].options.prefix_extractor.reset(); + ASSERT_OK( + CheckOptionsCompatibility(dbname_, Env::Default(), db_opt, cf_descs)); + + // It's okay to change prefix_extractor from nullptr to non-nullptr + cf_descs[0].options.prefix_extractor.reset(new DummySliceTransform()); + ASSERT_OK( + CheckOptionsCompatibility(dbname_, Env::Default(), db_opt, cf_descs)); + + cf_descs[0].options.prefix_extractor = prefix_extractor; + ASSERT_OK( + CheckOptionsCompatibility(dbname_, Env::Default(), db_opt, cf_descs)); + } + // comparator { test::SimpleSuffixReverseComparator comparator; From eef63ef807eaf13b55d07d044ac35152371963ff Mon Sep 17 00:00:00 2001 From: Mike Kolupaev Date: Mon, 22 Feb 2016 13:54:58 -0800 Subject: [PATCH 131/195] Fixed CompactFiles() spuriously failing or corrupting DB Summary: We started getting two kinds of crashes since we started using `DB::CompactFiles()`: (1) `CompactFiles()` fails saying something like "/data/logdevice/4440/shard12/012302.sst: No such file or directory", and presumably makes DB read-only, (2) DB fails to open saying "Corruption: Can't access /267000.sst: IO error: /data/logdevice/4440/shard1/267000.sst: No such file or directory". AFAICT, both can be explained by background thread deleting compaction output as "obsolete" while it's being written, before it's committed to manifest. If it ends up committed to the manifest, we get (2); if compaction notices the disappearance and fails, we get (1). The internal tasks t10068021 and t10134177 have some details about the investigation that led to this. Test Plan: `make -j check`; the new test fails to reopen the DB without the fix Reviewers: yhchiang Reviewed By: yhchiang Subscribers: dhruba, sdong Differential Revision: https://reviews.facebook.net/D54561 --- db/compact_files_test.cc | 60 ++++++++++++++++++++++++++++++++++++++-- db/db_impl.cc | 7 +++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc index 1b8c5b942..794defb11 100644 --- a/db/compact_files_test.cc +++ b/db/compact_files_test.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include "rocksdb/db.h" @@ -142,9 +143,6 @@ TEST_F(CompactFilesTest, ObsoleteFiles) { } auto l0_files = collector->GetFlushedFiles(); - CompactionOptions compact_opt; - compact_opt.compression = kNoCompression; - compact_opt.output_file_size_limit = kWriteBufferSize * 5; ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); // verify all compaction input files are deleted @@ -154,6 +152,62 @@ TEST_F(CompactFilesTest, ObsoleteFiles) { delete db; } +TEST_F(CompactFilesTest, CapturingPendingFiles) { + Options options; + options.create_if_missing = true; + // Disable RocksDB background compaction. + options.compaction_style = kCompactionStyleNone; + // Always do full scans for obsolete files (needed to reproduce the issue). + options.delete_obsolete_files_period_micros = 0; + + // Add listener. + FlushedFileCollector* collector = new FlushedFileCollector(); + options.listeners.emplace_back(collector); + + DB* db = nullptr; + DestroyDB(db_name_, options); + Status s = DB::Open(options, db_name_, &db); + assert(s.ok()); + assert(db); + + // Create 5 files. + for (int i = 0; i < 5; ++i) { + db->Put(WriteOptions(), "key" + ToString(i), "value"); + db->Flush(FlushOptions()); + } + + auto l0_files = collector->GetFlushedFiles(); + EXPECT_EQ(5, l0_files.size()); + + rocksdb::SyncPoint::GetInstance()->LoadDependency({ + {"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"}, + {"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"}, + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // Start compacting files. + std::thread compaction_thread( + [&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); }); + + // In the meantime flush another file. + TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0"); + db->Put(WriteOptions(), "key5", "value"); + db->Flush(FlushOptions()); + TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1"); + + compaction_thread.join(); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + delete db; + + // Make sure we can reopen the DB. + s = DB::Open(options, db_name_, &db); + ASSERT_TRUE(s.ok()); + assert(db); + delete db; +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/db_impl.cc b/db/db_impl.cc index afe1a9c9d..d29df88c1 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1819,6 +1819,9 @@ Status DBImpl::CompactFilesImpl( std::vector snapshot_seqs = snapshots_.GetAll(&earliest_write_conflict_snapshot); + auto pending_outputs_inserted_elem = + CaptureCurrentFileNumberInPendingOutputs(); + assert(is_snapshot_supported_ || snapshots_.empty()); CompactionJob compaction_job( job_context->job_id, c.get(), db_options_, env_options_, versions_.get(), @@ -1846,6 +1849,8 @@ Status DBImpl::CompactFilesImpl( TEST_SYNC_POINT("CompactFilesImpl:0"); TEST_SYNC_POINT("CompactFilesImpl:1"); compaction_job.Run(); + TEST_SYNC_POINT("CompactFilesImpl:2"); + TEST_SYNC_POINT("CompactFilesImpl:3"); mutex_.Lock(); Status status = compaction_job.Install(*c->mutable_cf_options()); @@ -1855,6 +1860,8 @@ Status DBImpl::CompactFilesImpl( } c->ReleaseCompactionFiles(s); + ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); + if (status.ok()) { // Done } else if (status.IsShutdownInProgress()) { From 291ae4c206b3e5f05108dcec671448496ed66a68 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Mon, 22 Feb 2016 16:33:26 -0800 Subject: [PATCH 132/195] Revert "Revert "Fixed the bug when both whole_key_filtering and prefix_extractor are set."" Summary: This reverts commit 73c31377bbcd300061245138dbaf782fedada9ba, which mistakenly reverts 73c31377bbcd300061245138dbaf782fedada9ba that fixes a bug when both whole_key_filtering and prefix_extractor are set Test Plan: revert the patch Reviewers: anthony, IslamAbdelRahman, rven, kradhakrishnan, sdong Reviewed By: sdong Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52707 --- table/block_based_filter_block.cc | 44 ++++++----------- table/block_based_filter_block.h | 5 +- table/table_test.cc | 82 +++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 29 deletions(-) diff --git a/table/block_based_filter_block.cc b/table/block_based_filter_block.cc index bc0a8c3f4..e65ee280d 100644 --- a/table/block_based_filter_block.cc +++ b/table/block_based_filter_block.cc @@ -19,18 +19,6 @@ namespace rocksdb { namespace { -bool SamePrefix(const SliceTransform* prefix_extractor, const Slice& key1, - const Slice& key2) { - if (!prefix_extractor->InDomain(key1) && !prefix_extractor->InDomain(key2)) { - return true; - } else if (!prefix_extractor->InDomain(key1) || - !prefix_extractor->InDomain(key2)) { - return false; - } else { - return (prefix_extractor->Transform(key1) == - prefix_extractor->Transform(key2)); - } -} void AppendItem(std::string* props, const std::string& key, const std::string& value) { @@ -77,7 +65,9 @@ BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( const BlockBasedTableOptions& table_opt) : policy_(table_opt.filter_policy.get()), prefix_extractor_(prefix_extractor), - whole_key_filtering_(table_opt.whole_key_filtering) { + whole_key_filtering_(table_opt.whole_key_filtering), + prev_prefix_start_(0), + prev_prefix_size_(0) { assert(policy_); } @@ -90,14 +80,13 @@ void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { } void BlockBasedFilterBlockBuilder::Add(const Slice& key) { - added_to_start_ = 0; - if (whole_key_filtering_) { - AddKey(key); - added_to_start_ = 1; - } if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { AddPrefix(key); } + + if (whole_key_filtering_) { + AddKey(key); + } } // Add key to filter if needed @@ -110,19 +99,16 @@ inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { // get slice for most recently added entry Slice prev; - if (start_.size() > added_to_start_) { - size_t prev_start = start_[start_.size() - 1 - added_to_start_]; - const char* base = entries_.data() + prev_start; - size_t length = entries_.size() - prev_start; - prev = Slice(base, length); + if (prev_prefix_size_ > 0) { + prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); } - // this assumes prefix(prefix(key)) == prefix(key), as the last - // entry in entries_ may be either a key or prefix, and we use - // prefix(last entry) to get the prefix of the last key. - if (prev.size() == 0 || !SamePrefix(prefix_extractor_, key, prev)) { - Slice prefix = prefix_extractor_->Transform(key); + Slice prefix = prefix_extractor_->Transform(key); + // insert prefix only when it's different from the previous prefix. + if (prev.size() == 0 || prefix != prev) { start_.push_back(entries_.size()); + prev_prefix_start_ = entries_.size(); + prev_prefix_size_ = prefix.size(); entries_.append(prefix.data(), prefix.size()); } } @@ -168,6 +154,8 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() { tmp_entries_.clear(); entries_.clear(); start_.clear(); + prev_prefix_start_ = 0; + prev_prefix_size_ = 0; } BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( diff --git a/table/block_based_filter_block.h b/table/block_based_filter_block.h index 92c8c0da8..a97309f2e 100644 --- a/table/block_based_filter_block.h +++ b/table/block_based_filter_block.h @@ -55,9 +55,12 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { const SliceTransform* prefix_extractor_; bool whole_key_filtering_; + size_t prev_prefix_start_; // the position of the last appended prefix + // to "entries_". + size_t prev_prefix_size_; // the length of the last appended prefix to + // "entries_". std::string entries_; // Flattened entry contents std::vector start_; // Starting index in entries_ of each entry - uint32_t added_to_start_; // To indicate if key is added std::string result_; // Filter data computed so far std::vector tmp_entries_; // policy_->CreateFilter() argument std::vector filter_offsets_; diff --git a/table/table_test.cc b/table/table_test.cc index a234d5cdc..3cc7d0dc7 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2344,6 +2344,88 @@ TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { ASSERT_EQ(kv_iter, kvmap.end()); } +class PrefixTest : public testing::Test { + public: + PrefixTest() : testing::Test() {} + ~PrefixTest() {} +}; + +namespace { +// A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest +class TestPrefixExtractor : public rocksdb::SliceTransform { + public: + ~TestPrefixExtractor() override{}; + const char* Name() const override { return "TestPrefixExtractor"; } + + rocksdb::Slice Transform(const rocksdb::Slice& src) const override { + assert(IsValid(src)); + return rocksdb::Slice(src.data(), 3); + } + + bool InDomain(const rocksdb::Slice& src) const override { + assert(IsValid(src)); + return true; + } + + bool InRange(const rocksdb::Slice& dst) const override { return true; } + + bool IsValid(const rocksdb::Slice& src) const { + if (src.size() != 4) { + return false; + } + if (src[0] != '[') { + return false; + } + if (src[1] < '0' || src[1] > '9') { + return false; + } + if (src[2] != ']') { + return false; + } + if (src[3] < '0' || src[3] > '9') { + return false; + } + return true; + } +}; +} // namespace + +TEST_F(PrefixTest, PrefixAndWholeKeyTest) { + rocksdb::Options options; + options.compaction_style = rocksdb::kCompactionStyleUniversal; + options.num_levels = 20; + options.create_if_missing = true; + options.optimize_filters_for_hits = false; + options.target_file_size_base = 268435456; + options.prefix_extractor = std::make_shared(); + rocksdb::BlockBasedTableOptions bbto; + bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10)); + bbto.block_size = 262144; + + bbto.whole_key_filtering = true; + + const std::string kDBPath = test::TmpDir() + "/prefix_test"; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyDB(kDBPath, options); + rocksdb::DB* db; + ASSERT_OK(rocksdb::DB::Open(options, kDBPath, &db)); + + // Create a bunch of keys with 10 filters. + for (int i = 0; i < 10; i++) { + std::string prefix = "[" + std::to_string(i) + "]"; + for (int j = 0; j < 10; j++) { + std::string key = prefix + std::to_string(j); + db->Put(rocksdb::WriteOptions(), key, "1"); + } + } + + // Trigger compaction. + db->CompactRange(CompactRangeOptions(), nullptr, nullptr); + delete db; + // In the second round, turn whole_key_filtering off and expect + // rocksdb still works. +} + } // namespace rocksdb int main(int argc, char** argv) { From b0469166564cb4a686ef33f8209513fdd8606be8 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 22 Feb 2016 21:32:19 -0800 Subject: [PATCH 133/195] Redo SyncPoints for flush while rolling test Summary: There was a race condition in the test where the rolling thread acquired the mutex before the flush thread pinned the logger. Rather than add more complicated synchronization to fix it, I followed Siying's suggestion to use SyncPoint in the test code. Comments in the LoadDependency() invocation explain the reason for each of the sync points. Test Plan: Ran test 1000 times for tsan/asan. Will wait for all sandcastle tests to finish before committing since this is a tricky test. Reviewers: IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54615 --- db/auto_roll_logger.cc | 2 -- db/auto_roll_logger.h | 2 +- db/auto_roll_logger_test.cc | 68 ++++++++++++++++--------------------- util/posix_logger.h | 3 +- 4 files changed, 31 insertions(+), 44 deletions(-) diff --git a/db/auto_roll_logger.cc b/db/auto_roll_logger.cc index 853637ff3..8118b2377 100644 --- a/db/auto_roll_logger.cc +++ b/db/auto_roll_logger.cc @@ -77,8 +77,6 @@ void AutoRollLogger::Logv(const char* format, va_list ap) { if ((kLogFileTimeToRoll > 0 && LogExpired()) || (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) { RollLogFile(); - TEST_SYNC_POINT_CALLBACK("AutoRollLogger::Logv:BeforeResetLogger", - logger_.get()); Status s = ResetLogger(); if (!s.ok()) { // can't really log the error if creating a new LOG file failed diff --git a/db/auto_roll_logger.h b/db/auto_roll_logger.h index 0e4974d4d..a43a98a8f 100644 --- a/db/auto_roll_logger.h +++ b/db/auto_roll_logger.h @@ -72,7 +72,7 @@ class AutoRollLogger : public Logger { // pin down the current logger_ instance before releasing the mutex. logger = logger_; } - TEST_SYNC_POINT_CALLBACK("AutoRollLogger::Flush:PinnedLogger", nullptr); + TEST_SYNC_POINT("AutoRollLogger::Flush:PinnedLogger"); if (logger) { logger->Flush(); } diff --git a/db/auto_roll_logger_test.cc b/db/auto_roll_logger_test.cc index 814a29b08..60c89a186 100644 --- a/db/auto_roll_logger_test.cc +++ b/db/auto_roll_logger_test.cc @@ -13,7 +13,6 @@ #include #include "db/auto_roll_logger.h" #include "port/port.h" -#include "util/mutexlock.h" #include "util/sync_point.h" #include "util/testharness.h" #include "rocksdb/db.h" @@ -275,51 +274,42 @@ TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { AutoRollLogger* auto_roll_logger = dynamic_cast(logger.get()); ASSERT_TRUE(auto_roll_logger); - - // The test is split into two parts, with the below callback happening between - // them: - // (1) Before ResetLogger() is reached, the log rolling test code occasionally - // invokes PosixLogger::Flush(). For this part, dependencies should not be - // enforced. - // (2) After ResetLogger() has begun, any calls to PosixLogger::Flush() will - // be from threads other than the log rolling thread. We want to only - // enforce dependencies for this part. - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "AutoRollLogger::Logv:BeforeResetLogger", [&](void* arg) { - rocksdb::SyncPoint::GetInstance()->LoadDependency({ - {"PosixLogger::Flush:1", - "AutoRollLogger::ResetLogger:BeforeNewLogger"}, - {"AutoRollLogger::ResetLogger:AfterNewLogger", - "PosixLogger::Flush:2"}, - }); - }); - - port::Mutex flush_thread_mutex; - port::CondVar flush_thread_cv{&flush_thread_mutex}; std::thread flush_thread; - // Additionally, to exercise the edge case, we need to ensure the old logger - // is used. For this, we pause after pinning the logger until dependencies - // have probably been loaded. - const int kWaitForDepsSeconds = 1; + + rocksdb::SyncPoint::GetInstance()->LoadDependency({ + // Need to pin the old logger before beginning the roll, as rolling grabs + // the mutex, which would prevent us from accessing the old logger. + {"AutoRollLogger::Flush:PinnedLogger", + "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"}, + // Need to finish the flush thread init before this callback because the + // callback accesses flush_thread.get_id() in order to apply certain sync + // points only to the flush thread. + {"AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit", + "AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin"}, + // Need to reset logger at this point in Flush() to exercise a race + // condition case, which is executing the flush with the pinned (old) + // logger after the roll has cut over to a new logger. + {"AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1", + "AutoRollLogger::ResetLogger:BeforeNewLogger"}, + {"AutoRollLogger::ResetLogger:AfterNewLogger", + "AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2"}, + }); rocksdb::SyncPoint::GetInstance()->SetCallBack( - "AutoRollLogger::Flush:PinnedLogger", [&](void* arg) { - MutexLock ml{&flush_thread_mutex}; - while (flush_thread.get_id() == std::thread::id()) { - flush_thread_cv.Wait(); - } + "PosixLogger::Flush:BeginCallback", [&](void* arg) { + TEST_SYNC_POINT( + "AutoRollLoggerTest::LogFlushWhileRolling:FlushCallbackBegin"); if (std::this_thread::get_id() == flush_thread.get_id()) { - Env::Default()->SleepForMicroseconds(kWaitForDepsSeconds * 1000 * 1000); - sleep(1); + TEST_SYNC_POINT( + "AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback1"); + TEST_SYNC_POINT( + "AutoRollLoggerTest::LogFlushWhileRolling:FlushCallback2"); } }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - { - MutexLock ml{&flush_thread_mutex}; - flush_thread = std::thread([&]() { auto_roll_logger->Flush(); }); - flush_thread_cv.Signal(); - } + flush_thread = std::thread([&]() { auto_roll_logger->Flush(); }); + TEST_SYNC_POINT( + "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"); RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, kSampleMessage + ":LogFlushWhileRolling"); flush_thread.join(); diff --git a/util/posix_logger.h b/util/posix_logger.h index 9fde5f046..61bb9e38a 100644 --- a/util/posix_logger.h +++ b/util/posix_logger.h @@ -57,8 +57,7 @@ class PosixLogger : public Logger { fclose(file_); } virtual void Flush() override { - TEST_SYNC_POINT("PosixLogger::Flush:1"); - TEST_SYNC_POINT("PosixLogger::Flush:2"); + TEST_SYNC_POINT_CALLBACK("PosixLogger::Flush:BeginCallback", nullptr); if (flush_pending_) { flush_pending_ = false; fflush(file_); From 2568985ab3dfece60bcd8fb61c0bf7b7c7dd0db6 Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Tue, 23 Feb 2016 10:26:24 -0800 Subject: [PATCH 134/195] IOStatsContext::ToString() add option to exclude zero counters Summary: similar to D52809 add option to exclude zero counters. Test Plan: [yiwu@dev4504.prn1 ~/rocksdb] ./iostats_context_test [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from IOStatsContextTest [ RUN ] IOStatsContextTest.ToString [ OK ] IOStatsContextTest.ToString (0 ms) [----------] 1 test from IOStatsContextTest (0 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test case ran. (0 ms total) [ PASSED ] 1 test. Reviewers: anthony, yhchiang, andrewkr, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54591 --- CMakeLists.txt | 1 + Makefile | 6 +++++- include/rocksdb/iostats_context.h | 2 +- src.mk | 1 + util/iostats_context.cc | 29 ++++++++++++++++------------- util/iostats_context_test.cc | 29 +++++++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 15 deletions(-) create mode 100644 util/iostats_context_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 399ae5c58..baed60308 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -391,6 +391,7 @@ set(TESTS util/file_reader_writer_test.cc util/heap_test.cc util/histogram_test.cc + util/iostats_context_test.cc util/memenv_test.cc util/mock_env_test.cc util/options_test.cc diff --git a/Makefile b/Makefile index 66e7ac334..0e3f11e65 100644 --- a/Makefile +++ b/Makefile @@ -340,7 +340,8 @@ TESTS = \ compact_on_deletion_collector_test \ compaction_job_stats_test \ transaction_test \ - ldb_cmd_test + ldb_cmd_test \ + iostats_context_test SUBSET := $(shell echo $(TESTS) |sed s/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/) @@ -1045,6 +1046,9 @@ ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS) ldb: tools/ldb.o $(LIBOBJECTS) $(AM_LINK) +iostats_context_test: util/iostats_context_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) + #------------------------------------------------- # make install related stuff INSTALL_PATH ?= /usr/local diff --git a/include/rocksdb/iostats_context.h b/include/rocksdb/iostats_context.h index 10ea420cf..632fe44c8 100644 --- a/include/rocksdb/iostats_context.h +++ b/include/rocksdb/iostats_context.h @@ -18,7 +18,7 @@ struct IOStatsContext { // reset all io-stats counter to zero void Reset(); - std::string ToString() const; + std::string ToString(bool exclude_zero_counters = false) const; // the thread pool id uint64_t thread_pool_id; diff --git a/src.mk b/src.mk index 8c6216771..f98075028 100644 --- a/src.mk +++ b/src.mk @@ -269,6 +269,7 @@ TEST_BENCH_SOURCES = \ utilities/transactions/transaction_test.cc \ utilities/ttl/ttl_test.cc \ utilities/write_batch_with_index/write_batch_with_index_test.cc \ + util/iostats_context_test.cc \ util/log_write_bench.cc \ util/memenv_test.cc \ util/mock_env_test.cc \ diff --git a/util/iostats_context.cc b/util/iostats_context.cc index 666fddcd1..a3c72db96 100644 --- a/util/iostats_context.cc +++ b/util/iostats_context.cc @@ -31,21 +31,24 @@ void IOStatsContext::Reset() { logger_nanos = 0; } -#define IOSTATS_CONTEXT_OUTPUT(counter) #counter << " = " << counter << ", " +#define IOSTATS_CONTEXT_OUTPUT(counter) \ + if (!exclude_zero_counters || counter > 0) { \ + ss << #counter << " = " << counter << ", "; \ + } -std::string IOStatsContext::ToString() const { +std::string IOStatsContext::ToString(bool exclude_zero_counters) const { std::ostringstream ss; - ss << IOSTATS_CONTEXT_OUTPUT(thread_pool_id) - << IOSTATS_CONTEXT_OUTPUT(bytes_read) - << IOSTATS_CONTEXT_OUTPUT(bytes_written) - << IOSTATS_CONTEXT_OUTPUT(open_nanos) - << IOSTATS_CONTEXT_OUTPUT(allocate_nanos) - << IOSTATS_CONTEXT_OUTPUT(write_nanos) - << IOSTATS_CONTEXT_OUTPUT(read_nanos) - << IOSTATS_CONTEXT_OUTPUT(range_sync_nanos) - << IOSTATS_CONTEXT_OUTPUT(fsync_nanos) - << IOSTATS_CONTEXT_OUTPUT(prepare_write_nanos) - << IOSTATS_CONTEXT_OUTPUT(logger_nanos); + IOSTATS_CONTEXT_OUTPUT(thread_pool_id); + IOSTATS_CONTEXT_OUTPUT(bytes_read); + IOSTATS_CONTEXT_OUTPUT(bytes_written); + IOSTATS_CONTEXT_OUTPUT(open_nanos); + IOSTATS_CONTEXT_OUTPUT(allocate_nanos); + IOSTATS_CONTEXT_OUTPUT(write_nanos); + IOSTATS_CONTEXT_OUTPUT(read_nanos); + IOSTATS_CONTEXT_OUTPUT(range_sync_nanos); + IOSTATS_CONTEXT_OUTPUT(fsync_nanos); + IOSTATS_CONTEXT_OUTPUT(prepare_write_nanos); + IOSTATS_CONTEXT_OUTPUT(logger_nanos); return ss.str(); } diff --git a/util/iostats_context_test.cc b/util/iostats_context_test.cc new file mode 100644 index 000000000..a2884f8a6 --- /dev/null +++ b/util/iostats_context_test.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "rocksdb/iostats_context.h" +#include "util/testharness.h" + +namespace rocksdb { + +TEST(IOStatsContextTest, ToString) { + iostats_context.Reset(); + iostats_context.bytes_read = 12345; + + std::string zero_included = iostats_context.ToString(); + ASSERT_NE(std::string::npos, zero_included.find("= 0")); + ASSERT_NE(std::string::npos, zero_included.find("= 12345")); + + std::string zero_excluded = iostats_context.ToString(true); + ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); + ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 38201b35996c2979edcdfb27fc7cbd3156503a6e Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 22 Feb 2016 17:29:28 -0800 Subject: [PATCH 135/195] Fix assert failure when DBImpl::SyncWAL() conflicts with log rolling Summary: DBImpl::SyncWAL() releases db mutex before calling DBImpl::MarkLogsSynced(), while inside DBImpl::MarkLogsSynced() we assert there is none or one outstanding log file. However, a memtable switch can happen in between and causing two or outstanding logs there, failing the assert. The diff adds a unit test that repros the issue and fix the assert so that the unit test passes. Test Plan: Run the new tests. Reviewers: anthony, kolmike, yhchiang, IslamAbdelRahman, kradhakrishnan, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54621 --- db/column_family_test.cc | 30 ++++++++++++++++++++++++++++++ db/db_impl.cc | 6 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/db/column_family_test.cc b/db/column_family_test.cc index fddbaf518..1fe7601b1 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -2542,6 +2542,36 @@ TEST_F(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) { cfd->RecalculateWriteStallConditions(mutable_cf_options); ASSERT_EQ(2, dbfull()->BGCompactionsAllowed()); } + +TEST_F(ColumnFamilyTest, LogSyncConflictFlush) { + Open(); + CreateColumnFamiliesAndReopen({"one", "two"}); + + Put(0, "", ""); + Put(1, "foo", "bar"); + + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1", + "ColumnFamilyTest::LogSyncConflictFlush:1"}, + {"ColumnFamilyTest::LogSyncConflictFlush:2", + "DBImpl::SyncWAL:BeforeMarkLogsSynced:2"}}); + + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + std::thread thread([&] { db_->SyncWAL(); }); + + TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1"); + Flush(1); + Put(1, "foo", "bar"); + Flush(1); + + TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2"); + + thread.join(); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + Close(); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/db_impl.cc b/db/db_impl.cc index d29df88c1..683006388 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2202,6 +2202,9 @@ Status DBImpl::SyncWAL() { status = directories_.GetWalDir()->Fsync(); } + TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:1"); + TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2"); + { InstrumentedMutexLock l(&mutex_); MarkLogsSynced(current_log_number, need_log_dir_sync, status); @@ -2229,7 +2232,8 @@ void DBImpl::MarkLogsSynced( ++it; } } - assert(logs_.empty() || (logs_.size() == 1 && !logs_[0].getting_synced)); + assert(logs_.empty() || logs_[0].number > up_to || + (logs_.size() == 1 && !logs_[0].getting_synced)); log_sync_cv_.SignalAll(); } From 82f15fb15d9cce7cc0190aba04533afffdcd5f5d Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 23 Feb 2016 14:54:05 -0800 Subject: [PATCH 136/195] Add test to make sure DropColumnFamily doesn't impact existing iterators Summary: Add a test case in ColumnFamilyTest.ReadDroppedColumnFamily to make sure existing iterator is not impacted by column family dropping. Test Plan: N/A Reviewers: igor, yhchiang, anthony, andrewkr, kradhakrishnan, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54657 --- db/column_family_test.cc | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 1fe7601b1..6e05a7da9 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1952,13 +1952,27 @@ TEST_F(ColumnFamilyTest, ReadDroppedColumnFamily) { PutRandomData(1, kKeysNum, 100); PutRandomData(2, kKeysNum, 100); - if (iter == 0) { - // Drop CF two - ASSERT_OK(db_->DropColumnFamily(handles_[2])); - } else { - // delete CF two - delete handles_[2]; - handles_[2] = nullptr; + { + std::unique_ptr iterator( + db_->NewIterator(ReadOptions(), handles_[2])); + iterator->SeekToFirst(); + + if (iter == 0) { + // Drop CF two + ASSERT_OK(db_->DropColumnFamily(handles_[2])); + } else { + // delete CF two + delete handles_[2]; + handles_[2] = nullptr; + } + // Make sure iterator created can still be used. + int count = 0; + for (; iterator->Valid(); iterator->Next()) { + ASSERT_OK(iterator->status()); + ++count; + } + ASSERT_OK(iterator->status()); + ASSERT_EQ(count, kKeysNum); } // Add bunch more data to other CFs From 69c98f043185f49ee1d83465b2eafe2afdef7c98 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 24 Feb 2016 10:32:11 -0800 Subject: [PATCH 137/195] Reorder instance variables in backup test for proper destruction order Summary: As titled. This fixes the tsan error caused by logger_ being used in backup_engine_'s destructor. It does not fix the transient unit test failure, which is caused by MANIFEST file changing while backup is happening. Test Plan: verified the tsan error no longer happens on either success or failure. $ COMPILE_WITH_TSAN=1 make -j32 backupable_db_test $ while ./backupable_db_test --gtest_filter=BackupableDBTest.CorruptionsTest ; do : ; done Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54669 --- utilities/backupable/backupable_db_test.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index cc70f69b5..a53ae2df9 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -544,6 +544,10 @@ class BackupableDBTest : public testing::Test { std::string dbname_; std::string backupdir_; + // logger_ must be above backup_engine_ such that the engine's destructor, + // which uses a raw pointer to the logger, executes first. + std::shared_ptr logger_; + // envs Env* env_; unique_ptr mock_env_; @@ -558,7 +562,6 @@ class BackupableDBTest : public testing::Test { // options Options options_; - std::shared_ptr logger_; protected: unique_ptr backupable_options_; From cd3fe675a97fa2b615686ad53669e436f295e185 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 25 Feb 2016 17:29:39 -0800 Subject: [PATCH 138/195] Remove stale TODO Summary: This was fixed by 0c2bd5cb Test Plan: n/a Reviewers: gabijs Reviewed By: gabijs Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54753 --- include/rocksdb/utilities/transaction_db.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 243b7a143..35b06d899 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -92,8 +92,6 @@ struct TransactionOptions { // will never relinquish any locks it holds. This could prevent keys from // being // written by other writers. - // - // TODO(agiardullo): Improve performance of checking expiration time. int64_t expiration = -1; }; From 8800975fb0f333ff20bb567e94c0a3fda3df9c1d Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 25 Feb 2016 15:42:26 -0800 Subject: [PATCH 139/195] Make DBTestUniversalCompaction.IncreaseUniversalCompactionNumLevels more robust Summary: Based on thread scheduling, DBTestUniversalCompaction.IncreaseUniversalCompactionNumLevels can fail to flush enough files to trigger expected compactions. Fix it by waiting for flush after inserting each key. There are failrue reported: db/db_universal_compaction_test.cc:1134: Failure Expected: (NumTableFilesAtLevel(options.num_levels - 1, 1)) > (0), actual: 0 vs 0 but I can't repro it. Try to fix the bug and see whether it goes away. Test Plan: Run the test multiple time. Reviewers: IslamAbdelRahman, anthony, andrewkr, kradhakrishnan, yhchiang Reviewed By: yhchiang Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54747 --- db/db_universal_compaction_test.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 9459e9761..82f11502b 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -1103,16 +1103,11 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { for (int i = 0; i <= max_key1; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); + dbfull()->TEST_WaitForFlushMemTable(handles_[1]); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); - int non_level0_num_files = 0; - for (int i = 1; i < options.num_levels; i++) { - non_level0_num_files += NumTableFilesAtLevel(i, 1); - } - ASSERT_EQ(non_level0_num_files, 0); - // Stage 2: reopen with universal compaction, num_levels=4 options.compaction_style = kCompactionStyleUniversal; options.num_levels = 4; @@ -1125,6 +1120,7 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { for (int i = max_key1 + 1; i <= max_key2; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); + dbfull()->TEST_WaitForFlushMemTable(handles_[1]); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); @@ -1155,6 +1151,7 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { for (int i = max_key2 + 1; i <= max_key3; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); + dbfull()->TEST_WaitForFlushMemTable(handles_[1]); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); From 21f17aaa60deb3c77e487f079621a6993efddd60 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 26 Feb 2016 18:03:07 -0500 Subject: [PATCH 140/195] Modified Makefile and build_tools/build_detect_platform to compile on Linux s390x. --- Makefile | 2 ++ build_tools/build_detect_platform | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 0e3f11e65..8f351b75e 100644 --- a/Makefile +++ b/Makefile @@ -86,10 +86,12 @@ ifneq ($(DEBUG_LEVEL), 2) OPT += -O2 -fno-omit-frame-pointer ifeq (,$(findstring ppc64,$(MACHINE))) # ppc64[le] doesn't support -momit-leaf-frame-pointer ifneq ($(MACHINE),armv7l) +ifneq ($(MACHINE),s390x) OPT += -momit-leaf-frame-pointer endif endif endif +endif # if we're compiling for release, compile without debug code (-DNDEBUG) and # don't treat warnings as errors diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 5832b03a9..00fae78c5 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -360,6 +360,8 @@ fi if test "$USE_SSE"; then # if Intel SSE instruction set is supported, set USE_SSE=1 COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 " +elif [ "$TARGET_ARCHITECTURE" = s390x ]; then + COMMON_FLAGS="$COMMON_FLAGS -march=z10 " elif test -z "$PORTABLE"; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then # Tune for this POWER processor, treating '+' models as base models From 7ca731b12ce69596d7b9fec4a72d44ca8a3913c3 Mon Sep 17 00:00:00 2001 From: James Page Date: Mon, 29 Feb 2016 09:03:24 +0000 Subject: [PATCH 141/195] build: Improve -momit-leaf-frame-pointer usage -momit-leaf-frame-pointer is only supported on certain archs. Detect this automatically based on which flags the compiler understands. --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 0e3f11e65..018184c75 100644 --- a/Makefile +++ b/Makefile @@ -84,12 +84,11 @@ endif # compile with -O2 if debug level is not 2 ifneq ($(DEBUG_LEVEL), 2) OPT += -O2 -fno-omit-frame-pointer -ifeq (,$(findstring ppc64,$(MACHINE))) # ppc64[le] doesn't support -momit-leaf-frame-pointer -ifneq ($(MACHINE),armv7l) +# Skip for archs that don't support -momit-leaf-frame-pointer +ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1)) OPT += -momit-leaf-frame-pointer endif endif -endif # if we're compiling for release, compile without debug code (-DNDEBUG) and # don't treat warnings as errors From 3373c81fa800fe377491d34bfecd72c5d702c433 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 29 Feb 2016 15:02:52 -0500 Subject: [PATCH 142/195] Modify build_tools/build_detect_platform to detect and set -march=z10 on Linux s390x. --- build_tools/build_detect_platform | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 00fae78c5..10a4d497f 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -360,13 +360,13 @@ fi if test "$USE_SSE"; then # if Intel SSE instruction set is supported, set USE_SSE=1 COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 " -elif [ "$TARGET_ARCHITECTURE" = s390x ]; then - COMMON_FLAGS="$COMMON_FLAGS -march=z10 " elif test -z "$PORTABLE"; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then # Tune for this POWER processor, treating '+' models as base models POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+` COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER " + elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then + COMMON_FLAGS="$COMMON_FLAGS -march=z10 " else COMMON_FLAGS="$COMMON_FLAGS -march=native " fi From 69c471bd9b665cecbf58aee7f3dc4d31a8e9ff29 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 29 Feb 2016 12:56:55 -0800 Subject: [PATCH 143/195] Handle concurrent manifest update and backup creation Summary: Fixed two related race conditions in backup creation. (1) CreateNewBackup() uses DB::DisableFileDeletions() to prevent table files from being deleted while it is copying; however, the MANIFEST file could still rotate during this time. The fix is to stop deleting the old manifest in the rotation logic. It will be deleted safely later when PurgeObsoleteFiles() runs (can only happen when file deletions are enabled). (2) CreateNewBackup() did not account for the CURRENT file being mutable. This is significant because the files returned by GetLiveFiles() contain a particular manifest filename, but the manifest to which CURRENT refers can change at any time. This causes problems when CURRENT changes between the call to GetLiveFiles() and when it's copied to the backup directory. To workaround this, I manually forge a CURRENT file referring to the manifest filename returned in GetLiveFiles(). (2) also applies to the checkpointing code, so let me know if this approach is good and I'll make the same change there. Test Plan: new test for roll manifest during backup creation. running the test before this change: $ ./backupable_db_test --gtest_filter=BackupableDBTest.ChangeManifestDuringBackupCreation ... IO error: /tmp/rocksdbtest-9383/backupable_db/MANIFEST-000001: No such file or directory running the test after this change: $ ./backupable_db_test --gtest_filter=BackupableDBTest.ChangeManifestDuringBackupCreation ... [ RUN ] BackupableDBTest.ChangeManifestDuringBackupCreation [ OK ] BackupableDBTest.ChangeManifestDuringBackupCreation (2836 ms) Reviewers: IslamAbdelRahman, anthony, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54711 --- db/deletefile_test.cc | 1 + db/version_set.cc | 12 +- utilities/backupable/backupable_db.cc | 314 ++++++++++++--------- utilities/backupable/backupable_db_test.cc | 28 ++ 4 files changed, 214 insertions(+), 141 deletions(-) diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc index 3cc060db9..57fafa5e7 100644 --- a/db/deletefile_test.cc +++ b/db/deletefile_test.cc @@ -37,6 +37,7 @@ class DeleteFileTest : public testing::Test { DeleteFileTest() { db_ = nullptr; env_ = Env::Default(); + options_.delete_obsolete_files_period_micros = 0; // always do full purge options_.enable_thread_tracking = true; options_.write_buffer_size = 1024*1024*1000; options_.target_file_size_base = 1024*1024*1000; diff --git a/db/version_set.cc b/db/version_set.cc index 4cf493f91..b5658ea38 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2252,15 +2252,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, if (s.ok() && new_descriptor_log) { s = SetCurrentFile(env_, dbname_, pending_manifest_file_number_, db_options_->disableDataSync ? nullptr : db_directory); - if (s.ok() && pending_manifest_file_number_ > manifest_file_number_) { - // delete old manifest file - Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, - "Deleting manifest %" PRIu64 " current manifest %" PRIu64 "\n", - manifest_file_number_, pending_manifest_file_number_); - // we don't care about an error here, PurgeObsoleteFiles will take care - // of it later - env_->DeleteFile(DescriptorFileName(dbname_, manifest_file_number_)); - } + // Leave the old file behind since PurgeObsoleteFiles will take care of it + // later. It's unsafe to delete now since file deletion may be disabled. } if (s.ok()) { @@ -2275,6 +2268,7 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, } LogFlush(db_options_->info_log); + TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestDone"); mu->Lock(); } diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index b8cb46c7c..024b789ca 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -20,10 +20,11 @@ #include "rocksdb/rate_limiter.h" #include "rocksdb/transaction_log.h" #include "port/port.h" +#include "util/sync_point.h" #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS -#endif +#endif // __STDC_FORMAT_MACROS #include #include @@ -39,8 +40,6 @@ #include #include #include -#include "port/port.h" - namespace rocksdb { @@ -269,43 +268,59 @@ class BackupEngineImpl : public BackupEngine { } Status PutLatestBackupFileContents(uint32_t latest_backup); - // if size_limit == 0, there is no size limit, copy everything - Status CopyFile(const std::string& src, const std::string& dst, Env* src_env, - Env* dst_env, bool sync, RateLimiter* rate_limiter, - uint64_t* size = nullptr, uint32_t* checksum_value = nullptr, - uint64_t size_limit = 0, - std::function progress_callback = []() {}); + + // If size_limit == 0, there is no size limit, copy everything. + // + // Exactly one of src and contents must be non-empty. + // + // @param src If non-empty, the file is copied from this pathname. + // @param contents If non-empty, the file will be created with these contents. + Status CopyOrCreateFile(const std::string& src, const std::string& dst, + const std::string& contents, Env* src_env, + Env* dst_env, bool sync, RateLimiter* rate_limiter, + uint64_t* size = nullptr, + uint32_t* checksum_value = nullptr, + uint64_t size_limit = 0, + std::function progress_callback = []() {}); Status CalculateChecksum(const std::string& src, Env* src_env, uint64_t size_limit, uint32_t* checksum_value); - struct CopyResult { + struct CopyOrCreateResult { uint64_t size; uint32_t checksum_value; Status status; }; - struct CopyWorkItem { + + // Exactly one of src_path and contents must be non-empty. If src_path is + // non-empty, the file is copied from this pathname. Otherwise, if contents is + // non-empty, the file will be created at dst_path with these contents. + struct CopyOrCreateWorkItem { std::string src_path; std::string dst_path; + std::string contents; Env* src_env; Env* dst_env; bool sync; RateLimiter* rate_limiter; uint64_t size_limit; - std::promise result; + std::promise result; std::function progress_callback; - CopyWorkItem() {} - CopyWorkItem(const CopyWorkItem&) = delete; - CopyWorkItem& operator=(const CopyWorkItem&) = delete; + CopyOrCreateWorkItem() {} + CopyOrCreateWorkItem(const CopyOrCreateWorkItem&) = delete; + CopyOrCreateWorkItem& operator=(const CopyOrCreateWorkItem&) = delete; - CopyWorkItem(CopyWorkItem&& o) ROCKSDB_NOEXCEPT { *this = std::move(o); } + CopyOrCreateWorkItem(CopyOrCreateWorkItem&& o) ROCKSDB_NOEXCEPT { + *this = std::move(o); + } - CopyWorkItem& operator=(CopyWorkItem&& o) ROCKSDB_NOEXCEPT { + CopyOrCreateWorkItem& operator=(CopyOrCreateWorkItem&& o) ROCKSDB_NOEXCEPT { src_path = std::move(o.src_path); dst_path = std::move(o.dst_path); + contents = std::move(o.contents); src_env = o.src_env; dst_env = o.dst_env; sync = o.sync; @@ -316,12 +331,14 @@ class BackupEngineImpl : public BackupEngine { return *this; } - CopyWorkItem(std::string _src_path, std::string _dst_path, Env* _src_env, - Env* _dst_env, bool _sync, RateLimiter* _rate_limiter, - uint64_t _size_limit, - std::function _progress_callback = []() {}) + CopyOrCreateWorkItem(std::string _src_path, std::string _dst_path, + std::string _contents, Env* _src_env, Env* _dst_env, + bool _sync, RateLimiter* _rate_limiter, + uint64_t _size_limit, + std::function _progress_callback = []() {}) : src_path(std::move(_src_path)), dst_path(std::move(_dst_path)), + contents(std::move(_contents)), src_env(_src_env), dst_env(_dst_env), sync(_sync), @@ -330,21 +347,23 @@ class BackupEngineImpl : public BackupEngine { progress_callback(_progress_callback) {} }; - struct BackupAfterCopyWorkItem { - std::future result; + struct BackupAfterCopyOrCreateWorkItem { + std::future result; bool shared; bool needed_to_copy; Env* backup_env; std::string dst_path_tmp; std::string dst_path; std::string dst_relative; - BackupAfterCopyWorkItem() {} + BackupAfterCopyOrCreateWorkItem() {} - BackupAfterCopyWorkItem(BackupAfterCopyWorkItem&& o) ROCKSDB_NOEXCEPT { + BackupAfterCopyOrCreateWorkItem(BackupAfterCopyOrCreateWorkItem&& o) + ROCKSDB_NOEXCEPT { *this = std::move(o); } - BackupAfterCopyWorkItem& operator=(BackupAfterCopyWorkItem&& o) ROCKSDB_NOEXCEPT { + BackupAfterCopyOrCreateWorkItem& operator=( + BackupAfterCopyOrCreateWorkItem&& o) ROCKSDB_NOEXCEPT { result = std::move(o.result); shared = o.shared; needed_to_copy = o.needed_to_copy; @@ -355,10 +374,11 @@ class BackupEngineImpl : public BackupEngine { return *this; } - BackupAfterCopyWorkItem(std::future&& _result, bool _shared, - bool _needed_to_copy, Env* _backup_env, - std::string _dst_path_tmp, std::string _dst_path, - std::string _dst_relative) + BackupAfterCopyOrCreateWorkItem(std::future&& _result, + bool _shared, bool _needed_to_copy, + Env* _backup_env, std::string _dst_path_tmp, + std::string _dst_path, + std::string _dst_relative) : result(std::move(_result)), shared(_shared), needed_to_copy(_needed_to_copy), @@ -368,18 +388,20 @@ class BackupEngineImpl : public BackupEngine { dst_relative(std::move(_dst_relative)) {} }; - struct RestoreAfterCopyWorkItem { - std::future result; + struct RestoreAfterCopyOrCreateWorkItem { + std::future result; uint32_t checksum_value; - RestoreAfterCopyWorkItem() {} - RestoreAfterCopyWorkItem(std::future&& _result, - uint32_t _checksum_value) + RestoreAfterCopyOrCreateWorkItem() {} + RestoreAfterCopyOrCreateWorkItem(std::future&& _result, + uint32_t _checksum_value) : result(std::move(_result)), checksum_value(_checksum_value) {} - RestoreAfterCopyWorkItem(RestoreAfterCopyWorkItem&& o) ROCKSDB_NOEXCEPT { + RestoreAfterCopyOrCreateWorkItem(RestoreAfterCopyOrCreateWorkItem&& o) + ROCKSDB_NOEXCEPT { *this = std::move(o); } - RestoreAfterCopyWorkItem& operator=(RestoreAfterCopyWorkItem&& o) ROCKSDB_NOEXCEPT { + RestoreAfterCopyOrCreateWorkItem& operator=( + RestoreAfterCopyOrCreateWorkItem&& o) ROCKSDB_NOEXCEPT { result = std::move(o.result); checksum_value = o.checksum_value; return *this; @@ -388,17 +410,27 @@ class BackupEngineImpl : public BackupEngine { bool initialized_; std::mutex byte_report_mutex_; - channel files_to_copy_; + channel files_to_copy_or_create_; std::vector threads_; + // Adds a file to the backup work queue to be copied or created if it doesn't + // already exist. + // + // Exactly one of src_dir and contents must be non-empty. + // + // @param src_dir If non-empty, the file in this directory named fname will be + // copied. + // @param fname Name of destination file and, in case of copy, source file. + // @param contents If non-empty, the file will be created with these contents. Status AddBackupFileWorkItem( std::unordered_set& live_dst_paths, - std::vector& backup_items_to_finish, + std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, - const std::string& src_fname, // starts with "/" + const std::string& fname, // starts with "/" RateLimiter* rate_limiter, uint64_t size_limit = 0, bool shared_checksum = false, - std::function progress_callback = []() {}); + std::function progress_callback = []() {}, + const std::string& contents = std::string()); // backup state data BackupID latest_backup_id_; @@ -451,7 +483,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env, read_only_(read_only) {} BackupEngineImpl::~BackupEngineImpl() { - files_to_copy_.sendEof(); + files_to_copy_or_create_.sendEof(); for (auto& t : threads_) { t.join(); } @@ -571,17 +603,18 @@ Status BackupEngineImpl::Initialize() { } } - // set up threads perform copies from files_to_copy_ in the background + // set up threads perform copies from files_to_copy_or_create_ in the + // background for (int t = 0; t < options_.max_background_operations; t++) { threads_.emplace_back([&]() { - CopyWorkItem work_item; - while (files_to_copy_.read(work_item)) { - CopyResult result; - result.status = - CopyFile(work_item.src_path, work_item.dst_path, work_item.src_env, - work_item.dst_env, work_item.sync, work_item.rate_limiter, - &result.size, &result.checksum_value, work_item.size_limit, - work_item.progress_callback); + CopyOrCreateWorkItem work_item; + while (files_to_copy_or_create_.read(work_item)) { + CopyOrCreateResult result; + result.status = CopyOrCreateFile( + work_item.src_path, work_item.dst_path, work_item.contents, + work_item.src_env, work_item.dst_env, work_item.sync, + work_item.rate_limiter, &result.size, &result.checksum_value, + work_item.size_limit, work_item.progress_callback); work_item.result.set_value(std::move(result)); } }); @@ -616,6 +649,8 @@ Status BackupEngineImpl::CreateNewBackup( db->EnableFileDeletions(false); return s; } + TEST_SYNC_POINT("BackupEngineImpl::CreateNewBackup:SavedLiveFiles1"); + TEST_SYNC_POINT("BackupEngineImpl::CreateNewBackup:SavedLiveFiles2"); BackupID new_backup_id = latest_backup_id_ + 1; assert(backups_.find(new_backup_id) == backups_.end()); @@ -650,8 +685,9 @@ Status BackupEngineImpl::CreateNewBackup( std::unordered_set live_dst_paths; live_dst_paths.reserve(live_files.size() + live_wal_files.size()); - std::vector backup_items_to_finish; - // Add a CopyWorkItem to the channel for each live file + std::vector backup_items_to_finish; + // Add a CopyOrCreateWorkItem to the channel for each live file + std::string manifest_fname, current_fname; for (size_t i = 0; s.ok() && i < live_files.size(); ++i) { uint64_t number; FileType type; @@ -663,6 +699,15 @@ Status BackupEngineImpl::CreateNewBackup( // we should only get sst, manifest and current files here assert(type == kTableFile || type == kDescriptorFile || type == kCurrentFile); + if (type == kCurrentFile) { + // We will craft the current file manually to ensure it's consistent with + // the manifest number. This is necessary because current's file contents + // can change during backup. + current_fname = live_files[i]; + continue; + } else if (type == kDescriptorFile) { + manifest_fname = live_files[i]; + } // rules: // * if it's kTableFile, then it's shared @@ -675,7 +720,15 @@ Status BackupEngineImpl::CreateNewBackup( options_.share_files_with_checksum && type == kTableFile, progress_callback); } - // Add a CopyWorkItem to the channel for each WAL file + if (s.ok() && !current_fname.empty() && !manifest_fname.empty()) { + // Write the current file with the manifest filename as its contents. + s = AddBackupFileWorkItem( + live_dst_paths, backup_items_to_finish, new_backup_id, + false /* shared */, "" /* src_dir */, CurrentFileName(""), + rate_limiter.get(), 0 /* size_limit */, false /* shared_checksum */, + progress_callback, manifest_fname.substr(1) + "\n"); + } + // Add a CopyOrCreateWorkItem to the channel for each WAL file for (size_t i = 0; s.ok() && i < live_wal_files.size(); ++i) { if (live_wal_files[i]->Type() == kAliveLogFile) { // we only care about live log files @@ -938,7 +991,7 @@ Status BackupEngineImpl::RestoreDBFromBackup( copy_file_buffer_size_ = rate_limiter->GetSingleBurstBytes(); } Status s; - std::vector restore_items_to_finish; + std::vector restore_items_to_finish; for (const auto& file_info : backup->GetFiles()) { const std::string &file = file_info->filename; std::string dst; @@ -968,18 +1021,15 @@ Status BackupEngineImpl::RestoreDBFromBackup( "/" + dst; Log(options_.info_log, "Restoring %s to %s\n", file.c_str(), dst.c_str()); - CopyWorkItem copy_work_item(GetAbsolutePath(file), - dst, - backup_env_, - db_env_, - false, - rate_limiter.get(), - 0 /* size_limit */); - RestoreAfterCopyWorkItem after_copy_work_item( - copy_work_item.result.get_future(), - file_info->checksum_value); - files_to_copy_.write(std::move(copy_work_item)); - restore_items_to_finish.push_back(std::move(after_copy_work_item)); + CopyOrCreateWorkItem copy_or_create_work_item( + GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_, + false, rate_limiter.get(), 0 /* size_limit */); + RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item( + copy_or_create_work_item.result.get_future(), + file_info->checksum_value); + files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); + restore_items_to_finish.push_back( + std::move(after_copy_or_create_work_item)); } Status item_status; for (auto& item : restore_items_to_finish) { @@ -1078,12 +1128,12 @@ Status BackupEngineImpl::PutLatestBackupFileContents(uint32_t latest_backup) { return s; } -Status BackupEngineImpl::CopyFile(const std::string& src, - const std::string& dst, Env* src_env, - Env* dst_env, bool sync, - RateLimiter* rate_limiter, uint64_t* size, - uint32_t* checksum_value, uint64_t size_limit, - std::function progress_callback) { +Status BackupEngineImpl::CopyOrCreateFile( + const std::string& src, const std::string& dst, const std::string& contents, + Env* src_env, Env* dst_env, bool sync, RateLimiter* rate_limiter, + uint64_t* size, uint32_t* checksum_value, uint64_t size_limit, + std::function progress_callback) { + assert(src.empty() != contents.empty()); Status s; unique_ptr dst_file; unique_ptr src_file; @@ -1102,9 +1152,9 @@ Status BackupEngineImpl::CopyFile(const std::string& src, size_limit = std::numeric_limits::max(); } - s = src_env->NewSequentialFile(src, &src_file, env_options); - if (s.ok()) { - s = dst_env->NewWritableFile(dst, &dst_file, env_options); + s = dst_env->NewWritableFile(dst, &dst_file, env_options); + if (s.ok() && !src.empty()) { + s = src_env->NewSequentialFile(src, &src_file, env_options); } if (!s.ok()) { return s; @@ -1112,19 +1162,28 @@ Status BackupEngineImpl::CopyFile(const std::string& src, unique_ptr dest_writer( new WritableFileWriter(std::move(dst_file), env_options)); - unique_ptr src_reader( - new SequentialFileReader(std::move(src_file))); - unique_ptr buf(new char[copy_file_buffer_size_]); - Slice data; + unique_ptr src_reader; + unique_ptr buf; + if (!src.empty()) { + src_reader.reset(new SequentialFileReader(std::move(src_file))); + buf.reset(new char[copy_file_buffer_size_]); + } + Slice data; uint64_t processed_buffer_size = 0; do { if (stop_backup_.load(std::memory_order_acquire)) { return Status::Incomplete("Backup stopped"); } - size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ? - copy_file_buffer_size_ : size_limit; - s = src_reader->Read(buffer_to_read, &data, buf.get()); + if (!src.empty()) { + size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) + ? copy_file_buffer_size_ + : size_limit; + s = src_reader->Read(buffer_to_read, &data, buf.get()); + processed_buffer_size += buffer_to_read; + } else { + data = contents; + } size_limit -= data.size(); if (!s.ok()) { @@ -1135,38 +1194,38 @@ Status BackupEngineImpl::CopyFile(const std::string& src, *size += data.size(); } if (checksum_value != nullptr) { - *checksum_value = crc32c::Extend(*checksum_value, data.data(), - data.size()); + *checksum_value = + crc32c::Extend(*checksum_value, data.data(), data.size()); } s = dest_writer->Append(data); if (rate_limiter != nullptr) { rate_limiter->Request(data.size(), Env::IO_LOW); } - processed_buffer_size += buffer_to_read; if (processed_buffer_size > options_.callback_trigger_interval_size) { processed_buffer_size -= options_.callback_trigger_interval_size; std::lock_guard lock(byte_report_mutex_); progress_callback(); } - } while (s.ok() && data.size() > 0 && size_limit > 0); + } while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0); if (s.ok() && sync) { s = dest_writer->Sync(false); } - return s; } -// src_fname will always start with "/" +// fname will always start with "/" Status BackupEngineImpl::AddBackupFileWorkItem( std::unordered_set& live_dst_paths, - std::vector& backup_items_to_finish, + std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, - const std::string& src_fname, RateLimiter* rate_limiter, - uint64_t size_limit, bool shared_checksum, - std::function progress_callback) { - assert(src_fname.size() > 0 && src_fname[0] == '/'); - std::string dst_relative = src_fname.substr(1); + const std::string& fname, RateLimiter* rate_limiter, uint64_t size_limit, + bool shared_checksum, std::function progress_callback, + const std::string& contents) { + assert(!fname.empty() && fname[0] == '/'); + assert(contents.empty() != src_dir.empty()); + + std::string dst_relative = fname.substr(1); std::string dst_relative_tmp; Status s; uint64_t size; @@ -1174,12 +1233,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem( if (shared && shared_checksum) { // add checksum and file length to the file name - s = CalculateChecksum(src_dir + src_fname, - db_env_, - size_limit, + s = CalculateChecksum(src_dir + fname, db_env_, size_limit, &checksum_value); if (s.ok()) { - s = db_env_->GetFileSize(src_dir + src_fname, &size); + s = db_env_->GetFileSize(src_dir + fname, &size); } if (!s.ok()) { return s; @@ -1218,12 +1275,14 @@ Status BackupEngineImpl::AddBackupFileWorkItem( } } - if (shared && (same_path || file_exists)) { + if (!contents.empty()) { + need_to_copy = false; + } else if (shared && (same_path || file_exists)) { need_to_copy = false; if (shared_checksum) { Log(options_.info_log, "%s already present, with checksum %u and size %" PRIu64, - src_fname.c_str(), checksum_value, size); + fname.c_str(), checksum_value, size); } else if (backuped_file_infos_.find(dst_relative) == backuped_file_infos_.end() && !same_path) { // file already exists, but it's not referenced by any backup. overwrite @@ -1231,48 +1290,39 @@ Status BackupEngineImpl::AddBackupFileWorkItem( Log(options_.info_log, "%s already present, but not referenced by any backup. We will " "overwrite the file.", - src_fname.c_str()); + fname.c_str()); need_to_copy = true; backup_env_->DeleteFile(dst_path); } else { // the file is present and referenced by a backup - db_env_->GetFileSize(src_dir + src_fname, &size); // Ignore error + db_env_->GetFileSize(src_dir + fname, &size); // Ignore error Log(options_.info_log, "%s already present, calculate checksum", - src_fname.c_str()); - s = CalculateChecksum(src_dir + src_fname, db_env_, size_limit, + fname.c_str()); + s = CalculateChecksum(src_dir + fname, db_env_, size_limit, &checksum_value); } } live_dst_paths.insert(dst_path); - if (need_to_copy) { - Log(options_.info_log, "Copying %s to %s", src_fname.c_str(), - dst_path_tmp.c_str()); - CopyWorkItem copy_work_item(src_dir + src_fname, dst_path_tmp, db_env_, - backup_env_, options_.sync, rate_limiter, - size_limit, progress_callback); - BackupAfterCopyWorkItem after_copy_work_item( - copy_work_item.result.get_future(), - shared, - need_to_copy, - backup_env_, - dst_path_tmp, - dst_path, - dst_relative); - files_to_copy_.write(std::move(copy_work_item)); - backup_items_to_finish.push_back(std::move(after_copy_work_item)); + if (!contents.empty() || need_to_copy) { + Log(options_.info_log, "Copying %s to %s", fname.c_str(), + dst_path_tmp.c_str()); + CopyOrCreateWorkItem copy_or_create_work_item( + src_dir.empty() ? "" : src_dir + fname, dst_path_tmp, contents, db_env_, + backup_env_, options_.sync, rate_limiter, size_limit, + progress_callback); + BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( + copy_or_create_work_item.result.get_future(), shared, need_to_copy, + backup_env_, dst_path_tmp, dst_path, dst_relative); + files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); + backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); } else { - std::promise promise_result; - BackupAfterCopyWorkItem after_copy_work_item( - promise_result.get_future(), - shared, - need_to_copy, - backup_env_, - dst_path_tmp, - dst_path, - dst_relative); - backup_items_to_finish.push_back(std::move(after_copy_work_item)); - CopyResult result; + std::promise promise_result; + BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( + promise_result.get_future(), shared, need_to_copy, backup_env_, + dst_path_tmp, dst_path, dst_relative); + backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); + CopyOrCreateResult result; result.status = s; result.size = size; result.checksum_value = checksum_value; diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index a53ae2df9..95810533e 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -24,6 +24,7 @@ #include "util/random.h" #include "util/mutexlock.h" #include "util/string_util.h" +#include "util/sync_point.h" #include "util/testutil.h" #include "util/mock_env.h" #include "utilities/backupable/backupable_db_testutil.h" @@ -1317,6 +1318,33 @@ TEST_F(BackupableDBTest, EnvFailures) { } } +// Verify manifest can roll while a backup is being created with the old +// manifest. +TEST_F(BackupableDBTest, ChangeManifestDuringBackupCreation) { + DestroyDB(dbname_, Options()); + options_.max_manifest_file_size = 0; // always rollover manifest for file add + OpenDBAndBackupEngine(true); + FillDB(db_.get(), 0, 100); + + rocksdb::SyncPoint::GetInstance()->LoadDependency({ + {"BackupEngineImpl::CreateNewBackup:SavedLiveFiles1", + "VersionSet::LogAndApply:WriteManifest"}, + {"VersionSet::LogAndApply:WriteManifestDone", + "BackupEngineImpl::CreateNewBackup:SavedLiveFiles2"}, + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + std::thread flush_thread{[this]() { ASSERT_OK(db_->Flush(FlushOptions())); }}; + + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); + + flush_thread.join(); + CloseDBAndBackupEngine(); + DestroyDB(dbname_, Options()); + AssertBackupConsistency(0, 0, 100); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + // see https://github.com/facebook/rocksdb/issues/921 TEST_F(BackupableDBTest, Issue921Test) { BackupEngine* backup_engine; From 1f5954147bd89ae7d60eb1d74e95b4eefc668fb8 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 26 Feb 2016 17:13:39 -0800 Subject: [PATCH 144/195] Introduce Iterator::GetProperty() and replace Iterator::IsKeyPinned() Summary: Add Iterator::GetProperty(), a way for users to communicate with iterator, and turn Iterator::IsKeyPinned() with it. As a follow-up, I'll ask a property as the version number attached to the iterator Test Plan: Rerun existing tests and add a negative test case. Reviewers: yhchiang, andrewkr, kradhakrishnan, anthony, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54783 --- HISTORY.md | 1 + db/db_iter.cc | 25 +++++++++++++++----- db/db_iter.h | 3 ++- db/db_test.cc | 48 +++++++++++++++++++++++++++++++++----- include/rocksdb/iterator.h | 19 ++++++++------- include/rocksdb/options.h | 5 ++-- table/iterator.cc | 11 +++++++++ 7 files changed, 89 insertions(+), 23 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index abf86a430..6561e520e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. +* Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" ## 4.5.0 (2/5/2016) ### Public API Changes diff --git a/db/db_iter.cc b/db/db_iter.cc index afffc4dcc..c051a3928 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -136,9 +136,21 @@ class DBIter: public Iterator { } return s; } - virtual bool IsKeyPinned() const override { - assert(valid_); - return iter_pinned_ && saved_key_.IsKeyPinned(); + + virtual Status GetProperty(std::string prop_name, + std::string* prop) override { + if (prop == nullptr) { + return Status::InvalidArgument("prop is nullptr"); + } + if (prop_name == "rocksdb.iterator.is.key.pinned") { + if (valid_) { + *prop = (iter_pinned_ && saved_key_.IsKeyPinned()) ? "1" : "0"; + } else { + *prop = "Iterator is not valid."; + } + return Status::OK(); + } + return Status::InvalidArgument("Undentified property."); } virtual void Next() override; @@ -850,12 +862,13 @@ inline Slice ArenaWrappedDBIter::key() const { return db_iter_->key(); } inline Slice ArenaWrappedDBIter::value() const { return db_iter_->value(); } inline Status ArenaWrappedDBIter::status() const { return db_iter_->status(); } inline Status ArenaWrappedDBIter::PinData() { return db_iter_->PinData(); } +inline Status ArenaWrappedDBIter::GetProperty(std::string prop_name, + std::string* prop) { + return db_iter_->GetProperty(prop_name, prop); +} inline Status ArenaWrappedDBIter::ReleasePinnedData() { return db_iter_->ReleasePinnedData(); } -inline bool ArenaWrappedDBIter::IsKeyPinned() const { - return db_iter_->IsKeyPinned(); -} void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1, void* arg2) { db_iter_->RegisterCleanup(function, arg1, arg2); diff --git a/db/db_iter.h b/db/db_iter.h index 23bedb660..4060c6408 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -9,6 +9,7 @@ #pragma once #include +#include #include "rocksdb/db.h" #include "rocksdb/iterator.h" #include "db/dbformat.h" @@ -66,7 +67,7 @@ class ArenaWrappedDBIter : public Iterator { void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); virtual Status PinData(); virtual Status ReleasePinnedData(); - virtual bool IsKeyPinned() const override; + virtual Status GetProperty(std::string prop_name, std::string* prop) override; private: DBIter* db_iter_; diff --git a/db/db_test.cc b/db/db_test.cc index c9c2a6392..08e4edd93 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -632,6 +632,27 @@ TEST_F(DBTest, ReadFromPersistedTier) { } while (ChangeOptions(kSkipHashCuckoo)); } +TEST_F(DBTest, IteratorProperty) { + // The test needs to be changed if kPersistedTier is supported in iterator. + Options options = CurrentOptions(); + CreateAndReopenWithCF({"pikachu"}, options); + Put(1, "1", "2"); + ReadOptions ropt; + ropt.pin_data = false; + { + unique_ptr iter(db_->NewIterator(ropt, handles_[1])); + iter->SeekToFirst(); + std::string prop_value; + ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("0", prop_value); + iter->Next(); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("Iterator is not valid.", prop_value); + } + Close(); +} + TEST_F(DBTest, PersistedTierOnIterator) { // The test needs to be changed if kPersistedTier is supported in iterator. Options options = CurrentOptions(); @@ -9789,7 +9810,10 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { ASSERT_EQ(true_data.lower_bound(k), true_data.end()); continue; } - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); keys_slices.push_back(iter->key()); true_keys.push_back(true_data.lower_bound(k)->first); } @@ -9804,7 +9828,10 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { printf("Testing iterating forward on all keys\n"); std::vector all_keys; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } ASSERT_EQ(all_keys.size(), true_data.size()); @@ -9822,7 +9849,10 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { printf("Testing iterating backward on all keys\n"); std::vector all_keys; for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } ASSERT_EQ(all_keys.size(), true_data.size()); @@ -9893,7 +9923,9 @@ TEST_F(DBTest, PinnedDataIteratorMultipleFiles) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } @@ -9946,7 +9978,9 @@ TEST_F(DBTest, PinnedDataIteratorMergeOperator) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } @@ -10001,7 +10035,9 @@ TEST_F(DBTest, PinnedDataIteratorReadAfterUpdate) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_TRUE(iter->IsKeyPinned()); + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index ca08c35bf..4d9b9b89a 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -19,6 +19,7 @@ #ifndef STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_ #define STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_ +#include #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -95,14 +96,16 @@ class Iterator : public Cleanable { // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; - // If true, this means that the Slice returned by key() is valid as long - // as the iterator is not deleted and ReleasePinnedData() is not called. - // - // IsKeyPinned() is guaranteed to always return true if - // - Iterator created with ReadOptions::pin_data = true - // - DB tables were created with BlockBasedTableOptions::use_delta_encoding - // set to false. - virtual bool IsKeyPinned() const { return false; } + // Property "rocksdb.iterator.is.key.pinned": + // If returning "1", this means that the Slice returned by key() is valid + // as long as the iterator is not deleted and ReleasePinnedData() is not + // called. + // It is guaranteed to always return "1" if + // - Iterator created with ReadOptions::pin_data = true + // - DB tables were created with + // BlockBasedTableOptions::use_delta_encoding + // set to false. + virtual Status GetProperty(std::string prop_name, std::string* prop); private: // No copying allowed diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index c0fe0b81a..9dbb5bbe8 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1399,8 +1399,9 @@ struct ReadOptions { // Keep the blocks loaded by the iterator pinned in memory as long as the // iterator is not deleted, If used when reading from tables created with - // BlockBasedTableOptions::use_delta_encoding = false, Iterator::IsKeyPinned() - // is guaranteed to return true. + // BlockBasedTableOptions::use_delta_encoding = false, + // Iterator's property "rocksdb.iterator.is.key.pinned" is guaranteed to + // return 1. // Default: false bool pin_data; diff --git a/table/iterator.cc b/table/iterator.cc index d99a8301f..0b53b41aa 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -46,6 +46,17 @@ void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { c->arg2 = arg2; } +Status Iterator::GetProperty(std::string prop_name, std::string* prop) { + if (prop == nullptr) { + return Status::InvalidArgument("prop is nullptr"); + } + if (prop_name == "rocksdb.iterator.is.key.pinned") { + *prop = "0"; + return Status::OK(); + } + return Status::InvalidArgument("Undentified property."); +} + namespace { class EmptyIterator : public Iterator { public: From 5ea9aa3c146f77e45563f30f346aa3e8f66082c3 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Tue, 2 Feb 2016 19:19:17 -0800 Subject: [PATCH 145/195] TransactionDB:ReinitializeTransaction Summary: Add function to reinitialize a transaction object so that it can be reused. This is an optimization so users can potentially avoid reallocating transaction objects. Test Plan: added tests Reviewers: yhchiang, kradhakrishnan, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: jkedgar, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53835 --- include/rocksdb/utilities/transaction_db.h | 14 ++- utilities/transactions/transaction_base.cc | 19 +++- utilities/transactions/transaction_base.h | 6 +- utilities/transactions/transaction_db_impl.cc | 23 ++++- utilities/transactions/transaction_db_impl.h | 7 +- utilities/transactions/transaction_impl.cc | 29 +++++- utilities/transactions/transaction_impl.h | 9 +- utilities/transactions/transaction_test.cc | 91 ++++++++++++++++++- 8 files changed, 176 insertions(+), 22 deletions(-) diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 35b06d899..ff29bc57a 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -111,14 +111,18 @@ class TransactionDB : public StackableDB { virtual ~TransactionDB() {} - // Starts a new Transaction. Passing set_snapshot=true has the same effect - // as calling Transaction::SetSnapshot(). + // Starts a new Transaction. // - // Caller should delete the returned transaction after calling - // Transaction::Commit() or Transaction::Rollback(). + // Caller is responsible for deleting the returned transaction when no + // longer needed. + // + // If old_txn is not null, BeginTransaction will reuse this Transaction + // handle instead of allocating a new one. This is an optimization to avoid + // extra allocations when repeatedly creating transactions. virtual Transaction* BeginTransaction( const WriteOptions& write_options, - const TransactionOptions& txn_options = TransactionOptions()) = 0; + const TransactionOptions& txn_options = TransactionOptions(), + Transaction* old_txn = nullptr) = 0; protected: // To Create an TransactionDB, call Open() diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index 2754d38cb..72d12c607 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -24,7 +24,10 @@ TransactionBaseImpl::TransactionBaseImpl(DB* db, start_time_(db_->GetEnv()->NowMicros()), write_batch_(cmp_, 0, true) {} -TransactionBaseImpl::~TransactionBaseImpl() {} +TransactionBaseImpl::~TransactionBaseImpl() { + // Release snapshot if snapshot is set + SetSnapshotInternal(nullptr); +} void TransactionBaseImpl::Clear() { save_points_.reset(nullptr); @@ -35,12 +38,22 @@ void TransactionBaseImpl::Clear() { num_merges_ = 0; } +void TransactionBaseImpl::Reinitialize(const WriteOptions& write_options) { + Clear(); + write_options_ = write_options; + start_time_ = db_->GetEnv()->NowMicros(); +} + void TransactionBaseImpl::SetSnapshot() { assert(dynamic_cast(db_) != nullptr); auto db_impl = reinterpret_cast(db_); const Snapshot* snapshot = db_impl->GetSnapshotForWriteConflictBoundary(); + SetSnapshotInternal(snapshot); +} + +void TransactionBaseImpl::SetSnapshotInternal(const Snapshot* snapshot) { // Set a custom deleter for the snapshot_ SharedPtr as the snapshot needs to // be released, not deleted when it is no longer referenced. snapshot_.reset(snapshot, std::bind(&TransactionBaseImpl::ReleaseSnapshot, @@ -493,7 +506,9 @@ WriteBatchBase* TransactionBaseImpl::GetBatchForWrite() { } void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) { - db->ReleaseSnapshot(snapshot); + if (snapshot != nullptr) { + db->ReleaseSnapshot(snapshot); + } } void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family, diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index cb8ca2483..86903ea1f 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -32,6 +32,8 @@ class TransactionBaseImpl : public Transaction { // Remove pending operations queued in this transaction. virtual void Clear(); + void Reinitialize(const WriteOptions& write_options); + // Called before executing Put, Merge, Delete, and GetForUpdate. If TryLock // returns non-OK, the Put/Merge/Delete/GetForUpdate will be failed. // untracked will be true if called from PutUntracked, DeleteUntracked, or @@ -240,7 +242,7 @@ class TransactionBaseImpl : public Transaction { const Comparator* cmp_; // Stores that time the txn was constructed, in microseconds. - const uint64_t start_time_; + uint64_t start_time_; // Stores the current snapshot that was was set by SetSnapshot or null if // no snapshot is currently set. @@ -306,6 +308,8 @@ class TransactionBaseImpl : public Transaction { bool read_only, bool untracked = false); WriteBatchBase* GetBatchForWrite(); + + void SetSnapshotInternal(const Snapshot* snapshot); }; } // namespace rocksdb diff --git a/utilities/transactions/transaction_db_impl.cc b/utilities/transactions/transaction_db_impl.cc index bc5b9e596..b02d7bd25 100644 --- a/utilities/transactions/transaction_db_impl.cc +++ b/utilities/transactions/transaction_db_impl.cc @@ -31,10 +31,14 @@ TransactionDBImpl::TransactionDBImpl(DB* db, new TransactionDBMutexFactoryImpl())) {} Transaction* TransactionDBImpl::BeginTransaction( - const WriteOptions& write_options, const TransactionOptions& txn_options) { - Transaction* txn = new TransactionImpl(this, write_options, txn_options); - - return txn; + const WriteOptions& write_options, const TransactionOptions& txn_options, + Transaction* old_txn) { + if (old_txn != nullptr) { + ReinitializeTransaction(old_txn, write_options, txn_options); + return old_txn; + } else { + return new TransactionImpl(this, write_options, txn_options); + } } TransactionDBOptions TransactionDBImpl::ValidateTxnDBOptions( @@ -173,7 +177,7 @@ void TransactionDBImpl::UnLock(TransactionImpl* txn, uint32_t cfh_id, Transaction* TransactionDBImpl::BeginInternalTransaction( const WriteOptions& options) { TransactionOptions txn_options; - Transaction* txn = BeginTransaction(options, txn_options); + Transaction* txn = BeginTransaction(options, txn_options, nullptr); assert(dynamic_cast(txn) != nullptr); auto txn_impl = reinterpret_cast(txn); @@ -302,5 +306,14 @@ bool TransactionDBImpl::TryStealingExpiredTransactionLocks( return tx.TryStealingLocks(); } +void TransactionDBImpl::ReinitializeTransaction( + Transaction* txn, const WriteOptions& write_options, + const TransactionOptions& txn_options) { + assert(dynamic_cast(txn) != nullptr); + auto txn_impl = reinterpret_cast(txn); + + txn_impl->Reinitialize(write_options, txn_options); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/transaction_db_impl.h b/utilities/transactions/transaction_db_impl.h index ace218d19..7b7d646a1 100644 --- a/utilities/transactions/transaction_db_impl.h +++ b/utilities/transactions/transaction_db_impl.h @@ -26,7 +26,8 @@ class TransactionDBImpl : public TransactionDB { ~TransactionDBImpl() {} Transaction* BeginTransaction(const WriteOptions& write_options, - const TransactionOptions& txn_options) override; + const TransactionOptions& txn_options, + Transaction* old_txn) override; using StackableDB::Put; virtual Status Put(const WriteOptions& options, @@ -78,6 +79,10 @@ class TransactionDBImpl : public TransactionDB { bool TryStealingExpiredTransactionLocks(TransactionID tx_id); private: + void ReinitializeTransaction( + Transaction* txn, const WriteOptions& write_options, + const TransactionOptions& txn_options = TransactionOptions()); + const TransactionDBOptions txn_db_options_; TransactionLockMgr lock_mgr_; diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index 7cda2cd0e..33393751d 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -39,21 +39,34 @@ TransactionImpl::TransactionImpl(TransactionDB* txn_db, const TransactionOptions& txn_options) : TransactionBaseImpl(txn_db->GetBaseDB(), write_options), txn_db_impl_(nullptr), - txn_id_(GenTxnID()), - expiration_time_(txn_options.expiration >= 0 - ? start_time_ + txn_options.expiration * 1000 - : 0), - lock_timeout_(txn_options.lock_timeout * 1000), + txn_id_(0), + expiration_time_(0), + lock_timeout_(0), exec_status_(STARTED) { txn_db_impl_ = dynamic_cast(txn_db); assert(txn_db_impl_); + Initialize(txn_options); +} + +void TransactionImpl::Initialize(const TransactionOptions& txn_options) { + txn_id_ = GenTxnID(); + + exec_status_ = STARTED; + + lock_timeout_ = txn_options.lock_timeout * 1000; if (lock_timeout_ < 0) { // Lock timeout not set, use default lock_timeout_ = txn_db_impl_->GetTxnDBOptions().transaction_lock_timeout * 1000; } + if (txn_options.expiration >= 0) { + expiration_time_ = start_time_ + txn_options.expiration * 1000; + } else { + expiration_time_ = 0; + } + if (txn_options.set_snapshot) { SetSnapshot(); } @@ -74,6 +87,12 @@ void TransactionImpl::Clear() { TransactionBaseImpl::Clear(); } +void TransactionImpl::Reinitialize(const WriteOptions& write_options, + const TransactionOptions& txn_options) { + TransactionBaseImpl::Reinitialize(write_options); + Initialize(txn_options); +} + bool TransactionImpl::IsExpired() const { if (expiration_time_ > 0) { if (db_->GetEnv()->NowMicros() >= expiration_time_) { diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index 01521f172..8a8ed6531 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -38,6 +38,9 @@ class TransactionImpl : public TransactionBaseImpl { virtual ~TransactionImpl(); + void Reinitialize(const WriteOptions& write_options, + const TransactionOptions& txn_options); + Status Commit() override; Status CommitBatch(WriteBatch* batch); @@ -82,11 +85,11 @@ class TransactionImpl : public TransactionBaseImpl { static std::atomic txn_id_counter_; // Unique ID for this transaction - const TransactionID txn_id_; + TransactionID txn_id_; // If non-zero, this transaction should not be committed after this time (in // microseconds according to Env->NowMicros()) - const uint64_t expiration_time_; + uint64_t expiration_time_; // Timeout in microseconds when locking a key or -1 if there is no timeout. int64_t lock_timeout_; @@ -96,6 +99,8 @@ class TransactionImpl : public TransactionBaseImpl { void Clear() override; + void Initialize(const TransactionOptions& txn_options); + Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key, SequenceNumber prev_seqno, SequenceNumber* new_seqno); diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index fec6c974f..809dc9506 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -448,7 +448,6 @@ TEST_F(TransactionTest, FlushTest2) { s = txn->Delete("S"); // Should fail after encountering a write to S in SST file - fprintf(stderr, "%" ROCKSDB_PRIszt " %s\n", n, s.ToString().c_str()); ASSERT_TRUE(s.IsBusy()); // Write a bunch of keys to db to force a compaction @@ -1210,6 +1209,96 @@ TEST_F(TransactionTest, ExpiredTransaction) { delete txn2; } +TEST_F(TransactionTest, ReinitializeTest) { + WriteOptions write_options; + ReadOptions read_options; + TransactionOptions txn_options; + string value; + Status s; + + // Set txn expiration timeout to 0 microseconds (expires instantly) + txn_options.expiration = 0; + Transaction* txn1 = db->BeginTransaction(write_options, txn_options); + + // Reinitialize transaction to no long expire + txn_options.expiration = -1; + db->BeginTransaction(write_options, txn_options, txn1); + + s = txn1->Put("Z", "z"); + ASSERT_OK(s); + + // Should commit since not expired + s = txn1->Commit(); + ASSERT_OK(s); + + db->BeginTransaction(write_options, txn_options, txn1); + + s = txn1->Put("Z", "zz"); + ASSERT_OK(s); + + // Reinitilize txn1 and verify that Z gets unlocked + db->BeginTransaction(write_options, txn_options, txn1); + + Transaction* txn2 = db->BeginTransaction(write_options, txn_options, nullptr); + s = txn2->Put("Z", "zzz"); + ASSERT_OK(s); + s = txn2->Commit(); + ASSERT_OK(s); + delete txn2; + + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "zzz"); + + // Verify snapshots get reinitialized correctly + txn1->SetSnapshot(); + s = txn1->Put("Z", "zzzz"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "zzzz"); + + db->BeginTransaction(write_options, txn_options, txn1); + const Snapshot* snapshot = txn1->GetSnapshot(); + ASSERT_TRUE(snapshot); + + txn_options.set_snapshot = true; + db->BeginTransaction(write_options, txn_options, txn1); + snapshot = txn1->GetSnapshot(); + ASSERT_TRUE(snapshot); + + s = txn1->Put("Z", "a"); + ASSERT_OK(s); + + txn1->Rollback(); + + s = txn1->Put("Y", "y"); + ASSERT_OK(s); + + txn_options.set_snapshot = false; + db->BeginTransaction(write_options, txn_options, txn1); + snapshot = txn1->GetSnapshot(); + + s = txn1->Put("X", "x"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "zzzz"); + + s = db->Get(read_options, "Y", &value); + ASSERT_TRUE(s.IsNotFound()); + + delete txn1; +} + TEST_F(TransactionTest, Rollback) { WriteOptions write_options; ReadOptions read_options; From b5b1db167a3437aca27c9ca85da6ade040c46484 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 29 Feb 2016 15:15:23 -0800 Subject: [PATCH 146/195] Recompute compaction score after scheduling manual compaction Summary: After we made manual compaction runnable concurrently with automaticallly compaction, we need to run ComputeCompactionScore() to prepare a coming compaction picking call before the compaction finishes. Test Plan: Run existing tests. Reviewers: yhchiang, IslamAbdelRahman, andrewkr, kradhakrishnan, anthony, igor Reviewed By: igor Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54891 --- db/compaction_picker.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 7d43b76d1..dad59a0a3 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -612,6 +612,17 @@ Compaction* CompactionPicker::CompactRange( if (input_level == 0) { level0_compactions_in_progress_.insert(compaction); } + + // Creating a compaction influences the compaction score because the score + // takes running compactions into account (by skipping files that are already + // being compacted). Since we just changed compaction score, we recalculate it + // here + { // this piece of code recomputes compaction score + CompactionOptionsFIFO dummy_compaction_options_fifo; + vstorage->ComputeCompactionScore(mutable_cf_options, + dummy_compaction_options_fifo); + } + return compaction; } From 432f3adf2c0f751bbd5c83a31c7c24d076e6b798 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 1 Mar 2016 09:34:50 -0800 Subject: [PATCH 147/195] Add DB Property "rocksdb.current_version_number" Summary: Add a DB Property "rocksdb.current_version_number" for users to monitor version changes and stale iterators. Test Plan: Add a unit test. Reviewers: andrewkr, yhchiang, kradhakrishnan, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54927 --- db/db_properties_test.cc | 12 ++++++++++++ db/internal_stats.cc | 11 +++++++++++ db/internal_stats.h | 2 ++ include/rocksdb/db.h | 4 ++++ 4 files changed, 29 insertions(+) diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 5af94569f..dbfdac970 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -90,6 +90,18 @@ TEST_F(DBPropertiesTest, Empty) { } while (ChangeOptions()); } +TEST_F(DBPropertiesTest, CurrentVersionNumber) { + uint64_t v1, v2, v3; + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v1)); + Put("12345678", ""); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v2)); + Flush(); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v3)); + + ASSERT_EQ(v1, v2); + ASSERT_GT(v3, v2); +} + TEST_F(DBPropertiesTest, GetAggregatedIntPropertyTest) { const int kKeySize = 100; const int kValueSize = 500; diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 1ec795c9c..4fa38bb1f 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -132,6 +132,7 @@ static const std::string is_file_deletions_enabled = static const std::string num_snapshots = "num-snapshots"; static const std::string oldest_snapshot_time = "oldest-snapshot-time"; static const std::string num_live_versions = "num-live-versions"; +static const std::string current_version_number = "current_version_number"; static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string base_level = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; @@ -191,6 +192,8 @@ const std::string DB::Properties::kOldestSnapshotTime = rocksdb_prefix + oldest_snapshot_time; const std::string DB::Properties::kNumLiveVersions = rocksdb_prefix + num_live_versions; +const std::string DB::Properties::kCurrentVersionNumber = + rocksdb_prefix + current_version_number; const std::string DB::Properties::kEstimateLiveDataSize = rocksdb_prefix + estimate_live_data_size; const std::string DB::Properties::kTotalSstFilesSize = @@ -254,6 +257,8 @@ const std::unordered_mapGetSuperVersionNumber(); + return true; +} + bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, Version* version) { *value = db->IsFileDeletionsEnabled(); diff --git a/db/internal_stats.h b/db/internal_stats.h index 958731a58..3d5580228 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -328,6 +328,8 @@ class InternalStats { bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version); bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version); bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version); + bool HandleCurrentVersionNumber(uint64_t* value, DBImpl* db, + Version* version); bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, Version* version); bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index cbcb1f211..9a0022dfa 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -444,6 +444,9 @@ class DB { // by iterators or unfinished compactions. static const std::string kNumLiveVersions; + // "rocksdb.current-version-number" - returns number of curent LSM version. + static const std::string kCurrentVersionNumber; + // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of // live data in bytes. static const std::string kEstimateLiveDataSize; @@ -504,6 +507,7 @@ class DB { // "rocksdb.num-snapshots" // "rocksdb.oldest-snapshot-time" // "rocksdb.num-live-versions" + // "rocksdb.current_version_number" // "rocksdb.estimate-live-data-size" // "rocksdb.total-sst-files-size" // "rocksdb.base-level" From 6743135ea1bac252007ce9f83fb38f80f975ede1 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 1 Mar 2016 12:05:29 -0800 Subject: [PATCH 148/195] Fix DB::AddFile() issue when PurgeObsoleteFiles() is called Summary: In some situations the DB will scan all existing files in the DB path and delete the ones that are Obsolete. If this happen during adding an external sst file. this could cause the file to be deleted while we are adding it. This diff fix this issue Test Plan: unit test to reproduce the bug existing unit tests Reviewers: sdong, yhchiang, andrewkr Reviewed By: andrewkr Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D54627 --- db/db_impl.cc | 12 +++++++++++- db/db_test.cc | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 683006388..fc8c531c4 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3587,8 +3587,16 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family, return Status::InvalidArgument( "Non zero sequence numbers are not supported"); } + // Generate a location for the new table - meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, file_info->file_size); + std::list::iterator pending_outputs_inserted_elem; + { + InstrumentedMutexLock l(&mutex_); + pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs(); + meta.fd = + FileDescriptor(versions_->NewFileNumber(), 0, file_info->file_size); + } + std::string db_fname = TableFileName( db_options_.db_paths, meta.fd.GetNumber(), meta.fd.GetPathId()); @@ -3601,6 +3609,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family, } else { status = CopyFile(env_, file_info->file_path, db_fname, 0); } + TEST_SYNC_POINT("DBImpl::AddFile:FileCopied"); if (!status.ok()) { return status; } @@ -3664,6 +3673,7 @@ Status DBImpl::AddFile(ColumnFamilyHandle* column_family, delete InstallSuperVersionAndScheduleWork(cfd, nullptr, mutable_cf_options); } + ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); } if (!status.ok()) { diff --git a/db/db_test.cc b/db/db_test.cc index 08e4edd93..de9bf8209 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -9409,6 +9409,58 @@ TEST_F(DBTest, AddExternalSstFile) { kSkipFIFOCompaction)); } +// This test reporduce a bug that can happen in some cases if the DB started +// purging obsolete files when we are adding an external sst file. +// This situation may result in deleting the file while it's being added. +TEST_F(DBTest, AddExternalSstFilePurgeObsoleteFilesBug) { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + options.env = env_; + const ImmutableCFOptions ioptions(options); + + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + // file1.sst (0 => 500) + std::string sst_file_path = sst_files_folder + "file1.sst"; + Status s = sst_file_writer.Open(sst_file_path); + ASSERT_OK(s); + for (int i = 0; i < 500; i++) { + std::string k = Key(i); + s = sst_file_writer.Add(k, k + "_val"); + ASSERT_OK(s); + } + + ExternalSstFileInfo sst_file_info; + s = sst_file_writer.Finish(&sst_file_info); + ASSERT_OK(s); + + options.delete_obsolete_files_period_micros = 0; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::AddFile:FileCopied", [&](void* arg) { + ASSERT_OK(Put("aaa", "bbb")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("aaa", "xxx")); + ASSERT_OK(Flush()); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + s = db_->AddFile(sst_file_path); + ASSERT_OK(s); + + for (int i = 0; i < 500; i++) { + std::string k = Key(i); + std::string v = k + "_val"; + ASSERT_EQ(Get(k), v); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBTest, AddExternalSstFileNoCopy) { std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; env_->CreateDir(sst_files_folder); From 74b660702e095d3a6f6da2954efb3f7be9105f78 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 1 Mar 2016 12:56:21 -0800 Subject: [PATCH 149/195] Rename iterator property "rocksdb.iterator.is.key.pinned" => "rocksdb.iterator.is-key-pinned" Summary: Rename iterator property to folow property naming convention. Test Plan: Run all existing tests. Reviewers: andrewkr, anthony, yhchiang, kradhakrishnan, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54957 --- db/db_iter.cc | 2 +- db/db_test.cc | 16 ++++++++-------- include/rocksdb/iterator.h | 2 +- include/rocksdb/options.h | 2 +- table/iterator.cc | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index c051a3928..4f4317f9a 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -142,7 +142,7 @@ class DBIter: public Iterator { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } - if (prop_name == "rocksdb.iterator.is.key.pinned") { + if (prop_name == "rocksdb.iterator.is-key-pinned") { if (valid_) { *prop = (iter_pinned_ && saved_key_.IsKeyPinned()) ? "1" : "0"; } else { diff --git a/db/db_test.cc b/db/db_test.cc index de9bf8209..d68087d13 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -644,10 +644,10 @@ TEST_F(DBTest, IteratorProperty) { iter->SeekToFirst(); std::string prop_value; ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("0", prop_value); iter->Next(); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("Iterator is not valid.", prop_value); } Close(); @@ -9864,7 +9864,7 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { } std::string prop_value; ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); keys_slices.push_back(iter->key()); true_keys.push_back(true_data.lower_bound(k)->first); @@ -9882,7 +9882,7 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } @@ -9903,7 +9903,7 @@ TEST_F(DBTest, PinnedDataIteratorRandomized) { for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { std::string prop_value; ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } @@ -9976,7 +9976,7 @@ TEST_F(DBTest, PinnedDataIteratorMultipleFiles) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } @@ -10031,7 +10031,7 @@ TEST_F(DBTest, PinnedDataIteratorMergeOperator) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } @@ -10088,7 +10088,7 @@ TEST_F(DBTest, PinnedDataIteratorReadAfterUpdate) { std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is.key.pinned", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 4d9b9b89a..2cd107f43 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -96,7 +96,7 @@ class Iterator : public Cleanable { // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; - // Property "rocksdb.iterator.is.key.pinned": + // Property "rocksdb.iterator.is-key-pinned": // If returning "1", this means that the Slice returned by key() is valid // as long as the iterator is not deleted and ReleasePinnedData() is not // called. diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 9dbb5bbe8..6ace73bb6 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1400,7 +1400,7 @@ struct ReadOptions { // Keep the blocks loaded by the iterator pinned in memory as long as the // iterator is not deleted, If used when reading from tables created with // BlockBasedTableOptions::use_delta_encoding = false, - // Iterator's property "rocksdb.iterator.is.key.pinned" is guaranteed to + // Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to // return 1. // Default: false bool pin_data; diff --git a/table/iterator.cc b/table/iterator.cc index 0b53b41aa..09f7f8e68 100644 --- a/table/iterator.cc +++ b/table/iterator.cc @@ -50,7 +50,7 @@ Status Iterator::GetProperty(std::string prop_name, std::string* prop) { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } - if (prop_name == "rocksdb.iterator.is.key.pinned") { + if (prop_name == "rocksdb.iterator.is-key-pinned") { *prop = "0"; return Status::OK(); } From 4572a2d8c08d70705728d42edef522502b410701 Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 1 Mar 2016 13:47:36 -0800 Subject: [PATCH 150/195] Update current version to 4.6 Summary: 4.5 is already cut, we can now increase the version in 4.6. Test Plan: Not needed. Reviewers: anthony, kradhakrishnan, andrewkr, yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54963 --- include/rocksdb/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index 0c98df903..8f6b899e1 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -5,7 +5,7 @@ #pragma once #define ROCKSDB_MAJOR 4 -#define ROCKSDB_MINOR 5 +#define ROCKSDB_MINOR 6 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with From 12fd9b1868764441717c328ee6081e0dcc0d430b Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 29 Feb 2016 15:02:32 -0800 Subject: [PATCH 151/195] Change BlockBasedTableOptions.format_version default to 2 Summary: BlockBasedTableOptions.format_version = 2 uses better encoding format. Now it's the time to make it default. Test Plan: Run all existing tests. Reviewers: igor, yhchiang, anthony, kradhakrishnan, andrewkr, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54879 --- HISTORY.md | 2 ++ include/rocksdb/table.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 6561e520e..567d40d08 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased +### Public API Changes +* Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 157d4274c..cb4d850e8 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -166,7 +166,7 @@ struct BlockBasedTableOptions { // this. // This option only affects newly written tables. When reading exising tables, // the information about version is read from the footer. - uint32_t format_version = 0; + uint32_t format_version = 2; }; // Table Properties that are specific to block-based table properties. From f8e90e8753c70f06d22a6e2669349e97e18d7bff Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 1 Mar 2016 19:33:33 -0800 Subject: [PATCH 152/195] Get file attributes in bulk for VerifyBackup and CreateNewBackup Summary: For VerifyBackup(), backup files can be spread across "shared/", "shared_checksum/", and "private/" subdirectories, so we have to bulk get all three. For CreateNewBackup(), we make two separate bulk calls: one for the data files and one for WAL files. There is also a new helper function, ExtendPathnameToSizeBytes(), that translates the file attributes vector to a map. I decided to leave GetChildrenFileAttributes()'s (from D53781) return type as vector to keep it consistent with GetChildren(). Depends on D53781. Test Plan: verified relevant unit tests $ ./backupable_db_test Reviewers: IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53919 --- utilities/backupable/backupable_db.cc | 147 +++++++++++++++------ utilities/backupable/backupable_db_test.cc | 22 +++ 2 files changed, 126 insertions(+), 43 deletions(-) diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 024b789ca..63665e9a6 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -117,6 +117,12 @@ class BackupEngineImpl : public BackupEngine { private: void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0); + // Extends the "result" map with pathname->size mappings for the contents of + // "dir". Pathnames are prefixed with "dir". + Status InsertPathnameToSizeBytes( + const std::string& dir, + std::unordered_map* result); + struct FileInfo { FileInfo(const std::string& fname, uint64_t sz, uint32_t checksum) : refs(0), filename(fname), size(sz), checksum_value(checksum) {} @@ -179,8 +185,10 @@ class BackupEngineImpl : public BackupEngine { return files_; } - Status LoadFromFile(const std::string& backup_dir, - bool use_size_in_file_name); + // @param abs_path_to_size Pre-fetched file sizes (bytes). + Status LoadFromFile( + const std::string& backup_dir, bool use_size_in_file_name, + const std::unordered_map& abs_path_to_size); Status StoreToFile(bool sync); std::string GetInfoString() { @@ -427,7 +435,7 @@ class BackupEngineImpl : public BackupEngine { std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, const std::string& fname, // starts with "/" - RateLimiter* rate_limiter, uint64_t size_limit = 0, + RateLimiter* rate_limiter, uint64_t size_bytes, uint64_t size_limit = 0, bool shared_checksum = false, std::function progress_callback = []() {}, const std::string& contents = std::string()); @@ -573,10 +581,19 @@ Status BackupEngineImpl::Initialize() { return s; } } else { // Load data from storage + std::unordered_map abs_path_to_size; + for (const auto& rel_dir : + {GetSharedFileRel(), GetSharedFileWithChecksumRel()}) { + const auto abs_dir = GetAbsolutePath(rel_dir); + InsertPathnameToSizeBytes(abs_dir, &abs_path_to_size); + } // load the backups if any for (auto& backup : backups_) { + InsertPathnameToSizeBytes( + GetAbsolutePath(GetPrivateFileRel(backup.first)), &abs_path_to_size); Status s = backup.second->LoadFromFile( - options_.backup_dir, options_.use_file_size_in_file_name); + options_.backup_dir, options_.use_file_size_in_file_name, + abs_path_to_size); if (!s.ok()) { Log(options_.info_log, "Backup %u corrupted -- %s", backup.first, s.ToString().c_str()); @@ -685,6 +702,12 @@ Status BackupEngineImpl::CreateNewBackup( std::unordered_set live_dst_paths; live_dst_paths.reserve(live_files.size() + live_wal_files.size()); + // Pre-fetch sizes for data files + std::unordered_map data_path_to_size; + if (s.ok()) { + s = InsertPathnameToSizeBytes(db->GetName(), &data_path_to_size); + } + std::vector backup_items_to_finish; // Add a CopyOrCreateWorkItem to the channel for each live file std::string manifest_fname, current_fname; @@ -709,13 +732,19 @@ Status BackupEngineImpl::CreateNewBackup( manifest_fname = live_files[i]; } + auto data_path_to_size_iter = + data_path_to_size.find(db->GetName() + live_files[i]); + uint64_t size_bytes = data_path_to_size_iter == data_path_to_size.end() + ? port::kMaxUint64 + : data_path_to_size_iter->second; + // rules: // * if it's kTableFile, then it's shared // * if it's kDescriptorFile, limit the size to manifest_file_size s = AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, options_.share_table_files && type == kTableFile, db->GetName(), - live_files[i], rate_limiter.get(), + live_files[i], rate_limiter.get(), size_bytes, (type == kDescriptorFile) ? manifest_file_size : 0, options_.share_files_with_checksum && type == kTableFile, progress_callback); @@ -725,21 +754,37 @@ Status BackupEngineImpl::CreateNewBackup( s = AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, false /* shared */, "" /* src_dir */, CurrentFileName(""), - rate_limiter.get(), 0 /* size_limit */, false /* shared_checksum */, - progress_callback, manifest_fname.substr(1) + "\n"); + rate_limiter.get(), manifest_fname.size(), 0 /* size_limit */, + false /* shared_checksum */, progress_callback, + manifest_fname.substr(1) + "\n"); } + + // Pre-fetch sizes for WAL files + std::unordered_map wal_path_to_size; + if (s.ok()) { + if (db->GetOptions().wal_dir != "") { + s = InsertPathnameToSizeBytes(db->GetOptions().wal_dir, + &wal_path_to_size); + } else { + wal_path_to_size = std::move(data_path_to_size); + } + } + // Add a CopyOrCreateWorkItem to the channel for each WAL file for (size_t i = 0; s.ok() && i < live_wal_files.size(); ++i) { + auto wal_path_to_size_iter = + wal_path_to_size.find(live_wal_files[i]->PathName()); + uint64_t size_bytes = wal_path_to_size_iter == wal_path_to_size.end() + ? port::kMaxUint64 + : wal_path_to_size_iter->second; if (live_wal_files[i]->Type() == kAliveLogFile) { // we only care about live log files // copy the file into backup_dir/files// - s = AddBackupFileWorkItem(live_dst_paths, - backup_items_to_finish, - new_backup_id, - false, /* not shared */ + s = AddBackupFileWorkItem(live_dst_paths, backup_items_to_finish, + new_backup_id, false, /* not shared */ db->GetOptions().wal_dir, live_wal_files[i]->PathName(), - rate_limiter.get()); + rate_limiter.get(), size_bytes); } } @@ -1070,21 +1115,20 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { Log(options_.info_log, "Verifying backup id %u\n", backup_id); - uint64_t size; - Status result; - std::string file_path; + std::unordered_map curr_abs_path_to_size; + for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(), + GetSharedFileWithChecksumRel()}) { + const auto abs_dir = GetAbsolutePath(rel_dir); + InsertPathnameToSizeBytes(abs_dir, &curr_abs_path_to_size); + } + for (const auto& file_info : backup->GetFiles()) { - const std::string& file = file_info->filename; - file_path = GetAbsolutePath(file); - result = backup_env_->FileExists(file_path); - if (!result.ok()) { - return result; + const auto abs_path = GetAbsolutePath(file_info->filename); + if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) { + return Status::NotFound("File missing: " + abs_path); } - result = backup_env_->GetFileSize(file_path, &size); - if (!result.ok()) { - return result; - } else if (size != file_info->size) { - return Status::Corruption("File corrupted: " + file); + if (file_info->size != curr_abs_path_to_size[abs_path]) { + return Status::Corruption("File corrupted: " + abs_path); } } return Status::OK(); @@ -1219,30 +1263,29 @@ Status BackupEngineImpl::AddBackupFileWorkItem( std::unordered_set& live_dst_paths, std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, - const std::string& fname, RateLimiter* rate_limiter, uint64_t size_limit, - bool shared_checksum, std::function progress_callback, - const std::string& contents) { + const std::string& fname, RateLimiter* rate_limiter, uint64_t size_bytes, + uint64_t size_limit, bool shared_checksum, + std::function progress_callback, const std::string& contents) { assert(!fname.empty() && fname[0] == '/'); assert(contents.empty() != src_dir.empty()); std::string dst_relative = fname.substr(1); std::string dst_relative_tmp; Status s; - uint64_t size; uint32_t checksum_value = 0; if (shared && shared_checksum) { // add checksum and file length to the file name s = CalculateChecksum(src_dir + fname, db_env_, size_limit, &checksum_value); - if (s.ok()) { - s = db_env_->GetFileSize(src_dir + fname, &size); - } if (!s.ok()) { - return s; + return s; } - dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_value, - size); + if (size_bytes == port::kMaxUint64) { + return Status::NotFound("File missing: " + src_dir + fname); + } + dst_relative = + GetSharedFileWithChecksum(dst_relative, checksum_value, size_bytes); dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); } else if (shared) { @@ -1282,7 +1325,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( if (shared_checksum) { Log(options_.info_log, "%s already present, with checksum %u and size %" PRIu64, - fname.c_str(), checksum_value, size); + fname.c_str(), checksum_value, size_bytes); } else if (backuped_file_infos_.find(dst_relative) == backuped_file_infos_.end() && !same_path) { // file already exists, but it's not referenced by any backup. overwrite @@ -1295,7 +1338,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem( backup_env_->DeleteFile(dst_path); } else { // the file is present and referenced by a backup - db_env_->GetFileSize(src_dir + fname, &size); // Ignore error Log(options_.info_log, "%s already present, calculate checksum", fname.c_str()); s = CalculateChecksum(src_dir + fname, db_env_, size_limit, @@ -1324,7 +1366,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); CopyOrCreateResult result; result.status = s; - result.size = size; + result.size = size_bytes; result.checksum_value = checksum_value; promise_result.set_value(std::move(result)); } @@ -1390,6 +1432,22 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir, } } +Status BackupEngineImpl::InsertPathnameToSizeBytes( + const std::string& dir, std::unordered_map* result) { + assert(result != nullptr); + std::vector files_attrs; + Status status = backup_env_->GetChildrenFileAttributes(dir, &files_attrs); + if (!status.ok()) { + return status; + } + const bool slash_needed = dir.empty() || dir.back() != '/'; + for (const auto& file_attrs : files_attrs) { + result->emplace(dir + (slash_needed ? "/" : "") + file_attrs.name, + file_attrs.size_bytes); + } + return Status::OK(); +} + Status BackupEngineImpl::GarbageCollect() { assert(!read_only_); Log(options_.info_log, "Starting garbage collection"); @@ -1561,8 +1619,9 @@ bool TEST_GetFileSizeFromBackupFileName(const std::string full_name, // // // ... -Status BackupEngineImpl::BackupMeta::LoadFromFile(const std::string& backup_dir, - bool use_size_in_file_name) { +Status BackupEngineImpl::BackupMeta::LoadFromFile( + const std::string& backup_dir, bool use_size_in_file_name, + const std::unordered_map& abs_path_to_size) { assert(Empty()); Status s; unique_ptr backup_meta_file; @@ -1606,9 +1665,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(const std::string& backup_dir, } else { if (!use_size_in_file_name || !GetFileSizeFromBackupFileName(filename, &size)) { - s = env_->GetFileSize(backup_dir + "/" + filename, &size); - if (!s.ok()) { - return s; + std::string abs_path = backup_dir + "/" + filename; + try { + size = abs_path_to_size.at(abs_path); + } catch (std::out_of_range& e) { + return Status::NotFound("Size missing for pathname: " + abs_path); } } } diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 95810533e..0379cdd07 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -232,6 +232,23 @@ class TestEnv : public EnvWrapper { return EnvWrapper::GetChildren(dir, r); } + // Some test cases do not actually create the test files (e.g., see + // DummyDB::live_files_) - for those cases, we mock those files' attributes + // so CreateNewBackup() can get their attributes. + void SetFilenamesForMockedAttrs(const std::vector& filenames) { + filenames_for_mocked_attrs_ = filenames; + } + Status GetChildrenFileAttributes( + const std::string& dir, std::vector* r) override { + if (filenames_for_mocked_attrs_.size() > 0) { + for (const auto& filename : filenames_for_mocked_attrs_) { + r->push_back({dir + filename, 10 /* size_bytes */}); + } + return Status::OK(); + } + return EnvWrapper::GetChildrenFileAttributes(dir, r); + } + void SetCreateDirIfMissingFailure(bool fail) { create_dir_if_missing_failure_ = fail; } @@ -255,6 +272,7 @@ class TestEnv : public EnvWrapper { port::Mutex mutex_; bool dummy_sequential_file_ = false; std::vector written_files_; + std::vector filenames_for_mocked_attrs_; uint64_t limit_written_files_ = 1000000; uint64_t limit_delete_files_ = 1000000; @@ -780,6 +798,7 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { dummy_db_->live_files_ = { "/00010.sst", "/00011.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); std::vector should_have_written = { "/shared/00010.sst.tmp", "/shared/00011.sst.tmp", @@ -796,6 +815,7 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { dummy_db_->live_files_ = { "/00010.sst", "/00015.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); // should not open 00010.sst - it's already there should_have_written = { @@ -846,6 +866,7 @@ TEST_F(BackupableDBTest, DifferentEnvs) { dummy_db_->live_files_ = { "/00010.sst", "/00011.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); CloseDBAndBackupEngine(); @@ -857,6 +878,7 @@ TEST_F(BackupableDBTest, DifferentEnvs) { CloseDBAndBackupEngine(); DestroyDB(dbname_, Options()); + test_backup_env_->SetFilenamesForMockedAttrs({}); AssertBackupConsistency(0, 0, 100, 500); } From deb08b8226a6e54038c48d62394c585a94a7f181 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Mar 2016 10:34:14 -0800 Subject: [PATCH 153/195] Add parsing of missing DB options Summary: There are a few options in struct DBOptions that aren't handled by options_helper.cc. Add those missing options so they can be used by GetDBOptionsFromString() and friends. Test Plan: Updated options_test.cc, reran all tests. Reviewers: sdong, yhchiang Reviewed By: yhchiang Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54603 --- util/options_helper.cc | 24 +++++++++++++++++++ util/options_helper.h | 52 +++++++++++++++++++++++++++++++++++++++--- util/options_parser.cc | 9 ++++++++ util/options_test.cc | 20 ++++++++-------- 4 files changed, 91 insertions(+), 14 deletions(-) diff --git a/util/options_helper.cc b/util/options_helper.cc index 44b57f48b..679d0a1b4 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -361,6 +361,18 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type, return ParseEnum( encoding_type_string_map, value, reinterpret_cast(opt_address)); + case OptionType::kWALRecoveryMode: + return ParseEnum( + wal_recovery_mode_string_map, value, + reinterpret_cast(opt_address)); + case OptionType::kAccessHint: + return ParseEnum( + access_hint_string_map, value, + reinterpret_cast(opt_address)); + case OptionType::kInfoLogLevel: + return ParseEnum( + info_log_level_string_map, value, + reinterpret_cast(opt_address)); default: return false; } @@ -498,6 +510,18 @@ bool SerializeSingleOptionHelper(const char* opt_address, return SerializeEnum( encoding_type_string_map, *reinterpret_cast(opt_address), value); + case OptionType::kWALRecoveryMode: + return SerializeEnum( + wal_recovery_mode_string_map, + *reinterpret_cast(opt_address), value); + case OptionType::kAccessHint: + return SerializeEnum( + access_hint_string_map, + *reinterpret_cast(opt_address), value); + case OptionType::kInfoLogLevel: + return SerializeEnum( + info_log_level_string_map, + *reinterpret_cast(opt_address), value); default: return false; } diff --git a/util/options_helper.h b/util/options_helper.h index fc7e2c2e2..b0864442c 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -93,6 +93,9 @@ enum class OptionType { kFlushBlockPolicyFactory, kChecksumType, kEncodingType, + kWALRecoveryMode, + kAccessHint, + kInfoLogLevel, kUnknown }; @@ -143,10 +146,7 @@ Status GetColumnFamilyOptionsFromMapInternal( static std::unordered_map db_options_type_info = { /* // not yet supported - AccessHint access_hint_on_compaction_start; Env* env; - InfoLogLevel info_log_level; - WALRecoveryMode wal_recovery_mode; std::shared_ptr row_cache; std::shared_ptr delete_scheduler; std::shared_ptr info_log; @@ -289,6 +289,30 @@ static std::unordered_map db_options_type_info = { OptionVerificationType::kNormal}}, {"stats_dump_period_sec", {offsetof(struct DBOptions, stats_dump_period_sec), OptionType::kUInt, + OptionVerificationType::kNormal}}, + {"fail_if_options_file_error", + {offsetof(struct DBOptions, fail_if_options_file_error), + OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"allow_concurrent_memtable_write", + {offsetof(struct DBOptions, allow_concurrent_memtable_write), + OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"wal_recovery_mode", + {offsetof(struct DBOptions, wal_recovery_mode), + OptionType::kWALRecoveryMode, OptionVerificationType::kNormal}}, + {"enable_write_thread_adaptive_yield", + {offsetof(struct DBOptions, enable_write_thread_adaptive_yield), + OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"write_thread_slow_yield_usec", + {offsetof(struct DBOptions, write_thread_slow_yield_usec), + OptionType::kUInt64T, OptionVerificationType::kNormal}}, + {"write_thread_max_yield_usec", + {offsetof(struct DBOptions, write_thread_max_yield_usec), + OptionType::kUInt64T, OptionVerificationType::kNormal}}, + {"access_hint_on_compaction_start", + {offsetof(struct DBOptions, access_hint_on_compaction_start), + OptionType::kAccessHint, OptionVerificationType::kNormal}}, + {"info_log_level", + {offsetof(struct DBOptions, info_log_level), OptionType::kInfoLogLevel, OptionVerificationType::kNormal}}}; static std::unordered_map cf_options_type_info = { @@ -558,6 +582,28 @@ static std::unordered_map {"kCompactionStyleFIFO", kCompactionStyleFIFO}, {"kCompactionStyleNone", kCompactionStyleNone}}; +static std::unordered_map wal_recovery_mode_string_map = { + {"kTolerateCorruptedTailRecords", + WALRecoveryMode::kTolerateCorruptedTailRecords}, + {"kAbsoluteConsistency", WALRecoveryMode::kAbsoluteConsistency}, + {"kPointInTimeRecovery", WALRecoveryMode::kPointInTimeRecovery}, + {"kSkipAnyCorruptedRecords", WALRecoveryMode::kSkipAnyCorruptedRecords}}; + +static std::unordered_map + access_hint_string_map = {{"NONE", DBOptions::AccessHint::NONE}, + {"NORMAL", DBOptions::AccessHint::NORMAL}, + {"SEQUENTIAL", DBOptions::AccessHint::SEQUENTIAL}, + {"WILLNEED", DBOptions::AccessHint::WILLNEED}}; + +static std::unordered_map info_log_level_string_map = + {{"DEBUG_LEVEL", InfoLogLevel::DEBUG_LEVEL}, + {"INFO_LEVEL", InfoLogLevel::INFO_LEVEL}, + {"WARN_LEVEL", InfoLogLevel::WARN_LEVEL}, + {"ERROR_LEVEL", InfoLogLevel::ERROR_LEVEL}, + {"FATAL_LEVEL", InfoLogLevel::FATAL_LEVEL}, + {"HEADER_LEVEL", InfoLogLevel::HEADER_LEVEL}}; + } // namespace rocksdb #endif // !ROCKSDB_LITE diff --git a/util/options_parser.cc b/util/options_parser.cc index e01529bff..0c368c646 100644 --- a/util/options_parser.cc +++ b/util/options_parser.cc @@ -557,6 +557,15 @@ bool AreEqualOptions( *reinterpret_cast( offset1) == *reinterpret_cast(offset2)); + case OptionType::kWALRecoveryMode: + return (*reinterpret_cast(offset1) == + *reinterpret_cast(offset2)); + case OptionType::kAccessHint: + return (*reinterpret_cast(offset1) == + *reinterpret_cast(offset2)); + case OptionType::kInfoLogLevel: + return (*reinterpret_cast(offset1) == + *reinterpret_cast(offset2)); default: if (type_info.verification == OptionVerificationType::kByName || type_info.verification == OptionVerificationType::kByNameAllowNull) { diff --git a/util/options_test.cc b/util/options_test.cc index 02f128d69..f36da9282 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1653,16 +1653,6 @@ TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) { options = new (options_ptr) DBOptions(); FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); - // Following options are not settable through GetDBOptionsFromString(): - options->fail_if_options_file_error = false; - options->allow_concurrent_memtable_write = false; - options->wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options->enable_write_thread_adaptive_yield = true; - options->write_thread_slow_yield_usec = true; - options->write_thread_max_yield_usec = 1000u; - options->access_hint_on_compaction_start = DBOptions::AccessHint::NONE; - options->info_log_level = InfoLogLevel::DEBUG_LEVEL; - char* new_options_ptr = new char[sizeof(DBOptions)]; DBOptions* new_options = new (new_options_ptr) DBOptions(); FillWithSpecialChar(new_options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); @@ -1714,7 +1704,15 @@ TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) { "allow_mmap_reads=false;" "max_log_file_size=4607;" "random_access_max_buffer_size=1048576;" - "advise_random_on_open=true;", + "advise_random_on_open=true;" + "fail_if_options_file_error=false;" + "allow_concurrent_memtable_write=true;" + "wal_recovery_mode=kPointInTimeRecovery;" + "enable_write_thread_adaptive_yield=true;" + "write_thread_slow_yield_usec=5;" + "write_thread_max_yield_usec=1000;" + "access_hint_on_compaction_start=NONE;" + "info_log_level=DEBUG_LEVEL;", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), From 19ea40f8b67514b6eda63b4a0cf5ac100c69d039 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 2 Mar 2016 11:21:26 -0800 Subject: [PATCH 154/195] Subcompaction boundary keys should not terminate after an empty level Summary: Now we skip to add boundary keys to subcompaction candidates since we see an empty level. This makes subcompaction almost disabled for universal compaction. We should consider all files instead. Test Plan: Run existing tests. Reviewers: IslamAbdelRahman, andrewkr, yhchiang Reviewed By: yhchiang Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D55005 --- db/compaction_job.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index bcf803e14..b04541adb 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -363,7 +363,7 @@ void CompactionJob::GenSubcompactionBoundaries() { size_t num_files = flevel->num_files; if (num_files == 0) { - break; + continue; } if (lvl == 0) { From e79ad9e18434b3422b78f60129ec6e77bdefb5fb Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 29 Feb 2016 18:38:03 -0800 Subject: [PATCH 155/195] Add Iterator Property rocksdb.iterator.version_number Summary: We want to provide a way to detect whether an iterator is stale and needs to be recreated. Add a iterator property to return version number. Test Plan: Add two unit tests for it. Reviewers: IslamAbdelRahman, yhchiang, anthony, kradhakrishnan, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54921 --- CMakeLists.txt | 1 + Makefile | 4 + db/db_impl.cc | 16 ++-- db/db_impl_readonly.cc | 16 ++-- db/db_iter.cc | 26 ++++-- db/db_iter.h | 16 ++-- db/db_iter_test.cc | 186 ++++++++++++++++++------------------- db/db_tailing_iter_test.cc | 45 +++++++++ db/db_test2.cc | 63 +++++++++++++ db/forward_iterator.cc | 16 +++- db/forward_iterator.h | 2 + include/rocksdb/iterator.h | 2 + table/internal_iterator.h | 5 + 13 files changed, 267 insertions(+), 131 deletions(-) create mode 100644 db/db_test2.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index baed60308..eda0d703b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -337,6 +337,7 @@ set(TESTS db/db_table_properties_test.cc db/db_tailing_iter_test.cc db/db_test.cc + db/db_test2.cc db/db_universal_compaction_test.cc db/db_wal_test.cc db/dbformat_test.cc diff --git a/Makefile b/Makefile index 018184c75..cfe70bea9 100644 --- a/Makefile +++ b/Makefile @@ -245,6 +245,7 @@ BENCHTOOLOBJECTS = $(BENCH_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) TESTS = \ db_test \ + db_test2 \ db_iter_test \ db_log_iter_test \ db_compaction_filter_test \ @@ -790,6 +791,9 @@ slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/db/db_impl.cc b/db/db_impl.cc index fc8c531c4..eb46811ae 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3922,8 +3922,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, env_, *cfd->ioptions(), cfd->user_comparator(), iter, kMaxSequenceNumber, sv->mutable_cf_options.max_sequential_skip_in_iterations, - read_options.iterate_upper_bound, read_options.prefix_same_as_start, - read_options.pin_data); + sv->version_number, read_options.iterate_upper_bound, + read_options.prefix_same_as_start, read_options.pin_data); #endif } else { SequenceNumber latest_snapshot = versions_->LastSequence(); @@ -3980,8 +3980,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, - read_options.iterate_upper_bound, read_options.prefix_same_as_start, - read_options.pin_data); + sv->version_number, read_options.iterate_upper_bound, + read_options.prefix_same_as_start, read_options.pin_data); InternalIterator* internal_iter = NewInternalIterator(read_options, cfd, sv, db_iter->GetArena()); @@ -4034,8 +4034,8 @@ Status DBImpl::NewIterators( iterators->push_back(NewDBIterator( env_, *cfd->ioptions(), cfd->user_comparator(), iter, kMaxSequenceNumber, - sv->mutable_cf_options.max_sequential_skip_in_iterations, nullptr, - false, read_options.pin_data)); + sv->mutable_cf_options.max_sequential_skip_in_iterations, + sv->version_number, nullptr, false, read_options.pin_data)); } #endif } else { @@ -4054,8 +4054,8 @@ Status DBImpl::NewIterators( ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), snapshot, - sv->mutable_cf_options.max_sequential_skip_in_iterations, nullptr, - false, read_options.pin_data); + sv->mutable_cf_options.max_sequential_skip_in_iterations, + sv->version_number, nullptr, false, read_options.pin_data); InternalIterator* internal_iter = NewInternalIterator(read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index d96362f79..57c14df14 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -54,10 +54,11 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options, auto db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), (read_options.snapshot != nullptr - ? reinterpret_cast( - read_options.snapshot)->number_ + ? reinterpret_cast(read_options.snapshot) + ->number_ : latest_snapshot), - super_version->mutable_cf_options.max_sequential_skip_in_iterations); + super_version->mutable_cf_options.max_sequential_skip_in_iterations, + super_version->version_number); auto internal_iter = NewInternalIterator( read_options, cfd, super_version, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); @@ -81,10 +82,11 @@ Status DBImplReadOnly::NewIterators( auto* db_iter = NewArenaWrappedDbIterator( env_, *cfd->ioptions(), cfd->user_comparator(), (read_options.snapshot != nullptr - ? reinterpret_cast( - read_options.snapshot)->number_ - : latest_snapshot), - sv->mutable_cf_options.max_sequential_skip_in_iterations); + ? reinterpret_cast(read_options.snapshot) + ->number_ + : latest_snapshot), + sv->mutable_cf_options.max_sequential_skip_in_iterations, + sv->version_number); auto* internal_iter = NewInternalIterator( read_options, cfd, sv, db_iter->GetArena()); db_iter->SetIterUnderDBIter(internal_iter); diff --git a/db/db_iter.cc b/db/db_iter.cc index 4f4317f9a..bfde03d17 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -13,18 +13,19 @@ #include #include -#include "db/filename.h" #include "db/dbformat.h" +#include "db/filename.h" #include "port/port.h" #include "rocksdb/env.h" -#include "rocksdb/options.h" #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" +#include "rocksdb/options.h" #include "table/internal_iterator.h" #include "util/arena.h" #include "util/logging.h" #include "util/mutexlock.h" #include "util/perf_context_imp.h" +#include "util/string_util.h" namespace rocksdb { @@ -61,7 +62,7 @@ class DBIter: public Iterator { DBIter(Env* env, const ImmutableCFOptions& ioptions, const Comparator* cmp, InternalIterator* iter, SequenceNumber s, bool arena_mode, - uint64_t max_sequential_skip_in_iterations, + uint64_t max_sequential_skip_in_iterations, uint64_t version_number, const Slice* iterate_upper_bound = nullptr, bool prefix_same_as_start = false) : arena_mode_(arena_mode), @@ -75,6 +76,7 @@ class DBIter: public Iterator { valid_(false), current_entry_is_merged_(false), statistics_(ioptions.statistics), + version_number_(version_number), iterate_upper_bound_(iterate_upper_bound), prefix_same_as_start_(prefix_same_as_start), iter_pinned_(false) { @@ -142,7 +144,13 @@ class DBIter: public Iterator { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } - if (prop_name == "rocksdb.iterator.is-key-pinned") { + if (prop_name == "rocksdb.iterator.version-number") { + // First try to pass the value returned from inner iterator. + if (!iter_->GetProperty(prop_name, prop).ok()) { + *prop = ToString(version_number_); + } + return Status::OK(); + } else if (prop_name == "rocksdb.iterator.is-key-pinned") { if (valid_) { *prop = (iter_pinned_ && saved_key_.IsKeyPinned()) ? "1" : "0"; } else { @@ -198,6 +206,7 @@ class DBIter: public Iterator { bool current_entry_is_merged_; Statistics* statistics_; uint64_t max_skip_; + uint64_t version_number_; const Slice* iterate_upper_bound_; IterKey prefix_start_; bool prefix_same_as_start_; @@ -830,12 +839,13 @@ Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& ioptions, InternalIterator* internal_iter, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, + uint64_t version_number, const Slice* iterate_upper_bound, bool prefix_same_as_start, bool pin_data) { DBIter* db_iter = new DBIter(env, ioptions, user_key_comparator, internal_iter, sequence, - false, max_sequential_skip_in_iterations, iterate_upper_bound, - prefix_same_as_start); + false, max_sequential_skip_in_iterations, version_number, + iterate_upper_bound, prefix_same_as_start); if (pin_data) { db_iter->PinData(); } @@ -877,7 +887,7 @@ void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1, ArenaWrappedDBIter* NewArenaWrappedDbIterator( Env* env, const ImmutableCFOptions& ioptions, const Comparator* user_key_comparator, const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, + uint64_t max_sequential_skip_in_iterations, uint64_t version_number, const Slice* iterate_upper_bound, bool prefix_same_as_start, bool pin_data) { ArenaWrappedDBIter* iter = new ArenaWrappedDBIter(); @@ -885,7 +895,7 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator( auto mem = arena->AllocateAligned(sizeof(DBIter)); DBIter* db_iter = new (mem) DBIter(env, ioptions, user_key_comparator, nullptr, sequence, - true, max_sequential_skip_in_iterations, + true, max_sequential_skip_in_iterations, version_number, iterate_upper_bound, prefix_same_as_start); iter->SetDBIter(db_iter); diff --git a/db/db_iter.h b/db/db_iter.h index 4060c6408..f239d2984 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -25,14 +25,12 @@ class InternalIterator; // Return a new iterator that converts internal keys (yielded by // "*internal_iter") that were live at the specified "sequence" number // into appropriate user keys. -extern Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& options, - const Comparator* user_key_comparator, - InternalIterator* internal_iter, - const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, - const Slice* iterate_upper_bound = nullptr, - bool prefix_same_as_start = false, - bool pin_data = false); +extern Iterator* NewDBIterator( + Env* env, const ImmutableCFOptions& options, + const Comparator* user_key_comparator, InternalIterator* internal_iter, + const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, + uint64_t version_number, const Slice* iterate_upper_bound = nullptr, + bool prefix_same_as_start = false, bool pin_data = false); // A wrapper iterator which wraps DB Iterator and the arena, with which the DB // iterator is supposed be allocated. This class is used as an entry point of @@ -78,7 +76,7 @@ class ArenaWrappedDBIter : public Iterator { extern ArenaWrappedDBIter* NewArenaWrappedDbIterator( Env* env, const ImmutableCFOptions& options, const Comparator* user_key_comparator, const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, + uint64_t max_sequential_skip_in_iterations, uint64_t version_number, const Slice* iterate_upper_bound = nullptr, bool prefix_same_as_start = false, bool pin_data = false); diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc index 023a0a09c..3b3030110 100644 --- a/db/db_iter_test.cc +++ b/db/db_iter_test.cc @@ -181,10 +181,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 10, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -215,7 +214,7 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations)); + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -241,7 +240,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 10, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -273,7 +273,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 10, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -308,7 +309,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 10, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); @@ -337,7 +339,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 7, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 7, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); SetPerfLevel(kEnableCount); ASSERT_TRUE(GetPerfLevel() == kEnableCount); @@ -374,7 +377,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 4, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 4, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -399,7 +403,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 10, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); @@ -421,7 +426,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 10, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -456,7 +462,8 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 7, options.max_sequential_skip_in_iterations, ro.iterate_upper_bound)); + 7, options.max_sequential_skip_in_iterations, 0, + ro.iterate_upper_bound)); SetPerfLevel(kEnableCount); ASSERT_TRUE(GetPerfLevel() == kEnableCount); @@ -482,10 +489,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 10, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); @@ -524,10 +530,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 2, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); @@ -555,10 +560,9 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 10, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); @@ -583,10 +587,9 @@ TEST_F(DBIteratorTest, DBIteratorEmpty) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 0, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 0, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } @@ -595,10 +598,9 @@ TEST_F(DBIteratorTest, DBIteratorEmpty) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 0, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 0, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(!db_iter->Valid()); } @@ -617,10 +619,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkipCountSkips) { } internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 2, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 2, + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); @@ -659,9 +660,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { options.statistics = rocksdb::CreateDBStatistics(); std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, i + 2, - options.max_sequential_skip_in_iterations)); + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -695,9 +695,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, i + 2, - options.max_sequential_skip_in_iterations)); + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -724,9 +723,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 202, - options.max_sequential_skip_in_iterations)); + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, 202, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -756,10 +754,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { } internal_iter->AddPut("c", "200"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, i, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, i, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); @@ -773,10 +770,9 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { } internal_iter->AddPut("c", "200"); internal_iter->Finish(); - std::unique_ptr db_iter( - NewDBIterator(env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, 200, - options.max_sequential_skip_in_iterations)); + std::unique_ptr db_iter(NewDBIterator( + env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, + 200, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); @@ -809,9 +805,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, i + 2, - options.max_sequential_skip_in_iterations)); + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -844,9 +839,8 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) { internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), - BytewiseComparator(), internal_iter, i + 2, - options.max_sequential_skip_in_iterations)); + env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, i + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -887,7 +881,7 @@ TEST_F(DBIteratorTest, DBIterator1) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 1, - options.max_sequential_skip_in_iterations)); + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -913,7 +907,7 @@ TEST_F(DBIteratorTest, DBIterator2) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 0, - options.max_sequential_skip_in_iterations)); + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -936,7 +930,7 @@ TEST_F(DBIteratorTest, DBIterator3) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 2, - options.max_sequential_skip_in_iterations)); + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -958,7 +952,7 @@ TEST_F(DBIteratorTest, DBIterator4) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 4, - options.max_sequential_skip_in_iterations)); + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -987,7 +981,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 0, options.max_sequential_skip_in_iterations)); + 0, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1009,7 +1003,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 1, options.max_sequential_skip_in_iterations)); + 1, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1031,7 +1025,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 2, options.max_sequential_skip_in_iterations)); + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1053,7 +1047,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 3, options.max_sequential_skip_in_iterations)); + 3, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1075,7 +1069,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 4, options.max_sequential_skip_in_iterations)); + 4, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1097,7 +1091,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 5, options.max_sequential_skip_in_iterations)); + 5, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1119,7 +1113,7 @@ TEST_F(DBIteratorTest, DBIterator5) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 6, options.max_sequential_skip_in_iterations)); + 6, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1145,7 +1139,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 0, options.max_sequential_skip_in_iterations)); + 0, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1167,7 +1161,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 1, options.max_sequential_skip_in_iterations)); + 1, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1189,7 +1183,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 2, options.max_sequential_skip_in_iterations)); + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1211,7 +1205,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 3, options.max_sequential_skip_in_iterations)); + 3, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } @@ -1229,7 +1223,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 4, options.max_sequential_skip_in_iterations)); + 4, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1251,7 +1245,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 5, options.max_sequential_skip_in_iterations)); + 5, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1273,7 +1267,7 @@ TEST_F(DBIteratorTest, DBIterator6) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 6, options.max_sequential_skip_in_iterations)); + 6, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1311,7 +1305,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 0, options.max_sequential_skip_in_iterations)); + 0, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1345,7 +1339,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 2, options.max_sequential_skip_in_iterations)); + 2, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1385,7 +1379,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 4, options.max_sequential_skip_in_iterations)); + 4, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1425,7 +1419,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 5, options.max_sequential_skip_in_iterations)); + 5, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1470,7 +1464,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 6, options.max_sequential_skip_in_iterations)); + 6, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1516,7 +1510,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 7, options.max_sequential_skip_in_iterations)); + 7, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1556,7 +1550,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 9, options.max_sequential_skip_in_iterations)); + 9, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1602,7 +1596,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 13, options.max_sequential_skip_in_iterations)); + 13, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1649,7 +1643,7 @@ TEST_F(DBIteratorTest, DBIterator7) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 14, options.max_sequential_skip_in_iterations)); + 14, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1678,7 +1672,7 @@ TEST_F(DBIteratorTest, DBIterator8) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations)); + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); @@ -1707,7 +1701,7 @@ TEST_F(DBIteratorTest, DBIterator9) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations)); + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); @@ -1754,7 +1748,7 @@ TEST_F(DBIteratorTest, DBIterator10) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, options.max_sequential_skip_in_iterations)); + 10, options.max_sequential_skip_in_iterations, 0)); db_iter->Seek("c"); ASSERT_TRUE(db_iter->Valid()); @@ -1778,9 +1772,9 @@ TEST_F(DBIteratorTest, SeekToLastOccurrenceSeq0) { internal_iter->AddPut("b", "2"); internal_iter->Finish(); - std::unique_ptr db_iter(NewDBIterator( - env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, - 10, 0 /* force seek */)); + std::unique_ptr db_iter( + NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, 10, 0 /* force seek */, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1807,7 +1801,7 @@ TEST_F(DBIteratorTest, DBIterator11) { std::unique_ptr db_iter(NewDBIterator( env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter, 1, - options.max_sequential_skip_in_iterations)); + options.max_sequential_skip_in_iterations, 0)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); @@ -1832,7 +1826,7 @@ TEST_F(DBIteratorTest, DBIterator12) { std::unique_ptr db_iter( NewDBIterator(env_, ImmutableCFOptions(options), BytewiseComparator(), - internal_iter, 10, 0)); + internal_iter, 10, 0, 0)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); @@ -1874,7 +1868,7 @@ class DBIterWithMergeIterTest : public testing::Test { db_iter_.reset(NewDBIterator(env_, ImmutableCFOptions(options_), BytewiseComparator(), merge_iter, 8 /* read data earlier than seqId 8 */, - 3 /* max iterators before reseek */)); + 3 /* max iterators before reseek */, 0)); } Env* env_; diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index 195424386..16edb64aa 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -654,6 +654,51 @@ TEST_F(DBTestTailingIterator, ManagedTailingIteratorSeekToSame) { ASSERT_EQ(found, iter->key().ToString()); } +TEST_F(DBTestTailingIterator, ForwardIteratorVersionProperty) { + Options options = CurrentOptions(); + options.write_buffer_size = 1000; + + ReadOptions read_options; + read_options.tailing = true; + + Put("foo", "bar"); + + uint64_t v1, v2, v3, v4; + { + std::unique_ptr iter(db_->NewIterator(read_options)); + iter->Seek("foo"); + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + v1 = static_cast(std::atoi(prop_value.c_str())); + + Put("foo1", "bar1"); + Flush(); + + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + v2 = static_cast(std::atoi(prop_value.c_str())); + + iter->Seek("f"); + + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + v3 = static_cast(std::atoi(prop_value.c_str())); + + ASSERT_EQ(v1, v2); + ASSERT_GT(v3, v2); + } + + { + std::unique_ptr iter(db_->NewIterator(read_options)); + iter->Seek("foo"); + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + v4 = static_cast(std::atoi(prop_value.c_str())); + } + ASSERT_EQ(v3, v4); +} } // namespace rocksdb #endif // !defined(ROCKSDB_LITE) diff --git a/db/db_test2.cc b/db/db_test2.cc new file mode 100644 index 000000000..c2b1770c7 --- /dev/null +++ b/db/db_test2.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include +#include "db/db_test_util.h" +#include "port/stack_trace.h" + +namespace rocksdb { + +class DBTest2 : public DBTestBase { + public: + DBTest2() : DBTestBase("/db_test2") {} +}; + +TEST_F(DBTest2, IteratorPropertyVersionNumber) { + Put("", ""); + Iterator* iter1 = db_->NewIterator(ReadOptions()); + std::string prop_value; + ASSERT_OK(iter1->GetProperty("rocksdb.iterator.version-number", &prop_value)); + uint64_t version_number1 = + static_cast(std::atoi(prop_value.c_str())); + + Put("", ""); + Flush(); + + Iterator* iter2 = db_->NewIterator(ReadOptions()); + ASSERT_OK(iter2->GetProperty("rocksdb.iterator.version-number", &prop_value)); + uint64_t version_number2 = + static_cast(std::atoi(prop_value.c_str())); + + ASSERT_GT(version_number2, version_number1); + + Put("", ""); + + Iterator* iter3 = db_->NewIterator(ReadOptions()); + ASSERT_OK(iter3->GetProperty("rocksdb.iterator.version-number", &prop_value)); + uint64_t version_number3 = + static_cast(std::atoi(prop_value.c_str())); + + ASSERT_EQ(version_number2, version_number3); + + iter1->SeekToFirst(); + ASSERT_OK(iter1->GetProperty("rocksdb.iterator.version-number", &prop_value)); + uint64_t version_number1_new = + static_cast(std::atoi(prop_value.c_str())); + ASSERT_EQ(version_number1, version_number1_new); + + delete iter1; + delete iter2; + delete iter3; +} +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 7af8e18af..35439c75e 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -10,15 +10,16 @@ #include #include -#include "db/job_context.h" +#include "db/column_family.h" #include "db/db_impl.h" #include "db/db_iter.h" -#include "db/column_family.h" +#include "db/dbformat.h" +#include "db/job_context.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/merger.h" -#include "db/dbformat.h" +#include "util/string_util.h" #include "util/sync_point.h" namespace rocksdb { @@ -471,6 +472,15 @@ Status ForwardIterator::status() const { return immutable_status_; } +Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) { + assert(prop != nullptr); + if (prop_name == "rocksdb.iterator.version-number") { + *prop = ToString(sv_->version_number); + return Status::OK(); + } + return Status::InvalidArgument(); +} + void ForwardIterator::RebuildIterators(bool refresh_sv) { // Clean up Cleanup(refresh_sv); diff --git a/db/forward_iterator.h b/db/forward_iterator.h index 16a726b08..b5beeceef 100644 --- a/db/forward_iterator.h +++ b/db/forward_iterator.h @@ -71,6 +71,8 @@ class ForwardIterator : public InternalIterator { virtual Slice key() const override; virtual Slice value() const override; virtual Status status() const override; + virtual Status GetProperty(std::string prop_name, std::string* prop) override; + bool TEST_CheckDeletedIters(int* deleted_iters, int* num_iters); private: diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 2cd107f43..7838d8912 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -105,6 +105,8 @@ class Iterator : public Cleanable { // - DB tables were created with // BlockBasedTableOptions::use_delta_encoding // set to false. + // Property "rocksdb.iterator.version-number": + // Number of LSM version used by the iterator. virtual Status GetProperty(std::string prop_name, std::string* prop); private: diff --git a/table/internal_iterator.h b/table/internal_iterator.h index 0be1c4ce2..d487b3124 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -6,6 +6,7 @@ #pragma once +#include #include "rocksdb/iterator.h" #include "rocksdb/status.h" @@ -78,6 +79,10 @@ class InternalIterator : public Cleanable { // set to false. virtual bool IsKeyPinned() const { return false; } + virtual Status GetProperty(std::string prop_name, std::string* prop) { + return Status::NotSupported(""); + } + private: // No copying allowed InternalIterator(const InternalIterator&) = delete; From ef204df7efac05ed83961783d94ce429f7aa4aac Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 2 Mar 2016 16:26:47 -0800 Subject: [PATCH 156/195] Compaction always needs to be removed from level0_compactions_in_progress_ for universal compaction Summary: We always put compaction to level0_compactions_in_progress_ for universal compaction, so we should also remove it. The bug causes assert failure when running manual compaction. Test Plan: TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom,compact --subcompactions=16 --compaction_style=1 always fails on my host. After the fix, it doesn't fail any more. Reviewers: IslamAbdelRahman, andrewkr, kradhakrishnan, yhchiang Reviewed By: yhchiang Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D55017 --- db/compaction_picker.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index dad59a0a3..91b9a2b5a 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -141,7 +141,8 @@ CompactionPicker::~CompactionPicker() {} // Delete this compaction from the list of running compactions. void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) { - if (c->start_level() == 0) { + if (c->start_level() == 0 || + ioptions_.compaction_style == kCompactionStyleUniversal) { level0_compactions_in_progress_.erase(c); } if (!status.ok()) { From 501927ffc4f64b808d708a3f86631378fef7e282 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 3 Mar 2016 13:32:20 -0800 Subject: [PATCH 157/195] [backupable db] Remove file size embedded in name workaround Summary: Now that we get sizes efficiently, we no longer need the workaround to embed file size in filename. Test Plan: $ ./backupable_db_test Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55035 --- include/rocksdb/utilities/backupable_db.h | 9 --- utilities/backupable/backupable_db.cc | 71 +++---------------- utilities/backupable/backupable_db_test.cc | 45 +----------- utilities/backupable/backupable_db_testutil.h | 15 ---- 4 files changed, 10 insertions(+), 130 deletions(-) delete mode 100644 utilities/backupable/backupable_db_testutil.h diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 640f1d390..06caa5bb0 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -88,14 +88,6 @@ struct BackupableDBOptions { // *turn it on only if you know what you're doing* bool share_files_with_checksum; - // Try to use the file size in file name instead of getting size from HDFS, - // if the file is generated with options.share_files_with_checksum = true. - // This is a temporary solution to reduce the backupable Db open latency when - // There are too many sst files. Will remove the option after we have a - // permanent solution. - // Default: false - bool use_file_size_in_file_name; - // Up to this many background threads will copy files for CreateNewBackup() // and RestoreDBFromBackup() // Default: 1 @@ -125,7 +117,6 @@ struct BackupableDBOptions { backup_rate_limit(_backup_rate_limit), restore_rate_limit(_restore_rate_limit), share_files_with_checksum(false), - use_file_size_in_file_name(false), max_background_operations(_max_background_operations), callback_trigger_interval_size(_callback_trigger_interval_size) { assert(share_table_files || !share_files_with_checksum); diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 63665e9a6..cb85edc20 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -187,7 +187,7 @@ class BackupEngineImpl : public BackupEngine { // @param abs_path_to_size Pre-fetched file sizes (bytes). Status LoadFromFile( - const std::string& backup_dir, bool use_size_in_file_name, + const std::string& backup_dir, const std::unordered_map& abs_path_to_size); Status StoreToFile(bool sync); @@ -591,9 +591,8 @@ Status BackupEngineImpl::Initialize() { for (auto& backup : backups_) { InsertPathnameToSizeBytes( GetAbsolutePath(GetPrivateFileRel(backup.first)), &abs_path_to_size); - Status s = backup.second->LoadFromFile( - options_.backup_dir, options_.use_file_size_in_file_name, - abs_path_to_size); + Status s = + backup.second->LoadFromFile(options_.backup_dir, abs_path_to_size); if (!s.ok()) { Log(options_.info_log, "Backup %u corrupted -- %s", backup.first, s.ToString().c_str()); @@ -1563,55 +1562,6 @@ Status BackupEngineImpl::BackupMeta::Delete(bool delete_meta) { return s; } -namespace { -bool ParseStrToUint64(const std::string& str, uint64_t* out) { - try { - unsigned long ul = std::stoul(str); - *out = static_cast(ul); - return true; - } catch (const std::invalid_argument&) { - return false; - } catch (const std::out_of_range&) { - return false; - } -} - -// Parse file name in the format of -// "shared_checksum/__.sst, and fill `size` with -// the parsed part. -// Will also accept only name part, or a file path in URL format. -// if file name doesn't have the extension of "sst", or doesn't have '_' as a -// part of the file name, or we can't parse a number from the sub string -// between the last '_' and '.', return false. -bool GetFileSizeFromBackupFileName(const std::string full_name, - uint64_t* size) { - auto dot_pos = full_name.find_last_of('.'); - if (dot_pos == std::string::npos) { - return false; - } - if (full_name.substr(dot_pos + 1) != "sst") { - return false; - } - auto last_underscore_pos = full_name.find_last_of('_'); - if (last_underscore_pos == std::string::npos) { - return false; - } - if (dot_pos <= last_underscore_pos + 2) { - return false; - } - return ParseStrToUint64(full_name.substr(last_underscore_pos + 1, - dot_pos - last_underscore_pos - 1), - size); -} -} // namespace - -namespace test { -bool TEST_GetFileSizeFromBackupFileName(const std::string full_name, - uint64_t* size) { - return GetFileSizeFromBackupFileName(full_name, size); -} -} // namespace test - // each backup meta file is of the format: // // @@ -1620,7 +1570,7 @@ bool TEST_GetFileSizeFromBackupFileName(const std::string full_name, // // ... Status BackupEngineImpl::BackupMeta::LoadFromFile( - const std::string& backup_dir, bool use_size_in_file_name, + const std::string& backup_dir, const std::unordered_map& abs_path_to_size) { assert(Empty()); Status s; @@ -1663,14 +1613,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( if (file_info) { size = file_info->size; } else { - if (!use_size_in_file_name || - !GetFileSizeFromBackupFileName(filename, &size)) { - std::string abs_path = backup_dir + "/" + filename; - try { - size = abs_path_to_size.at(abs_path); - } catch (std::out_of_range& e) { - return Status::NotFound("Size missing for pathname: " + abs_path); - } + std::string abs_path = backup_dir + "/" + filename; + try { + size = abs_path_to_size.at(abs_path); + } catch (std::out_of_range& e) { + return Status::NotFound("Size missing for pathname: " + abs_path); } } diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 0379cdd07..ce34d7d1a 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -27,7 +27,6 @@ #include "util/sync_point.h" #include "util/testutil.h" #include "util/mock_env.h" -#include "utilities/backupable/backupable_db_testutil.h" namespace rocksdb { @@ -596,8 +595,7 @@ class BackupableDBTestWithParam : public BackupableDBTest, public testing::WithParamInterface { public: BackupableDBTestWithParam() { - backupable_options_->share_files_with_checksum = - backupable_options_->use_file_size_in_file_name = GetParam(); + backupable_options_->share_files_with_checksum = GetParam(); } }; @@ -746,47 +744,6 @@ TEST_P(BackupableDBTestWithParam, OnlineIntegrationTest) { INSTANTIATE_TEST_CASE_P(BackupableDBTestWithParam, BackupableDBTestWithParam, ::testing::Bool()); -TEST_F(BackupableDBTest, GetFileSizeFromBackupFileName) { - uint64_t size = 0; - - ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName( - "shared_checksum/6580354_1874793674_65806675.sst", &size)); - ASSERT_EQ(65806675u, size); - - ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName( - "hdfs://a.b:80/a/b/shared_checksum/6580354_1874793674_85806675.sst", - &size)); - ASSERT_EQ(85806675u, size); - - ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName( - "6580354_1874793674_65806665.sst", &size)); - ASSERT_EQ(65806665u, size); - - ASSERT_TRUE(test::TEST_GetFileSizeFromBackupFileName( - "private/66/6580354_1874793674_65806666.sst", &size)); - ASSERT_EQ(65806666u, size); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName( - "shared_checksum/6580354.sst", &size)); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName( - "private/368/6592388.log", &size)); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName( - "private/68/MANIFEST-6586581", &size)); - - ASSERT_TRUE( - !test::TEST_GetFileSizeFromBackupFileName("private/68/CURRENT", &size)); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName( - "shared_checksum/6580354_1874793674_65806675.log", &size)); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName( - "shared_checksum/6580354_1874793674_65806675", &size)); - - ASSERT_TRUE(!test::TEST_GetFileSizeFromBackupFileName("meta/368", &size)); -} - // this will make sure that backup does not copy the same file twice TEST_F(BackupableDBTest, NoDoubleCopy) { OpenDBAndBackupEngine(true, true); diff --git a/utilities/backupable/backupable_db_testutil.h b/utilities/backupable/backupable_db_testutil.h deleted file mode 100644 index efe4acdf2..000000000 --- a/utilities/backupable/backupable_db_testutil.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. An additional grant -// of patent rights can be found in the PATENTS file in the same directory. -#pragma once -#ifndef ROCKSDB_LITE -#include - -namespace rocksdb { -namespace test { -extern bool TEST_GetFileSizeFromBackupFileName(const std::string full_name, - uint64_t* size); -} // namespace test -} // namespace rocksdb -#endif // ROCKSDB_LITE From 58ecd91326bdc011efe5fd0be11795b6cbca74b4 Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Thu, 3 Mar 2016 15:08:24 -0800 Subject: [PATCH 158/195] Fix Windows build --- utilities/backupable/backupable_db.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index cb85edc20..dd544871d 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -1616,7 +1616,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( std::string abs_path = backup_dir + "/" + filename; try { size = abs_path_to_size.at(abs_path); - } catch (std::out_of_range& e) { + } catch (std::out_of_range&) { return Status::NotFound("Size missing for pathname: " + abs_path); } } From dfe96c72c3a8a99e446c4b0613aed1374355ab9c Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 3 Mar 2016 18:25:07 -0800 Subject: [PATCH 159/195] Fix WriteLevel0TableForRecovery file delete protection Summary: The call to ``` CaptureCurrentFileNumberInPendingOutputs() ``` should be before ``` versions_->NewFileNumber() ``` Right now we are not actually protecting the file from being deleted Test Plan: make check Reviewers: sdong, anthony, yhchiang Reviewed By: yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D54645 --- db/db_impl.cc | 2 +- db/db_impl.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index eb46811ae..153b605a8 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1386,9 +1386,9 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, mutex_.AssertHeld(); const uint64_t start_micros = env_->NowMicros(); FileMetaData meta; - meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); auto pending_outputs_inserted_elem = CaptureCurrentFileNumberInPendingOutputs(); + meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); ReadOptions ro; ro.total_order_seek = true; Arena arena; diff --git a/db/db_impl.h b/db/db_impl.h index e0d10e99e..f068fe084 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -494,10 +494,11 @@ class DBImpl : public DB { // Background process needs to call // auto x = CaptureCurrentFileNumberInPendingOutputs() + // auto file_num = versions_->NewFileNumber(); // // ReleaseFileNumberFromPendingOutputs(x) - // This will protect any temporary files created while is - // executing from being deleted. + // This will protect any file with number `file_num` or greater from being + // deleted while is running. // ----------- // This function will capture current file number and append it to // pending_outputs_. This will prevent any background process to delete any From 68189f7e1bd531ef687e820ab9a2e6577a3283ce Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Fri, 4 Mar 2016 12:32:11 -0800 Subject: [PATCH 160/195] Update benchmarks used to measure subcompaction performance Summary: After closely working with Mark, Siying, and Yueh-Hsuan this set of changes reflects the updates needed to measure RocksDB subcompaction performance in a correct manner. The essence of the benchmark is executing `fillrandom` followed by `compact` with the correct set of options for various number of subcompactions specified. Test Plan: Tested internally to verify correctness and reliability. Reviewers: sdong, yhchiang, MarkCallaghan Reviewed By: MarkCallaghan Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D55089 --- tools/benchmark.sh | 100 +++++++++++++++++++++++++++++++-------- tools/run_flash_bench.sh | 6 +-- 2 files changed, 82 insertions(+), 24 deletions(-) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 518577d08..7d3e9d550 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -8,6 +8,14 @@ if [ $# -ne 1 ]; then exit 0 fi +# Make it easier to run only the compaction test. Getting valid data requires +# a number of iterations and having an ability to run the test separately from +# rest of the benchmarks helps. +if [ "$COMPACTION_TEST" == "1" -a "$1" != "universal_compaction" ]; then + echo "Skipping $1 because it's not a compaction test." + exit 0 +fi + # size constants K=1024 M=$((1024 * K)) @@ -104,6 +112,23 @@ params_bulkload="$const_params --max_background_compactions=16 --max_background_ --level0_slowdown_writes_trigger=$((10 * M)) \ --level0_stop_writes_trigger=$((10 * M))" +# +# Tune values for level and universal compaction. +# For universal compaction, these level0_* options mean total sorted of runs in +# LSM. In level-based compaction, it means number of L0 files. +# +params_level_compact="$const_params --max_background_compactions=16 \ + --max_background_flushes=7 \ + --level0_file_num_compaction_trigger=4 \ + --level0_slowdown_writes_trigger=16 \ + --level0_stop_writes_trigger=20" + +params_univ_compact="$const_params --max_background_compactions=16 \ + --max_background_flushes=7 \ + --level0_file_num_compaction_trigger=8 \ + --level0_slowdown_writes_trigger=16 \ + --level0_stop_writes_trigger=20" + function summarize_result { test_out=$1 test_name=$2 @@ -162,29 +187,64 @@ function run_bulkload { eval $cmd } -function run_univ_compaction_worker { - # Worker function intended to be called from run_univ_compaction. - echo -e "\nCompacting ...\n" +function run_manual_compaction_worker { + # This runs with a vector memtable and the WAL disabled to load faster. + # It is still crash safe and the client can discover where to restart a + # load after a crash. I think this is a good way to load. + echo "Bulk loading $num_keys random keys for manual compaction." - compact_output_file=$output_dir/benchmark_univ_compact_sub_$3.t${num_threads}.s${syncval}.log + fillrandom_output_file=$output_dir/benchmark_man_compact_fillrandom_$3.log + man_compact_output_log=$output_dir/benchmark_man_compact_$3.log - # The essence of the command is borrowed from run_change overwrite with - # compaction specific options being added. - cmd="./db_bench --benchmarks=overwrite \ - --use_existing_db=1 \ - --sync=$syncval \ - $params_w \ + if [ "$2" == "1" ]; then + extra_params=$params_univ_compact + else + extra_params=$params_level_compact + fi + + # Make sure that fillrandom uses the same compaction options as compact. + cmd="./db_bench --benchmarks=fillrandom \ + --use_existing_db=0 \ + --disable_auto_compactions=0 \ + --sync=0 \ + $extra_params \ --threads=$num_threads \ - --merge_operator=\"put\" \ - --seed=$( date +%s ) \ --compaction_measure_io_stats=$1 \ --compaction_style=$2 \ --subcompactions=$3 \ - 2>&1 | tee -a $compact_output_file" - echo $cmd | tee $compact_output_file + --memtablerep=vector \ + --disable_wal=1 \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $fillrandom_output_file" + + echo $cmd | tee $fillrandom_output_file eval $cmd - summarize_result $compact_output_file univ_compact_sub_comp_$3 overwrite + summarize_result $fillrandom_output_file man_compact_fillrandom_$3 fillrandom + + echo "Compacting with $3 subcompactions specified ..." + + # This is the part we're really interested in. Given that compact benchmark + # doesn't output regular statistics then we'll just use the time command to + # measure how long this step takes. + cmd="{ \ + time ./db_bench --benchmarks=compact \ + --use_existing_db=1 \ + --disable_auto_compactions=0 \ + --sync=0 \ + $extra_params \ + --threads=$num_threads \ + --compaction_measure_io_stats=$1 \ + --compaction_style=$2 \ + --subcompactions=$3 \ + ;} + 2>&1 | tee -a $man_compact_output_log" + + echo $cmd | tee $man_compact_output_log + eval $cmd + + # Can't use summarize_result here. One way to analyze the results is to run + # "grep real" on the resulting log files. } function run_univ_compaction { @@ -198,14 +258,16 @@ function run_univ_compaction { # by allowing the usage of { 1, 2, 4, 8, 16 } threads for different runs. subcompactions=("1" "2" "4" "8" "16") - # Have a separate suffix for each experiment so that separate results will be - # persisted. + # Do the real work of running various experiments. + + # Run the compaction benchmark which is based on bulkload. It pretty much + # consists of running manual compaction with different number of subcompaction + # threads. log_suffix=1 - # Do the real work of running various experiments. for ((i=0; i < ${#subcompactions[@]}; i++)) do - run_univ_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} $log_suffix + run_manual_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} $log_suffix ((log_suffix++)) done } diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index 8fe0b34ad..fc2c9470f 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -267,7 +267,7 @@ done ###### Universal compaction tests. # Use a single thread to reduce the variability in the benchmark. -env $ARGS NUM_THREADS=1 ./tools/benchmark.sh universal_compaction +env $ARGS COMPACTION_TEST=1 NUM_THREADS=1 ./tools/benchmark.sh universal_compaction if [[ $skip_low_pri_tests != 1 ]]; then echo bulkload > $output_dir/report2.txt @@ -355,8 +355,4 @@ if [[ $skip_low_pri_tests != 1 ]]; then grep revrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt fi -echo universal_compaction >> $output_dir/report2.txt -head -1 $output_dir/report.txt >> $output_dir/report2.txt -grep univ_compact $output_dir/report.txt >> $output_dir/report2.txt - cat $output_dir/report2.txt From a7d4eb2f34d3177b9c062474626503a40c530e34 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 4 Mar 2016 14:24:52 -0800 Subject: [PATCH 161/195] Fix a bug where flush does not happen when a manual compaction is running Summary: Currently, when rocksdb tries to run manual compaction to refit data into a level, there's a ReFitLevel() process that requires no bg work is currently running. When RocksDB plans to ReFitLevel(), it will do the following: 1. pause scheduling new bg work. 2. wait until all bg work finished 3. do the ReFitLevel() 4. unpause scheduling new bg work. However, as it pause scheduling new bg work at step one and waiting for all bg work finished in step 2, RocksDB will stop flushing until all bg work is done (which could take a long time.) This patch fix this issue by changing the way ReFitLevel() pause the background work: 1. pause scheduling compaction. 2. wait until all bg work finished. 3. pause scheduling flush 4. do ReFitLevel() 5. unpause both flush and compaction. The major difference is that. We only pause scheduling compaction in step 1 and wait for all bg work finished in step 2. This prevent flush being blocked for a long time. Although there's a very rare case that ReFitLevel() might be in starvation in step 2, but it's less likely the case as flush typically finish very fast. Test Plan: existing test. Reviewers: anthony, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55029 --- db/db_impl.cc | 13 ++++++++++++- db/db_impl.h | 5 ++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 153b605a8..ebcbe1a4e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -282,6 +282,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname) #endif // ROCKSDB_LITE event_logger_(db_options_.info_log.get()), bg_work_paused_(0), + bg_compaction_paused_(0), refitting_level_(false), opened_successfully_(false) { env_->GetAbsolutePath(dbname, &db_absolute_path_); @@ -1889,10 +1890,11 @@ Status DBImpl::CompactFilesImpl( Status DBImpl::PauseBackgroundWork() { InstrumentedMutexLock guard_lock(&mutex_); - bg_work_paused_++; + bg_compaction_paused_++; while (bg_compaction_scheduled_ > 0 || bg_flush_scheduled_ > 0) { bg_cv_.Wait(); } + bg_work_paused_++; return Status::OK(); } @@ -1902,7 +1904,11 @@ Status DBImpl::ContinueBackgroundWork() { return Status::InvalidArgument(); } assert(bg_work_paused_ > 0); + assert(bg_compaction_paused_ > 0); + bg_compaction_paused_--; bg_work_paused_--; + // It's sufficient to check just bg_work_paused_ here since + // bg_work_paused_ is always no greater than bg_compaction_paused_ if (bg_work_paused_ == 0) { MaybeScheduleFlushOrCompaction(); } @@ -2485,6 +2491,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { } } + if (bg_compaction_paused_ > 0) { + // we paused the background compaction + return; + } + if (HasExclusiveManualCompaction()) { // only manual compactions are allowed to run. don't schedule automatic // compactions diff --git a/db/db_impl.h b/db/db_impl.h index f068fe084..b9efb775b 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -844,9 +844,12 @@ class DBImpl : public DB { // Unified interface for logging events EventLogger event_logger_; - // A value of >0 temporarily disables scheduling of background work + // A value of > 0 temporarily disables scheduling of background work int bg_work_paused_; + // A value of > 0 temporarily disables scheduling of background compaction + int bg_compaction_paused_; + // Guard against multiple concurrent refitting bool refitting_level_; From bf1c4089db50abe1f19bf0d92efff6b0afb430d3 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 4 Mar 2016 16:03:31 -0800 Subject: [PATCH 162/195] Use pure if-then check instead of assert in EraseColumnFamilyInfo Summary: Use pure if-then check instead of assert in EraseColumnFamilyInfo when the specified column family does not found in the cf_info_map_. So the second deletion will be no op instead of crash. Test Plan: existing test. Reviewers: sdong, anthony, kradhakrishnan, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55023 --- util/thread_status_updater.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/util/thread_status_updater.cc b/util/thread_status_updater.cc index 375b2f321..a3f9a9afc 100644 --- a/util/thread_status_updater.cc +++ b/util/thread_status_updater.cc @@ -3,11 +3,11 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include "util/thread_status_updater.h" #include #include "rocksdb/env.h" #include "port/likely.h" #include "util/mutexlock.h" -#include "util/thread_status_updater.h" namespace rocksdb { @@ -246,7 +246,9 @@ void ThreadStatusUpdater::EraseColumnFamilyInfo(const void* cf_key) { // a consistent view of global column family table (cf_info_map). std::lock_guard lck(thread_list_mutex_); auto cf_pair = cf_info_map_.find(cf_key); - assert(cf_pair != cf_info_map_.end()); + if (cf_pair == cf_info_map_.end()) { + return; + } auto* cf_info = cf_pair->second.get(); assert(cf_info); @@ -278,7 +280,9 @@ void ThreadStatusUpdater::EraseDatabaseInfo(const void* db_key) { size_t result __attribute__((unused)) = 0; for (auto cf_key : db_pair->second) { auto cf_pair = cf_info_map_.find(cf_key); - assert(cf_pair != cf_info_map_.end()); + if (cf_pair == cf_info_map_.end()) { + continue; + } cf_pair->second.reset(); result = cf_info_map_.erase(cf_key); assert(result); From 294bdf9ee21e242f151a4983913adb3f4783789d Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 3 Mar 2016 13:18:56 -0800 Subject: [PATCH 163/195] Change Property name from "rocksdb.current_version_number" to "rocksdb.current-super-version-number" Summary: I realized I again is wrong about the naming convention. Let me change it to the correct one. Test Plan: Run unit tests. Reviewers: IslamAbdelRahman, kradhakrishnan, yhchiang, andrewkr Reviewed By: andrewkr Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D55041 --- db/db_iter.cc | 2 +- db/db_properties_test.cc | 9 ++++++--- db/db_tailing_iter_test.cc | 16 ++++++++-------- db/db_test2.cc | 12 ++++++++---- db/forward_iterator.cc | 2 +- db/internal_stats.cc | 13 +++++++------ db/internal_stats.h | 4 ++-- include/rocksdb/db.h | 9 ++++++--- include/rocksdb/iterator.h | 5 +++-- 9 files changed, 42 insertions(+), 30 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index bfde03d17..07c8d9e0f 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -144,7 +144,7 @@ class DBIter: public Iterator { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } - if (prop_name == "rocksdb.iterator.version-number") { + if (prop_name == "rocksdb.iterator.super-version-number") { // First try to pass the value returned from inner iterator. if (!iter_->GetProperty(prop_name, prop).ok()) { *prop = ToString(version_number_); diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index dbfdac970..60e04cfad 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -92,11 +92,14 @@ TEST_F(DBPropertiesTest, Empty) { TEST_F(DBPropertiesTest, CurrentVersionNumber) { uint64_t v1, v2, v3; - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v1)); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v1)); Put("12345678", ""); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v2)); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v2)); Flush(); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.current_version_number", &v3)); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v3)); ASSERT_EQ(v1, v2); ASSERT_GT(v3, v2); diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index 16edb64aa..bfb62926e 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -668,21 +668,21 @@ TEST_F(DBTestTailingIterator, ForwardIteratorVersionProperty) { std::unique_ptr iter(db_->NewIterator(read_options)); iter->Seek("foo"); std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number", + &prop_value)); v1 = static_cast(std::atoi(prop_value.c_str())); Put("foo1", "bar1"); Flush(); - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number", + &prop_value)); v2 = static_cast(std::atoi(prop_value.c_str())); iter->Seek("f"); - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number", + &prop_value)); v3 = static_cast(std::atoi(prop_value.c_str())); ASSERT_EQ(v1, v2); @@ -693,8 +693,8 @@ TEST_F(DBTestTailingIterator, ForwardIteratorVersionProperty) { std::unique_ptr iter(db_->NewIterator(read_options)); iter->Seek("foo"); std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.super-version-number", + &prop_value)); v4 = static_cast(std::atoi(prop_value.c_str())); } ASSERT_EQ(v3, v4); diff --git a/db/db_test2.cc b/db/db_test2.cc index c2b1770c7..1764131ac 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -21,7 +21,8 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) { Put("", ""); Iterator* iter1 = db_->NewIterator(ReadOptions()); std::string prop_value; - ASSERT_OK(iter1->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK( + iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number1 = static_cast(std::atoi(prop_value.c_str())); @@ -29,7 +30,8 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) { Flush(); Iterator* iter2 = db_->NewIterator(ReadOptions()); - ASSERT_OK(iter2->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK( + iter2->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number2 = static_cast(std::atoi(prop_value.c_str())); @@ -38,14 +40,16 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) { Put("", ""); Iterator* iter3 = db_->NewIterator(ReadOptions()); - ASSERT_OK(iter3->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK( + iter3->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number3 = static_cast(std::atoi(prop_value.c_str())); ASSERT_EQ(version_number2, version_number3); iter1->SeekToFirst(); - ASSERT_OK(iter1->GetProperty("rocksdb.iterator.version-number", &prop_value)); + ASSERT_OK( + iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number1_new = static_cast(std::atoi(prop_value.c_str())); ASSERT_EQ(version_number1, version_number1_new); diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 35439c75e..bbca88f9c 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -474,7 +474,7 @@ Status ForwardIterator::status() const { Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) { assert(prop != nullptr); - if (prop_name == "rocksdb.iterator.version-number") { + if (prop_name == "rocksdb.iterator.super-version-number") { *prop = ToString(sv_->version_number); return Status::OK(); } diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 4fa38bb1f..a554f0b85 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -132,7 +132,8 @@ static const std::string is_file_deletions_enabled = static const std::string num_snapshots = "num-snapshots"; static const std::string oldest_snapshot_time = "oldest-snapshot-time"; static const std::string num_live_versions = "num-live-versions"; -static const std::string current_version_number = "current_version_number"; +static const std::string current_version_number = + "current-super-version-number"; static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string base_level = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; @@ -192,7 +193,7 @@ const std::string DB::Properties::kOldestSnapshotTime = rocksdb_prefix + oldest_snapshot_time; const std::string DB::Properties::kNumLiveVersions = rocksdb_prefix + num_live_versions; -const std::string DB::Properties::kCurrentVersionNumber = +const std::string DB::Properties::kCurrentSuperVersionNumber = rocksdb_prefix + current_version_number; const std::string DB::Properties::kEstimateLiveDataSize = rocksdb_prefix + estimate_live_data_size; @@ -257,8 +258,8 @@ const std::unordered_mapGetSuperVersionNumber(); return true; } diff --git a/db/internal_stats.h b/db/internal_stats.h index 3d5580228..03b2bd882 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -328,8 +328,8 @@ class InternalStats { bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version); bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version); bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version); - bool HandleCurrentVersionNumber(uint64_t* value, DBImpl* db, - Version* version); + bool HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db, + Version* version); bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, Version* version); bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 9a0022dfa..d05c04ea1 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -444,8 +444,11 @@ class DB { // by iterators or unfinished compactions. static const std::string kNumLiveVersions; - // "rocksdb.current-version-number" - returns number of curent LSM version. - static const std::string kCurrentVersionNumber; + // "rocksdb.current-super-version-number" - returns number of curent LSM + // version. It is a uint64_t integer number, incremented after there is + // any change to the LSM tree. The number is not preserved after restarting + // the DB. After DB restart, it will start from 0 again. + static const std::string kCurrentSuperVersionNumber; // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of // live data in bytes. @@ -507,7 +510,7 @@ class DB { // "rocksdb.num-snapshots" // "rocksdb.oldest-snapshot-time" // "rocksdb.num-live-versions" - // "rocksdb.current_version_number" + // "rocksdb.current-super-version-number" // "rocksdb.estimate-live-data-size" // "rocksdb.total-sst-files-size" // "rocksdb.base-level" diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 7838d8912..7da37ec33 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -105,8 +105,9 @@ class Iterator : public Cleanable { // - DB tables were created with // BlockBasedTableOptions::use_delta_encoding // set to false. - // Property "rocksdb.iterator.version-number": - // Number of LSM version used by the iterator. + // Property "rocksdb.iterator.super-version-number": + // LSM version used by the iterator. The same format as DB Property + // kCurrentSuperVersionNumber. See its comment for more information. virtual Status GetProperty(std::string prop_name, std::string* prop); private: From 171c8e80b1ebc1537e1f44c1c1f82d9f0ee0037d Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 7 Mar 2016 12:57:19 -0800 Subject: [PATCH 164/195] Update dependencies / Fix Clang Summary: Update dependencies and fix broken clang symlink version changes ``` zstd 0.4.7 => 0.5.1 jemalloc 4.0.3 => master ``` Test Plan: make check -j64 USE_CLANG=1 make check -j64 ROCKSDB_FBCODE_BUILD_WITH_481=1 make check -j64 Reviewers: sdong, yhchiang, anthony, andrewkr Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D55179 --- build_tools/dependencies.sh | 14 +++++++------- build_tools/dependencies_4.8.1.sh | 14 +++++++------- build_tools/update_dependencies.sh | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/build_tools/dependencies.sh b/build_tools/dependencies.sh index a7d9a6531..f5ef15c3a 100644 --- a/build_tools/dependencies.sh +++ b/build_tools/dependencies.sh @@ -1,16 +1,16 @@ GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/ -CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/ -LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.9.x/gcc-4.9-glibc-2.20/024dbc3 -GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.20/gcc-4.9-glibc-2.20/500e281 +CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/ +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/ea2fd1278810d3af2ea52218d2767e09d786dbd0/4.9.x/gcc-4.9-glibc-2.20/024dbc3 +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.20/gcc-4.9-glibc-2.20/500e281 SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf -ZSTD_BASE=/mnt/gvfs/third-party2/zstd/69d56740ffb89d8bc81ded8ec428c01a813ea948/0.4.7/gcc-4.9-glibc-2.20/e9936bf +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.9-glibc-2.20/e9936bf GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a -JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.9-glibc-2.20/a6c5e1e +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/40791a3fef9206a77f2c4bc51f8169e5bf10d68e/master/gcc-4.9-glibc-2.20/a6c5e1e NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf -LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.9-glibc-2.20/12266b1 +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/303048f72efc92ae079e62dfc84823401aecfd94/trunk/gcc-4.9-glibc-2.20/12266b1 KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e -BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/a5b8152b2a15ce8a98808cf954fbccec825a97bc/2.25/centos6-native/da39a3e VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf diff --git a/build_tools/dependencies_4.8.1.sh b/build_tools/dependencies_4.8.1.sh index 33c092ecb..845f765d0 100644 --- a/build_tools/dependencies_4.8.1.sh +++ b/build_tools/dependencies_4.8.1.sh @@ -1,16 +1,16 @@ GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/ -CLANG_BASE=/mnt/vol/engshare/fbcode/third-party2/clang/3.7.1/centos6-native/9d9ecb9/ -LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/5710d6a0fb0d12820aac0bffcd7fcb8646e7fff7/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc -GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0600c95b31226b5e535614c590677d87c62d8016/2.17/gcc-4.8.1-glibc-2.17/99df8fc +CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/ +LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/ea2fd1278810d3af2ea52218d2767e09d786dbd0/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc +GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.17/gcc-4.8.1-glibc-2.17/99df8fc SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.8.1-glibc-2.17/c3f970a -ZSTD_BASE=/mnt/gvfs/third-party2/zstd/69d56740ffb89d8bc81ded8ec428c01a813ea948/0.4.7/gcc-4.8.1-glibc-2.17/c3f970a +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.8.1-glibc-2.17/c3f970a GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a -JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/5f0be745ddc0f86f22c8c8bb64b6b1163c93df91/4.0.3/gcc-4.8.1-glibc-2.17/8d31e51 +JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/40791a3fef9206a77f2c4bc51f8169e5bf10d68e/master/gcc-4.8.1-glibc-2.17/8d31e51 NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a -LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/121f1a75c4414683aea8c70b761bfaf187f7c1a3/trunk/gcc-4.8.1-glibc-2.17/675d945 +LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/303048f72efc92ae079e62dfc84823401aecfd94/trunk/gcc-4.8.1-glibc-2.17/675d945 KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.8.1-glibc-2.17/da39a3e -BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/75670d0d8ef4891fd1ec2a7513ef01cd002c823b/2.25/centos6-native/da39a3e +BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/a5b8152b2a15ce8a98808cf954fbccec825a97bc/2.25/centos6-native/da39a3e VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a diff --git a/build_tools/update_dependencies.sh b/build_tools/update_dependencies.sh index 4f8033398..9959700cf 100755 --- a/build_tools/update_dependencies.sh +++ b/build_tools/update_dependencies.sh @@ -65,7 +65,7 @@ echo "Writing dependencies to $OUTPUT" # Compilers locations GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1` -CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1` +CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1` log_variable GCC_BASE log_variable CLANG_BASE @@ -102,7 +102,7 @@ echo "Writing 4.8.1 dependencies to $OUTPUT" # Compilers locations GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1` -CLANG_BASE=`ls -d1 $TP2_LATEST/clang/3.7.1/centos6-native/*/ | head -n1` +CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1` log_variable GCC_BASE log_variable CLANG_BASE From 200080ed72008e613011374a03ec8367bb1d652f Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 3 Mar 2016 15:36:26 -0800 Subject: [PATCH 165/195] Improve snapshot handling for Transaction reinitialization Summary: Previously, reusing a transaction (by passing it as an argument to BeginTransaction) would not clear the transaction's snapshot. This is not a clear, well-definited behavior. Test Plan: improved test Reviewers: sdong, IslamAbdelRahman, horuff, jkedgar Reviewed By: jkedgar Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55053 --- utilities/transactions/transaction_base.cc | 10 ++++++++-- utilities/transactions/transaction_base.h | 6 +++--- utilities/transactions/transaction_db_impl.cc | 2 +- utilities/transactions/transaction_impl.cc | 6 ++++-- utilities/transactions/transaction_impl.h | 2 +- utilities/transactions/transaction_test.cc | 15 ++++++++------- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index 72d12c607..01bab827a 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -22,7 +22,8 @@ TransactionBaseImpl::TransactionBaseImpl(DB* db, write_options_(write_options), cmp_(GetColumnFamilyUserComparator(db->DefaultColumnFamily())), start_time_(db_->GetEnv()->NowMicros()), - write_batch_(cmp_, 0, true) {} + write_batch_(cmp_, 0, true), + indexing_enabled_(true) {} TransactionBaseImpl::~TransactionBaseImpl() { // Release snapshot if snapshot is set @@ -38,10 +39,15 @@ void TransactionBaseImpl::Clear() { num_merges_ = 0; } -void TransactionBaseImpl::Reinitialize(const WriteOptions& write_options) { +void TransactionBaseImpl::Reinitialize(DB* db, + const WriteOptions& write_options) { Clear(); + ClearSnapshot(); + db_ = db; write_options_ = write_options; start_time_ = db_->GetEnv()->NowMicros(); + indexing_enabled_ = true; + cmp_ = GetColumnFamilyUserComparator(db_->DefaultColumnFamily()); } void TransactionBaseImpl::SetSnapshot() { diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index 86903ea1f..db33b6f65 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -32,7 +32,7 @@ class TransactionBaseImpl : public Transaction { // Remove pending operations queued in this transaction. virtual void Clear(); - void Reinitialize(const WriteOptions& write_options); + void Reinitialize(DB* db, const WriteOptions& write_options); // Called before executing Put, Merge, Delete, and GetForUpdate. If TryLock // returns non-OK, the Put/Merge/Delete/GetForUpdate will be failed. @@ -235,7 +235,7 @@ class TransactionBaseImpl : public Transaction { // Sets a snapshot if SetSnapshotOnNextOperation() has been called. void SetSnapshotIfNeeded(); - DB* const db_; + DB* db_; WriteOptions write_options_; @@ -294,7 +294,7 @@ class TransactionBaseImpl : public Transaction { // WriteBatchWithIndex. // If false, future Put/Merge/Deletes will be inserted directly into the // underlying WriteBatch and not indexed in the WriteBatchWithIndex. - bool indexing_enabled_ = true; + bool indexing_enabled_; // SetSnapshotOnNextOperation() has been called and the snapshot has not yet // been reset. diff --git a/utilities/transactions/transaction_db_impl.cc b/utilities/transactions/transaction_db_impl.cc index b02d7bd25..ef03f3454 100644 --- a/utilities/transactions/transaction_db_impl.cc +++ b/utilities/transactions/transaction_db_impl.cc @@ -312,7 +312,7 @@ void TransactionDBImpl::ReinitializeTransaction( assert(dynamic_cast(txn) != nullptr); auto txn_impl = reinterpret_cast(txn); - txn_impl->Reinitialize(write_options, txn_options); + txn_impl->Reinitialize(this, write_options, txn_options); } } // namespace rocksdb diff --git a/utilities/transactions/transaction_impl.cc b/utilities/transactions/transaction_impl.cc index 33393751d..8f80433a8 100644 --- a/utilities/transactions/transaction_impl.cc +++ b/utilities/transactions/transaction_impl.cc @@ -70,6 +70,7 @@ void TransactionImpl::Initialize(const TransactionOptions& txn_options) { if (txn_options.set_snapshot) { SetSnapshot(); } + if (expiration_time_ > 0) { txn_db_impl_->InsertExpirableTransaction(txn_id_, this); } @@ -87,9 +88,10 @@ void TransactionImpl::Clear() { TransactionBaseImpl::Clear(); } -void TransactionImpl::Reinitialize(const WriteOptions& write_options, +void TransactionImpl::Reinitialize(TransactionDB* txn_db, + const WriteOptions& write_options, const TransactionOptions& txn_options) { - TransactionBaseImpl::Reinitialize(write_options); + TransactionBaseImpl::Reinitialize(txn_db->GetBaseDB(), write_options); Initialize(txn_options); } diff --git a/utilities/transactions/transaction_impl.h b/utilities/transactions/transaction_impl.h index 8a8ed6531..cb02e2834 100644 --- a/utilities/transactions/transaction_impl.h +++ b/utilities/transactions/transaction_impl.h @@ -38,7 +38,7 @@ class TransactionImpl : public TransactionBaseImpl { virtual ~TransactionImpl(); - void Reinitialize(const WriteOptions& write_options, + void Reinitialize(TransactionDB* txn_db, const WriteOptions& write_options, const TransactionOptions& txn_options); Status Commit() override; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 809dc9506..6f40e5e6a 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -1222,7 +1222,7 @@ TEST_F(TransactionTest, ReinitializeTest) { // Reinitialize transaction to no long expire txn_options.expiration = -1; - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); s = txn1->Put("Z", "z"); ASSERT_OK(s); @@ -1231,13 +1231,13 @@ TEST_F(TransactionTest, ReinitializeTest) { s = txn1->Commit(); ASSERT_OK(s); - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); s = txn1->Put("Z", "zz"); ASSERT_OK(s); // Reinitilize txn1 and verify that Z gets unlocked - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); Transaction* txn2 = db->BeginTransaction(write_options, txn_options, nullptr); s = txn2->Put("Z", "zzz"); @@ -1262,12 +1262,12 @@ TEST_F(TransactionTest, ReinitializeTest) { ASSERT_OK(s); ASSERT_EQ(value, "zzzz"); - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); const Snapshot* snapshot = txn1->GetSnapshot(); - ASSERT_TRUE(snapshot); + ASSERT_FALSE(snapshot); txn_options.set_snapshot = true; - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); snapshot = txn1->GetSnapshot(); ASSERT_TRUE(snapshot); @@ -1280,8 +1280,9 @@ TEST_F(TransactionTest, ReinitializeTest) { ASSERT_OK(s); txn_options.set_snapshot = false; - db->BeginTransaction(write_options, txn_options, txn1); + txn1 = db->BeginTransaction(write_options, txn_options, txn1); snapshot = txn1->GetSnapshot(); + ASSERT_FALSE(snapshot); s = txn1->Put("X", "x"); ASSERT_OK(s); From badd6b784680a39d7b1a0bbaa83086666055901d Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 7 Mar 2016 15:56:16 -0800 Subject: [PATCH 166/195] Ignore db_test2 Summary: Add db_test2 to .gitignore Test Plan: make sure db_test2 dont show in "git status" Reviewers: sdong, yhchiang, andrewkr, anthony Reviewed By: anthony Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D55191 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6a92b5d53..62a8bf1bc 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ unity.a tags rocksdb_dump rocksdb_undump +db_test2 java/out java/target From 2200295ee1a2fbe24f107a7f2b366a88fab192a7 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 3 Mar 2016 16:33:26 -0800 Subject: [PATCH 167/195] optimistic transactions support for reinitialization Summary: Extend optimization in D53835 to optimistic transactions for completeness. Test Plan: added test Reviewers: sdong, IslamAbdelRahman, horuff, jkedgar Reviewed By: horuff Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55059 --- .../utilities/optimistic_transaction_db.h | 16 ++-- .../optimistic_transaction_db_impl.cc | 25 ++++-- .../optimistic_transaction_db_impl.h | 11 ++- .../optimistic_transaction_impl.cc | 12 +++ .../optimistic_transaction_impl.h | 6 ++ .../optimistic_transaction_test.cc | 84 +++++++++++++++++++ 6 files changed, 138 insertions(+), 16 deletions(-) diff --git a/include/rocksdb/utilities/optimistic_transaction_db.h b/include/rocksdb/utilities/optimistic_transaction_db.h index 42ebe191f..b2c2f99a8 100644 --- a/include/rocksdb/utilities/optimistic_transaction_db.h +++ b/include/rocksdb/utilities/optimistic_transaction_db.h @@ -43,15 +43,19 @@ class OptimisticTransactionDB { virtual ~OptimisticTransactionDB() {} - // Starts a new Transaction. Passing set_snapshot=true has the same effect - // as calling SetSnapshot(). + // Starts a new Transaction. // - // Caller should delete the returned transaction after calling - // Commit() or Rollback(). + // Caller is responsible for deleting the returned transaction when no + // longer needed. + // + // If old_txn is not null, BeginTransaction will reuse this Transaction + // handle instead of allocating a new one. This is an optimization to avoid + // extra allocations when repeatedly creating transactions. virtual Transaction* BeginTransaction( const WriteOptions& write_options, - const OptimisticTransactionOptions& - txn_options = OptimisticTransactionOptions()) = 0; + const OptimisticTransactionOptions& txn_options = + OptimisticTransactionOptions(), + Transaction* old_txn = nullptr) = 0; // Return the underlying Database that was opened virtual DB* GetBaseDB() = 0; diff --git a/utilities/transactions/optimistic_transaction_db_impl.cc b/utilities/transactions/optimistic_transaction_db_impl.cc index d54173d3d..190440242 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.cc +++ b/utilities/transactions/optimistic_transaction_db_impl.cc @@ -5,11 +5,11 @@ #ifndef ROCKSDB_LITE +#include "utilities/transactions/optimistic_transaction_db_impl.h" + #include #include -#include "utilities/transactions/optimistic_transaction_db_impl.h" - #include "db/db_impl.h" #include "rocksdb/db.h" #include "rocksdb/options.h" @@ -20,11 +20,13 @@ namespace rocksdb { Transaction* OptimisticTransactionDBImpl::BeginTransaction( const WriteOptions& write_options, - const OptimisticTransactionOptions& txn_options) { - Transaction* txn = - new OptimisticTransactionImpl(this, write_options, txn_options); - - return txn; + const OptimisticTransactionOptions& txn_options, Transaction* old_txn) { + if (old_txn != nullptr) { + ReinitializeTransaction(old_txn, write_options, txn_options); + return old_txn; + } else { + return new OptimisticTransactionImpl(this, write_options, txn_options); + } } Status OptimisticTransactionDB::Open(const Options& options, @@ -76,5 +78,14 @@ Status OptimisticTransactionDB::Open( return s; } +void OptimisticTransactionDBImpl::ReinitializeTransaction( + Transaction* txn, const WriteOptions& write_options, + const OptimisticTransactionOptions& txn_options) { + assert(dynamic_cast(txn) != nullptr); + auto txn_impl = reinterpret_cast(txn); + + txn_impl->Reinitialize(this, write_options, txn_options); +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/transactions/optimistic_transaction_db_impl.h b/utilities/transactions/optimistic_transaction_db_impl.h index 72f186188..e426a21be 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.h +++ b/utilities/transactions/optimistic_transaction_db_impl.h @@ -19,14 +19,19 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB { ~OptimisticTransactionDBImpl() {} - Transaction* BeginTransaction( - const WriteOptions& write_options, - const OptimisticTransactionOptions& txn_options) override; + Transaction* BeginTransaction(const WriteOptions& write_options, + const OptimisticTransactionOptions& txn_options, + Transaction* old_txn) override; DB* GetBaseDB() override { return db_.get(); } private: std::unique_ptr db_; + + void ReinitializeTransaction(Transaction* txn, + const WriteOptions& write_options, + const OptimisticTransactionOptions& txn_options = + OptimisticTransactionOptions()); }; } // namespace rocksdb diff --git a/utilities/transactions/optimistic_transaction_impl.cc b/utilities/transactions/optimistic_transaction_impl.cc index 5cb1a8f8d..2647b3dd7 100644 --- a/utilities/transactions/optimistic_transaction_impl.cc +++ b/utilities/transactions/optimistic_transaction_impl.cc @@ -28,11 +28,23 @@ OptimisticTransactionImpl::OptimisticTransactionImpl( OptimisticTransactionDB* txn_db, const WriteOptions& write_options, const OptimisticTransactionOptions& txn_options) : TransactionBaseImpl(txn_db->GetBaseDB(), write_options), txn_db_(txn_db) { + Initialize(txn_options); +} + +void OptimisticTransactionImpl::Initialize( + const OptimisticTransactionOptions& txn_options) { if (txn_options.set_snapshot) { SetSnapshot(); } } +void OptimisticTransactionImpl::Reinitialize( + OptimisticTransactionDB* txn_db, const WriteOptions& write_options, + const OptimisticTransactionOptions& txn_options) { + TransactionBaseImpl::Reinitialize(txn_db->GetBaseDB(), write_options); + Initialize(txn_options); +} + OptimisticTransactionImpl::~OptimisticTransactionImpl() { } diff --git a/utilities/transactions/optimistic_transaction_impl.h b/utilities/transactions/optimistic_transaction_impl.h index cbd167505..4876a100d 100644 --- a/utilities/transactions/optimistic_transaction_impl.h +++ b/utilities/transactions/optimistic_transaction_impl.h @@ -34,6 +34,10 @@ class OptimisticTransactionImpl : public TransactionBaseImpl { virtual ~OptimisticTransactionImpl(); + void Reinitialize(OptimisticTransactionDB* txn_db, + const WriteOptions& write_options, + const OptimisticTransactionOptions& txn_options); + Status Commit() override; void Rollback() override; @@ -47,6 +51,8 @@ class OptimisticTransactionImpl : public TransactionBaseImpl { friend class OptimisticTransactionCallback; + void Initialize(const OptimisticTransactionOptions& txn_options); + // Returns OK if it is safe to commit this transaction. Returns Status::Busy // if there are read or write conflicts that would prevent us from committing // OR if we can not determine whether there would be any such conflicts. diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 991771757..b672b8722 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1267,6 +1267,90 @@ TEST_F(OptimisticTransactionTest, UndoGetForUpdateTest) { delete txn1; } +TEST_F(OptimisticTransactionTest, ReinitializeTest) { + WriteOptions write_options; + ReadOptions read_options; + OptimisticTransactionOptions txn_options; + string value; + Status s; + + Transaction* txn1 = txn_db->BeginTransaction(write_options, txn_options); + + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + + s = txn1->Put("Z", "z"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + + s = txn1->Put("Z", "zz"); + ASSERT_OK(s); + + // Reinitilize txn1 and verify that zz is not written + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + + s = txn1->Commit(); + ASSERT_OK(s); + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "z"); + + // Verify snapshots get reinitialized correctly + txn1->SetSnapshot(); + s = txn1->Put("Z", "zzzz"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "zzzz"); + + const Snapshot* snapshot = txn1->GetSnapshot(); + ASSERT_TRUE(snapshot); + + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + snapshot = txn1->GetSnapshot(); + ASSERT_FALSE(snapshot); + + txn_options.set_snapshot = true; + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + snapshot = txn1->GetSnapshot(); + ASSERT_TRUE(snapshot); + + s = txn1->Put("Z", "a"); + ASSERT_OK(s); + + txn1->Rollback(); + + s = txn1->Put("Y", "y"); + ASSERT_OK(s); + + txn_options.set_snapshot = false; + txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); + snapshot = txn1->GetSnapshot(); + ASSERT_FALSE(snapshot); + + s = txn1->Put("X", "x"); + ASSERT_OK(s); + + s = txn1->Commit(); + ASSERT_OK(s); + + s = db->Get(read_options, "Z", &value); + ASSERT_OK(s); + ASSERT_EQ(value, "zzzz"); + + s = db->Get(read_options, "Y", &value); + ASSERT_TRUE(s.IsNotFound()); + + delete txn1; +} + } // namespace rocksdb int main(int argc, char** argv) { From aa3f02d50cdaff11a2fdb25f585994610d2f2108 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 8 Mar 2016 16:46:41 -0800 Subject: [PATCH 168/195] Improve comment in compaction.h and compaction_picker.h Summary: ReleaseCompactionFiles must be called when DB mutex is held, but the documentation is mission. Test Plan: no code change Reviewers: anthony, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54987 --- db/compaction.h | 2 ++ db/compaction_picker.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/db/compaction.h b/db/compaction.h index dc8712363..729c4edaf 100644 --- a/db/compaction.h +++ b/db/compaction.h @@ -138,6 +138,8 @@ class Compaction { // Clear all files to indicate that they are not being compacted // Delete this compaction from the list of running compactions. + // + // Requirement: DB mutex held void ReleaseCompactionFiles(Status status); // Returns the summary of the compaction in "output" with maximum "len" diff --git a/db/compaction_picker.h b/db/compaction_picker.h index b8611b4cb..0503c8692 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -83,6 +83,8 @@ class CompactionPicker { #endif // ROCKSDB_LITE // Free up the files that participated in a compaction + // + // Requirement: DB mutex held void ReleaseCompactionFiles(Compaction* c, Status status); // Returns true if any one of the specified files are being compacted From f0161c37b08663eff4ae08bb24311e18c4b581a6 Mon Sep 17 00:00:00 2001 From: Alexander Fenster Date: Thu, 10 Mar 2016 13:34:42 -0800 Subject: [PATCH 169/195] formatting fix --- tools/ldb_cmd.cc | 47 +++++++++++++++++++++++++---------------------- tools/ldb_cmd.h | 2 +- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index bec67a377..2b377e032 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1742,15 +1742,17 @@ Options BatchPutCommand::PrepareOptionsForOpenDB() { // ---------------------------------------------------------------------------- ScanCommand::ScanCommand(const vector& params, - const map& options, const vector& flags) : - LDBCommand(options, flags, true, - BuildCmdLineOptions({ARG_TTL, ARG_NO_VALUE, ARG_HEX, ARG_KEY_HEX, ARG_TO, - ARG_VALUE_HEX, ARG_FROM, ARG_TIMESTAMP, - ARG_MAX_KEYS, ARG_TTL_START, ARG_TTL_END})), - start_key_specified_(false), - end_key_specified_(false), - max_keys_scanned_(-1), - no_value_(false) { + const map& options, + const vector& flags) + : LDBCommand(options, flags, true, + BuildCmdLineOptions( + {ARG_TTL, ARG_NO_VALUE, ARG_HEX, ARG_KEY_HEX, + ARG_TO, ARG_VALUE_HEX, ARG_FROM, ARG_TIMESTAMP, + ARG_MAX_KEYS, ARG_TTL_START, ARG_TTL_END})), + start_key_specified_(false), + end_key_specified_(false), + max_keys_scanned_(-1), + no_value_(false) { map::const_iterator itr = options.find(ARG_FROM); if (itr != options.end()) { @@ -1769,7 +1771,8 @@ ScanCommand::ScanCommand(const vector& params, end_key_specified_ = true; } - vector::const_iterator vitr = std::find(flags.begin(), flags.end(), ARG_NO_VALUE); + vector::const_iterator vitr = + std::find(flags.begin(), flags.end(), ARG_NO_VALUE); if (vitr != flags.end()) { no_value_ = true; } @@ -1858,20 +1861,20 @@ void ScanCommand::DoCommand() { } if (no_value_) { - fprintf(stdout, "%.*s\n", - static_cast(key_slice.size()), key_slice.data()); + fprintf(stdout, "%.*s\n", static_cast(key_slice.size()), + key_slice.data()); } else { - Slice val_slice = it->value(); - std::string formatted_value; - if (is_value_hex_) { - formatted_value = "0x" + val_slice.ToString(true /* hex */); - val_slice = formatted_value; - } - fprintf(stdout, "%.*s : %.*s\n", - static_cast(key_slice.size()), key_slice.data(), - static_cast(val_slice.size()), val_slice.data()); + Slice val_slice = it->value(); + std::string formatted_value; + if (is_value_hex_) { + formatted_value = "0x" + val_slice.ToString(true /* hex */); + val_slice = formatted_value; + } + fprintf(stdout, "%.*s : %.*s\n", static_cast(key_slice.size()), + key_slice.data(), static_cast(val_slice.size()), + val_slice.data()); } - + num_keys_scanned++; if (max_keys_scanned_ >= 0 && num_keys_scanned >= max_keys_scanned_) { break; diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index 2c1c5e4b1..b21be4a3d 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -378,7 +378,7 @@ private: */ bool StringToBool(string val) { std::transform(val.begin(), val.end(), val.begin(), - [](char ch) -> char { return (char)::tolower(ch); }); + [](char ch)->char { return (char)::tolower(ch); }); if (val == "true") { return true; From 765597fa7891f9171ba6965b0971b97d85f6032a Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Thu, 10 Mar 2016 14:34:28 -0800 Subject: [PATCH 170/195] Update compaction score right after CompactFiles forms a compaction Summary: This is a follow-up patch of https://reviews.facebook.net/D54891. As the information about files being compacted will also be used when making compaction decision, it is necessary to update the compaction score when a compaction plan has been made but not yet execute. This patch adds a missing call to update the compaction score in CompactFiles(). Test Plan: compact_files_test Reviewers: sdong, IslamAbdelRahman, kradhakrishnan, yiwu, andrewkr Reviewed By: andrewkr Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55227 --- db/db_impl.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/db/db_impl.cc b/db/db_impl.cc index ebcbe1a4e..675669ad2 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1844,6 +1844,17 @@ Status DBImpl::CompactFilesImpl( // support for CompactFiles, we should have CompactFiles API // pass a pointer of CompactionJobStats as the out-value // instead of using EventListener. + + // Creating a compaction influences the compaction score because the score + // takes running compactions into account (by skipping files that are already + // being compacted). Since we just changed compaction score, we recalculate it + // here. + { + CompactionOptionsFIFO dummy_compaction_options_fifo; + version->storage_info()->ComputeCompactionScore( + *c->mutable_cf_options(), dummy_compaction_options_fifo); + } + compaction_job.Prepare(); mutex_.Unlock(); From f71fc77b7c58e8863900d7fa469106470bbcaef9 Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Thu, 10 Mar 2016 17:35:19 -0800 Subject: [PATCH 171/195] Cache to have an option to fail Cache::Insert() when full Summary: Cache to have an option to fail Cache::Insert() when full. Update call sites to check status and handle error. I totally have no idea what's correct behavior of all the call sites when they encounter error. Please let me know if you see something wrong or more unit test is needed. Test Plan: make check -j32, see tests pass. Reviewers: anthony, yhchiang, andrewkr, IslamAbdelRahman, kradhakrishnan, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D54705 --- CMakeLists.txt | 1 + HISTORY.md | 1 + Makefile | 4 + db/db_block_cache_test.cc | 237 +++++++++++++++++++++++++++++ db/table_cache.cc | 13 +- include/rocksdb/cache.h | 35 ++++- include/rocksdb/statistics.h | 14 +- table/block_based_table_builder.cc | 6 +- table/block_based_table_reader.cc | 92 ++++++----- util/cache.cc | 117 ++++++++++---- util/cache_bench.cc | 6 +- util/cache_test.cc | 81 ++++++++-- 12 files changed, 504 insertions(+), 103 deletions(-) create mode 100644 db/db_block_cache_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index eda0d703b..16219b96f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -338,6 +338,7 @@ set(TESTS db/db_tailing_iter_test.cc db/db_test.cc db/db_test2.cc + db/db_block_cache_test.cc db/db_universal_compaction_test.cc db/db_wal_test.cc db/dbformat_test.cc diff --git a/HISTORY.md b/HISTORY.md index 567d40d08..64c89e8ee 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. +* Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signiture of Cache::Insert() is updated accordingly. ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" diff --git a/Makefile b/Makefile index cfe70bea9..a1e321f83 100644 --- a/Makefile +++ b/Makefile @@ -246,6 +246,7 @@ BENCHTOOLOBJECTS = $(BENCH_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) TESTS = \ db_test \ db_test2 \ + db_block_cache_test \ db_iter_test \ db_log_iter_test \ db_compaction_filter_test \ @@ -794,6 +795,9 @@ db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc new file mode 100644 index 000000000..18fb5b2ee --- /dev/null +++ b/db/db_block_cache_test.cc @@ -0,0 +1,237 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include +#include "db/db_test_util.h" +#include "port/stack_trace.h" + +namespace rocksdb { + +static uint64_t TestGetTickerCount(const Options& options, + Tickers ticker_type) { + return options.statistics->getTickerCount(ticker_type); +} + +class DBBlockCacheTest : public DBTestBase { + private: + size_t miss_count_ = 0; + size_t hit_count_ = 0; + size_t insert_count_ = 0; + size_t failure_count_ = 0; + size_t compressed_miss_count_ = 0; + size_t compressed_hit_count_ = 0; + size_t compressed_insert_count_ = 0; + size_t compressed_failure_count_ = 0; + + public: + const size_t kNumBlocks = 10; + const size_t kValueSize = 100; + + DBBlockCacheTest() : DBTestBase("/db_block_cache_test") {} + + BlockBasedTableOptions GetTableOptions() { + BlockBasedTableOptions table_options; + // Set a small enough block size so that each key-value get its own block. + table_options.block_size = 1; + return table_options; + } + + Options GetOptions(const BlockBasedTableOptions& table_options) { + Options options = CurrentOptions(); + options.create_if_missing = true; + // options.compression = kNoCompression; + options.statistics = rocksdb::CreateDBStatistics(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + return options; + } + + void InitTable(const Options& options) { + std::string value(kValueSize, 'a'); + for (size_t i = 0; i < kNumBlocks; i++) { + ASSERT_OK(Put(ToString(i), value.c_str())); + } + } + + void RecordCacheCounters(const Options& options) { + miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS); + hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT); + insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD); + failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); + compressed_miss_count_ = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS); + compressed_hit_count_ = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT); + compressed_insert_count_ = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD); + compressed_failure_count_ = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); + } + + void CheckCacheCounters(const Options& options, size_t expected_misses, + size_t expected_hits, size_t expected_inserts, + size_t expected_failures) { + size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS); + size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT); + size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD); + size_t new_failure_count = + TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); + ASSERT_EQ(miss_count_ + expected_misses, new_miss_count); + ASSERT_EQ(hit_count_ + expected_hits, new_hit_count); + ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count); + ASSERT_EQ(failure_count_ + expected_failures, new_failure_count); + miss_count_ = new_miss_count; + hit_count_ = new_hit_count; + insert_count_ = new_insert_count; + failure_count_ = new_failure_count; + } + + void CheckCompressedCacheCounters(const Options& options, + size_t expected_misses, + size_t expected_hits, + size_t expected_inserts, + size_t expected_failures) { + size_t new_miss_count = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS); + size_t new_hit_count = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT); + size_t new_insert_count = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD); + size_t new_failure_count = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); + ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count); + ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count); + ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count); + ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count); + compressed_miss_count_ = new_miss_count; + compressed_hit_count_ = new_hit_count; + compressed_insert_count_ = new_insert_count; + compressed_failure_count_ = new_failure_count; + } +}; + +TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) { + ReadOptions read_options; + auto table_options = GetTableOptions(); + auto options = GetOptions(table_options); + InitTable(options); + + std::shared_ptr cache = NewLRUCache(0, 0, false); + table_options.block_cache = cache; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + Reopen(options); + RecordCacheCounters(options); + + std::vector> iterators(kNumBlocks - 1); + Iterator* iter = nullptr; + + // Load blocks into cache. + for (size_t i = 0; i < kNumBlocks - 1; i++) { + iter = db_->NewIterator(read_options); + iter->Seek(ToString(i)); + ASSERT_OK(iter->status()); + CheckCacheCounters(options, 1, 0, 1, 0); + iterators[i].reset(iter); + } + size_t usage = cache->GetUsage(); + ASSERT_LT(0, usage); + cache->SetCapacity(usage); + ASSERT_EQ(usage, cache->GetPinnedUsage()); + + // Test with strict capacity limit. + cache->SetStrictCapacityLimit(true); + iter = db_->NewIterator(read_options); + iter->Seek(ToString(kNumBlocks - 1)); + ASSERT_TRUE(iter->status().IsIncomplete()); + CheckCacheCounters(options, 1, 0, 0, 1); + delete iter; + iter = nullptr; + + // Release interators and access cache again. + for (size_t i = 0; i < kNumBlocks - 1; i++) { + iterators[i].reset(); + CheckCacheCounters(options, 0, 0, 0, 0); + } + ASSERT_EQ(0, cache->GetPinnedUsage()); + for (size_t i = 0; i < kNumBlocks - 1; i++) { + iter = db_->NewIterator(read_options); + iter->Seek(ToString(i)); + ASSERT_OK(iter->status()); + CheckCacheCounters(options, 0, 1, 0, 0); + iterators[i].reset(iter); + } +} + +TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { + ReadOptions read_options; + auto table_options = GetTableOptions(); + auto options = GetOptions(table_options); + InitTable(options); + + std::shared_ptr cache = NewLRUCache(0, 0, false); + std::shared_ptr compressed_cache = NewLRUCache(0, 0, false); + table_options.block_cache = cache; + table_options.block_cache_compressed = compressed_cache; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + Reopen(options); + RecordCacheCounters(options); + + std::vector> iterators(kNumBlocks - 1); + Iterator* iter = nullptr; + + // Load blocks into cache. + for (size_t i = 0; i < kNumBlocks - 1; i++) { + iter = db_->NewIterator(read_options); + iter->Seek(ToString(i)); + ASSERT_OK(iter->status()); + CheckCacheCounters(options, 1, 0, 1, 0); + CheckCompressedCacheCounters(options, 1, 0, 1, 0); + iterators[i].reset(iter); + } + size_t usage = cache->GetUsage(); + ASSERT_LT(0, usage); + ASSERT_EQ(usage, cache->GetPinnedUsage()); + size_t compressed_usage = compressed_cache->GetUsage(); + ASSERT_LT(0, compressed_usage); + // Compressed block cache cannot be pinned. + ASSERT_EQ(0, compressed_cache->GetPinnedUsage()); + + // Set strict capacity limit flag. Now block will only load into compressed + // block cache. + cache->SetCapacity(usage); + cache->SetStrictCapacityLimit(true); + ASSERT_EQ(usage, cache->GetPinnedUsage()); + // compressed_cache->SetCapacity(compressed_usage); + compressed_cache->SetCapacity(0); + // compressed_cache->SetStrictCapacityLimit(true); + iter = db_->NewIterator(read_options); + iter->Seek(ToString(kNumBlocks - 1)); + ASSERT_TRUE(iter->status().IsIncomplete()); + CheckCacheCounters(options, 1, 0, 0, 1); + CheckCompressedCacheCounters(options, 1, 0, 1, 0); + delete iter; + iter = nullptr; + + // Clear strict capacity limit flag. This time we shall hit compressed block + // cache. + cache->SetStrictCapacityLimit(false); + iter = db_->NewIterator(read_options); + iter->Seek(ToString(kNumBlocks - 1)); + ASSERT_OK(iter->status()); + CheckCacheCounters(options, 1, 0, 1, 0); + CheckCompressedCacheCounters(options, 0, 1, 0, 0); + delete iter; + iter = nullptr; +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/table_cache.cc b/db/table_cache.cc index 53e35d3a0..2a4621b7e 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -143,8 +143,12 @@ Status TableCache::FindTable(const EnvOptions& env_options, // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { - *handle = cache_->Insert(key, table_reader.release(), 1, - &DeleteEntry); + s = cache_->Insert(key, table_reader.get(), 1, &DeleteEntry, + handle); + if (s.ok()) { + // Release ownership of table reader. + table_reader.release(); + } } } return s; @@ -285,9 +289,8 @@ Status TableCache::Get(const ReadOptions& options, size_t charge = row_cache_key.Size() + row_cache_entry->size() + sizeof(std::string); void* row_ptr = new std::string(std::move(*row_cache_entry)); - auto row_handle = ioptions_.row_cache->Insert( - row_cache_key.GetKey(), row_ptr, charge, &DeleteEntry); - ioptions_.row_cache->Release(row_handle); + ioptions_.row_cache->Insert(row_cache_key.GetKey(), row_ptr, charge, + &DeleteEntry); } #endif // ROCKSDB_LITE diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 30d9c67d3..327270e34 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -25,6 +25,7 @@ #include #include #include "rocksdb/slice.h" +#include "rocksdb/status.h" namespace rocksdb { @@ -33,12 +34,15 @@ using std::shared_ptr; class Cache; // Create a new cache with a fixed size capacity. The cache is sharded -// to 2^numShardBits shards, by hash of the key. The total capacity +// to 2^num_shard_bits shards, by hash of the key. The total capacity // is divided and evenly assigned to each shard. // -// The functions without parameter numShardBits uses default value, which is 4 +// The parameter num_shard_bits defaults to 4, and strict_capacity_limit +// defaults to false. extern shared_ptr NewLRUCache(size_t capacity); -extern shared_ptr NewLRUCache(size_t capacity, int numShardBits); +extern shared_ptr NewLRUCache(size_t capacity, int num_shard_bits); +extern shared_ptr NewLRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit); class Cache { public: @@ -55,15 +59,22 @@ class Cache { // Insert a mapping from key->value into the cache and assign it // the specified charge against the total cache capacity. + // If strict_capacity_limit is true and cache reaches its full capacity, + // return Status::Incomplete. // - // Returns a handle that corresponds to the mapping. The caller - // must call this->Release(handle) when the returned mapping is no - // longer needed. + // If handle is not nullptr, returns a handle that corresponds to the + // mapping. The caller must call this->Release(handle) when the returned + // mapping is no longer needed. In case of error caller is responsible to + // cleanup the value (i.e. calling "deleter"). + // + // If handle is nullptr, it is as if Release is called immediately after + // insert. In case of error value will be cleanup. // // When the inserted entry is no longer needed, the key and // value will be passed to "deleter". - virtual Handle* Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) = 0; + virtual Status Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Handle** handle = nullptr) = 0; // If the cache has no mapping for "key", returns nullptr. // @@ -100,6 +111,14 @@ class Cache { // purge the released entries from the cache in order to lower the usage virtual void SetCapacity(size_t capacity) = 0; + // Set whether to return error on insertion when cache reaches its full + // capacity. + virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0; + + // Set whether to return error on insertion when cache reaches its full + // capacity. + virtual bool HasStrictCapacityLimit() const = 0; + // returns the maximum configured capacity of the cache virtual size_t GetCapacity() const = 0; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index c16c3a7ca..c832516da 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -33,6 +33,8 @@ enum Tickers : uint32_t { BLOCK_CACHE_HIT, // # of blocks added to block cache. BLOCK_CACHE_ADD, + // # of failures when adding blocks to block cache. + BLOCK_CACHE_ADD_FAILURES, // # of times cache miss when accessing index block from block cache. BLOCK_CACHE_INDEX_MISS, // # of times cache hit when accessing index block from block cache. @@ -140,8 +142,12 @@ enum Tickers : uint32_t { GET_UPDATES_SINCE_CALLS, BLOCK_CACHE_COMPRESSED_MISS, // miss in the compressed block cache BLOCK_CACHE_COMPRESSED_HIT, // hit in the compressed block cache - WAL_FILE_SYNCED, // Number of times WAL sync is done - WAL_FILE_BYTES, // Number of bytes written to WAL + // Number of blocks added to comopressed block cache + BLOCK_CACHE_COMPRESSED_ADD, + // Number of failures when adding blocks to compressed block cache + BLOCK_CACHE_COMPRESSED_ADD_FAILURES, + WAL_FILE_SYNCED, // Number of times WAL sync is done + WAL_FILE_BYTES, // Number of bytes written to WAL // Writes can be processed by requesting thread or by the thread at the // head of the writers queue. @@ -176,6 +182,7 @@ const std::vector> TickersNameMap = { {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"}, {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"}, {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"}, + {BLOCK_CACHE_ADD_FAILURES, "rocksdb.block.cache.add.failures"}, {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"}, {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"}, {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"}, @@ -227,6 +234,9 @@ const std::vector> TickersNameMap = { {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"}, {BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"}, {BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"}, + {BLOCK_CACHE_COMPRESSED_ADD, "rocksdb.block.cachecompressed.add"}, + {BLOCK_CACHE_COMPRESSED_ADD_FAILURES, + "rocksdb.block.cachecompressed.add.failures"}, {WAL_FILE_SYNCED, "rocksdb.wal.synced"}, {WAL_FILE_BYTES, "rocksdb.wal.bytes"}, {WRITE_DONE_BY_SELF, "rocksdb.write.self"}, diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 1484acb51..47d74bc5f 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -703,7 +703,6 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, if (type != kNoCompression && block_cache_compressed != nullptr) { - Cache::Handle* cache_handle = nullptr; size_t size = block_contents.size(); std::unique_ptr ubuf(new char[size + 1]); @@ -723,9 +722,8 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, (end - r->compressed_cache_key_prefix)); // Insert into compressed block cache. - cache_handle = block_cache_compressed->Insert( - key, block, block->usable_size(), &DeleteCachedBlock); - block_cache_compressed->Release(cache_handle); + block_cache_compressed->Insert(key, block, block->usable_size(), + &DeleteCachedBlock); // Invalidate OS cache. r->file->InvalidateCache(static_cast(r->offset), size); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 42c5aa494..cbaf90a90 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -740,11 +740,16 @@ Status BlockBasedTable::GetDataBlockFromCache( assert(block->value->compression_type() == kNoCompression); if (block_cache != nullptr && block->value->cachable() && read_options.fill_cache) { - block->cache_handle = block_cache->Insert(block_cache_key, block->value, - block->value->usable_size(), - &DeleteCachedEntry); - assert(reinterpret_cast( - block_cache->Value(block->cache_handle)) == block->value); + s = block_cache->Insert( + block_cache_key, block->value, block->value->usable_size(), + &DeleteCachedEntry, &(block->cache_handle)); + if (s.ok()) { + RecordTick(statistics, BLOCK_CACHE_ADD); + } else { + RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); + delete block->value; + block->value = nullptr; + } } } @@ -784,27 +789,37 @@ Status BlockBasedTable::PutDataBlockToCache( // Release the hold on the compressed cache entry immediately. if (block_cache_compressed != nullptr && raw_block != nullptr && raw_block->cachable()) { - auto cache_handle = block_cache_compressed->Insert( - compressed_block_cache_key, raw_block, raw_block->usable_size(), - &DeleteCachedEntry); - block_cache_compressed->Release(cache_handle); - RecordTick(statistics, BLOCK_CACHE_COMPRESSED_MISS); - // Avoid the following code to delete this cached block. - raw_block = nullptr; + s = block_cache_compressed->Insert(compressed_block_cache_key, raw_block, + raw_block->usable_size(), + &DeleteCachedEntry); + if (s.ok()) { + // Avoid the following code to delete this cached block. + raw_block = nullptr; + RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD); + } else { + RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); + } } delete raw_block; // insert into uncompressed block cache assert((block->value->compression_type() == kNoCompression)); if (block_cache != nullptr && block->value->cachable()) { - block->cache_handle = block_cache->Insert(block_cache_key, block->value, - block->value->usable_size(), - &DeleteCachedEntry); - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, - block->value->usable_size()); - assert(reinterpret_cast(block_cache->Value(block->cache_handle)) == - block->value); + s = block_cache->Insert(block_cache_key, block->value, + block->value->usable_size(), + &DeleteCachedEntry, &(block->cache_handle)); + if (s.ok()) { + assert(block->cache_handle != nullptr); + RecordTick(statistics, BLOCK_CACHE_ADD); + RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, + block->value->usable_size()); + assert(reinterpret_cast( + block_cache->Value(block->cache_handle)) == block->value); + } else { + RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); + delete block->value; + block->value = nullptr; + } } return s; @@ -891,10 +906,17 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( filter = ReadFilter(rep_, &filter_size); if (filter != nullptr) { assert(filter_size > 0); - cache_handle = block_cache->Insert(key, filter, filter_size, - &DeleteCachedEntry); - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter_size); + Status s = block_cache->Insert(key, filter, filter_size, + &DeleteCachedEntry, + &cache_handle); + if (s.ok()) { + RecordTick(statistics, BLOCK_CACHE_ADD); + RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter_size); + } else { + RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); + delete filter; + return CachableEntry(); + } } } @@ -937,10 +959,18 @@ InternalIterator* BlockBasedTable::NewIndexIterator( // Create index reader and put it in the cache. Status s; s = CreateIndexReader(&index_reader); + if (s.ok()) { + s = block_cache->Insert(key, index_reader, index_reader->usable_size(), + &DeleteCachedEntry, &cache_handle); + } - if (!s.ok()) { + if (s.ok()) { + RecordTick(statistics, BLOCK_CACHE_ADD); + RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, + index_reader->usable_size()); + } else { + RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); // make sure if something goes wrong, index_reader shall remain intact. - assert(index_reader == nullptr); if (input_iter != nullptr) { input_iter->SetStatus(s); return input_iter; @@ -949,12 +979,6 @@ InternalIterator* BlockBasedTable::NewIndexIterator( } } - cache_handle = - block_cache->Insert(key, index_reader, index_reader->usable_size(), - &DeleteCachedEntry); - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, - index_reader->usable_size()); } assert(cache_handle); @@ -1036,7 +1060,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( } // Didn't get any data from block caches. - if (block.value == nullptr) { + if (s.ok() && block.value == nullptr) { if (no_io) { // Could not read from block_cache and can't do IO if (input_iter != nullptr) { @@ -1055,7 +1079,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( } InternalIterator* iter; - if (block.value != nullptr) { + if (s.ok() && block.value != nullptr) { iter = block.value->NewIterator(&rep->internal_comparator, input_iter); if (block.cache_handle != nullptr) { iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, diff --git a/util/cache.cc b/util/cache.cc index 078b10e1a..6015644f6 100644 --- a/util/cache.cc +++ b/util/cache.cc @@ -196,10 +196,13 @@ class LRUCache { // free the needed space void SetCapacity(size_t capacity); + // Set the flag to reject insertion if cache if full. + void SetStrictCapacityLimit(bool strict_capacity_limit); + // Like Cache methods, but with an extra "hash" parameter. - Cache::Handle* Insert(const Slice& key, uint32_t hash, - void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)); + Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Cache::Handle** handle); Cache::Handle* Lookup(const Slice& key, uint32_t hash); void Release(Cache::Handle* handle); void Erase(const Slice& key, uint32_t hash); @@ -245,6 +248,9 @@ class LRUCache { // Memory size for entries residing only in the LRU list size_t lru_usage_; + // Whether to reject insertion if cache reaches its full capacity. + bool strict_capacity_limit_; + // mutex_ protects the following state. // We don't count mutex_ as the cache's internal state so semantically we // don't mind mutex_ invoking the non-const actions. @@ -336,6 +342,11 @@ void LRUCache::SetCapacity(size_t capacity) { } } +void LRUCache::SetStrictCapacityLimit(bool strict_capacity_limit) { + MutexLock l(&mutex_); + strict_capacity_limit_ = strict_capacity_limit; +} + Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { MutexLock l(&mutex_); LRUHandle* e = table_.Lookup(key, hash); @@ -350,6 +361,9 @@ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { } void LRUCache::Release(Cache::Handle* handle) { + if (handle == nullptr) { + return; + } LRUHandle* e = reinterpret_cast(handle); bool last_reference = false; { @@ -383,15 +397,16 @@ void LRUCache::Release(Cache::Handle* handle) { } } -Cache::Handle* LRUCache::Insert( - const Slice& key, uint32_t hash, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) { - +Status LRUCache::Insert(const Slice& key, uint32_t hash, void* value, + size_t charge, + void (*deleter)(const Slice& key, void* value), + Cache::Handle** handle) { // Allocate the memory here outside of the mutex // If the cache is full, we'll have to release it // It shouldn't happen very often though. LRUHandle* e = reinterpret_cast( new char[sizeof(LRUHandle) - 1 + key.size()]); + Status s; autovector last_reference_list; e->value = value; @@ -399,7 +414,9 @@ Cache::Handle* LRUCache::Insert( e->charge = charge; e->key_length = key.size(); e->hash = hash; - e->refs = 2; // One from LRUCache, one for the returned handle + e->refs = (handle == nullptr + ? 1 + : 2); // One from LRUCache, one for the returned handle e->next = e->prev = nullptr; e->in_cache = true; memcpy(e->key_data, key.data(), key.size()); @@ -411,20 +428,36 @@ Cache::Handle* LRUCache::Insert( // is freed or the lru list is empty EvictFromLRU(charge, &last_reference_list); - // insert into the cache - // note that the cache might get larger than its capacity if not enough - // space was freed - LRUHandle* old = table_.Insert(e); - usage_ += e->charge; - if (old != nullptr) { - old->in_cache = false; - if (Unref(old)) { - usage_ -= old->charge; - // old is on LRU because it's in cache and its reference count - // was just 1 (Unref returned 0) - LRU_Remove(old); - last_reference_list.push_back(old); + if (strict_capacity_limit_ && usage_ - lru_usage_ + charge > capacity_) { + if (handle == nullptr) { + last_reference_list.push_back(e); + } else { + delete[] reinterpret_cast(e); + *handle = nullptr; } + s = Status::Incomplete("Insert failed due to LRU cache being full."); + } else { + // insert into the cache + // note that the cache might get larger than its capacity if not enough + // space was freed + LRUHandle* old = table_.Insert(e); + usage_ += e->charge; + if (old != nullptr) { + old->in_cache = false; + if (Unref(old)) { + usage_ -= old->charge; + // old is on LRU because it's in cache and its reference count + // was just 1 (Unref returned 0) + LRU_Remove(old); + last_reference_list.push_back(old); + } + } + if (handle == nullptr) { + LRU_Append(e); + } else { + *handle = reinterpret_cast(e); + } + s = Status::OK(); } } @@ -434,7 +467,7 @@ Cache::Handle* LRUCache::Insert( entry->Free(); } - return reinterpret_cast(e); + return s; } void LRUCache::Erase(const Slice& key, uint32_t hash) { @@ -472,6 +505,7 @@ class ShardedLRUCache : public Cache { uint64_t last_id_; int num_shard_bits_; size_t capacity_; + bool strict_capacity_limit_; static inline uint32_t HashSlice(const Slice& s) { return Hash(s.data(), s.size(), 0); @@ -483,13 +517,18 @@ class ShardedLRUCache : public Cache { } public: - ShardedLRUCache(size_t capacity, int num_shard_bits) - : last_id_(0), num_shard_bits_(num_shard_bits), capacity_(capacity) { + ShardedLRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit) + : last_id_(0), + num_shard_bits_(num_shard_bits), + capacity_(capacity), + strict_capacity_limit_(strict_capacity_limit) { int num_shards = 1 << num_shard_bits_; shards_ = new LRUCache[num_shards]; const size_t per_shard = (capacity + (num_shards - 1)) / num_shards; for (int s = 0; s < num_shards; s++) { shards_[s].SetCapacity(per_shard); + shards_[s].SetStrictCapacityLimit(strict_capacity_limit); } } virtual ~ShardedLRUCache() { @@ -504,11 +543,19 @@ class ShardedLRUCache : public Cache { } capacity_ = capacity; } - virtual Handle* Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, - void* value)) override { + virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override { + int num_shards = 1 << num_shard_bits_; + for (int s = 0; s < num_shards; s++) { + shards_[s].SetStrictCapacityLimit(strict_capacity_limit); + } + strict_capacity_limit_ = strict_capacity_limit; + } + virtual Status Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Handle** handle) override { const uint32_t hash = HashSlice(key); - return shards_[Shard(hash)].Insert(key, hash, value, charge, deleter); + return shards_[Shard(hash)].Insert(key, hash, value, charge, deleter, + handle); } virtual Handle* Lookup(const Slice& key) override { const uint32_t hash = HashSlice(key); @@ -531,6 +578,10 @@ class ShardedLRUCache : public Cache { } virtual size_t GetCapacity() const override { return capacity_; } + virtual bool HasStrictCapacityLimit() const override { + return strict_capacity_limit_; + } + virtual size_t GetUsage() const override { // We will not lock the cache when getting the usage from shards. int num_shards = 1 << num_shard_bits_; @@ -569,14 +620,20 @@ class ShardedLRUCache : public Cache { } // end anonymous namespace shared_ptr NewLRUCache(size_t capacity) { - return NewLRUCache(capacity, kNumShardBits); + return NewLRUCache(capacity, kNumShardBits, false); } shared_ptr NewLRUCache(size_t capacity, int num_shard_bits) { + return NewLRUCache(capacity, num_shard_bits, false); +} + +shared_ptr NewLRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit) { if (num_shard_bits >= 20) { return nullptr; // the cache cannot be sharded into too many fine pieces } - return std::make_shared(capacity, num_shard_bits); + return std::make_shared(capacity, num_shard_bits, + strict_capacity_limit); } } // namespace rocksdb diff --git a/util/cache_bench.cc b/util/cache_bench.cc index 0e0d70d62..266c9e1c5 100644 --- a/util/cache_bench.cc +++ b/util/cache_bench.cc @@ -142,8 +142,7 @@ class CacheBench { // Cast uint64* to be char*, data would be copied to cache Slice key(reinterpret_cast(&rand_key), 8); // do insert - auto handle = cache_->Insert(key, new char[10], 1, &deleter); - cache_->Release(handle); + cache_->Insert(key, new char[10], 1, &deleter); } } @@ -221,8 +220,7 @@ class CacheBench { int32_t prob_op = thread->rnd.Uniform(100); if (prob_op >= 0 && prob_op < FLAGS_insert_percent) { // do insert - auto handle = cache_->Insert(key, new char[10], 1, &deleter); - cache_->Release(handle); + cache_->Insert(key, new char[10], 1, &deleter); } else if (prob_op -= FLAGS_insert_percent && prob_op < FLAGS_lookup_percent) { // do lookup diff --git a/util/cache_test.cc b/util/cache_test.cc index d49cd4fdf..3df71c098 100644 --- a/util/cache_test.cc +++ b/util/cache_test.cc @@ -73,8 +73,8 @@ class CacheTest : public testing::Test { } void Insert(shared_ptr cache, int key, int value, int charge = 1) { - cache->Release(cache->Insert(EncodeKey(key), EncodeValue(value), charge, - &CacheTest::Deleter)); + cache->Insert(EncodeKey(key), EncodeValue(value), charge, + &CacheTest::Deleter); } void Erase(shared_ptr cache, int key) { @@ -118,14 +118,12 @@ TEST_F(CacheTest, UsageTest) { auto cache = NewLRUCache(kCapacity, 8); size_t usage = 0; - const char* value = "abcdef"; + char value[10] = "abcdef"; // make sure everything will be cached for (int i = 1; i < 100; ++i) { std::string key(i, 'a'); auto kv_size = key.size() + 5; - cache->Release( - cache->Insert(key, (void*)value, kv_size, dumbDeleter) - ); + cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); } @@ -133,9 +131,8 @@ TEST_F(CacheTest, UsageTest) { // make sure the cache will be overloaded for (uint64_t i = 1; i < kCapacity; ++i) { auto key = ToString(i); - cache->Release( - cache->Insert(key, (void*)value, key.size() + 5, dumbDeleter) - ); + cache->Insert(key, reinterpret_cast(value), key.size() + 5, + dumbDeleter); } // the usage should be close to the capacity @@ -149,7 +146,7 @@ TEST_F(CacheTest, PinnedUsageTest) { auto cache = NewLRUCache(kCapacity, 8); size_t pinned_usage = 0; - const char* value = "abcdef"; + char value[10] = "abcdef"; std::forward_list unreleased_handles; @@ -158,7 +155,9 @@ TEST_F(CacheTest, PinnedUsageTest) { for (int i = 1; i < 100; ++i) { std::string key(i, 'a'); auto kv_size = key.size() + 5; - auto handle = cache->Insert(key, (void*)value, kv_size, dumbDeleter); + Cache::Handle* handle; + cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter, + &handle); pinned_usage += kv_size; ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); if (i % 2 == 0) { @@ -182,8 +181,8 @@ TEST_F(CacheTest, PinnedUsageTest) { // check that overloading the cache does not change the pinned usage for (uint64_t i = 1; i < 2 * kCapacity; ++i) { auto key = ToString(i); - cache->Release( - cache->Insert(key, (void*)value, key.size() + 5, dumbDeleter)); + cache->Insert(key, reinterpret_cast(value), key.size() + 5, + dumbDeleter); } ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); @@ -408,7 +407,8 @@ TEST_F(CacheTest, SetCapacity) { // Insert 5 entries, but not releasing. for (size_t i = 0; i < 5; i++) { std::string key = ToString(i+1); - handles[i] = cache->Insert(key, new Value(i+1), 1, &deleter); + Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); + ASSERT_TRUE(s.ok()); } ASSERT_EQ(5U, cache->GetCapacity()); ASSERT_EQ(5U, cache->GetUsage()); @@ -422,7 +422,8 @@ TEST_F(CacheTest, SetCapacity) { // and usage should be 7 for (size_t i = 5; i < 10; i++) { std::string key = ToString(i+1); - handles[i] = cache->Insert(key, new Value(i+1), 1, &deleter); + Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); + ASSERT_TRUE(s.ok()); } ASSERT_EQ(10U, cache->GetCapacity()); ASSERT_EQ(10U, cache->GetUsage()); @@ -441,6 +442,53 @@ TEST_F(CacheTest, SetCapacity) { } } +TEST_F(CacheTest, SetStrictCapacityLimit) { + // test1: set the flag to false. Insert more keys than capacity. See if they + // all go through. + std::shared_ptr cache = NewLRUCache(5, 0, false); + std::vector handles(10); + Status s; + for (size_t i = 0; i < 10; i++) { + std::string key = ToString(i + 1); + s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); + ASSERT_TRUE(s.ok()); + ASSERT_NE(nullptr, handles[i]); + } + + // test2: set the flag to true. Insert and check if it fails. + std::string extra_key = "extra"; + Value* extra_value = new Value(0); + cache->SetStrictCapacityLimit(true); + Cache::Handle* handle; + s = cache->Insert(extra_key, extra_value, 1, &deleter, &handle); + ASSERT_TRUE(s.IsIncomplete()); + ASSERT_EQ(nullptr, handle); + + for (size_t i = 0; i < 10; i++) { + cache->Release(handles[i]); + } + + // test3: init with flag being true. + std::shared_ptr cache2 = NewLRUCache(5, 0, true); + for (size_t i = 0; i < 5; i++) { + std::string key = ToString(i + 1); + s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); + ASSERT_TRUE(s.ok()); + ASSERT_NE(nullptr, handles[i]); + } + s = cache2->Insert(extra_key, extra_value, 1, &deleter, &handle); + ASSERT_TRUE(s.IsIncomplete()); + ASSERT_EQ(nullptr, handle); + // test insert without handle + s = cache2->Insert(extra_key, extra_value, 1, &deleter); + ASSERT_TRUE(s.IsIncomplete()); + ASSERT_EQ(5, cache->GetUsage()); + + for (size_t i = 0; i < 5; i++) { + cache2->Release(handles[i]); + } +} + TEST_F(CacheTest, OverCapacity) { size_t n = 10; @@ -452,7 +500,8 @@ TEST_F(CacheTest, OverCapacity) { // Insert n+1 entries, but not releasing. for (size_t i = 0; i < n + 1; i++) { std::string key = ToString(i+1); - handles[i] = cache->Insert(key, new Value(i+1), 1, &deleter); + Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); + ASSERT_TRUE(s.ok()); } // Guess what's in the cache now? From d9620239d270f1fa9d4e8bebb67fe026651974f3 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 10 Mar 2016 18:16:21 -0800 Subject: [PATCH 172/195] Cleanup stale manifests outside of full purge Summary: - Keep track of obsolete manifests in VersionSet - Updated FindObsoleteFiles() to put obsolete manifests in the JobContext for later use by PurgeObsoleteFiles() - Added test case that verifies a stale manifest is deleted by a non-full purge Test Plan: $ ./backupable_db_test --gtest_filter=BackupableDBTest.ChangeManifestDuringBackupCreation Reviewers: IslamAbdelRahman, yoshinorim, sdong Reviewed By: sdong Subscribers: andrewkr, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D55269 --- db/db_impl.cc | 10 +++++++--- db/job_context.h | 9 ++++++--- db/version_set.cc | 5 +++++ db/version_set.h | 3 +++ utilities/backupable/backupable_db_test.cc | 15 ++++++++++++++- 5 files changed, 35 insertions(+), 7 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 675669ad2..4153d14ef 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -572,6 +572,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, // Get obsolete files. This function will also update the list of // pending files in VersionSet(). versions_->GetObsoleteFiles(&job_context->sst_delete_files, + &job_context->manifest_delete_files, job_context->min_pending_output); // store the current filenum, lognum, etc @@ -689,9 +690,9 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) { } auto candidate_files = state.full_scan_candidate_files; - candidate_files.reserve(candidate_files.size() + - state.sst_delete_files.size() + - state.log_delete_files.size()); + candidate_files.reserve( + candidate_files.size() + state.sst_delete_files.size() + + state.log_delete_files.size() + state.manifest_delete_files.size()); // We may ignore the dbname when generating the file names. const char* kDumbDbName = ""; for (auto file : state.sst_delete_files) { @@ -707,6 +708,9 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) { 0); } } + for (const auto& filename : state.manifest_delete_files) { + candidate_files.emplace_back(filename, 0); + } // dedup state.candidate_files so we don't try to delete the same // file twice diff --git a/db/job_context.h b/db/job_context.h index ce71103de..0f24136a3 100644 --- a/db/job_context.h +++ b/db/job_context.h @@ -22,9 +22,9 @@ class MemTable; struct JobContext { inline bool HaveSomethingToDelete() const { return full_scan_candidate_files.size() || sst_delete_files.size() || - log_delete_files.size() || new_superversion != nullptr || - superversions_to_free.size() > 0 || memtables_to_free.size() > 0 || - logs_to_free.size() > 0; + log_delete_files.size() || manifest_delete_files.size() || + new_superversion != nullptr || superversions_to_free.size() > 0 || + memtables_to_free.size() > 0 || logs_to_free.size() > 0; } // Structure to store information for candidate files to delete. @@ -56,6 +56,9 @@ struct JobContext { // a list of log files that we need to delete std::vector log_delete_files; + // a list of manifest files that we need to delete + std::vector manifest_delete_files; + // a list of memtables to be free autovector memtables_to_free; diff --git a/db/version_set.cc b/db/version_set.cc index b5658ea38..fd53a4adb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2254,6 +2254,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, db_options_->disableDataSync ? nullptr : db_directory); // Leave the old file behind since PurgeObsoleteFiles will take care of it // later. It's unsafe to delete now since file deletion may be disabled. + obsolete_manifests_.emplace_back( + DescriptorFileName("", manifest_file_number_)); } if (s.ok()) { @@ -3388,7 +3390,10 @@ void VersionSet::GetLiveFilesMetaData(std::vector* metadata) { } void VersionSet::GetObsoleteFiles(std::vector* files, + std::vector* manifest_filenames, uint64_t min_pending_output) { + assert(manifest_filenames->empty()); + obsolete_manifests_.swap(*manifest_filenames); std::vector pending_files; for (auto f : obsolete_files_) { if (f->fd.GetNumber() < min_pending_output) { diff --git a/db/version_set.h b/db/version_set.h index 52bf964e7..d9ff91732 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -697,6 +698,7 @@ class VersionSet { void GetLiveFilesMetaData(std::vector *metadata); void GetObsoleteFiles(std::vector* files, + std::vector* manifest_filenames, uint64_t min_pending_output); ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); } @@ -758,6 +760,7 @@ class VersionSet { uint64_t manifest_file_size_; std::vector obsolete_files_; + std::vector obsolete_manifests_; // env options for all reads and writes except compactions const EnvOptions& env_options_; diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index ce34d7d1a..0a23f4df0 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -13,6 +13,7 @@ #include #include +#include "db/db_impl.h" #include "db/filename.h" #include "port/port.h" #include "port/stack_trace.h" @@ -1318,10 +1319,22 @@ TEST_F(BackupableDBTest, ChangeManifestDuringBackupCreation) { ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); flush_thread.join(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + // The last manifest roll would've already been cleaned up by the full scan + // that happens when CreateNewBackup invokes EnableFileDeletions. We need to + // trigger another roll to verify non-full scan purges stale manifests. + DBImpl* db_impl = reinterpret_cast(db_.get()); + std::string prev_manifest_path = + DescriptorFileName(dbname_, db_impl->TEST_Current_Manifest_FileNo()); + FillDB(db_.get(), 0, 100); + ASSERT_OK(env_->FileExists(prev_manifest_path)); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_TRUE(env_->FileExists(prev_manifest_path).IsNotFound()); + CloseDBAndBackupEngine(); DestroyDB(dbname_, Options()); AssertBackupConsistency(0, 0, 100); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } // see https://github.com/facebook/rocksdb/issues/921 From e8e6cf0173849ee323cf6972121a179d085505b1 Mon Sep 17 00:00:00 2001 From: Baris Yazici Date: Fri, 11 Mar 2016 11:11:45 -0800 Subject: [PATCH 173/195] fix: handle_fatal_signal (sig=6) in std::vector >::_M_range_check | c++/4.8.2/bits/stl_vector.h:794 #174 Summary: Fix for https://github.com/facebook/mysql-5.6/issues/174 When there is no old files to purge, vector.at(i) function was crashing if (old_info_log_file_count != 0 && old_info_log_file_count >= db_options_.keep_log_file_num) { std::sort(old_info_log_files.begin(), old_info_log_files.end()); size_t end = old_info_log_file_count - db_options_.keep_log_file_num; for (unsigned int i = 0; i <= end; i++) { std::string& to_delete = old_info_log_files.at(i); Added check to old_info_log_file_count be non zero. Test Plan: run existing tests Reviewers: gunnarku, vasilep, sdong, yhchiang Reviewed By: yhchiang Subscribers: andrewkr, webscalesql-eng, dhruba Differential Revision: https://reviews.facebook.net/D55245 --- db/db_impl.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 4153d14ef..9db1e11e0 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -829,7 +829,8 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) { // Delete old info log files. size_t old_info_log_file_count = old_info_log_files.size(); - if (old_info_log_file_count >= db_options_.keep_log_file_num) { + if (old_info_log_file_count != 0 && + old_info_log_file_count >= db_options_.keep_log_file_num) { std::sort(old_info_log_files.begin(), old_info_log_files.end()); size_t end = old_info_log_file_count - db_options_.keep_log_file_num; for (unsigned int i = 0; i <= end; i++) { From 790252805d7fa18d5e7ff04cceb0053ad8590c85 Mon Sep 17 00:00:00 2001 From: agiardullo Date: Thu, 3 Mar 2016 11:20:25 -0800 Subject: [PATCH 174/195] Add multithreaded transaction test Summary: Refactored db_bench transaction stress tests so that they can be called from unit tests as well. Test Plan: run new unit test as well as db_bench Reviewers: yhchiang, IslamAbdelRahman, sdong Reviewed By: IslamAbdelRahman Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55203 --- src.mk | 1 + tools/db_bench_tool.cc | 211 +++------------- util/transaction_test_util.cc | 237 ++++++++++++++++++ util/transaction_test_util.h | 111 ++++++++ .../optimistic_transaction_test.cc | 140 +++++------ utilities/transactions/transaction_test.cc | 69 +++++ 6 files changed, 521 insertions(+), 248 deletions(-) create mode 100644 util/transaction_test_util.cc create mode 100644 util/transaction_test_util.h diff --git a/src.mk b/src.mk index f98075028..37114717d 100644 --- a/src.mk +++ b/src.mk @@ -101,6 +101,7 @@ LIB_SOURCES = \ util/env_posix.cc \ util/io_posix.cc \ util/thread_posix.cc \ + util/transaction_test_util.cc \ util/sst_file_manager_impl.cc \ util/file_util.cc \ util/file_reader_writer.cc \ diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 65ce703f1..4f00375f8 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -36,33 +36,34 @@ #include "db/db_impl.h" #include "db/version_set.h" -#include "rocksdb/options.h" +#include "hdfs/env_hdfs.h" +#include "port/port.h" +#include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/env.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/write_batch.h" -#include "rocksdb/slice.h" #include "rocksdb/filter_policy.h" -#include "rocksdb/rate_limiter.h" -#include "rocksdb/slice_transform.h" +#include "rocksdb/memtablerep.h" +#include "rocksdb/options.h" #include "rocksdb/perf_context.h" +#include "rocksdb/rate_limiter.h" +#include "rocksdb/slice.h" +#include "rocksdb/slice_transform.h" #include "rocksdb/utilities/flashcache.h" +#include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" -#include "rocksdb/utilities/optimistic_transaction_db.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "util/crc32c.h" +#include "rocksdb/write_batch.h" #include "util/compression.h" +#include "util/crc32c.h" #include "util/histogram.h" #include "util/mutexlock.h" #include "util/random.h" -#include "util/string_util.h" #include "util/statistics.h" +#include "util/string_util.h" #include "util/testutil.h" +#include "util/transaction_test_util.h" #include "util/xxhash.h" -#include "hdfs/env_hdfs.h" #include "utilities/merge_operators.h" #ifdef OS_WIN @@ -3763,18 +3764,22 @@ class Benchmark { ReadOptions options(FLAGS_verify_checksum, true); Duration duration(FLAGS_duration, readwrites_); ReadOptions read_options(FLAGS_verify_checksum, true); - std::string value; - DB* db = db_.db; - uint64_t transactions_done = 0; - uint64_t transactions_aborted = 0; - Status s; uint64_t num_prefix_ranges = FLAGS_transaction_sets; + uint64_t transactions_done = 0; if (num_prefix_ranges == 0 || num_prefix_ranges > 9999) { fprintf(stderr, "invalid value for transaction_sets\n"); abort(); } + TransactionOptions txn_options; + txn_options.lock_timeout = FLAGS_transaction_lock_timeout; + txn_options.set_snapshot = FLAGS_transaction_set_snapshot; + + RandomTransactionInserter inserter(&thread->rand, write_options_, + read_options, FLAGS_num, + num_prefix_ranges); + if (FLAGS_num_multi_db > 1) { fprintf(stderr, "Cannot run RandomTransaction benchmark with " @@ -3783,126 +3788,26 @@ class Benchmark { } while (!duration.Done(1)) { - Transaction* txn = nullptr; - WriteBatch* batch = nullptr; + bool success; + // RandomTransactionInserter will attempt to insert a key for each + // # of FLAGS_transaction_sets if (FLAGS_optimistic_transaction_db) { - txn = db_.opt_txn_db->BeginTransaction(write_options_); - assert(txn); + success = inserter.OptimisticTransactionDBInsert(db_.opt_txn_db); } else if (FLAGS_transaction_db) { TransactionDB* txn_db = reinterpret_cast(db_.db); - - TransactionOptions txn_options; - txn_options.lock_timeout = FLAGS_transaction_lock_timeout; - - txn = txn_db->BeginTransaction(write_options_, txn_options); - assert(txn); + success = inserter.TransactionDBInsert(txn_db, txn_options); } else { - batch = new WriteBatch(); + success = inserter.DBInsert(db_.db); } - if (txn && FLAGS_transaction_set_snapshot) { - txn->SetSnapshot(); - } - - // pick a random number to use to increment a key in each set - uint64_t incr = (thread->rand.Next() % 100) + 1; - - bool failed = false; - // For each set, pick a key at random and increment it - for (uint8_t i = 0; i < num_prefix_ranges; i++) { - uint64_t int_value; - char prefix_buf[5]; - - // key format: [SET#][random#] - std::string rand_key = ToString(thread->rand.Next() % FLAGS_num); - Slice base_key(rand_key); - - // Pad prefix appropriately so we can iterate over each set - snprintf(prefix_buf, sizeof(prefix_buf), "%04d", i + 1); - std::string full_key = std::string(prefix_buf) + base_key.ToString(); - Slice key(full_key); - - if (txn) { - s = txn->GetForUpdate(read_options, key, &value); - } else { - s = db->Get(read_options, key, &value); - } - - if (s.ok()) { - int_value = std::stoull(value); - - if (int_value == 0 || int_value == ULONG_MAX) { - fprintf(stderr, "Get returned unexpected value: %s\n", - value.c_str()); - abort(); - } - } else if (s.IsNotFound()) { - int_value = 0; - } else if (!(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { - fprintf(stderr, "Get returned an unexpected error: %s\n", - s.ToString().c_str()); - abort(); - } else { - failed = true; - break; - } - - if (FLAGS_transaction_sleep > 0) { - FLAGS_env->SleepForMicroseconds(thread->rand.Next() % - FLAGS_transaction_sleep); - } - - std::string sum = ToString(int_value + incr); - if (txn) { - s = txn->Put(key, sum); - if (!s.ok()) { - // Since we did a GetForUpdate, Put should not fail. - fprintf(stderr, "Put returned an unexpected error: %s\n", - s.ToString().c_str()); - abort(); - } - } else { - batch->Put(key, sum); - } - } - - if (txn) { - if (failed) { - transactions_aborted++; - txn->Rollback(); - s = Status::OK(); - } else { - s = txn->Commit(); - } - } else { - s = db->Write(write_options_, batch); - } - - if (!s.ok()) { - failed = true; - - // Ideally, we'd want to run this stress test with enough concurrency - // on a small enough set of keys that we get some failed transactions - // due to conflicts. - if (FLAGS_optimistic_transaction_db && - (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { - transactions_aborted++; - } else if (FLAGS_transaction_db && s.IsExpired()) { - transactions_aborted++; - } else { - fprintf(stderr, "Unexpected write error: %s\n", s.ToString().c_str()); - abort(); - } - } - - delete txn; - delete batch; - - if (!failed) { - thread->stats.FinishedOps(nullptr, db, 1, kOthers); + if (!success) { + fprintf(stderr, "Unexpected error: %s\n", + inserter.GetLastStatus().ToString().c_str()); + abort(); } + thread->stats.FinishedOps(nullptr, db_.db, 1, kOthers); transactions_done++; } @@ -3910,7 +3815,7 @@ class Benchmark { if (FLAGS_optimistic_transaction_db || FLAGS_transaction_db) { snprintf(msg, sizeof(msg), "( transactions:%" PRIu64 " aborts:%" PRIu64 ")", - transactions_done, transactions_aborted); + transactions_done, inserter.GetFailureCount()); } else { snprintf(msg, sizeof(msg), "( batches:%" PRIu64 " )", transactions_done); } @@ -3930,50 +3835,14 @@ class Benchmark { return; } - uint64_t prev_total = 0; + Status s = + RandomTransactionInserter::Verify(db_.db, FLAGS_transaction_sets); - // For each set of keys with the same prefix, sum all the values - for (uint32_t i = 0; i < FLAGS_transaction_sets; i++) { - char prefix_buf[5]; - snprintf(prefix_buf, sizeof(prefix_buf), "%04u", i + 1); - uint64_t total = 0; - - Iterator* iter = db_.db->NewIterator(ReadOptions()); - - for (iter->Seek(Slice(prefix_buf, 4)); iter->Valid(); iter->Next()) { - Slice key = iter->key(); - - // stop when we reach a different prefix - if (key.ToString().compare(0, 4, prefix_buf) != 0) { - break; - } - - Slice value = iter->value(); - uint64_t int_value = std::stoull(value.ToString()); - if (int_value == 0 || int_value == ULONG_MAX) { - fprintf(stderr, "Iter returned unexpected value: %s\n", - value.ToString().c_str()); - abort(); - } - - total += int_value; - } - delete iter; - - if (i > 0) { - if (total != prev_total) { - fprintf(stderr, - "RandomTransactionVerify found inconsistent totals. " - "Set[%" PRIu32 "]: %" PRIu64 ", Set[%" PRIu32 "]: %" PRIu64 - " \n", - i - 1, prev_total, i, total); - abort(); - } - } - prev_total = total; + if (s.ok()) { + fprintf(stdout, "RandomTransactionVerify Success.\n"); + } else { + fprintf(stdout, "RandomTransactionVerify FAILED!!\n"); } - - fprintf(stdout, "RandomTransactionVerify Success!\n"); } #endif // ROCKSDB_LITE diff --git a/util/transaction_test_util.cc b/util/transaction_test_util.cc new file mode 100644 index 000000000..7ec990374 --- /dev/null +++ b/util/transaction_test_util.cc @@ -0,0 +1,237 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +#ifndef ROCKSDB_LITE + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include "util/transaction_test_util.h" + +#include +#include + +#include "rocksdb/db.h" +#include "rocksdb/utilities/optimistic_transaction_db.h" +#include "rocksdb/utilities/transaction.h" +#include "rocksdb/utilities/transaction_db.h" +#include "util/random.h" +#include "util/string_util.h" + +namespace rocksdb { + +RandomTransactionInserter::RandomTransactionInserter( + Random64* rand, const WriteOptions& write_options, + const ReadOptions& read_options, uint64_t num_keys, uint16_t num_sets) + : rand_(rand), + write_options_(write_options), + read_options_(read_options), + num_keys_(num_keys), + num_sets_(num_sets) {} + +RandomTransactionInserter::~RandomTransactionInserter() { + if (txn_ != nullptr) { + delete txn_; + } + if (optimistic_txn_ != nullptr) { + delete optimistic_txn_; + } +} + +bool RandomTransactionInserter::TransactionDBInsert( + TransactionDB* db, const TransactionOptions& txn_options) { + txn_ = db->BeginTransaction(write_options_, txn_options, txn_); + + return DoInsert(nullptr, txn_, false); +} + +bool RandomTransactionInserter::OptimisticTransactionDBInsert( + OptimisticTransactionDB* db, + const OptimisticTransactionOptions& txn_options) { + optimistic_txn_ = + db->BeginTransaction(write_options_, txn_options, optimistic_txn_); + + return DoInsert(nullptr, optimistic_txn_, true); +} + +bool RandomTransactionInserter::DBInsert(DB* db) { + return DoInsert(db, nullptr, false); +} + +bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn, + bool is_optimistic) { + Status s; + WriteBatch batch; + std::string value; + + // pick a random number to use to increment a key in each set + uint64_t incr = (rand_->Next() % 100) + 1; + + bool unexpected_error = false; + + // For each set, pick a key at random and increment it + for (uint8_t i = 0; i < num_sets_; i++) { + uint64_t int_value = 0; + char prefix_buf[5]; + // prefix_buf needs to be large enough to hold a uint16 in string form + + // key format: [SET#][random#] + std::string rand_key = ToString(rand_->Next() % num_keys_); + Slice base_key(rand_key); + + // Pad prefix appropriately so we can iterate over each set + snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", i + 1); + std::string full_key = std::string(prefix_buf) + base_key.ToString(); + Slice key(full_key); + + if (txn != nullptr) { + s = txn->GetForUpdate(read_options_, key, &value); + } else { + s = db->Get(read_options_, key, &value); + } + + if (s.ok()) { + // Found key, parse its value + int_value = std::stoull(value); + + if (int_value == 0 || int_value == ULONG_MAX) { + unexpected_error = true; + fprintf(stderr, "Get returned unexpected value: %s\n", value.c_str()); + s = Status::Corruption(); + } + } else if (s.IsNotFound()) { + // Have not yet written to this key, so assume its value is 0 + int_value = 0; + s = Status::OK(); + } else { + // Optimistic transactions should never return non-ok status here. + // Non-optimistic transactions may return write-coflict/timeout errors. + if (is_optimistic || !(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { + fprintf(stderr, "Get returned an unexpected error: %s\n", + s.ToString().c_str()); + unexpected_error = true; + } + break; + } + + if (s.ok()) { + // Increment key + std::string sum = ToString(int_value + incr); + if (txn != nullptr) { + s = txn->Put(key, sum); + if (!s.ok()) { + // Since we did a GetForUpdate, Put should not fail. + fprintf(stderr, "Put returned an unexpected error: %s\n", + s.ToString().c_str()); + unexpected_error = true; + } + } else { + batch.Put(key, sum); + } + } + } + + if (s.ok()) { + if (txn != nullptr) { + s = txn->Commit(); + + if (!s.ok()) { + if (is_optimistic) { + // Optimistic transactions can have write-conflict errors on commit. + // Any other error is unexpected. + if (!(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { + unexpected_error = true; + } + } else { + // Non-optimistic transactions should only fail due to expiration + // or write failures. For testing purproses, we do not expect any + // write failures. + if (!s.IsExpired()) { + unexpected_error = true; + } + } + + if (unexpected_error) { + fprintf(stderr, "Commit returned an unexpected error: %s\n", + s.ToString().c_str()); + } + } + + } else { + s = db->Write(write_options_, &batch); + if (!s.ok()) { + unexpected_error = true; + fprintf(stderr, "Write returned an unexpected error: %s\n", + s.ToString().c_str()); + } + } + } else { + if (txn != nullptr) { + txn->Rollback(); + } + } + + if (s.ok()) { + success_count_++; + } else { + failure_count_++; + } + + last_status_ = s; + + // return success if we didn't get any unexpected errors + return !unexpected_error; +} + +Status RandomTransactionInserter::Verify(DB* db, uint16_t num_sets) { + uint64_t prev_total = 0; + + // For each set of keys with the same prefix, sum all the values + for (uint32_t i = 0; i < num_sets; i++) { + char prefix_buf[5]; + snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", i + 1); + uint64_t total = 0; + + Iterator* iter = db->NewIterator(ReadOptions()); + + for (iter->Seek(Slice(prefix_buf, 4)); iter->Valid(); iter->Next()) { + Slice key = iter->key(); + + // stop when we reach a different prefix + if (key.ToString().compare(0, 4, prefix_buf) != 0) { + break; + } + + Slice value = iter->value(); + uint64_t int_value = std::stoull(value.ToString()); + if (int_value == 0 || int_value == ULONG_MAX) { + fprintf(stderr, "Iter returned unexpected value: %s\n", + value.ToString().c_str()); + return Status::Corruption(); + } + + total += int_value; + } + delete iter; + + if (i > 0) { + if (total != prev_total) { + fprintf(stderr, + "RandomTransactionVerify found inconsistent totals. " + "Set[%" PRIu32 "]: %" PRIu64 ", Set[%" PRIu32 "]: %" PRIu64 + " \n", + i - 1, prev_total, i, total); + return Status::Corruption(); + } + } + prev_total = total; + } + + return Status::OK(); +} + +} // namespace rocksdb + +#endif // ROCKSDB_LITE diff --git a/util/transaction_test_util.h b/util/transaction_test_util.h new file mode 100644 index 000000000..c9885fc5f --- /dev/null +++ b/util/transaction_test_util.h @@ -0,0 +1,111 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#ifndef ROCKSDB_LITE + +#include "rocksdb/options.h" +#include "rocksdb/utilities/optimistic_transaction_db.h" +#include "rocksdb/utilities/transaction_db.h" + +namespace rocksdb { + +class DB; +class Random64; + +// Utility class for stress testing transactions. Can be used to write many +// transactions in parallel and then validate that the data written is logically +// consistent. This class assumes the input DB is initially empty. +// +// Each call to TransactionDBInsert()/OptimisticTransactionDBInsert() will +// increment the value of a key in #num_sets sets of keys. Regardless of +// whether the transaction succeeds, the total sum of values of keys in each +// set is an invariant that should remain equal. +// +// After calling TransactionDBInsert()/OptimisticTransactionDBInsert() many +// times, Verify() can be called to validate that the invariant holds. +// +// To test writing Transaction in parallel, multiple threads can create a +// RandomTransactionInserter with similar arguments using the same DB. +class RandomTransactionInserter { + public: + // num_keys is the number of keys in each set. + // num_sets is the number of sets of keys. + explicit RandomTransactionInserter( + Random64* rand, const WriteOptions& write_options = WriteOptions(), + const ReadOptions& read_options = ReadOptions(), uint64_t num_keys = 1000, + uint16_t num_sets = 3); + + ~RandomTransactionInserter(); + + // Increment a key in each set using a Transaction on a TransactionDB. + // + // Returns true if the transaction succeeded OR if any error encountered was + // expected (eg a write-conflict). Error status may be obtained by calling + // GetLastStatus(); + bool TransactionDBInsert( + TransactionDB* db, + const TransactionOptions& txn_options = TransactionOptions()); + + // Increment a key in each set using a Transaction on an + // OptimisticTransactionDB + // + // Returns true if the transaction succeeded OR if any error encountered was + // expected (eg a write-conflict). Error status may be obtained by calling + // GetLastStatus(); + bool OptimisticTransactionDBInsert( + OptimisticTransactionDB* db, + const OptimisticTransactionOptions& txn_options = + OptimisticTransactionOptions()); + // Increment a key in each set without using a transaction. If this function + // is called in parallel, then Verify() may fail. + // + // Returns true if the write succeeds. + // Error status may be obtained by calling GetLastStatus(). + bool DBInsert(DB* db); + + // Returns OK if Invariant is true. + static Status Verify(DB* db, uint16_t num_sets); + + // Returns the status of the previous Insert operation + Status GetLastStatus() { return last_status_; } + + // Returns the number of successfully written calls to + // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert + uint64_t GetSuccessCount() { return success_count_; } + + // Returns the number of calls to + // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert that did not + // write any data. + uint64_t GetFailureCount() { return failure_count_; } + + private: + // Input options + Random64* rand_; + const WriteOptions write_options_; + const ReadOptions read_options_; + const uint64_t num_keys_; + const uint16_t num_sets_; + + // Number of successful insert batches performed + uint64_t success_count_ = 0; + + // Number of failed insert batches attempted + uint64_t failure_count_ = 0; + + // Status returned by most recent insert operation + Status last_status_; + + // optimization: re-use allocated transaction objects. + Transaction* txn_ = nullptr; + Transaction* optimistic_txn_ = nullptr; + + bool DoInsert(DB* db, Transaction* txn, bool is_optimistic); +}; + +} // namespace rocksdb + +#endif // ROCKSDB_LITE diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index b672b8722..688f3d11a 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -6,12 +6,16 @@ #ifndef ROCKSDB_LITE #include +#include #include "rocksdb/db.h" -#include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/optimistic_transaction_db.h" +#include "rocksdb/utilities/transaction.h" +#include "util/crc32c.h" #include "util/logging.h" +#include "util/random.h" #include "util/testharness.h" +#include "util/transaction_test_util.h" using std::string; @@ -1267,88 +1271,70 @@ TEST_F(OptimisticTransactionTest, UndoGetForUpdateTest) { delete txn1; } -TEST_F(OptimisticTransactionTest, ReinitializeTest) { +namespace { +Status OptimisticTransactionStressTestInserter(OptimisticTransactionDB* db, + const size_t num_transactions, + const size_t num_sets, + const size_t num_keys_per_set) { + size_t seed = std::hash()(std::this_thread::get_id()); + Random64 _rand(seed); WriteOptions write_options; ReadOptions read_options; OptimisticTransactionOptions txn_options; - string value; - Status s; - - Transaction* txn1 = txn_db->BeginTransaction(write_options, txn_options); - - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - - s = txn1->Put("Z", "z"); - ASSERT_OK(s); - - s = txn1->Commit(); - ASSERT_OK(s); - - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - - s = txn1->Put("Z", "zz"); - ASSERT_OK(s); - - // Reinitilize txn1 and verify that zz is not written - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - - s = txn1->Commit(); - ASSERT_OK(s); - s = db->Get(read_options, "Z", &value); - ASSERT_OK(s); - ASSERT_EQ(value, "z"); - - // Verify snapshots get reinitialized correctly - txn1->SetSnapshot(); - s = txn1->Put("Z", "zzzz"); - ASSERT_OK(s); - - s = txn1->Commit(); - ASSERT_OK(s); - - s = db->Get(read_options, "Z", &value); - ASSERT_OK(s); - ASSERT_EQ(value, "zzzz"); - - const Snapshot* snapshot = txn1->GetSnapshot(); - ASSERT_TRUE(snapshot); - - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - snapshot = txn1->GetSnapshot(); - ASSERT_FALSE(snapshot); - txn_options.set_snapshot = true; - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - snapshot = txn1->GetSnapshot(); - ASSERT_TRUE(snapshot); - s = txn1->Put("Z", "a"); + RandomTransactionInserter inserter(&_rand, write_options, read_options, + num_keys_per_set, num_sets); + + for (size_t t = 0; t < num_transactions; t++) { + bool success = inserter.OptimisticTransactionDBInsert(db, txn_options); + if (!success) { + // unexpected failure + return inserter.GetLastStatus(); + } + } + + // Make sure at least some of the transactions succeeded. It's ok if + // some failed due to write-conflicts. + if (inserter.GetFailureCount() > num_transactions / 2) { + return Status::TryAgain("Too many transactions failed! " + + std::to_string(inserter.GetFailureCount()) + " / " + + std::to_string(num_transactions)); + } + + return Status::OK(); +} +} // namespace + +TEST_F(OptimisticTransactionTest, OptimisticTransactionStressTest) { + const size_t num_threads = 4; + const size_t num_transactions_per_thread = 10000; + const size_t num_sets = 3; + const size_t num_keys_per_set = 100; + // Setting the key-space to be 100 keys should cause enough write-conflicts + // to make this test interesting. + + std::vector threads; + + std::function call_inserter = [&] { + ASSERT_OK(OptimisticTransactionStressTestInserter( + txn_db, num_transactions_per_thread, num_sets, num_keys_per_set)); + }; + + // Create N threads that use RandomTransactionInserter to write + // many transactions. + for (uint32_t i = 0; i < num_threads; i++) { + threads.emplace_back(call_inserter); + } + + // Wait for all threads to run + for (auto& t : threads) { + t.join(); + } + + // Verify that data is consistent + Status s = RandomTransactionInserter::Verify(db, num_sets); ASSERT_OK(s); - - txn1->Rollback(); - - s = txn1->Put("Y", "y"); - ASSERT_OK(s); - - txn_options.set_snapshot = false; - txn1 = txn_db->BeginTransaction(write_options, txn_options, txn1); - snapshot = txn1->GetSnapshot(); - ASSERT_FALSE(snapshot); - - s = txn1->Put("X", "x"); - ASSERT_OK(s); - - s = txn1->Commit(); - ASSERT_OK(s); - - s = db->Get(read_options, "Z", &value); - ASSERT_OK(s); - ASSERT_EQ(value, "zzzz"); - - s = db->Get(read_options, "Y", &value); - ASSERT_TRUE(s.IsNotFound()); - - delete txn1; } } // namespace rocksdb diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 6f40e5e6a..f9bb7d96d 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -6,6 +6,7 @@ #ifndef ROCKSDB_LITE #include +#include #include "db/db_impl.h" #include "rocksdb/db.h" @@ -14,9 +15,11 @@ #include "rocksdb/utilities/transaction_db.h" #include "table/mock_table.h" #include "util/logging.h" +#include "util/random.h" #include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" +#include "util/transaction_test_util.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend.h" @@ -2980,6 +2983,72 @@ TEST_F(TransactionTest, ExpiredTransactionDataRace1) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +namespace { +Status TransactionStressTestInserter(TransactionDB* db, + const size_t num_transactions, + const size_t num_sets, + const size_t num_keys_per_set) { + size_t seed = std::hash()(std::this_thread::get_id()); + Random64 _rand(seed); + WriteOptions write_options; + ReadOptions read_options; + TransactionOptions txn_options; + txn_options.set_snapshot = true; + + RandomTransactionInserter inserter(&_rand, write_options, read_options, + num_keys_per_set, num_sets); + + for (size_t t = 0; t < num_transactions; t++) { + bool success = inserter.TransactionDBInsert(db, txn_options); + if (!success) { + // unexpected failure + return inserter.GetLastStatus(); + } + } + + // Make sure at least some of the transactions succeeded. It's ok if + // some failed due to write-conflicts. + if (inserter.GetFailureCount() > num_transactions / 2) { + return Status::TryAgain("Too many transactions failed! " + + std::to_string(inserter.GetFailureCount()) + " / " + + std::to_string(num_transactions)); + } + + return Status::OK(); +} +} // namespace + +TEST_F(TransactionTest, TransactionStressTest) { + const size_t num_threads = 4; + const size_t num_transactions_per_thread = 10000; + const size_t num_sets = 3; + const size_t num_keys_per_set = 100; + // Setting the key-space to be 100 keys should cause enough write-conflicts + // to make this test interesting. + + std::vector threads; + + std::function call_inserter = [&] { + ASSERT_OK(TransactionStressTestInserter(db, num_transactions_per_thread, + num_sets, num_keys_per_set)); + }; + + // Create N threads that use RandomTransactionInserter to write + // many transactions. + for (uint32_t i = 0; i < num_threads; i++) { + threads.emplace_back(call_inserter); + } + + // Wait for all threads to run + for (auto& t : threads) { + t.join(); + } + + // Verify that data is consistent + Status s = RandomTransactionInserter::Verify(db, num_sets); + ASSERT_OK(s); +} + } // namespace rocksdb int main(int argc, char** argv) { From 54f6b9e162c49cb6eb5a799e845fa66a7bc345b3 Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Fri, 11 Mar 2016 16:54:25 -0800 Subject: [PATCH 175/195] Histogram Concurrency Improvement and Time-Windowing Support --- CMakeLists.txt | 1 + src.mk | 1 + tools/db_bench_tool.cc | 12 +- util/histogram.cc | 224 +++++++++++++++++++++++++----------- util/histogram.h | 113 ++++++++++++++---- util/histogram_test.cc | 202 +++++++++++++++++++++++++++----- util/histogram_windowing.cc | 193 +++++++++++++++++++++++++++++++ util/histogram_windowing.h | 80 +++++++++++++ 8 files changed, 700 insertions(+), 126 deletions(-) create mode 100644 util/histogram_windowing.cc create mode 100644 util/histogram_windowing.h diff --git a/CMakeLists.txt b/CMakeLists.txt index eda0d703b..4eea5cfff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,6 +207,7 @@ set(SOURCES util/filter_policy.cc util/hash.cc util/histogram.cc + util/histogram_windowing.cc util/instrumented_mutex.cc util/iostats_context.cc tools/ldb_cmd.cc diff --git a/src.mk b/src.mk index f98075028..aaca3bcdb 100644 --- a/src.mk +++ b/src.mk @@ -107,6 +107,7 @@ LIB_SOURCES = \ util/filter_policy.cc \ util/hash.cc \ util/histogram.cc \ + util/histogram_windowing.cc \ util/instrumented_mutex.cc \ util/iostats_context.cc \ utilities/backupable/backupable_db.cc \ diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 65ce703f1..2bb63a1c4 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1205,7 +1205,7 @@ class Stats { uint64_t bytes_; uint64_t last_op_finish_; uint64_t last_report_finish_; - std::unordered_map, std::hash> hist_; std::string message_; bool exclude_from_merge_; @@ -1242,7 +1242,7 @@ class Stats { for (auto it = other.hist_.begin(); it != other.hist_.end(); ++it) { auto this_it = hist_.find(it->first); if (this_it != hist_.end()) { - this_it->second.Merge(other.hist_.at(it->first)); + this_it->second->Merge(*(other.hist_.at(it->first))); } else { hist_.insert({ it->first, it->second }); } @@ -1316,10 +1316,10 @@ class Stats { if (hist_.find(op_type) == hist_.end()) { - HistogramImpl hist_temp; - hist_.insert({op_type, hist_temp}); + auto hist_temp = std::make_shared(); + hist_.insert({op_type, std::move(hist_temp)}); } - hist_[op_type].Add(micros); + hist_[op_type]->Add(micros); if (micros > 20000 && !FLAGS_stats_interval) { fprintf(stderr, "long op: %" PRIu64 " micros%30s\r", micros, ""); @@ -1452,7 +1452,7 @@ class Stats { for (auto it = hist_.begin(); it != hist_.end(); ++it) { fprintf(stdout, "Microseconds per %s:\n%s\n", OperationTypeString[it->first].c_str(), - it->second.ToString().c_str()); + it->second->ToString().c_str()); } } if (FLAGS_report_file_operations) { diff --git a/util/histogram.cc b/util/histogram.cc index 4b5013a55..d052abb33 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -7,11 +7,15 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "util/histogram.h" +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif +#include #include #include #include +#include "util/histogram.h" #include "port/port.h" namespace rocksdb { @@ -73,90 +77,126 @@ namespace { const HistogramBucketMapper bucketMapper; } -void HistogramImpl::Clear() { - min_ = static_cast(bucketMapper.LastValue()); - max_ = 0; - num_ = 0; - sum_ = 0; - sum_squares_ = 0; - memset(buckets_, 0, sizeof buckets_); +HistogramStat::HistogramStat() + : num_buckets_(bucketMapper.BucketCount()) { + assert(num_buckets_ == sizeof(buckets_) / sizeof(*buckets_)); + Clear(); } -bool HistogramImpl::Empty() { return num_ == 0; } +void HistogramStat::Clear() { + min_.store(bucketMapper.LastValue(), std::memory_order_relaxed); + max_.store(0, std::memory_order_relaxed); + num_.store(0, std::memory_order_relaxed); + sum_.store(0, std::memory_order_relaxed); + sum_squares_.store(0, std::memory_order_relaxed); + for (unsigned int b = 0; b < num_buckets_; b++) { + buckets_[b].store(0, std::memory_order_relaxed); + } +}; -void HistogramImpl::Add(uint64_t value) { +bool HistogramStat::Empty() const { return num() == 0; } + +void HistogramStat::Add(uint64_t value) { + // This function is designed to be lock free, as it's in the critical path + // of any operation. Each individual value is atomic and the order of updates + // by concurrent threads is tolerable. const size_t index = bucketMapper.IndexForValue(value); - buckets_[index] += 1; - if (min_ > value) min_ = static_cast(value); - if (max_ < value) max_ = static_cast(value); - num_++; - sum_ += value; - sum_squares_ += (value * value); + assert(index < num_buckets_ && index >= 0); + buckets_[index].fetch_add(1, std::memory_order_relaxed); + + uint64_t old_min = min(); + while (value < old_min && !min_.compare_exchange_weak(old_min, value)) {} + + uint64_t old_max = max(); + while (value > old_max && !max_.compare_exchange_weak(old_max, value)) {} + + num_.fetch_add(1, std::memory_order_relaxed); + sum_.fetch_add(value, std::memory_order_relaxed); + sum_squares_.fetch_add(value * value, std::memory_order_relaxed); } -void HistogramImpl::Merge(const HistogramImpl& other) { - if (other.min_ < min_) min_ = other.min_; - if (other.max_ > max_) max_ = other.max_; - num_ += other.num_; - sum_ += other.sum_; - sum_squares_ += other.sum_squares_; - for (unsigned int b = 0; b < bucketMapper.BucketCount(); b++) { - buckets_[b] += other.buckets_[b]; +void HistogramStat::Merge(const HistogramStat& other) { + // This function needs to be performned with the outer lock acquired + // However, atomic operation on every member is still need, since Add() + // requires no lock and value update can still happen concurrently + uint64_t old_min = min(); + uint64_t other_min = other.min(); + while (other_min < old_min && + !min_.compare_exchange_weak(old_min, other_min)) {} + + uint64_t old_max = max(); + uint64_t other_max = other.max(); + while (other_max > old_max && + !max_.compare_exchange_weak(old_max, other_max)) {} + + num_.fetch_add(other.num(), std::memory_order_relaxed); + sum_.fetch_add(other.sum(), std::memory_order_relaxed); + sum_squares_.fetch_add(other.sum_squares(), std::memory_order_relaxed); + for (unsigned int b = 0; b < num_buckets_; b++) { + buckets_[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed); } } -double HistogramImpl::Median() const { +double HistogramStat::Median() const { return Percentile(50.0); } -double HistogramImpl::Percentile(double p) const { - double threshold = num_ * (p / 100.0); - double sum = 0; - for (unsigned int b = 0; b < bucketMapper.BucketCount(); b++) { - sum += buckets_[b]; - if (sum >= threshold) { +double HistogramStat::Percentile(double p) const { + double threshold = num() * (p / 100.0); + uint64_t cumulative_sum = 0; + for (unsigned int b = 0; b < num_buckets_; b++) { + uint64_t bucket_value = bucket_at(b); + cumulative_sum += bucket_value; + if (cumulative_sum >= threshold) { // Scale linearly within this bucket - double left_point = - static_cast((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)); - double right_point = - static_cast(bucketMapper.BucketLimit(b)); - double left_sum = sum - buckets_[b]; - double right_sum = sum; + uint64_t left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1); + uint64_t right_point = bucketMapper.BucketLimit(b); + uint64_t left_sum = cumulative_sum - bucket_value; + uint64_t right_sum = cumulative_sum; double pos = 0; - double right_left_diff = right_sum - left_sum; + uint64_t right_left_diff = right_sum - left_sum; if (right_left_diff != 0) { - pos = (threshold - left_sum) / (right_sum - left_sum); + pos = (threshold - left_sum) / right_left_diff; } double r = left_point + (right_point - left_point) * pos; - if (r < min_) r = min_; - if (r > max_) r = max_; + uint64_t cur_min = min(); + uint64_t cur_max = max(); + if (r < cur_min) r = static_cast(cur_min); + if (r > cur_max) r = static_cast(cur_max); return r; } } - return max_; + return static_cast(max()); } -double HistogramImpl::Average() const { - if (num_ == 0.0) return 0; - return sum_ / num_; +double HistogramStat::Average() const { + uint64_t cur_num = num(); + uint64_t cur_sum = sum(); + if (cur_num == 0) return 0; + return static_cast(cur_sum) / static_cast(cur_num); } -double HistogramImpl::StandardDeviation() const { - if (num_ == 0.0) return 0; - double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_); +double HistogramStat::StandardDeviation() const { + uint64_t cur_num = num(); + uint64_t cur_sum = sum(); + uint64_t cur_sum_squares = sum_squares(); + if (cur_num == 0) return 0; + double variance = + static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / + static_cast(cur_num * cur_num); return sqrt(variance); } - -std::string HistogramImpl::ToString() const { +std::string HistogramStat::ToString() const { + uint64_t cur_num = num(); std::string r; char buf[200]; snprintf(buf, sizeof(buf), - "Count: %.0f Average: %.4f StdDev: %.2f\n", - num_, Average(), StandardDeviation()); + "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", + cur_num, Average(), StandardDeviation()); r.append(buf); snprintf(buf, sizeof(buf), - "Min: %.4f Median: %.4f Max: %.4f\n", - (num_ == 0.0 ? 0.0 : min_), Median(), max_); + "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n", + (cur_num == 0 ? 0 : min()), Median(), (cur_num == 0 ? 0 : max())); r.append(buf); snprintf(buf, sizeof(buf), "Percentiles: " @@ -165,30 +205,30 @@ std::string HistogramImpl::ToString() const { Percentile(99.99)); r.append(buf); r.append("------------------------------------------------------\n"); - const double mult = 100.0 / num_; - double sum = 0; - for (unsigned int b = 0; b < bucketMapper.BucketCount(); b++) { - if (buckets_[b] <= 0.0) continue; - sum += buckets_[b]; + const double mult = 100.0 / cur_num; + uint64_t cumulative_sum = 0; + for (unsigned int b = 0; b < num_buckets_; b++) { + uint64_t bucket_value = bucket_at(b); + if (bucket_value <= 0.0) continue; + cumulative_sum += bucket_value; snprintf(buf, sizeof(buf), - "[ %7lu, %7lu ) %8lu %7.3f%% %7.3f%% ", - // left - (unsigned long)((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), - (unsigned long)bucketMapper.BucketLimit(b), // right - (unsigned long)buckets_[b], // count - (mult * buckets_[b]), // percentage - (mult * sum)); // cumulative percentage + "[ %7" PRIu64 ", %7" PRIu64 " ) %8" PRIu64 " %7.3f%% %7.3f%% ", + (b == 0) ? 0 : bucketMapper.BucketLimit(b-1), // left + bucketMapper.BucketLimit(b), // right + bucket_value, // count + (mult * bucket_value), // percentage + (mult * cumulative_sum)); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. - int marks = static_cast(20*(buckets_[b] / num_) + 0.5); + size_t marks = static_cast(mult * bucket_value / 5 + 0.5); r.append(marks, '#'); r.push_back('\n'); } return r; } -void HistogramImpl::Data(HistogramData * const data) const { +void HistogramStat::Data(HistogramData * const data) const { assert(data); data->median = Median(); data->percentile95 = Percentile(95); @@ -197,4 +237,52 @@ void HistogramImpl::Data(HistogramData * const data) const { data->standard_deviation = StandardDeviation(); } +void HistogramImpl::Clear() { + std::lock_guard lock(mutex_); + stats_.Clear(); +} + +bool HistogramImpl::Empty() const { + return stats_.Empty(); +} + +void HistogramImpl::Add(uint64_t value) { + stats_.Add(value); +} + +void HistogramImpl::Merge(const Histogram& other) { + if (strcmp(Name(), other.Name()) == 0) { + Merge(dynamic_cast(other)); + } +} + +void HistogramImpl::Merge(const HistogramImpl& other) { + std::lock_guard lock(mutex_); + stats_.Merge(other.stats_); +} + +double HistogramImpl::Median() const { + return stats_.Median(); +} + +double HistogramImpl::Percentile(double p) const { + return stats_.Percentile(p); +} + +double HistogramImpl::Average() const { + return stats_.Average(); +} + +double HistogramImpl::StandardDeviation() const { + return stats_.StandardDeviation(); +} + +std::string HistogramImpl::ToString() const { + return stats_.ToString(); +} + +void HistogramImpl::Data(HistogramData * const data) const { + stats_.Data(data); +} + } // namespace levedb diff --git a/util/histogram.h b/util/histogram.h index 2b6cd8bab..84c3e94fe 100644 --- a/util/histogram.h +++ b/util/histogram.h @@ -14,8 +14,7 @@ #include #include #include - -#include +#include namespace rocksdb { @@ -25,7 +24,7 @@ class HistogramBucketMapper { HistogramBucketMapper(); // converts a value to the bucket index. - size_t IndexForValue(const uint64_t value) const; + size_t IndexForValue(uint64_t value) const; // number of buckets required. size_t BucketCount() const { @@ -52,33 +51,99 @@ class HistogramBucketMapper { std::map valueIndexMap_; }; -class HistogramImpl { +struct HistogramStat { + HistogramStat(); + ~HistogramStat() {} + + HistogramStat(const HistogramStat&) = delete; + HistogramStat& operator=(const HistogramStat&) = delete; + + void Clear(); + bool Empty() const; + void Add(uint64_t value); + void Merge(const HistogramStat& other); + + inline uint64_t min() const { return min_.load(std::memory_order_relaxed); } + inline uint64_t max() const { return max_.load(std::memory_order_relaxed); } + inline uint64_t num() const { return num_.load(std::memory_order_relaxed); } + inline uint64_t sum() const { return sum_.load(std::memory_order_relaxed); } + inline uint64_t sum_squares() const { + return sum_squares_.load(std::memory_order_relaxed); + } + inline uint64_t bucket_at(size_t b) const { + return buckets_[b].load(std::memory_order_relaxed); + } + + double Median() const; + double Percentile(double p) const; + double Average() const; + double StandardDeviation() const; + void Data(HistogramData* const data) const; + std::string ToString() const; + + // To be able to use HistogramStat as thread local variable, it + // cannot have dynamic allocated member. That's why we're + // using manually values from BucketMapper + std::atomic_uint_fast64_t min_; + std::atomic_uint_fast64_t max_; + std::atomic_uint_fast64_t num_; + std::atomic_uint_fast64_t sum_; + std::atomic_uint_fast64_t sum_squares_; + std::atomic_uint_fast64_t buckets_[138]; // 138==BucketMapper::BucketCount() + const uint64_t num_buckets_; +}; + +class Histogram { +public: + Histogram() {} + virtual ~Histogram() {}; + + virtual void Clear() = 0; + virtual bool Empty() const = 0; + virtual void Add(uint64_t value) = 0; + virtual void Merge(const Histogram&) = 0; + + virtual std::string ToString() const = 0; + virtual const char* Name() const = 0; + virtual uint64_t min() const = 0; + virtual uint64_t max() const = 0; + virtual uint64_t num() const = 0; + virtual double Median() const = 0; + virtual double Percentile(double p) const = 0; + virtual double Average() const = 0; + virtual double StandardDeviation() const = 0; + virtual void Data(HistogramData* const data) const = 0; +}; + +class HistogramImpl : public Histogram { public: - HistogramImpl() { memset(buckets_, 0, sizeof(buckets_)); } - virtual void Clear(); - virtual bool Empty(); - virtual void Add(uint64_t value); + HistogramImpl() { Clear(); } + + HistogramImpl(const HistogramImpl&) = delete; + HistogramImpl& operator=(const HistogramImpl&) = delete; + + virtual void Clear() override; + virtual bool Empty() const override; + virtual void Add(uint64_t value) override; + virtual void Merge(const Histogram& other) override; void Merge(const HistogramImpl& other); - virtual std::string ToString() const; - - virtual double Median() const; - virtual double Percentile(double p) const; - virtual double Average() const; - virtual double StandardDeviation() const; - virtual void Data(HistogramData * const data) const; + virtual std::string ToString() const override; + virtual const char* Name() const override { return "HistogramImpl"; } + virtual uint64_t min() const override { return stats_.min(); } + virtual uint64_t max() const override { return stats_.max(); } + virtual uint64_t num() const override { return stats_.num(); } + virtual double Median() const override; + virtual double Percentile(double p) const override; + virtual double Average() const override; + virtual double StandardDeviation() const override; + virtual void Data(HistogramData* const data) const override; virtual ~HistogramImpl() {} private: - // To be able to use HistogramImpl as thread local variable, its constructor - // has to be static. That's why we're using manually values from BucketMapper - double min_ = 1000000000; // this is BucketMapper:LastValue() - double max_ = 0; - double num_ = 0; - double sum_ = 0; - double sum_squares_ = 0; - uint64_t buckets_[138]; // this is BucketMapper::BucketCount() + HistogramStat stats_; + std::mutex mutex_; }; -} // namespace rocksdb +} // namespace rocksdb \ No newline at end of file diff --git a/util/histogram_test.cc b/util/histogram_test.cc index b9657db06..ce363ff6b 100644 --- a/util/histogram_test.cc +++ b/util/histogram_test.cc @@ -4,56 +4,202 @@ // of patent rights can be found in the PATENTS file in the same directory. // #include "util/histogram.h" - +#include "util/histogram_windowing.h" #include "util/testharness.h" namespace rocksdb { class HistogramTest : public testing::Test {}; -TEST_F(HistogramTest, BasicOperation) { - HistogramImpl histogram; - for (uint64_t i = 1; i <= 100; i++) { - histogram.Add(i); - } - - { - double median = histogram.Median(); - // ASSERT_LE(median, 50); - ASSERT_GT(median, 0); - } - - { - double percentile100 = histogram.Percentile(100.0); - ASSERT_LE(percentile100, 100.0); - ASSERT_GT(percentile100, 0.0); - double percentile99 = histogram.Percentile(99.0); - double percentile85 = histogram.Percentile(85.0); - ASSERT_LE(percentile99, 99.0); - ASSERT_TRUE(percentile99 >= percentile85); - } - - ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately calculated. +namespace { + const double kIota = 0.1; + const HistogramBucketMapper bucketMapper; + Env* env = Env::Default(); } -TEST_F(HistogramTest, EmptyHistogram) { - HistogramImpl histogram; +void PopulateHistogram(Histogram& histogram, + uint64_t low, uint64_t high, uint64_t loop = 1) { + for (; loop > 0; loop--) { + for (uint64_t i = low; i <= high; i++) { + histogram.Add(i); + } + } +} + +void BasicOperation(Histogram& histogram) { + PopulateHistogram(histogram, 1, 100, 10); + + HistogramData data; + histogram.Data(&data); + + ASSERT_LE(std::fabs(histogram.Percentile(100.0) - 100.0), kIota); + ASSERT_LE(std::fabs(data.percentile99 - 99.0), kIota); + ASSERT_LE(std::fabs(data.percentile95 - 95.0), kIota); + ASSERT_LE(std::fabs(data.median - 50.0), kIota); + ASSERT_EQ(data.average, 50.5); // avg is acurately calculated. + ASSERT_LT(std::fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86 +} + +void MergeHistogram(Histogram& histogram, Histogram& other) { + PopulateHistogram(histogram, 1, 100); + PopulateHistogram(other, 101, 200); + histogram.Merge(other); + + HistogramData data; + histogram.Data(&data); + + ASSERT_LE(std::fabs(histogram.Percentile(100.0) - 200.0), kIota); + ASSERT_LE(std::fabs(data.percentile99 - 198.0), kIota); + ASSERT_LE(std::fabs(data.percentile95 - 190.0), kIota); + ASSERT_LE(std::fabs(data.median - 100.0), kIota); + ASSERT_EQ(data.average, 100.5); // avg is acurately calculated. + ASSERT_LT(std::fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73 +} + +void EmptyHistogram(Histogram& histogram) { + ASSERT_EQ(histogram.min(), bucketMapper.LastValue()); + ASSERT_EQ(histogram.max(), 0); + ASSERT_EQ(histogram.num(), 0); ASSERT_EQ(histogram.Median(), 0.0); ASSERT_EQ(histogram.Percentile(85.0), 0.0); ASSERT_EQ(histogram.Average(), 0.0); + ASSERT_EQ(histogram.StandardDeviation(), 0.0); } -TEST_F(HistogramTest, ClearHistogram) { - HistogramImpl histogram; +void ClearHistogram(Histogram& histogram) { for (uint64_t i = 1; i <= 100; i++) { histogram.Add(i); } histogram.Clear(); + ASSERT_TRUE(histogram.Empty()); ASSERT_EQ(histogram.Median(), 0); ASSERT_EQ(histogram.Percentile(85.0), 0); ASSERT_EQ(histogram.Average(), 0); } +TEST_F(HistogramTest, BasicOperation) { + HistogramImpl histogram; + BasicOperation(histogram); + + HistogramWindowingImpl histogramWindowing; + BasicOperation(histogramWindowing); +} + +TEST_F(HistogramTest, MergeHistogram) { + HistogramImpl histogram; + HistogramImpl other; + MergeHistogram(histogram, other); + + HistogramWindowingImpl histogramWindowing; + HistogramWindowingImpl otherWindowing; + MergeHistogram(histogramWindowing, otherWindowing); +} + +TEST_F(HistogramTest, EmptyHistogram) { + HistogramImpl histogram; + EmptyHistogram(histogram); + + HistogramWindowingImpl histogramWindowing; + EmptyHistogram(histogramWindowing); +} + +TEST_F(HistogramTest, ClearHistogram) { + HistogramImpl histogram; + ClearHistogram(histogram); + + HistogramWindowingImpl histogramWindowing; + ClearHistogram(histogramWindowing); +} + +TEST_F(HistogramTest, HistogramWindowingExpire) { + uint64_t num_windows = 3; + int micros_per_window = 1000000; + uint64_t min_num_per_window = 0; + + HistogramWindowingImpl + histogramWindowing(num_windows, micros_per_window, min_num_per_window); + + PopulateHistogram(histogramWindowing, 1, 1, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 100); + ASSERT_EQ(histogramWindowing.min(), 1); + ASSERT_EQ(histogramWindowing.max(), 1); + ASSERT_EQ(histogramWindowing.Average(), 1); + + PopulateHistogram(histogramWindowing, 2, 2, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 200); + ASSERT_EQ(histogramWindowing.min(), 1); + ASSERT_EQ(histogramWindowing.max(), 2); + ASSERT_EQ(histogramWindowing.Average(), 1.5); + + PopulateHistogram(histogramWindowing, 3, 3, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 300); + ASSERT_EQ(histogramWindowing.min(), 1); + ASSERT_EQ(histogramWindowing.max(), 3); + ASSERT_EQ(histogramWindowing.Average(), 2.0); + + // dropping oldest window with value 1, remaining 2 ~ 4 + PopulateHistogram(histogramWindowing, 4, 4, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 300); + ASSERT_EQ(histogramWindowing.min(), 2); + ASSERT_EQ(histogramWindowing.max(), 4); + ASSERT_EQ(histogramWindowing.Average(), 3.0); + + // dropping oldest window with value 2, remaining 3 ~ 5 + PopulateHistogram(histogramWindowing, 5, 5, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 300); + ASSERT_EQ(histogramWindowing.min(), 3); + ASSERT_EQ(histogramWindowing.max(), 5); + ASSERT_EQ(histogramWindowing.Average(), 4.0); +} + +TEST_F(HistogramTest, HistogramWindowingMerge) { + uint64_t num_windows = 3; + int micros_per_window = 1000000; + uint64_t min_num_per_window = 0; + + HistogramWindowingImpl + histogramWindowing(num_windows, micros_per_window, min_num_per_window); + HistogramWindowingImpl + otherWindowing(num_windows, micros_per_window, min_num_per_window); + + PopulateHistogram(histogramWindowing, 1, 1, 100); + PopulateHistogram(otherWindowing, 1, 1, 100); + env->SleepForMicroseconds(micros_per_window); + + PopulateHistogram(histogramWindowing, 2, 2, 100); + PopulateHistogram(otherWindowing, 2, 2, 100); + env->SleepForMicroseconds(micros_per_window); + + PopulateHistogram(histogramWindowing, 3, 3, 100); + PopulateHistogram(otherWindowing, 3, 3, 100); + env->SleepForMicroseconds(micros_per_window); + + histogramWindowing.Merge(otherWindowing); + ASSERT_EQ(histogramWindowing.num(), 600); + ASSERT_EQ(histogramWindowing.min(), 1); + ASSERT_EQ(histogramWindowing.max(), 3); + ASSERT_EQ(histogramWindowing.Average(), 2.0); + + // dropping oldest window with value 1, remaining 2 ~ 4 + PopulateHistogram(histogramWindowing, 4, 4, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 500); + ASSERT_EQ(histogramWindowing.min(), 2); + ASSERT_EQ(histogramWindowing.max(), 4); + + // dropping oldest window with value 2, remaining 3 ~ 5 + PopulateHistogram(histogramWindowing, 5, 5, 100); + env->SleepForMicroseconds(micros_per_window); + ASSERT_EQ(histogramWindowing.num(), 400); + ASSERT_EQ(histogramWindowing.min(), 3); + ASSERT_EQ(histogramWindowing.max(), 5); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/histogram_windowing.cc b/util/histogram_windowing.cc new file mode 100644 index 000000000..091338558 --- /dev/null +++ b/util/histogram_windowing.cc @@ -0,0 +1,193 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/histogram.h" +#include "util/histogram_windowing.h" + +#include + +namespace rocksdb { + +namespace { + const HistogramBucketMapper bucketMapper; +} + +HistogramWindowingImpl::HistogramWindowingImpl() { + env_ = Env::Default(); + window_stats_.reset(new HistogramStat[num_windows_]); + Clear(); +} + +HistogramWindowingImpl::HistogramWindowingImpl( + uint64_t num_windows, + uint64_t micros_per_window, + uint64_t min_num_per_window) : + num_windows_(num_windows), + micros_per_window_(micros_per_window), + min_num_per_window_(min_num_per_window) { + env_ = Env::Default(); + window_stats_.reset(new HistogramStat[num_windows_]); + Clear(); +} + +HistogramWindowingImpl::~HistogramWindowingImpl(){ + window_stats_.release(); +} + +void HistogramWindowingImpl::Clear() { + std::lock_guard lock(mutex_); + + stats_.Clear(); + for (size_t i = 0; i < num_windows_; i++) { + window_stats_[i].Clear(); + } + current_window_.store(0, std::memory_order_relaxed); + last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); +} + +bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); } + +// This function is designed to be lock free, as it's in the critical path +// of any operation. +// Each individual value is atomic, it is just that some samples can go +// in the older bucket which is tolerable. +void HistogramWindowingImpl::Add(uint64_t value){ + TimerTick(); + + // Parent (global) member update + stats_.Add(value); + + // Current window update + window_stats_[current_window()].Add(value); +} + +void HistogramWindowingImpl::Merge(const Histogram& other) { + if (strcmp(Name(), other.Name()) == 0) { + Merge(dynamic_cast(other)); + } +} + +void HistogramWindowingImpl::Merge(const HistogramWindowingImpl& other) { + std::lock_guard lock(mutex_); + stats_.Merge(other.stats_); + + if (stats_.num_buckets_ != other.stats_.num_buckets_ || + micros_per_window_ != other.micros_per_window_) { + return; + } + + uint64_t cur_window = current_window(); + uint64_t other_cur_window = other.current_window(); + // going backwards for alignment + for (unsigned int i = 0; + i < std::min(num_windows_, other.num_windows_); i++) { + uint64_t window_index = + (cur_window + num_windows_ - i) % num_windows_; + uint64_t other_window_index = + (other_cur_window + other.num_windows_ - i) % other.num_windows_; + + window_stats_[window_index].Merge(other.window_stats_[other_window_index]); + } +} + +std::string HistogramWindowingImpl::ToString() const { + return stats_.ToString(); +} + +double HistogramWindowingImpl::Median() const { + return Percentile(50.0); +} + +double HistogramWindowingImpl::Percentile(double p) const { + // Retry 3 times in total + for (int retry = 0; retry < 3; retry++) { + uint64_t start_num = stats_.num(); + double result = stats_.Percentile(p); + // Detect if swap buckets or Clear() was called during calculation + if (stats_.num() >= start_num) { + return result; + } + } + return 0.0; +} + +double HistogramWindowingImpl::Average() const { + return stats_.Average(); +} + +double HistogramWindowingImpl::StandardDeviation() const { + return stats_.StandardDeviation(); +} + +void HistogramWindowingImpl::Data(HistogramData * const data) const { + stats_.Data(data); +} + +void HistogramWindowingImpl::TimerTick() { + uint64_t curr_time = env_->NowMicros(); + if (curr_time - last_swap_time() > micros_per_window_ && + window_stats_[current_window()].num() >= min_num_per_window_) { + SwapHistoryBucket(); + } +} + +void HistogramWindowingImpl::SwapHistoryBucket() { + // Threads executing Add() would be competing for this mutex, the first one + // who got the metex would take care of the bucket swap, other threads + // can skip this. + // If mutex is held by Merge() or Clear(), next Add() will take care of the + // swap, if needed. + if (mutex_.try_lock()) { + last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); + + uint64_t next_window = (current_window() + 1) % num_windows_; + + // subtract next buckets from totals and swap to next buckets + HistogramStat& stats_to_drop = window_stats_[next_window]; + + if (!stats_to_drop.Empty()) { + for (size_t b = 0; b < stats_.num_buckets_; b++){ + stats_.buckets_[b].fetch_sub( + stats_to_drop.bucket_at(b), std::memory_order_relaxed); + } + + if (stats_.min() == stats_to_drop.min()) { + uint64_t new_min = bucketMapper.LastValue(); + for (unsigned int i = 1; i < num_windows_; i++) { + uint64_t m = window_stats_[(next_window + i) % num_windows_].min(); + if (m < new_min) new_min = m; + } + stats_.min_.store(new_min, std::memory_order_relaxed); + } + + if (stats_.max() == stats_to_drop.max()) { + uint64_t new_max = 0; + for (unsigned int i = 1; i < num_windows_; i++) { + uint64_t m = window_stats_[(next_window + i) % num_windows_].max(); + if (m > new_max) new_max = m; + } + stats_.max_.store(new_max, std::memory_order_relaxed); + } + + stats_.num_.fetch_sub(stats_to_drop.num(), std::memory_order_relaxed); + stats_.sum_.fetch_sub(stats_to_drop.sum(), std::memory_order_relaxed); + stats_.sum_squares_.fetch_sub( + stats_to_drop.sum_squares(), std::memory_order_relaxed); + + stats_to_drop.Clear(); + } + + // advance to next window bucket + current_window_.store(next_window, std::memory_order_relaxed); + + mutex_.unlock(); + } +} + +} // namespace rocksdb diff --git a/util/histogram_windowing.h b/util/histogram_windowing.h new file mode 100644 index 000000000..cdcf1ba8a --- /dev/null +++ b/util/histogram_windowing.h @@ -0,0 +1,80 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +#include "util/histogram.h" +#include "rocksdb/env.h" + +namespace rocksdb { + +class HistogramWindowingImpl : public Histogram +{ +public: + HistogramWindowingImpl(); + HistogramWindowingImpl(uint64_t num_windows, + uint64_t micros_per_window, + uint64_t min_num_per_window); + + HistogramWindowingImpl(const HistogramImpl&) = delete; + HistogramWindowingImpl& operator=(const HistogramImpl&) = delete; + + ~HistogramWindowingImpl(); + + virtual void Clear() override; + virtual bool Empty() const override; + virtual void Add(uint64_t value) override; + virtual void Merge(const Histogram& other) override; + void Merge(const HistogramWindowingImpl& other); + + virtual std::string ToString() const override; + virtual const char* Name() const override { return "HistogramWindowingImpl"; } + virtual uint64_t min() const override { return stats_.min(); } + virtual uint64_t max() const override { return stats_.max(); } + virtual uint64_t num() const override { return stats_.num(); } + virtual double Median() const override; + virtual double Percentile(double p) const override; + virtual double Average() const override; + virtual double StandardDeviation() const override; + virtual void Data(HistogramData* const data) const override; + +private: + void TimerTick(); + void SwapHistoryBucket(); + inline uint64_t current_window() const { + return current_window_.load(std::memory_order_relaxed); + } + inline uint64_t last_swap_time() const{ + return last_swap_time_.load(std::memory_order_relaxed); + } + + Env* env_; + std::mutex mutex_; + + // Aggregated stats over windows_stats_, all the computation is done + // upon aggregated values + HistogramStat stats_; + + // This is a circular array representing the latest N time-windows. + // Each entry stores a time-window of data. Expiration is done + // on window-based. + std::unique_ptr window_stats_; + + std::atomic_uint_fast64_t current_window_; + std::atomic_uint_fast64_t last_swap_time_; + + // Following parameters are configuable + uint64_t num_windows_ = 5; + uint64_t micros_per_window_ = 60000000; + // By default, don't care about the number of values in current window + // when decide whether to swap windows or not. + uint64_t min_num_per_window_ = 0; +}; + +} // namespace rocksdb \ No newline at end of file From 580fede347abaccfc019b207e7f592893ab17914 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Fri, 11 Mar 2016 19:01:12 -0800 Subject: [PATCH 176/195] Aggregate hot Iterator counters in LocalStatistics (DBIter::Next perf regression) Summary: This patch bump the counters in the frequent code path DBIter::Next() / DBIter::Prev() in a local data members and send them to Statistics when the iterator is destroyed A better solution will be to have thread_local implementation for Statistics New performance ``` readseq : 0.035 micros/op 28597881 ops/sec; 3163.7 MB/s 1,851,568,819 stalled-cycles-frontend # 31.29% frontend cycles idle [49.86%] 884,929,823 stalled-cycles-backend # 14.95% backend cycles idle [50.21%] readreverse : 0.071 micros/op 14077393 ops/sec; 1557.3 MB/s 3,239,575,993 stalled-cycles-frontend # 27.36% frontend cycles idle [49.96%] 1,558,253,983 stalled-cycles-backend # 13.16% backend cycles idle [50.14%] ``` Existing performance ``` readreverse : 0.174 micros/op 5732342 ops/sec; 634.1 MB/s 20,570,209,389 stalled-cycles-frontend # 70.71% frontend cycles idle [50.01%] 18,422,816,837 stalled-cycles-backend # 63.33% backend cycles idle [50.04%] readseq : 0.119 micros/op 8400537 ops/sec; 929.3 MB/s 15,634,225,844 stalled-cycles-frontend # 79.07% frontend cycles idle [49.96%] 14,227,427,453 stalled-cycles-backend # 71.95% backend cycles idle [50.09%] ``` Test Plan: unit tests Reviewers: yhchiang, sdong, igor Reviewed By: sdong Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55107 --- HISTORY.md | 1 + db/db_iter.cc | 60 ++++++++++++++++++++++++++++++------- db/db_test.cc | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 10 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 64c89e8ee..d525c1170 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signiture of Cache::Insert() is updated accordingly. +* Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" diff --git a/db/db_iter.cc b/db/db_iter.cc index 07c8d9e0f..256b65447 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -60,6 +60,44 @@ class DBIter: public Iterator { kReverse }; + // LocalStatistics contain Statistics counters that will be aggregated per + // each iterator instance and then will be sent to the global statistics when + // the iterator is destroyed. + // + // The purpose of this approach is to avoid perf regression happening + // when multiple threads bump the atomic counters from a DBIter::Next(). + struct LocalStatistics { + explicit LocalStatistics() { ResetCounters(); } + + void ResetCounters() { + next_count_ = 0; + next_found_count_ = 0; + prev_count_ = 0; + prev_found_count_ = 0; + bytes_read_ = 0; + } + + void BumpGlobalStatistics(Statistics* global_statistics) { + RecordTick(global_statistics, NUMBER_DB_NEXT, next_count_); + RecordTick(global_statistics, NUMBER_DB_NEXT_FOUND, next_found_count_); + RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_); + RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_); + RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_); + ResetCounters(); + } + + // Map to Tickers::NUMBER_DB_NEXT + uint64_t next_count_; + // Map to Tickers::NUMBER_DB_NEXT_FOUND + uint64_t next_found_count_; + // Map to Tickers::NUMBER_DB_PREV + uint64_t prev_count_; + // Map to Tickers::NUMBER_DB_PREV_FOUND + uint64_t prev_found_count_; + // Map to Tickers::ITER_BYTES_READ + uint64_t bytes_read_; + }; + DBIter(Env* env, const ImmutableCFOptions& ioptions, const Comparator* cmp, InternalIterator* iter, SequenceNumber s, bool arena_mode, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, @@ -86,6 +124,7 @@ class DBIter: public Iterator { } virtual ~DBIter() { RecordTick(statistics_, NO_ITERATORS, -1); + local_stats_.BumpGlobalStatistics(statistics_); if (!arena_mode_) { delete iter_; } else { @@ -213,6 +252,7 @@ class DBIter: public Iterator { bool iter_pinned_; // List of operands for merge operator. std::deque merge_operands_; + LocalStatistics local_stats_; // No copying allowed DBIter(const DBIter&); @@ -250,6 +290,9 @@ void DBIter::Next() { PERF_COUNTER_ADD(internal_key_skipped_count, 1); } + if (statistics_ != nullptr) { + local_stats_.next_count_++; + } // Now we point to the next internal position, for both of merge and // not merge cases. if (!iter_->Valid()) { @@ -257,18 +300,15 @@ void DBIter::Next() { return; } FindNextUserEntry(true /* skipping the current user key */); - if (statistics_ != nullptr) { - RecordTick(statistics_, NUMBER_DB_NEXT); - if (valid_) { - RecordTick(statistics_, NUMBER_DB_NEXT_FOUND); - RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); - } - } if (valid_ && prefix_extractor_ && prefix_same_as_start_ && prefix_extractor_->Transform(saved_key_.GetKey()) .compare(prefix_start_.GetKey()) != 0) { valid_ = false; } + if (statistics_ != nullptr && valid_) { + local_stats_.next_found_count_++; + local_stats_.bytes_read_ += (key().size() + value().size()); + } } // PRE: saved_key_ has the current user key if skipping @@ -436,10 +476,10 @@ void DBIter::Prev() { } PrevInternal(); if (statistics_ != nullptr) { - RecordTick(statistics_, NUMBER_DB_PREV); + local_stats_.prev_count_++; if (valid_) { - RecordTick(statistics_, NUMBER_DB_PREV_FOUND); - RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); + local_stats_.prev_found_count_++; + local_stats_.bytes_read_ += (key().size() + value().size()); } } if (valid_ && prefix_extractor_ && prefix_same_as_start_ && diff --git a/db/db_test.cc b/db/db_test.cc index d68087d13..4b42296c9 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -10611,6 +10611,88 @@ TEST_F(DBTest, PrefixExtractorBlockFilter) { delete iter; } +TEST_F(DBTest, IteratorWithLocalStatistics) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + + Random rnd(301); + for (int i = 0; i < 1000; i++) { + // Key 10 bytes / Value 10 bytes + ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); + } + + std::atomic total_next(0); + std::atomic total_next_found(0); + std::atomic total_prev(0); + std::atomic total_prev_found(0); + std::atomic total_bytes(0); + + std::vector threads; + std::function reader_func_next = [&]() { + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + // Seek will bump ITER_BYTES_READ + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + while (true) { + iter->Next(); + total_next++; + + if (!iter->Valid()) { + break; + } + total_next_found++; + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + } + + delete iter; + }; + + std::function reader_func_prev = [&]() { + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToLast(); + // Seek will bump ITER_BYTES_READ + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + while (true) { + iter->Prev(); + total_prev++; + + if (!iter->Valid()) { + break; + } + total_prev_found++; + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + } + + delete iter; + }; + + for (int i = 0; i < 10; i++) { + threads.emplace_back(reader_func_next); + } + for (int i = 0; i < 15; i++) { + threads.emplace_back(reader_func_prev); + } + + for (auto& t : threads) { + t.join(); + } + + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND), + total_next_found); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND), + total_prev_found); + ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes); +} + #ifndef ROCKSDB_LITE class BloomStatsTestWithParam : public DBTest, From fd664a27b5174a0b6197ddbbdad28c5f4db7db22 Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Fri, 11 Mar 2016 22:56:25 -0800 Subject: [PATCH 177/195] Fix Build Error --- util/histogram_test.cc | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/util/histogram_test.cc b/util/histogram_test.cc index ce363ff6b..47d9c65ba 100644 --- a/util/histogram_test.cc +++ b/util/histogram_test.cc @@ -3,6 +3,8 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // +#include + #include "util/histogram.h" #include "util/histogram_windowing.h" #include "util/testharness.h" @@ -32,12 +34,12 @@ void BasicOperation(Histogram& histogram) { HistogramData data; histogram.Data(&data); - ASSERT_LE(std::fabs(histogram.Percentile(100.0) - 100.0), kIota); - ASSERT_LE(std::fabs(data.percentile99 - 99.0), kIota); - ASSERT_LE(std::fabs(data.percentile95 - 95.0), kIota); - ASSERT_LE(std::fabs(data.median - 50.0), kIota); + ASSERT_LE(fabs(histogram.Percentile(100.0) - 100.0), kIota); + ASSERT_LE(fabs(data.percentile99 - 99.0), kIota); + ASSERT_LE(fabs(data.percentile95 - 95.0), kIota); + ASSERT_LE(fabs(data.median - 50.0), kIota); ASSERT_EQ(data.average, 50.5); // avg is acurately calculated. - ASSERT_LT(std::fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86 + ASSERT_LT(fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86 } void MergeHistogram(Histogram& histogram, Histogram& other) { @@ -48,12 +50,12 @@ void MergeHistogram(Histogram& histogram, Histogram& other) { HistogramData data; histogram.Data(&data); - ASSERT_LE(std::fabs(histogram.Percentile(100.0) - 200.0), kIota); - ASSERT_LE(std::fabs(data.percentile99 - 198.0), kIota); - ASSERT_LE(std::fabs(data.percentile95 - 190.0), kIota); - ASSERT_LE(std::fabs(data.median - 100.0), kIota); + ASSERT_LE(fabs(histogram.Percentile(100.0) - 200.0), kIota); + ASSERT_LE(fabs(data.percentile99 - 198.0), kIota); + ASSERT_LE(fabs(data.percentile95 - 190.0), kIota); + ASSERT_LE(fabs(data.median - 100.0), kIota); ASSERT_EQ(data.average, 100.5); // avg is acurately calculated. - ASSERT_LT(std::fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73 + ASSERT_LT(fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73 } void EmptyHistogram(Histogram& histogram) { From 08304c0867dba9d9c4ae6ce18bf5317d89f85f51 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Sat, 12 Mar 2016 13:50:20 -0800 Subject: [PATCH 178/195] Expose RepairDB as ldb command Summary: This will make it easier for admins and devs to use RepairDB. Test Plan: Tried deleting the manifest and verified it recovers: $ ldb --create_if_missing --db=/tmp/test_db put ok ok $ rm -f /tmp/test_db/MANIFEST-000001 $ ./ldb --db=/tmp/test_db repair $ ldb --db=/tmp/test_db get ok ok Reviewers: yhchiang, sdong, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55359 --- tools/ldb_cmd.cc | 25 +++++++++++++++++++++++++ tools/ldb_cmd.h | 15 +++++++++++++++ tools/ldb_tool.cc | 1 + 3 files changed, 41 insertions(+) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 44571a879..be743955d 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -191,6 +191,8 @@ LDBCommand* LDBCommand::SelectCommand( return new InternalDumpCommand(cmdParams, option_map, flags); } else if (cmd == CheckConsistencyCommand::Name()) { return new CheckConsistencyCommand(cmdParams, option_map, flags); + } else if (cmd == RepairCommand::Name()) { + return new RepairCommand(cmdParams, option_map, flags); } return nullptr; } @@ -2144,6 +2146,29 @@ void CheckConsistencyCommand::DoCommand() { // ---------------------------------------------------------------------------- +RepairCommand::RepairCommand(const vector& params, + const map& options, + const vector& flags) + : LDBCommand(options, flags, false, BuildCmdLineOptions({})) {} + +void RepairCommand::Help(string& ret) { + ret.append(" "); + ret.append(RepairCommand::Name()); + ret.append("\n"); +} + +void RepairCommand::DoCommand() { + Options options = PrepareOptionsForOpenDB(); + Status status = RepairDB(db_path_, options); + if (status.ok()) { + printf("OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::Failed(status.ToString()); + } +} + +// ---------------------------------------------------------------------------- + namespace { void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index af6d6019c..cc3814c2f 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -883,6 +883,21 @@ public: static void Help(string& ret); }; +class RepairCommand : public LDBCommand { + public: + static string Name() { return "repair"; } + + RepairCommand(const vector& params, + const map& options, + const vector& flags); + + virtual void DoCommand() override; + + virtual bool NoDBOpen() override { return true; } + + static void Help(string& ret); +}; + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index 4e24997af..7ec56f115 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -77,6 +77,7 @@ public: ListColumnFamiliesCommand::Help(ret); DBFileDumperCommand::Help(ret); InternalDumpCommand::Help(ret); + RepairCommand::Help(ret); fprintf(stderr, "%s\n", ret.c_str()); } From 0267655dad2f9e607ec65b2a7d405f647b99948a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Sat, 12 Mar 2016 13:51:57 -0800 Subject: [PATCH 179/195] Update change log for 4.6 release Summary: as titled Test Plan: N/A Reviewers: sdong, kradhakrishnan, anthony, yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55323 --- HISTORY.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index d525c1170..757f14f9a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,10 @@ # Rocksdb Change Log -## Unreleased +## 4.6.0 (3/10/2016) ### Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signiture of Cache::Insert() is updated accordingly. * Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. +* Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree. ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" From b2ae5950ba5240483ea83ad89f132269638b7373 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 10 Mar 2016 15:16:11 -0800 Subject: [PATCH 180/195] Index Reader should not be reused after DB restart Summary: In block based table reader, wow we put index reader to block cache, which can be retrieved after DB restart. However, index reader may reference internal comparator, which can be destroyed after DB restarts, causing problems. Fix it by making cache key identical per table reader. Test Plan: Add a new test which failed with out the commit but now pass. Reviewers: IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: maro, yhchiang, kradhakrishnan, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55287 --- db/db_test2.cc | 19 +++++++++++++++++++ table/block_based_table_reader.cc | 28 ++++++++++++++++++++-------- table/block_based_table_reader.h | 2 +- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/db/db_test2.cc b/db/db_test2.cc index 1764131ac..3d9820b65 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -58,6 +58,25 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) { delete iter2; delete iter3; } + +TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + Put(1, "a", "begin"); + Put(1, "z", "end"); + ASSERT_OK(Flush(1)); + TryReopenWithColumnFamilies({"default", "pikachu"}, options); + + std::string value; + value = Get(1, "a"); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index cbaf90a90..e48eea694 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -98,17 +98,23 @@ void ReleaseCachedEntry(void* arg, void* h) { cache->Release(handle); } -Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, - const BlockHandle& handle, char* cache_key) { +Slice GetCacheKeyFromOffset(const char* cache_key_prefix, + size_t cache_key_prefix_size, uint64_t offset, + char* cache_key) { assert(cache_key != nullptr); assert(cache_key_prefix_size != 0); assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize); memcpy(cache_key, cache_key_prefix, cache_key_prefix_size); - char* end = - EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset()); + char* end = EncodeVarint64(cache_key + cache_key_prefix_size, offset); return Slice(cache_key, static_cast(end - cache_key)); } +Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, + const BlockHandle& handle, char* cache_key) { + return GetCacheKeyFromOffset(cache_key_prefix, cache_key_prefix_size, + handle.offset(), cache_key); +} + Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key, Tickers block_cache_miss_ticker, Tickers block_cache_hit_ticker, @@ -359,6 +365,8 @@ struct BlockBasedTable::Rep { size_t cache_key_prefix_size = 0; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t compressed_cache_key_prefix_size = 0; + uint64_t dummy_index_reader_offset = + 0; // ID that is unique for the block cache. // Footer contains the fixed table information Footer footer; @@ -415,13 +423,16 @@ struct BlockBasedTable::CachableEntry { }; // Helper function to setup the cache key's prefix for the Table. -void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) { +void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) { assert(kMaxCacheKeyPrefixSize >= 10); rep->cache_key_prefix_size = 0; rep->compressed_cache_key_prefix_size = 0; if (rep->table_options.block_cache != nullptr) { GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(), &rep->cache_key_prefix[0], &rep->cache_key_prefix_size); + // Create dummy offset of index reader which is beyond the file size. + rep->dummy_index_reader_offset = + file_size + rep->table_options.block_cache->NewId(); } if (rep->table_options.block_cache_compressed != nullptr) { GenerateCachePrefix(rep->table_options.block_cache_compressed.get(), @@ -510,7 +521,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, rep->footer = footer; rep->index_type = table_options.index_type; rep->hash_index_allow_collision = table_options.hash_index_allow_collision; - SetupCacheKeyPrefix(rep); + SetupCacheKeyPrefix(rep, file_size); unique_ptr new_table(new BlockBasedTable(rep)); // Read meta index @@ -935,8 +946,9 @@ InternalIterator* BlockBasedTable::NewIndexIterator( bool no_io = read_options.read_tier == kBlockCacheTier; Cache* block_cache = rep_->table_options.block_cache.get(); char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, - rep_->footer.index_handle(), cache_key); + auto key = + GetCacheKeyFromOffset(rep_->cache_key_prefix, rep_->cache_key_prefix_size, + rep_->dummy_index_reader_offset, cache_key); Statistics* statistics = rep_->ioptions.statistics; auto cache_handle = GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS, diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index a35b8ae41..600ca18a3 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -207,7 +207,7 @@ class BlockBasedTable : public TableReader { // Create the filter from the filter block. static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr); - static void SetupCacheKeyPrefix(Rep* rep); + static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size); explicit BlockBasedTable(Rep* rep) : rep_(rep), compaction_optimized_(false) {} From 5bd3da1c59af19cd68f6580138d46d6da23083c9 Mon Sep 17 00:00:00 2001 From: Edouard A Date: Mon, 14 Mar 2016 23:48:28 +0100 Subject: [PATCH 181/195] Added quasardb to the USERS.md file --- USERS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/USERS.md b/USERS.md index 929f4da8c..c873ca386 100644 --- a/USERS.md +++ b/USERS.md @@ -52,3 +52,7 @@ Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtub ## VWO, Wingify [VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed. + +## quasardb +[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark. +quasardb uses a heavily tuned RocksDB as its persistence layer. \ No newline at end of file From 1a2cc27e013b561c9d3c8b81384d14443822057f Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Mon, 14 Mar 2016 15:04:40 -0700 Subject: [PATCH 182/195] ColumnFamilyOptions SanitizeOptions is buggy on 32-bit platforms. Summary: The pre-existing code is trying to clamp between 65,536 and 0, resulting in clamping to 65,536, resulting in very small buffers, resulting in ShouldFlushNow() being true quite easily, resulting in assertion failing and database performance being "not what it should be". https://github.com/facebook/rocksdb/issues/1018 Test Plan: make check Reviewers: sdong, andrewkr, IslamAbdelRahman, yhchiang, igor Reviewed By: igor Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55455 --- db/column_family.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 6972b52e9..cde308d8c 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -147,13 +147,10 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options, const ColumnFamilyOptions& src) { ColumnFamilyOptions result = src; result.comparator = icmp; -#ifdef OS_MACOSX - // TODO(icanadi) make write_buffer_size uint64_t instead of size_t - ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, ((size_t)1) << 30); -#else - ClipToRange(&result.write_buffer_size, - ((size_t)64) << 10, ((size_t)64) << 30); -#endif + size_t clamp_max = std::conditional< + sizeof(size_t) == 4, std::integral_constant, + std::integral_constant>::type::value; + ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max); // if user sets arena_block_size, we trust user to use this value. Otherwise, // calculate a proper value from writer_buffer_size; if (result.arena_block_size <= 0) { From 58379bfb5579c6359e88bf422e0155351454fe3f Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Mon, 14 Mar 2016 21:41:13 -0700 Subject: [PATCH 183/195] remove division from histogramwidowing impl --- util/histogram_windowing.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/util/histogram_windowing.cc b/util/histogram_windowing.cc index 091338558..9d8f5429a 100644 --- a/util/histogram_windowing.cc +++ b/util/histogram_windowing.cc @@ -146,7 +146,9 @@ void HistogramWindowingImpl::SwapHistoryBucket() { if (mutex_.try_lock()) { last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); - uint64_t next_window = (current_window() + 1) % num_windows_; + uint64_t curr_window = current_window(); + uint64_t next_window = (curr_window == num_windows_ - 1) ? + 0 : curr_window + 1; // subtract next buckets from totals and swap to next buckets HistogramStat& stats_to_drop = window_stats_[next_window]; @@ -159,18 +161,22 @@ void HistogramWindowingImpl::SwapHistoryBucket() { if (stats_.min() == stats_to_drop.min()) { uint64_t new_min = bucketMapper.LastValue(); - for (unsigned int i = 1; i < num_windows_; i++) { - uint64_t m = window_stats_[(next_window + i) % num_windows_].min(); - if (m < new_min) new_min = m; + for (unsigned int i = 0; i < num_windows_; i++) { + if (i != next_window) { + uint64_t m = window_stats_[i].min(); + if (m < new_min) new_min = m; + } } stats_.min_.store(new_min, std::memory_order_relaxed); } if (stats_.max() == stats_to_drop.max()) { uint64_t new_max = 0; - for (unsigned int i = 1; i < num_windows_; i++) { - uint64_t m = window_stats_[(next_window + i) % num_windows_].max(); - if (m > new_max) new_max = m; + for (unsigned int i = 0; i < num_windows_; i++) { + if (i != next_window) { + uint64_t m = window_stats_[i].max(); + if (m > new_max) new_max = m; + } } stats_.max_.store(new_max, std::memory_order_relaxed); } From 697fab820a4ebec5768a2775bbfe9f87715eaaec Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Mon, 14 Mar 2016 23:09:04 -0700 Subject: [PATCH 184/195] Updates to RocksDB subcompaction benchmarking script Summary: Set of updates to the subcompaction benchmark script which are based on our internal discussions. The intent behind the changes is to make sure that the scripts will correctly reflect how we're doing the actual benchmarking. Test Plan: Tested by exercising the full set of compaction benchmarks and validating the execution and consistency of results. Reviewers: MarkCallaghan, sdong, yhchiang Reviewed By: yhchiang Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55461 --- tools/benchmark.sh | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 7d3e9d550..57e3ee575 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -4,7 +4,7 @@ if [ $# -ne 1 ]; then echo -n "./benchmark.sh [bulkload/fillseq/overwrite/filluniquerandom/" echo "readrandom/readwhilewriting/readwhilemerging/updaterandom/" - echo "mergerandom/randomtransaction]" + echo "mergerandom/randomtransaction/compact]" exit 0 fi @@ -117,14 +117,14 @@ params_bulkload="$const_params --max_background_compactions=16 --max_background_ # For universal compaction, these level0_* options mean total sorted of runs in # LSM. In level-based compaction, it means number of L0 files. # -params_level_compact="$const_params --max_background_compactions=16 \ - --max_background_flushes=7 \ +params_level_compact="$const_params \ + --max_background_flushes=4 \ --level0_file_num_compaction_trigger=4 \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=20" -params_univ_compact="$const_params --max_background_compactions=16 \ - --max_background_flushes=7 \ +params_univ_compact="$const_params \ + --max_background_flushes=4 \ --level0_file_num_compaction_trigger=8 \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=20" @@ -187,6 +187,14 @@ function run_bulkload { eval $cmd } +# +# Parameter description: +# +# $1 - 1 if I/O statistics should be collected. +# $2 - compaction type to use (level=0, universal=1). +# $3 - number of subcompactions. +# $4 - number of maximum background compactions. +# function run_manual_compaction_worker { # This runs with a vector memtable and the WAL disabled to load faster. # It is still crash safe and the client can discover where to restart a @@ -214,6 +222,7 @@ function run_manual_compaction_worker { --subcompactions=$3 \ --memtablerep=vector \ --disable_wal=1 \ + --max_background_compactions=$4 \ --seed=$( date +%s ) \ 2>&1 | tee -a $fillrandom_output_file" @@ -237,6 +246,7 @@ function run_manual_compaction_worker { --compaction_measure_io_stats=$1 \ --compaction_style=$2 \ --subcompactions=$3 \ + --max_background_compactions=$4 \ ;} 2>&1 | tee -a $man_compact_output_log" @@ -254,21 +264,19 @@ function run_univ_compaction { # Values: kCompactionStyleLevel = 0x0, kCompactionStyleUniversal = 0x1. compaction_style=1 - # Get the basic understanding about impact of scaling out the subcompactions - # by allowing the usage of { 1, 2, 4, 8, 16 } threads for different runs. - subcompactions=("1" "2" "4" "8" "16") + # Define a set of benchmarks. + subcompactions=(1 2 4 8 16) + max_background_compactions=(16 16 8 4 2) - # Do the real work of running various experiments. + i=0 + total=${#subcompactions[@]} - # Run the compaction benchmark which is based on bulkload. It pretty much - # consists of running manual compaction with different number of subcompaction - # threads. - log_suffix=1 - - for ((i=0; i < ${#subcompactions[@]}; i++)) + # Execute a set of benchmarks to cover variety of scenarios. + while [ "$i" -lt "$total" ] do - run_manual_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} $log_suffix - ((log_suffix++)) + run_manual_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} \ + ${max_background_compactions[$i]} + ((i++)) done } From 6b03f93d4fd9e6e3f41dba818483eaeac2acec2e Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Tue, 15 Mar 2016 10:30:10 -0700 Subject: [PATCH 185/195] Fix the build break on Ubuntu 15.10 when gcc 5.2.1 is used Summary: Using gcc 5.2.1 to compile RocksDB on Ubuntu 15.10 results in a warning about unused variable. Warning is treated as an error and all of it results in a build break. Fix the issue and make sure that everything compiles with that particular configuration. P.S. Lint complains about a non-ASCII character used in the source code ("Wagner-Fischer"). Fix this as well. Test Plan: # Everything compiler cleanly on the system which exhibited the problem. # `make clean && make -j 16` on CentOS. # `make clean && USE_CLANG=1 make -j 16` on CentOS. Reviewers: sdong, igor Reviewed By: igor Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55503 --- third-party/gtest-1.7.0/fused-src/gtest/gtest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third-party/gtest-1.7.0/fused-src/gtest/gtest.h b/third-party/gtest-1.7.0/fused-src/gtest/gtest.h index 2756b47d5..e3f0cfb95 100644 --- a/third-party/gtest-1.7.0/fused-src/gtest/gtest.h +++ b/third-party/gtest-1.7.0/fused-src/gtest/gtest.h @@ -7682,7 +7682,7 @@ namespace edit_distance { // Returns the optimal edits to go from 'left' to 'right'. // All edits cost the same, with replace having lower priority than // add/remove. -// Simple implementation of the Wagner–Fischer algorithm. +// Simple implementation of the Wagner-Fischer algorithm. // See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm enum EditType { kMatch, kAdd, kRemove, kReplace }; GTEST_API_ std::vector CalculateOptimalEdits( @@ -17586,7 +17586,7 @@ internal::CartesianProductHolder10()); \ return 0; \ } \ - static int gtest_registering_dummy_; \ + static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ }; \ From f11b0df121ae934e92faaa2ee4907c29bfccd44b Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Tue, 15 Mar 2016 10:57:33 -0700 Subject: [PATCH 186/195] Fix AppVeyor build error --- CMakeLists.txt | 1 + tools/db_bench_tool.cc | 5 +++-- util/transaction_test_util.h | 1 + utilities/transactions/optimistic_transaction_test.cc | 3 ++- utilities/transactions/transaction_test.cc | 3 ++- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16219b96f..bfe6531e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -238,6 +238,7 @@ set(SOURCES util/thread_status_updater.cc util/thread_status_util.cc util/thread_status_util_debug.cc + util/transaction_test_util.cc util/xfunc.cc util/xxhash.cc utilities/backupable/backupable_db.cc diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 4f00375f8..e90bbc490 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -3764,7 +3764,7 @@ class Benchmark { ReadOptions options(FLAGS_verify_checksum, true); Duration duration(FLAGS_duration, readwrites_); ReadOptions read_options(FLAGS_verify_checksum, true); - uint64_t num_prefix_ranges = FLAGS_transaction_sets; + uint16_t num_prefix_ranges = static_cast(FLAGS_transaction_sets); uint64_t transactions_done = 0; if (num_prefix_ranges == 0 || num_prefix_ranges > 9999) { @@ -3836,7 +3836,8 @@ class Benchmark { } Status s = - RandomTransactionInserter::Verify(db_.db, FLAGS_transaction_sets); + RandomTransactionInserter::Verify(db_.db, + static_cast(FLAGS_transaction_sets)); if (s.ok()) { fprintf(stdout, "RandomTransactionVerify Success.\n"); diff --git a/util/transaction_test_util.h b/util/transaction_test_util.h index c9885fc5f..97c62841f 100644 --- a/util/transaction_test_util.h +++ b/util/transaction_test_util.h @@ -8,6 +8,7 @@ #ifndef ROCKSDB_LITE #include "rocksdb/options.h" +#include "port/port.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction_db.h" diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 688f3d11a..fd90f2423 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1284,7 +1284,8 @@ Status OptimisticTransactionStressTestInserter(OptimisticTransactionDB* db, txn_options.set_snapshot = true; RandomTransactionInserter inserter(&_rand, write_options, read_options, - num_keys_per_set, num_sets); + num_keys_per_set, + static_cast(num_sets)); for (size_t t = 0; t < num_transactions; t++) { bool success = inserter.OptimisticTransactionDBInsert(db, txn_options); diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index f9bb7d96d..f7a1f2ed8 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -2996,7 +2996,8 @@ Status TransactionStressTestInserter(TransactionDB* db, txn_options.set_snapshot = true; RandomTransactionInserter inserter(&_rand, write_options, read_options, - num_keys_per_set, num_sets); + num_keys_per_set, + static_cast(num_sets)); for (size_t t = 0; t < num_transactions; t++) { bool success = inserter.TransactionDBInsert(db, txn_options); From f76b260eff1086968fc7813112d9993525cd6dd6 Mon Sep 17 00:00:00 2001 From: SherlockNoMad Date: Tue, 15 Mar 2016 11:38:15 -0700 Subject: [PATCH 187/195] Fix FB internal CI build failure --- util/histogram.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/histogram.cc b/util/histogram.cc index d052abb33..1e63c39b3 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -101,7 +101,7 @@ void HistogramStat::Add(uint64_t value) { // of any operation. Each individual value is atomic and the order of updates // by concurrent threads is tolerable. const size_t index = bucketMapper.IndexForValue(value); - assert(index < num_buckets_ && index >= 0); + assert(index < num_buckets_); buckets_[index].fetch_add(1, std::memory_order_relaxed); uint64_t old_min = min(); From 3ff98bd209cae4b1d6751ee7d1bd02629003f7f3 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 15 Mar 2016 12:17:40 -0700 Subject: [PATCH 188/195] Fix no compression test Summary: DBBlockCacheTest.TestWithCompressedBlockCache is depending on compression using snappy, so this test fail when snappy is not available block this test when we don't have snappy https://ci-builds.fb.com/view/rocksdb/job/rocksdb_no_compression/833/console Test Plan: run the test when compression libraries are not avaliable Reviewers: sdong, yiwu Reviewed By: yiwu Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55413 --- db/db_block_cache_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 18fb5b2ee..939cf44e0 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -166,10 +166,12 @@ TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) { } } +#ifdef SNAPPY TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { ReadOptions read_options; auto table_options = GetTableOptions(); auto options = GetOptions(table_options); + options.compression = CompressionType::kSnappyCompression; InitTable(options); std::shared_ptr cache = NewLRUCache(0, 0, false); @@ -227,6 +229,7 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { delete iter; iter = nullptr; } +#endif } // namespace rocksdb From 3d29f914665ade3c5e657006e2506ec27d71327d Mon Sep 17 00:00:00 2001 From: Edouard Alligand Date: Wed, 16 Mar 2016 15:37:55 +0100 Subject: [PATCH 189/195] Improve documentation of the allow_os_buffer parameter. --- include/rocksdb/options.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 6ace73bb6..61a0c1ff1 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1041,7 +1041,23 @@ struct DBOptions { // large amounts of data (such as xfs's allocsize option). size_t manifest_preallocation_size; - // Data being read from file storage may be buffered in the OS + // Hint the OS that it should not buffer disk I/O. Enabling this + // parameter may improve performance but increases pressure on the + // system cache. + // + // The exact behavior of this parameter is platform dependent. + // + // On POSIX systems, after RocksDB reads data from disk it will + // mark the pages as "unneeded". The operating system may - or may not + // - evict these pages from memory, reducing pressure on the system + // cache. If the disk block is requested again this can result in + // additional disk I/O. + // + // On WINDOWS system, files will be opened in "unbuffered I/O" mode + // which means that data read from the disk will not be cached or + // bufferized. The hardware buffer of the devices may however still + // be used. Memory mapped files are not impacted by this parameter. + // // Default: true bool allow_os_buffer; From 02e62ebbc88891969d59fc315be7e0f112e56d89 Mon Sep 17 00:00:00 2001 From: Edouard A Date: Wed, 16 Mar 2016 22:57:57 +0100 Subject: [PATCH 190/195] Fixes warnings and ensure correct int behavior on 32-bit platforms. --- db/managed_iterator.cc | 5 +++-- db/memtable_list.cc | 4 ++-- db/version_set.cc | 4 ++-- table/block_based_table_factory.cc | 6 +++--- util/arena.cc | 2 +- util/random.h | 2 +- util/testutil.cc | 2 +- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/db/managed_iterator.cc b/db/managed_iterator.cc index ceb7ba40b..1d47f933d 100644 --- a/db/managed_iterator.cc +++ b/db/managed_iterator.cc @@ -79,7 +79,7 @@ ManagedIterator::ManagedIterator(DBImpl* db, const ReadOptions& read_options, release_supported_(true) { read_options_.managed = false; if ((!read_options_.tailing) && (read_options_.snapshot == nullptr)) { - assert(read_options_.snapshot = db_->GetSnapshot()); + assert(nullptr != (read_options_.snapshot = db_->GetSnapshot())); snapshot_created_ = true; } cfh_.SetCFD(cfd); @@ -210,7 +210,8 @@ void ManagedIterator::RebuildIterator() { void ManagedIterator::UpdateCurrent() { assert(mutable_iter_ != nullptr); - if (!(valid_ = mutable_iter_->Valid())) { + valid_ = mutable_iter_->Valid(); + if (!valid_) { status_ = mutable_iter_->status(); return; } diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 9668f8396..9c1d3632b 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -345,8 +345,8 @@ Status MemTableList::InstallMemtableFlushResults( imm_flush_needed.store(true, std::memory_order_release); } ++mem_id; - } while (!current_->memlist_.empty() && (m = current_->memlist_.back()) && - m->file_number_ == file_number); + } while (!current_->memlist_.empty() && (nullptr != (m = current_->memlist_.back())) && + (m->file_number_ == file_number)); } commit_in_progress_ = false; return s; diff --git a/db/version_set.cc b/db/version_set.cc index fd53a4adb..1da447540 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -90,8 +90,8 @@ class FilePicker { const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), - curr_level_(-1), - hit_file_level_(-1), + curr_level_(static_cast(-1)), + hit_file_level_(static_cast(-1)), search_left_bound_(0), search_right_bound_(FileIndexer::kLevelMaxIndex), #ifndef NDEBUG diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 4cfff93a3..75917232d 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -110,7 +110,7 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " flush_block_policy_factory: %s (%p)\n", table_options_.flush_block_policy_factory->Name(), - table_options_.flush_block_policy_factory.get()); + static_cast(table_options_.flush_block_policy_factory.get())); ret.append(buffer); snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks: %d\n", table_options_.cache_index_and_filter_blocks); @@ -128,7 +128,7 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { table_options_.no_block_cache); ret.append(buffer); snprintf(buffer, kBufferSize, " block_cache: %p\n", - table_options_.block_cache.get()); + static_cast(table_options_.block_cache.get())); ret.append(buffer); if (table_options_.block_cache) { snprintf(buffer, kBufferSize, " block_cache_size: %" ROCKSDB_PRIszt "\n", @@ -136,7 +136,7 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { ret.append(buffer); } snprintf(buffer, kBufferSize, " block_cache_compressed: %p\n", - table_options_.block_cache_compressed.get()); + static_cast(table_options_.block_cache_compressed.get())); ret.append(buffer); if (table_options_.block_cache_compressed) { snprintf(buffer, kBufferSize, diff --git a/util/arena.cc b/util/arena.cc index 8bb482dbf..77eec10e7 100644 --- a/util/arena.cc +++ b/util/arena.cc @@ -26,7 +26,7 @@ const size_t Arena::kInlineSize; #endif const size_t Arena::kMinBlockSize = 4096; -const size_t Arena::kMaxBlockSize = 2 << 30; +const size_t Arena::kMaxBlockSize = 2u << 30; static const int kAlignUnit = sizeof(void*); size_t OptimizeBlockSize(size_t block_size) { diff --git a/util/random.h b/util/random.h index 57d5bd65b..7428454d8 100644 --- a/util/random.h +++ b/util/random.h @@ -102,7 +102,7 @@ class Random64 { // return "base" random bits. The effect is to pick a number in the // range [0,2^max_log-1] with exponential bias towards smaller numbers. uint64_t Skewed(int max_log) { - return Uniform(1 << Uniform(max_log + 1)); + return Uniform(uint64_t(1) << Uniform(max_log + 1)); } }; diff --git a/util/testutil.cc b/util/testutil.cc index 527402706..b8190faf7 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -113,7 +113,7 @@ class Uint64ComparatorImpl : public Comparator { }; } // namespace -static port::OnceType once = LEVELDB_ONCE_INIT; +static port::OnceType once; static const Comparator* uint64comp; static void InitModule() { From 72224104d339e19e708296f8a10f2ba7dcea88ee Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 17 Mar 2016 10:07:21 -0700 Subject: [PATCH 191/195] Forge current file for checkpoint Summary: This fixes a similar issue as D54711: "CURRENT" file can mutate between GetLiveFiles() and copy to the tmp directory, in which case it would reference the wrong manifest filename. To fix this, I forge the "CURRENT" file such that it simply contains the filename for the manifest returned by GetLiveFiles(). - Changed CreateCheckpoint() to forge current file - Added CreateFile() utility function - Added test case that rolls manifest during checkpoint creation Test Plan: $ ./checkpoint_test Reviewers: sdong, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D55065 --- util/file_util.cc | 16 +++++++++ util/file_util.h | 3 ++ utilities/checkpoint/checkpoint.cc | 19 ++++++++++- utilities/checkpoint/checkpoint_test.cc | 44 +++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/util/file_util.cc b/util/file_util.cc index 0748a4cf9..c14309da2 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -66,6 +66,22 @@ Status CopyFile(Env* env, const std::string& source, return Status::OK(); } +// Utility function to create a file with the provided contents +Status CreateFile(Env* env, const std::string& destination, + const std::string& contents) { + const EnvOptions soptions; + Status s; + unique_ptr dest_writer; + + unique_ptr destfile; + s = env->NewWritableFile(destination, &destfile, soptions); + if (!s.ok()) { + return s; + } + dest_writer.reset(new WritableFileWriter(std::move(destfile), soptions)); + return dest_writer->Append(Slice(contents)); +} + Status DeleteSSTFile(const DBOptions* db_options, const std::string& fname, uint32_t path_id) { // TODO(tec): support sst_file_manager for multiple path_ids diff --git a/util/file_util.h b/util/file_util.h index b5cb0cf66..5b2320e33 100644 --- a/util/file_util.h +++ b/util/file_util.h @@ -16,6 +16,9 @@ namespace rocksdb { extern Status CopyFile(Env* env, const std::string& source, const std::string& destination, uint64_t size = 0); +extern Status CreateFile(Env* env, const std::string& destination, + const std::string& contents); + extern Status DeleteSSTFile(const DBOptions* db_options, const std::string& fname, uint32_t path_id); diff --git a/utilities/checkpoint/checkpoint.cc b/utilities/checkpoint/checkpoint.cc index dd41d9cdf..b8543bb5b 100644 --- a/utilities/checkpoint/checkpoint.cc +++ b/utilities/checkpoint/checkpoint.cc @@ -24,6 +24,7 @@ #include "rocksdb/env.h" #include "rocksdb/transaction_log.h" #include "util/file_util.h" +#include "util/sync_point.h" #include "port/port.h" namespace rocksdb { @@ -76,7 +77,9 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) { s = db_->DisableFileDeletions(); if (s.ok()) { // this will return live_files prefixed with "/" - s = db_->GetLiveFiles(live_files, &manifest_file_size, true); + s = db_->GetLiveFiles(live_files, &manifest_file_size); + TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles1"); + TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"); } // if we have more than one column family, we need to also get WAL files if (s.ok()) { @@ -98,6 +101,7 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) { s = db_->GetEnv()->CreateDir(full_private_path); // copy/hard link live_files + std::string manifest_fname, current_fname; for (size_t i = 0; s.ok() && i < live_files.size(); ++i) { uint64_t number; FileType type; @@ -110,6 +114,15 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) { assert(type == kTableFile || type == kDescriptorFile || type == kCurrentFile); assert(live_files[i].size() > 0 && live_files[i][0] == '/'); + if (type == kCurrentFile) { + // We will craft the current file manually to ensure it's consistent with + // the manifest number. This is necessary because current's file contents + // can change during checkpoint creation. + current_fname = live_files[i]; + continue; + } else if (type == kDescriptorFile) { + manifest_fname = live_files[i]; + } std::string src_fname = live_files[i]; // rules: @@ -132,6 +145,10 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) { (type == kDescriptorFile) ? manifest_file_size : 0); } } + if (s.ok() && !current_fname.empty() && !manifest_fname.empty()) { + s = CreateFile(db_->GetEnv(), full_private_path + current_fname, + manifest_fname.substr(1) + "\n"); + } Log(db_->GetOptions().info_log, "Number of log files %" ROCKSDB_PRIszt, live_wal_files.size()); diff --git a/utilities/checkpoint/checkpoint_test.cc b/utilities/checkpoint/checkpoint_test.cc index 42d180bba..3336e5af5 100644 --- a/utilities/checkpoint/checkpoint_test.cc +++ b/utilities/checkpoint/checkpoint_test.cc @@ -346,6 +346,50 @@ TEST_F(DBTest, CheckpointCF) { ASSERT_OK(DestroyDB(snapshot_name, options)); } +TEST_F(DBTest, CurrentFileModifiedWhileCheckpointing) { + const std::string kSnapshotName = test::TmpDir(env_) + "/snapshot"; + ASSERT_OK(DestroyDB(kSnapshotName, CurrentOptions())); + env_->DeleteDir(kSnapshotName); + + Options options = CurrentOptions(); + options.max_manifest_file_size = 0; // always rollover manifest for file add + Reopen(options); + + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {// Get past the flush in the checkpoint thread before adding any keys to + // the db so the checkpoint thread won't hit the WriteManifest + // syncpoints. + {"DBImpl::GetLiveFiles:1", + "DBTest::CurrentFileModifiedWhileCheckpointing:PrePut"}, + // Roll the manifest during checkpointing right after live files are + // snapshotted. + {"CheckpointImpl::CreateCheckpoint:SavedLiveFiles1", + "VersionSet::LogAndApply:WriteManifest"}, + {"VersionSet::LogAndApply:WriteManifestDone", + "CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"}}); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + std::thread t([&]() { + Checkpoint* checkpoint; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->CreateCheckpoint(kSnapshotName)); + delete checkpoint; + }); + TEST_SYNC_POINT("DBTest::CurrentFileModifiedWhileCheckpointing:PrePut"); + ASSERT_OK(Put("Default", "Default1")); + ASSERT_OK(Flush()); + t.join(); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + DB* snapshotDB; + // Successful Open() implies that CURRENT pointed to the manifest in the + // checkpoint. + ASSERT_OK(DB::Open(options, kSnapshotName, &snapshotDB)); + delete snapshotDB; + snapshotDB = nullptr; +} + } // namespace rocksdb int main(int argc, char** argv) { From 90aff0c444b21445395c89adf67f24617f340658 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Thu, 17 Mar 2016 10:14:23 -0700 Subject: [PATCH 192/195] Update --max_write_buffer_number for compaction benchmarks Summary: For compactions benchmarks (both level and universal) we'll use `--max_write_buffer_number=4`. For all the other benchmarks which don't customize the value of `--max_background_flushes` we'll continue using `--max_write_buffer_number=8`. Test Plan: To validate basic correctness and command-line options: ``` cd ~/rocksdb NKEYS=10000000 ./tools/run_flash_bench.sh ``` Reviewers: MarkCallaghan Reviewed By: MarkCallaghan Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55497 --- tools/benchmark.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 57e3ee575..e4729e7f3 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -79,7 +79,6 @@ const_params=" --hard_rate_limit=3 \ --rate_limit_delay_max_milliseconds=1000000 \ --write_buffer_size=$((128 * M)) \ - --max_write_buffer_number=8 \ --target_file_size_base=$((128 * M)) \ --max_bytes_for_level_base=$((1 * G)) \ \ @@ -106,8 +105,16 @@ if [ $duration -gt 0 ]; then const_params="$const_params --duration=$duration" fi -params_w="$const_params $l0_config --max_background_compactions=16 --max_background_flushes=7" -params_bulkload="$const_params --max_background_compactions=16 --max_background_flushes=7 \ +params_w="$const_params \ + $l0_config \ + --max_background_compactions=16 \ + --max_write_buffer_number=8 \ + --max_background_flushes=7" + +params_bulkload="$const_params \ + --max_background_compactions=16 \ + --max_write_buffer_number=8 \ + --max_background_flushes=7 \ --level0_file_num_compaction_trigger=$((10 * M)) \ --level0_slowdown_writes_trigger=$((10 * M)) \ --level0_stop_writes_trigger=$((10 * M))" @@ -119,12 +126,14 @@ params_bulkload="$const_params --max_background_compactions=16 --max_background_ # params_level_compact="$const_params \ --max_background_flushes=4 \ + --max_write_buffer_number=4 \ --level0_file_num_compaction_trigger=4 \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=20" params_univ_compact="$const_params \ --max_background_flushes=4 \ + --max_write_buffer_number=4 \ --level0_file_num_compaction_trigger=8 \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=20" From 2ca0994cf74f6c6553cba6055ae879ba54f3bfca Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 17 Mar 2016 11:25:20 -0700 Subject: [PATCH 193/195] Latest versions of Jemalloc library do not require je_init()/je_unint() calls. #ifdef in the source code and make this a default build option. --- port/win/port_win.cc | 4 ++++ thirdparty.inc | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/port/win/port_win.cc b/port/win/port_win.cc index 66b0f90e9..dd87c3577 100644 --- a/port/win/port_win.cc +++ b/port/win/port_win.cc @@ -233,6 +233,8 @@ int GetMaxOpenFiles() { return -1; } #include "jemalloc/jemalloc.h" +#ifndef JEMALLOC_NON_INIT + namespace rocksdb { namespace port { @@ -278,6 +280,8 @@ JEMALLOC_SECTION(".CRT$XCT") JEMALLOC_ATTR(used) static const void( } // extern "C" +#endif // JEMALLOC_NON_INIT + // Global operators to be replaced by a linker void* operator new(size_t size) { diff --git a/thirdparty.inc b/thirdparty.inc index 46da30c52..e10bdaa4e 100644 --- a/thirdparty.inc +++ b/thirdparty.inc @@ -8,6 +8,7 @@ set(USE_SNAPPY_DEFAULT 0) # SNAPPY is disabled by default, enable with -D set(USE_LZ4_DEFAULT 0) # LZ4 is disabled by default, enable with -DLZ4=1 cmake command line agrument set(USE_ZLIB_DEFAULT 0) # ZLIB is disabled by default, enable with -DZLIB=1 cmake command line agrument set(USE_JEMALLOC_DEFAULT 0) # JEMALLOC is disabled by default, enable with -DJEMALLOC=1 cmake command line agrument +set(USE_JENONINIT_DEFAULT 1) # Default is enabled do not call je_init/je_uninit as the newer versions do not have it disable with -DJENONINIT=0 # # This example assumes all the libraries locate in directories under THIRDPARTY_HOME environment variable @@ -208,7 +209,7 @@ endif () if (${USE_JEMALLOC} EQUAL 1) message(STATUS "JEMALLOC library is enabled") - set(JEMALLOC_CXX_FLAGS -DJEMALLOC) + set(JEMALLOC_CXX_FLAGS "-DJEMALLOC -DJEMALLOC_EXPORT= ") if(DEFINED ENV{JEMALLOC_INCLUDE}) set(JEMALLOC_INCLUDE $ENV{JEMALLOC_INCLUDE}) @@ -228,6 +229,18 @@ if (${USE_JEMALLOC} EQUAL 1) include_directories(${JEMALLOC_INCLUDE}) set (THIRDPARTY_LIBS ${THIRDPARTY_LIBS} ${JEMALLOC_LIBS}) set (ARTIFACT_SUFFIX "_je") + + set(USE_JENONINIT USE_JENONINIT_DEFAULT) + + if(JENONINIT) + set(USE_JENONINIT ${JENONINIT}) + endif() + + if(${USE_JENONINIT} EQUAL 1) + add_definitions(-DJEMALLOC_NON_INIT) + message(STATUS "JEMALLOC NONINIT version") + endif() + else () set (ARTIFACT_SUFFIX "") message(STATUS "JEMALLOC library is disabled") From 522de4f59e6314698286cf29d8a325a284d81778 Mon Sep 17 00:00:00 2001 From: Marton Trencseni Date: Thu, 17 Mar 2016 22:40:01 +0000 Subject: [PATCH 194/195] Adding pin_l0_filter_and_index_blocks_in_cache feature. Summary: When a block based table file is opened, if prefetch_index_and_filter is true, it will prefetch the index and filter blocks, putting them into the block cache. What this feature adds: when a L0 block based table file is opened, if pin_l0_filter_and_index_blocks_in_cache is true in the options (and prefetch_index_and_filter is true), then the filter and index blocks aren't released back to the block cache at the end of BlockBasedTableReader::Open(). Instead the table reader takes ownership of them, hence pinning them, ie. the LRU cache will never push them out. Meanwhile in the table reader, further accesses will not hit the block cache, thus avoiding lock contention. When the table reader is destroyed, it releases the pinned blocks (if there were any). This has to happen before the cache is destroyed, so I had to introduce a TableReader::Close(), to guarantee the order of destruction. Test Plan: Added two unit tests for this. Existing unit tests run fine (default is pin_l0_filter_and_index_blocks_in_cache=false). DISABLE_JEMALLOC=1 OPT=-g make all valgrind_check -j32 Mac: OK. Linux: with D55287 patched in it's OK. Reviewers: sdong Reviewed By: sdong Subscribers: andrewkr, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54801 --- db/builder.cc | 5 +- db/builder.h | 2 +- db/c.cc | 5 + db/column_family.h | 2 + db/db_test.cc | 86 ++++++++++++++++ db/flush_job.cc | 16 +-- db/table_cache.cc | 21 ++-- db/table_cache.h | 11 +- db/version_set.cc | 33 ++++-- examples/rocksdb_option_file_example.ini | 1 + include/rocksdb/c.h | 3 + include/rocksdb/table.h | 6 ++ java/rocksjni/table.cc | 5 +- table/block_based_table_factory.cc | 12 ++- table/block_based_table_reader.cc | 122 +++++++++++++++++------ table/block_based_table_reader.h | 9 +- table/table_builder.h | 7 +- table/table_reader.h | 2 + table/table_test.cc | 2 +- tools/benchmark.sh | 1 + tools/db_bench_tool.cc | 5 + util/options_helper.h | 4 + util/options_test.cc | 4 +- util/testutil.cc | 1 + 24 files changed, 295 insertions(+), 70 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index ae6015003..317c9b054 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -63,7 +63,7 @@ Status BuildTable( const CompressionType compression, const CompressionOptions& compression_opts, bool paranoid_file_checks, InternalStats* internal_stats, const Env::IOPriority io_priority, - TableProperties* table_properties) { + TableProperties* table_properties, int level) { // Reports the IOStats for flush for every following bytes. const size_t kReportFlushIOStatsEvery = 1048576; Status s; @@ -149,7 +149,8 @@ Status BuildTable( ReadOptions(), env_options, internal_comparator, meta->fd, nullptr, (internal_stats == nullptr) ? nullptr : internal_stats->GetFileReadHist(0), - false)); + false /* for_compaction */, nullptr /* arena */, + false /* skip_filter */, level)); s = it->status(); if (s.ok() && paranoid_file_checks) { for (it->SeekToFirst(); it->Valid(); it->Next()) { diff --git a/db/builder.h b/db/builder.h index b4b72b7d7..1eba6da9c 100644 --- a/db/builder.h +++ b/db/builder.h @@ -61,6 +61,6 @@ extern Status BuildTable( const CompressionOptions& compression_opts, bool paranoid_file_checks, InternalStats* internal_stats, const Env::IOPriority io_priority = Env::IO_HIGH, - TableProperties* table_properties = nullptr); + TableProperties* table_properties = nullptr, int level = -1); } // namespace rocksdb diff --git a/db/c.cc b/db/c.cc index 85e911491..9f49aba23 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1288,6 +1288,11 @@ void rocksdb_block_based_options_set_cache_index_and_filter_blocks( options->rep.cache_index_and_filter_blocks = v; } +void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache( + rocksdb_block_based_table_options_t* options, unsigned char v) { + options->rep.pin_l0_filter_and_index_blocks_in_cache = v; +} + void rocksdb_block_based_options_set_skip_table_builder_flush( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.skip_table_builder_flush = v; diff --git a/db/column_family.h b/db/column_family.h index ce5f409c4..1a4036e60 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -465,6 +465,8 @@ class ColumnFamilySet { // Don't call while iterating over ColumnFamilySet void FreeDeadColumnFamilies(); + Cache* get_table_cache() { return table_cache_; } + private: friend class ColumnFamilyData; // helper function that gets called from cfd destructor diff --git a/db/db_test.cc b/db/db_test.cc index 4b42296c9..aeb097f76 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -424,6 +424,92 @@ TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) { TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); } +TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCacheWithPinning) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.pin_l0_filter_and_index_blocks_in_cache = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_OK(Put(1, "key", "val")); + // Create a new table. + ASSERT_OK(Flush(1)); + + // index/filter blocks added to block cache right after table creation. + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); + + // only index/filter were added + ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); + + std::string value; + // Miss and hit count should remain the same, they're all pinned. + db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); + + // Miss and hit count should remain the same, they're all pinned. + value = Get(1, "key"); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); +} + +TEST_F(DBTest, MultiLevelIndexAndFilterBlocksCachedWithPinning) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.pin_l0_filter_and_index_blocks_in_cache = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + Put(1, "a", "begin"); + Put(1, "z", "end"); + ASSERT_OK(Flush(1)); + // move this table to L1 + dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + TryReopenWithColumnFamilies({"default", "pikachu"}, options); + // create new table at L0 + Put(1, "a2", "begin2"); + Put(1, "z2", "end2"); + ASSERT_OK(Flush(1)); + + // get base cache values + uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); + uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT); + + std::string value; + // this should be read from L0 + // so cache values don't change + value = Get(1, "a2"); + ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); + + // should be read from L1; the block cache survives the reopen, and during + // the BlockBasedTableReader::Open() of the table we try to fetch it, we + // will see one hit from there, and then the Get() results in another hit + value = Get(1, "a"); + ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(fh + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); +} + TEST_F(DBTest, ParanoidFileChecks) { Options options = CurrentOptions(); options.create_if_missing = true; diff --git a/db/flush_job.cc b/db/flush_job.cc index b83f9dbe6..b4e5b307f 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -234,14 +234,14 @@ Status FlushJob::WriteLevel0Table(const autovector& mems, TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression", &output_compression_); - s = BuildTable(dbname_, db_options_.env, *cfd_->ioptions(), env_options_, - cfd_->table_cache(), iter.get(), meta, - cfd_->internal_comparator(), - cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(), - existing_snapshots_, earliest_write_conflict_snapshot_, - output_compression_, cfd_->ioptions()->compression_opts, - mutable_cf_options_.paranoid_file_checks, - cfd_->internal_stats(), Env::IO_HIGH, &table_properties_); + s = BuildTable( + dbname_, db_options_.env, *cfd_->ioptions(), env_options_, + cfd_->table_cache(), iter.get(), meta, cfd_->internal_comparator(), + cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(), + existing_snapshots_, earliest_write_conflict_snapshot_, + output_compression_, cfd_->ioptions()->compression_opts, + mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(), + Env::IO_HIGH, &table_properties_, 0 /* level */); info.table_properties = table_properties_; LogFlush(db_options_.info_log); } diff --git a/db/table_cache.cc b/db/table_cache.cc index 2a4621b7e..be6b5c324 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -88,7 +88,7 @@ Status TableCache::GetTableReader( const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, - unique_ptr* table_reader, bool skip_filters) { + unique_ptr* table_reader, bool skip_filters, int level) { std::string fname = TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId()); unique_ptr file; @@ -109,7 +109,7 @@ Status TableCache::GetTableReader( file_read_hist)); s = ioptions_.table_factory->NewTableReader( TableReaderOptions(ioptions_, env_options, internal_comparator, - skip_filters), + skip_filters, level), std::move(file_reader), fd.GetFileSize(), table_reader); TEST_SYNC_POINT("TableCache::GetTableReader:0"); } @@ -120,7 +120,8 @@ Status TableCache::FindTable(const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, Cache::Handle** handle, const bool no_io, bool record_read_stats, - HistogramImpl* file_read_hist, bool skip_filters) { + HistogramImpl* file_read_hist, bool skip_filters, + int level) { PERF_TIMER_GUARD(find_table_nanos); Status s; uint64_t number = fd.GetNumber(); @@ -136,7 +137,7 @@ Status TableCache::FindTable(const EnvOptions& env_options, unique_ptr table_reader; s = GetTableReader(env_options, internal_comparator, fd, false /* sequential mode */, record_read_stats, - file_read_hist, &table_reader, skip_filters); + file_read_hist, &table_reader, skip_filters, level); if (!s.ok()) { assert(table_reader == nullptr); RecordTick(ioptions_.statistics, NO_FILE_ERRORS); @@ -158,7 +159,7 @@ InternalIterator* TableCache::NewIterator( const ReadOptions& options, const EnvOptions& env_options, const InternalKeyComparator& icomparator, const FileDescriptor& fd, TableReader** table_reader_ptr, HistogramImpl* file_read_hist, - bool for_compaction, Arena* arena, bool skip_filters) { + bool for_compaction, Arena* arena, bool skip_filters, int level) { PERF_TIMER_GUARD(new_table_iterator_nanos); if (table_reader_ptr != nullptr) { @@ -173,7 +174,8 @@ InternalIterator* TableCache::NewIterator( unique_ptr table_reader_unique_ptr; Status s = GetTableReader( env_options, icomparator, fd, /* sequential mode */ true, - /* record stats */ false, nullptr, &table_reader_unique_ptr); + /* record stats */ false, nullptr, &table_reader_unique_ptr, + false /* skip_filters */, level); if (!s.ok()) { return NewErrorInternalIterator(s, arena); } @@ -184,7 +186,7 @@ InternalIterator* TableCache::NewIterator( Status s = FindTable(env_options, icomparator, fd, &handle, options.read_tier == kBlockCacheTier /* no_io */, !for_compaction /* record read_stats */, - file_read_hist, skip_filters); + file_read_hist, skip_filters, level); if (!s.ok()) { return NewErrorInternalIterator(s, arena); } @@ -216,7 +218,7 @@ Status TableCache::Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const Slice& k, GetContext* get_context, HistogramImpl* file_read_hist, - bool skip_filters) { + bool skip_filters, int level) { TableReader* t = fd.table_reader; Status s; Cache::Handle* handle = nullptr; @@ -265,7 +267,8 @@ Status TableCache::Get(const ReadOptions& options, if (!t) { s = FindTable(env_options_, internal_comparator, fd, &handle, options.read_tier == kBlockCacheTier /* no_io */, - true /* record_read_stats */, file_read_hist, skip_filters); + true /* record_read_stats */, file_read_hist, skip_filters, + level); if (s.ok()) { t = GetTableReaderFromHandle(handle); } diff --git a/db/table_cache.h b/db/table_cache.h index f8416e0b4..499b9dbe5 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -45,34 +45,37 @@ class TableCache { // the cache and should not be deleted, and is valid for as long as the // returned iterator is live. // @param skip_filters Disables loading/accessing the filter block + // @param level The level this table is at, -1 for "not set / don't know" InternalIterator* NewIterator( const ReadOptions& options, const EnvOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr, HistogramImpl* file_read_hist = nullptr, bool for_compaction = false, - Arena* arena = nullptr, bool skip_filters = false); + Arena* arena = nullptr, bool skip_filters = false, int level = -1); // If a seek to internal key "k" in specified file finds an entry, // call (*handle_result)(arg, found_key, found_value) repeatedly until // it returns false. // @param skip_filters Disables loading/accessing the filter block + // @param level The level this table is at, -1 for "not set / don't know" Status Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, const Slice& k, GetContext* get_context, HistogramImpl* file_read_hist = nullptr, - bool skip_filters = false); + bool skip_filters = false, int level = -1); // Evict any entry for the specified file number static void Evict(Cache* cache, uint64_t file_number); // Find table reader // @param skip_filters Disables loading/accessing the filter block + // @param level == -1 means not specified Status FindTable(const EnvOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, Cache::Handle**, const bool no_io = false, bool record_read_stats = true, HistogramImpl* file_read_hist = nullptr, - bool skip_filters = false); + bool skip_filters = false, int level = -1); // Get TableReader from a cache handle. TableReader* GetTableReaderFromHandle(Cache::Handle* handle); @@ -106,7 +109,7 @@ class TableCache { const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, unique_ptr* table_reader, - bool skip_filters = false); + bool skip_filters = false, int level = -1); const ImmutableCFOptions& ioptions_; const EnvOptions& env_options_; diff --git a/db/version_set.cc b/db/version_set.cc index 1da447540..167586d71 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -91,6 +91,7 @@ class FilePicker { const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(static_cast(-1)), + returned_file_level_(static_cast(-1)), hit_file_level_(static_cast(-1)), search_left_bound_(0), search_right_bound_(FileIndexer::kLevelMaxIndex), @@ -117,6 +118,8 @@ class FilePicker { } } + int GetCurrentLevel() { return returned_file_level_; } + FdWithKeyRange* GetNextFile() { while (!search_ended_) { // Loops over different levels. while (curr_index_in_curr_level_ < curr_file_level_->num_files) { @@ -189,6 +192,7 @@ class FilePicker { } prev_file_ = f; #endif + returned_file_level_ = curr_level_; if (curr_level_ > 0 && cmp_largest < 0) { // No more files to search in this level. search_ended_ = !PrepareNextLevel(); @@ -215,6 +219,7 @@ class FilePicker { private: unsigned int num_levels_; unsigned int curr_level_; + unsigned int returned_file_level_; unsigned int hit_file_level_; int32_t search_left_bound_; int32_t search_right_bound_; @@ -485,7 +490,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState { const EnvOptions& env_options, const InternalKeyComparator& icomparator, HistogramImpl* file_read_hist, bool for_compaction, - bool prefix_enabled, bool skip_filters) + bool prefix_enabled, bool skip_filters, int level) : TwoLevelIteratorState(prefix_enabled), table_cache_(table_cache), read_options_(read_options), @@ -493,7 +498,8 @@ class LevelFileIteratorState : public TwoLevelIteratorState { icomparator_(icomparator), file_read_hist_(file_read_hist), for_compaction_(for_compaction), - skip_filters_(skip_filters) {} + skip_filters_(skip_filters), + level_(level) {} InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override { if (meta_handle.size() != sizeof(FileDescriptor)) { @@ -505,7 +511,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState { return table_cache_->NewIterator( read_options_, env_options_, icomparator_, *fd, nullptr /* don't need reference to table*/, file_read_hist_, - for_compaction_, nullptr /* arena */, skip_filters_); + for_compaction_, nullptr /* arena */, skip_filters_, level_); } } @@ -521,6 +527,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState { HistogramImpl* file_read_hist_; bool for_compaction_; bool skip_filters_; + int level_; }; // A wrapper of version builder which references the current version in @@ -788,7 +795,8 @@ void Version::AddIterators(const ReadOptions& read_options, const auto& file = storage_info_.LevelFilesBrief(0).files[i]; merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( read_options, soptions, cfd_->internal_comparator(), file.fd, nullptr, - cfd_->internal_stats()->GetFileReadHist(0), false, arena)); + cfd_->internal_stats()->GetFileReadHist(0), false, arena, + false /* skip_filters */, 0 /* level */)); } // For levels > 0, we can use a concatenating iterator that sequentially @@ -803,7 +811,7 @@ void Version::AddIterators(const ReadOptions& read_options, cfd_->internal_stats()->GetFileReadHist(level), false /* for_compaction */, cfd_->ioptions()->prefix_extractor != nullptr, - IsFilterSkipped(level)); + IsFilterSkipped(level), level); mem = arena->AllocateAligned(sizeof(LevelFileNumIterator)); auto* first_level_iter = new (mem) LevelFileNumIterator( cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level)); @@ -908,7 +916,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, read_options, *internal_comparator(), f->fd, ikey, &get_context, cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), - fp.IsHitFileLastInLevel())); + fp.IsHitFileLastInLevel()), + fp.GetCurrentLevel()); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -2054,9 +2063,16 @@ VersionSet::VersionSet(const std::string& dbname, const DBOptions* db_options, env_options_(storage_options), env_options_compactions_(env_options_) {} +void CloseTables(void* ptr, size_t) { + TableReader* table_reader = reinterpret_cast(ptr); + table_reader->Close(); +} + VersionSet::~VersionSet() { // we need to delete column_family_set_ because its destructor depends on // VersionSet + column_family_set_->get_table_cache()->ApplyToAllCacheEntries(&CloseTables, + false); column_family_set_.reset(); for (auto file : obsolete_files_) { delete file; @@ -3267,7 +3283,8 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) { read_options, env_options_compactions_, cfd->internal_comparator(), flevel->files[i].fd, nullptr, nullptr, /* no per level latency histogram*/ - true /* for compaction */); + true /* for_compaction */, nullptr /* arena */, + false /* skip_filters */, (int)which /* level */); } } else { // Create concatenating iterator for the files from this level @@ -3277,7 +3294,7 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) { cfd->internal_comparator(), nullptr /* no per level latency histogram */, true /* for_compaction */, false /* prefix enabled */, - false /* skip_filters */), + false /* skip_filters */, (int)which /* level */), new LevelFileNumIterator(cfd->internal_comparator(), c->input_levels(which))); } diff --git a/examples/rocksdb_option_file_example.ini b/examples/rocksdb_option_file_example.ini index 838afe8eb..7dc070429 100644 --- a/examples/rocksdb_option_file_example.ini +++ b/examples/rocksdb_option_file_example.ini @@ -138,6 +138,7 @@ block_size=8192 block_restart_interval=16 cache_index_and_filter_blocks=false + pin_l0_filter_and_index_blocks_in_cache=false index_type=kBinarySearch hash_index_allow_collision=true flush_block_policy_factory=FlushBlockBySizePolicyFactory diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 4bb870e20..6e52d20af 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -451,6 +451,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_cache_index_and_filter_blocks( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void +rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache( + rocksdb_block_based_table_options_t*, unsigned char); +extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_skip_table_builder_flush( rocksdb_block_based_table_options_t* options, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory( diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index cb4d850e8..8aba3a153 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -64,6 +64,12 @@ struct BlockBasedTableOptions { // block during table initialization. bool cache_index_and_filter_blocks = false; + // if cache_index_and_filter_blocks is true and the below is true, then + // filter and index blocks are stored in the cache, but a reference is + // held in the "table reader" object so the blocks are pinned and only + // evicted from cache when the table reader is freed. + bool pin_l0_filter_and_index_blocks_in_cache = false; + // The index type that will be used for this table. enum IndexType : char { // A space efficient index block that is optimized for diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc index 97aef9888..204d1ba38 100644 --- a/java/rocksjni/table.cc +++ b/java/rocksjni/table.cc @@ -38,13 +38,14 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( /* * Class: org_rocksdb_BlockBasedTableConfig * Method: newTableFactoryHandle - * Signature: (ZJIJIIZIZZJIBBI)J + * Signature: (ZJIJIIZIZZZJIBBI)J */ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( JNIEnv* env, jobject jobj, jboolean no_block_cache, jlong block_cache_size, jint block_cache_num_shardbits, jlong block_size, jint block_size_deviation, jint block_restart_interval, jboolean whole_key_filtering, jlong jfilterPolicy, jboolean cache_index_and_filter_blocks, + jboolean pin_l0_filter_and_index_blocks_in_cache, jboolean hash_index_allow_collision, jlong block_cache_compressed_size, jint block_cache_compressd_num_shard_bits, jbyte jchecksum_type, jbyte jindex_type, jint jformat_version) { @@ -70,6 +71,8 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( options.filter_policy = *pFilterPolicy; } options.cache_index_and_filter_blocks = cache_index_and_filter_blocks; + options.pin_l0_filter_and_index_blocks_in_cache = + pin_l0_filter_and_index_blocks_in_cache; options.hash_index_allow_collision = hash_index_allow_collision; if (block_cache_compressed_size > 0) { if (block_cache_compressd_num_shard_bits > 0) { diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 75917232d..c2617b168 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -64,7 +64,7 @@ Status BlockBasedTableFactory::NewTableReader( table_reader_options.ioptions, table_reader_options.env_options, table_options_, table_reader_options.internal_comparator, std::move(file), file_size, table_reader, prefetch_enabled, - table_reader_options.skip_filters); + table_reader_options.skip_filters, table_reader_options.level); } TableBuilder* BlockBasedTableFactory::NewTableBuilder( @@ -94,6 +94,12 @@ Status BlockBasedTableFactory::SanitizeOptions( return Status::InvalidArgument("Enable cache_index_and_filter_blocks, " ", but block cache is disabled"); } + if (table_options_.pin_l0_filter_and_index_blocks_in_cache && + table_options_.no_block_cache) { + return Status::InvalidArgument( + "Enable pin_l0_filter_and_index_blocks_in_cache, " + ", but block cache is disabled"); + } if (!BlockBasedTableSupportedVersion(table_options_.format_version)) { return Status::InvalidArgument( "Unsupported BlockBasedTable format_version. Please check " @@ -115,6 +121,10 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks: %d\n", table_options_.cache_index_and_filter_blocks); ret.append(buffer); + snprintf(buffer, kBufferSize, + " pin_l0_filter_and_index_blocks_in_cache: %d\n", + table_options_.pin_l0_filter_and_index_blocks_in_cache); + ret.append(buffer); snprintf(buffer, kBufferSize, " index_type: %d\n", table_options_.index_type); ret.append(buffer); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index e48eea694..0f9cf185c 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -340,6 +340,28 @@ class HashIndexReader : public IndexReader { BlockContents prefixes_contents_; }; +// CachableEntry represents the entries that *may* be fetched from block cache. +// field `value` is the item we want to get. +// field `cache_handle` is the cache handle to the block cache. If the value +// was not read from cache, `cache_handle` will be nullptr. +template +struct BlockBasedTable::CachableEntry { + CachableEntry(TValue* _value, Cache::Handle* _cache_handle) + : value(_value), cache_handle(_cache_handle) {} + CachableEntry() : CachableEntry(nullptr, nullptr) {} + void Release(Cache* cache) { + if (cache_handle) { + cache->Release(cache_handle); + value = nullptr; + cache_handle = nullptr; + } + } + bool IsSet() const { return cache_handle != nullptr; } + + TValue* value = nullptr; + // if the entry is from the cache, cache_handle will be populated. + Cache::Handle* cache_handle = nullptr; +}; struct BlockBasedTable::Rep { Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, @@ -394,34 +416,21 @@ struct BlockBasedTable::Rep { // and compatible with existing code, we introduce a wrapper that allows // block to extract prefix without knowing if a key is internal or not. unique_ptr internal_prefix_transform; + + // only used in level 0 files: + // when pin_l0_filter_and_index_blocks_in_cache is true, we do use the + // LRU cache, but we always keep the filter & idndex block's handle checked + // out here (=we don't call Release()), plus the parsed out objects + // the LRU cache will never push flush them out, hence they're pinned + CachableEntry filter_entry; + CachableEntry index_entry; }; BlockBasedTable::~BlockBasedTable() { + Close(); delete rep_; } -// CachableEntry represents the entries that *may* be fetched from block cache. -// field `value` is the item we want to get. -// field `cache_handle` is the cache handle to the block cache. If the value -// was not read from cache, `cache_handle` will be nullptr. -template -struct BlockBasedTable::CachableEntry { - CachableEntry(TValue* _value, Cache::Handle* _cache_handle) - : value(_value), cache_handle(_cache_handle) {} - CachableEntry() : CachableEntry(nullptr, nullptr) {} - void Release(Cache* cache) { - if (cache_handle) { - cache->Release(cache_handle); - value = nullptr; - cache_handle = nullptr; - } - } - - TValue* value = nullptr; - // if the entry is from the cache, cache_handle will be populated. - Cache::Handle* cache_handle = nullptr; -}; - // Helper function to setup the cache key's prefix for the Table. void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) { assert(kMaxCacheKeyPrefixSize >= 10); @@ -498,7 +507,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, uint64_t file_size, unique_ptr* table_reader, const bool prefetch_index_and_filter, - const bool skip_filters) { + const bool skip_filters, const int level) { table_reader->reset(); Footer footer; @@ -594,14 +603,33 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, assert(table_options.block_cache != nullptr); // Hack: Call NewIndexIterator() to implicitly add index to the // block_cache + + // if pin_l0_filter_and_index_blocks_in_cache is true and this is + // a level0 file, then we will pass in this pointer to rep->index + // to NewIndexIterator(), which will save the index block in there + // else it's a nullptr and nothing special happens + CachableEntry* index_entry = nullptr; + if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache && + level == 0) { + index_entry = &rep->index_entry; + } unique_ptr iter( - new_table->NewIndexIterator(ReadOptions())); + new_table->NewIndexIterator(ReadOptions(), nullptr, index_entry)); s = iter->status(); if (s.ok()) { // Hack: Call GetFilter() to implicitly add filter to the block_cache auto filter_entry = new_table->GetFilter(); - filter_entry.Release(table_options.block_cache.get()); + // if pin_l0_filter_and_index_blocks_in_cache is true, and this is + // a level0 file, then save it in rep_->filter_entry; it will be + // released in the destructor only, hence it will be pinned in the + // cache until this reader is alive + if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache && + level == 0) { + rep->filter_entry = filter_entry; + } else { + filter_entry.Release(table_options.block_cache.get()); + } } } else { // If we don't use block cache for index/filter blocks access, we'll @@ -886,6 +914,11 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( return {rep_->filter.get(), nullptr /* cache handle */}; } + // we have a pinned filter block + if (rep_->filter_entry.IsSet()) { + return rep_->filter_entry; + } + PERF_TIMER_GUARD(read_filter_block_nanos); Cache* block_cache = rep_->table_options.block_cache.get(); @@ -935,12 +968,19 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( } InternalIterator* BlockBasedTable::NewIndexIterator( - const ReadOptions& read_options, BlockIter* input_iter) { + const ReadOptions& read_options, BlockIter* input_iter, + CachableEntry* index_entry) { // index reader has already been pre-populated. if (rep_->index_reader) { return rep_->index_reader->NewIterator( input_iter, read_options.total_order_seek); } + // we have a pinned index block + if (rep_->index_entry.IsSet()) { + return rep_->index_entry.value->NewIterator(input_iter, + read_options.total_order_seek); + } + PERF_TIMER_GUARD(read_index_block_nanos); bool no_io = read_options.read_tier == kBlockCacheTier; @@ -996,7 +1036,15 @@ InternalIterator* BlockBasedTable::NewIndexIterator( assert(cache_handle); auto* iter = index_reader->NewIterator( input_iter, read_options.total_order_seek); - iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle); + + // the caller would like to take ownership of the index block + // don't call RegisterCleanup() in this case, the caller will take care of it + if (index_entry != nullptr) { + *index_entry = {index_reader, cache_handle}; + } else { + iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle); + } + return iter; } @@ -1224,7 +1272,13 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL); } - filter_entry.Release(rep_->table_options.block_cache.get()); + // if rep_->filter_entry is not set, we should call Release(); otherwise + // don't call, in this case we have a local copy in rep_->filter_entry, + // it's pinned to the cache and will be released in the destructor + if (!rep_->filter_entry.IsSet()) { + filter_entry.Release(rep_->table_options.block_cache.get()); + } + return may_match; } @@ -1324,7 +1378,12 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } } - filter_entry.Release(rep_->table_options.block_cache.get()); + // if rep_->filter_entry is not set, we should call Release(); otherwise + // don't call, in this case we have a local copy in rep_->filter_entry, + // it's pinned to the cache and will be released in the destructor + if (!rep_->filter_entry.IsSet()) { + filter_entry.Release(rep_->table_options.block_cache.get()); + } return s; } @@ -1612,6 +1671,11 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { return s; } +void BlockBasedTable::Close() { + rep_->filter_entry.Release(rep_->table_options.block_cache.get()); + rep_->index_entry.Release(rep_->table_options.block_cache.get()); +} + Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { out_file->Append( "Index Details:\n" diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 600ca18a3..6a88d9d9a 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -76,7 +76,7 @@ class BlockBasedTable : public TableReader { unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader, bool prefetch_index_and_filter = true, - bool skip_filters = false); + bool skip_filters = false, int level = -1); bool PrefixMayMatch(const Slice& internal_key); @@ -119,6 +119,8 @@ class BlockBasedTable : public TableReader { // convert SST file to a human readable form Status DumpTable(WritableFile* out_file) override; + void Close() override; + ~BlockBasedTable(); bool TEST_filter_block_preloaded() const; @@ -155,8 +157,9 @@ class BlockBasedTable : public TableReader { // 2. index is not present in block cache. // 3. We disallowed any io to be performed, that is, read_options == // kBlockCacheTier - InternalIterator* NewIndexIterator(const ReadOptions& read_options, - BlockIter* input_iter = nullptr); + InternalIterator* NewIndexIterator( + const ReadOptions& read_options, BlockIter* input_iter = nullptr, + CachableEntry* index_entry = nullptr); // Read block cache from block caches (if set): block_cache and // block_cache_compressed. diff --git a/table/table_builder.h b/table/table_builder.h index ed79bed0e..274245f08 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -29,17 +29,20 @@ struct TableReaderOptions { TableReaderOptions(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, const InternalKeyComparator& _internal_comparator, - bool _skip_filters = false) + bool _skip_filters = false, int _level = -1) : ioptions(_ioptions), env_options(_env_options), internal_comparator(_internal_comparator), - skip_filters(_skip_filters) {} + skip_filters(_skip_filters), + level(_level) {} const ImmutableCFOptions& ioptions; const EnvOptions& env_options; const InternalKeyComparator& internal_comparator; // This is only used for BlockBasedTable (reader) bool skip_filters; + // what level this table/file is on, -1 for "not set, don't know" + int level; }; struct TableBuilderOptions { diff --git a/table/table_reader.h b/table/table_reader.h index 5751ab03f..c047bf8cb 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -91,6 +91,8 @@ class TableReader { virtual Status DumpTable(WritableFile* out_file) { return Status::NotSupported("DumpTable() not supported"); } + + virtual void Close() {} }; } // namespace rocksdb diff --git a/table/table_test.cc b/table/table_test.cc index 3cc7d0dc7..424ca005e 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1715,7 +1715,7 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) { ImmutableCFOptions ioptions3(options); // Generate table without filter policy c3.Finish(options, ioptions3, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); + GetPlainInternalComparator(options.comparator), &keys, &kvmap); // Open table with filter policy table_options.filter_policy.reset(NewBloomFilterPolicy(1)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); diff --git a/tools/benchmark.sh b/tools/benchmark.sh index e4729e7f3..d28aeb271 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -74,6 +74,7 @@ const_params=" --level_compaction_dynamic_level_bytes=true \ --bytes_per_sync=$((8 * M)) \ --cache_index_and_filter_blocks=0 \ + --pin_l0_filter_and_index_blocks_in_cache=1 \ --benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \ \ --hard_rate_limit=3 \ diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index e3e11e17b..2e1a83237 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -340,6 +340,9 @@ DEFINE_int64(cache_size, -1, "Number of bytes to use as a cache of uncompressed" DEFINE_bool(cache_index_and_filter_blocks, false, "Cache index/filter blocks in block cache."); +DEFINE_bool(pin_l0_filter_and_index_blocks_in_cache, false, + "Pin index/filter blocks of L0 files in block cache."); + DEFINE_int32(block_size, static_cast(rocksdb::BlockBasedTableOptions().block_size), "Number of bytes in a block."); @@ -2511,6 +2514,8 @@ class Benchmark { } block_based_options.cache_index_and_filter_blocks = FLAGS_cache_index_and_filter_blocks; + block_based_options.pin_l0_filter_and_index_blocks_in_cache = + FLAGS_pin_l0_filter_and_index_blocks_in_cache; block_based_options.block_cache = cache_; block_based_options.block_cache_compressed = compressed_cache_; block_based_options.block_size = FLAGS_block_size; diff --git a/util/options_helper.h b/util/options_helper.h index b0864442c..5c33e36ff 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -491,6 +491,10 @@ static std::unordered_mapUniform(2); + opt.pin_l0_filter_and_index_blocks_in_cache = rnd->Uniform(2); opt.index_type = rnd->Uniform(2) ? BlockBasedTableOptions::kBinarySearch : BlockBasedTableOptions::kHashSearch; opt.hash_index_allow_collision = rnd->Uniform(2); From 44756260aeae05847614022ad8382686333adf70 Mon Sep 17 00:00:00 2001 From: Marton Trencseni Date: Fri, 18 Mar 2016 06:13:54 +0000 Subject: [PATCH 195/195] Reset block cache in failing unit test. Test Plan: make -j40 check OPT=-g, on both /tmp and /dev/shm Reviewers: sdong Reviewed By: sdong Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55701 --- db/db_test.cc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index aeb097f76..64cba350e 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -481,6 +481,10 @@ TEST_F(DBTest, MultiLevelIndexAndFilterBlocksCachedWithPinning) { ASSERT_OK(Flush(1)); // move this table to L1 dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + + // reset block cache + table_options.block_cache = NewLRUCache(64 * 1024); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); TryReopenWithColumnFamilies({"default", "pikachu"}, options); // create new table at L0 Put(1, "a2", "begin2"); @@ -502,12 +506,13 @@ TEST_F(DBTest, MultiLevelIndexAndFilterBlocksCachedWithPinning) { ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); - // should be read from L1; the block cache survives the reopen, and during - // the BlockBasedTableReader::Open() of the table we try to fetch it, we - // will see one hit from there, and then the Get() results in another hit + // this should be read from L1 + // the file is opened, prefetching results in a cache filter miss + // the block is loaded and added to the cache, + // then the get results in a cache hit for L1 value = Get(1, "a"); - ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(fh + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); } TEST_F(DBTest, ParanoidFileChecks) {