diff --git a/CMakeLists.txt b/CMakeLists.txt index 2490d8208..0de146867 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,6 +342,9 @@ set(TESTS db/db_test.cc db/db_test2.cc db/db_block_cache_test.cc + db/db_bloom_filter_test.cc + db/db_iterator_test.cc + db/db_sst_test.cc db/db_universal_compaction_test.cc db/db_wal_test.cc db/dbformat_test.cc diff --git a/Makefile b/Makefile index 8325ba36a..cd5e3a5dd 100644 --- a/Makefile +++ b/Makefile @@ -258,12 +258,15 @@ TESTS = \ db_test \ db_test2 \ db_block_cache_test \ + db_bloom_filter_test \ db_iter_test \ db_log_iter_test \ db_compaction_filter_test \ db_compaction_test \ db_dynamic_level_test \ db_inplace_update_test \ + db_iterator_test \ + db_sst_test \ db_tailing_iter_test \ db_universal_compaction_test \ db_wal_test \ @@ -872,6 +875,9 @@ db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_bloom_filter_test: db/db_bloom_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) @@ -887,6 +893,12 @@ db_dynamic_level_test: db/db_dynamic_level_test.o db/db_test_util.o $(LIBOBJECTS db_inplace_update_test: db/db_inplace_update_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_iterator_test: db/db_iterator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + +db_sst_test: db/db_sst_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_tailing_iter_test: db/db_tailing_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 939cf44e0..1e2831e5b 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -12,11 +12,6 @@ namespace rocksdb { -static uint64_t TestGetTickerCount(const Options& options, - Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} - class DBBlockCacheTest : public DBTestBase { private: size_t miss_count_ = 0; @@ -229,6 +224,222 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { delete iter; iter = nullptr; } + +// Make sure that when options.block_cache is set, after a new table is +// created its index/filter blocks are added to block cache. +TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_OK(Put(1, "key", "val")); + // Create a new table. + ASSERT_OK(Flush(1)); + + // index/filter blocks added to block cache right after table creation. + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(2, /* only index/filter were added */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); + uint64_t int_num; + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); + ASSERT_EQ(int_num, 0U); + + // Make sure filter block is in cache. + std::string value; + ReadOptions ropt; + db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); + + // Miss count should remain the same. + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + // Make sure index block is in cache. + auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + value = Get(1, "key"); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(index_block_hit + 1, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + value = Get(1, "key"); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(index_block_hit + 2, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); +} + +TEST_F(DBBlockCacheTest, ParanoidFileChecks) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + options.level0_file_num_compaction_trigger = 2; + options.paranoid_file_checks = true; + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = false; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_OK(Put(1, "1_key", "val")); + ASSERT_OK(Put(1, "9_key", "val")); + // Create a new table. + ASSERT_OK(Flush(1)); + ASSERT_EQ(1, /* read and cache data block */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_OK(Put(1, "1_key2", "val2")); + ASSERT_OK(Put(1, "9_key2", "val2")); + // Create a new SST file. This will further trigger a compaction + // and generate another file. + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(3, /* Totally 3 files created up to now */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + // After disabling options.paranoid_file_checks. NO further block + // is added after generating a new file. + ASSERT_OK( + dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}})); + + ASSERT_OK(Put(1, "1_key3", "val3")); + ASSERT_OK(Put(1, "9_key3", "val3")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "1_key4", "val4")); + ASSERT_OK(Put(1, "9_key4", "val4")); + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(3, /* Totally 3 files created up to now */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); +} + +TEST_F(DBBlockCacheTest, CompressedCache) { + if (!Snappy_Supported()) { + return; + } + int num_iter = 80; + + // Run this test three iterations. + // Iteration 1: only a uncompressed block cache + // Iteration 2: only a compressed block cache + // Iteration 3: both block cache and compressed cache + // Iteration 4: both block cache and compressed cache, but DB is not + // compressed + for (int iter = 0; iter < 4; iter++) { + Options options = CurrentOptions(); + options.write_buffer_size = 64 * 1024; // small write buffer + options.statistics = rocksdb::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + switch (iter) { + case 0: + // only uncompressed block cache + table_options.block_cache = NewLRUCache(8 * 1024); + table_options.block_cache_compressed = nullptr; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + break; + case 1: + // no block cache, only compressed cache + table_options.no_block_cache = true; + table_options.block_cache = nullptr; + table_options.block_cache_compressed = NewLRUCache(8 * 1024); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + break; + case 2: + // both compressed and uncompressed block cache + table_options.block_cache = NewLRUCache(1024); + table_options.block_cache_compressed = NewLRUCache(8 * 1024); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + break; + case 3: + // both block cache and compressed cache, but DB is not compressed + // also, make block cache sizes bigger, to trigger block cache hits + table_options.block_cache = NewLRUCache(1024 * 1024); + table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.compression = kNoCompression; + break; + default: + ASSERT_TRUE(false); + } + CreateAndReopenWithCF({"pikachu"}, options); + // default column family doesn't have block cache + Options no_block_cache_opts; + no_block_cache_opts.statistics = options.statistics; + no_block_cache_opts = CurrentOptions(no_block_cache_opts); + BlockBasedTableOptions table_options_no_bc; + table_options_no_bc.no_block_cache = true; + no_block_cache_opts.table_factory.reset( + NewBlockBasedTableFactory(table_options_no_bc)); + ReopenWithColumnFamilies( + {"default", "pikachu"}, + std::vector({no_block_cache_opts, options})); + + Random rnd(301); + + // Write 8MB (80 values, each 100K) + ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); + std::vector values; + std::string str; + for (int i = 0; i < num_iter; i++) { + if (i % 4 == 0) { // high compression ratio + str = RandomString(&rnd, 1000); + } + values.push_back(str); + ASSERT_OK(Put(1, Key(i), values[i])); + } + + // flush all data from memtable so that reads are from block cache + ASSERT_OK(Flush(1)); + + for (int i = 0; i < num_iter; i++) { + ASSERT_EQ(Get(1, Key(i)), values[i]); + } + + // check that we triggered the appropriate code paths in the cache + switch (iter) { + case 0: + // only uncompressed block cache + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); + ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); + break; + case 1: + // no block cache, only compressed cache + ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); + break; + case 2: + // both compressed and uncompressed block cache + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); + break; + case 3: + // both compressed and uncompressed block cache + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0); + ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); + // compressed doesn't have any hits since blocks are not compressed on + // storage + ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0); + break; + default: + ASSERT_TRUE(false); + } + + options.create_if_missing = true; + DestroyAndReopen(options); + } +} + #endif } // namespace rocksdb diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc new file mode 100644 index 000000000..b9a86c31f --- /dev/null +++ b/db/db_bloom_filter_test.cc @@ -0,0 +1,1047 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/perf_context.h" + +namespace rocksdb { + +// DB tests related to bloom filter. + +class DBBloomFilterTest : public DBTestBase { + public: + DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} +}; + +// KeyMayExist can lead to a few false positives, but not false negatives. +// To make test deterministic, use a much larger number of bits per key-20 than +// bits in the key, so that false positives are eliminated +TEST_F(DBBloomFilterTest, KeyMayExist) { + do { + ReadOptions ropts; + std::string value; + anon::OptionsOverride options_override; + options_override.filter_policy.reset(NewBloomFilterPolicy(20)); + Options options = CurrentOptions(options_override); + options.statistics = rocksdb::CreateDBStatistics(); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); + + ASSERT_OK(Put(1, "a", "b")); + bool value_found = false; + ASSERT_TRUE( + db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); + ASSERT_TRUE(value_found); + ASSERT_EQ("b", value); + + ASSERT_OK(Flush(1)); + value.clear(); + + uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); + uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + ASSERT_TRUE( + db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); + ASSERT_TRUE(!value_found); + // assert that no new files were opened and no new blocks were + // read into block cache. + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_OK(Delete(1, "a")); + + numopen = TestGetTickerCount(options, NO_FILE_OPENS); + cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_OK(Flush(1)); + dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], + true /* disallow trivial move */); + + numopen = TestGetTickerCount(options, NO_FILE_OPENS); + cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_OK(Delete(1, "c")); + + numopen = TestGetTickerCount(options, NO_FILE_OPENS); + cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + // KeyMayExist function only checks data in block caches, which is not used + // by plain table format. + } while ( + ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); +} + +// A delete is skipped for key if KeyMayExist(key) returns False +// Tests Writebatch consistency and proper delete behaviour +TEST_F(DBBloomFilterTest, FilterDeletes) { + do { + anon::OptionsOverride options_override; + options_override.filter_policy.reset(NewBloomFilterPolicy(20)); + Options options = CurrentOptions(options_override); + options.filter_deletes = true; + CreateAndReopenWithCF({"pikachu"}, options); + WriteBatch batch; + + batch.Delete(handles_[1], "a"); + dbfull()->Write(WriteOptions(), &batch); + ASSERT_EQ(AllEntriesFor("a", 1), "[ ]"); // Delete skipped + batch.Clear(); + + batch.Put(handles_[1], "a", "b"); + batch.Delete(handles_[1], "a"); + dbfull()->Write(WriteOptions(), &batch); + ASSERT_EQ(Get(1, "a"), "NOT_FOUND"); + ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]"); // Delete issued + batch.Clear(); + + batch.Delete(handles_[1], "c"); + batch.Put(handles_[1], "c", "d"); + dbfull()->Write(WriteOptions(), &batch); + ASSERT_EQ(Get(1, "c"), "d"); + ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]"); // Delete skipped + batch.Clear(); + + ASSERT_OK(Flush(1)); // A stray Flush + + batch.Delete(handles_[1], "c"); + dbfull()->Write(WriteOptions(), &batch); + ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]"); // Delete issued + batch.Clear(); + } while (ChangeCompactOptions()); +} + +TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { + Options options = last_options_; + options.prefix_extractor.reset(NewFixedPrefixTransform(8)); + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + + WriteOptions wo; + ReadOptions ro; + FlushOptions fo; + fo.wait = true; + std::string value; + + ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); + ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); + ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); + + dbfull()->Flush(fo); + + ASSERT_EQ("foo", Get("barbarbar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ("foo2", Get("barbarbar2")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + + ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + + ASSERT_EQ("NOT_FOUND", Get("foobarbar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); +} + +TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { + Options options = last_options_; + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + options.statistics = rocksdb::CreateDBStatistics(); + + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + + WriteOptions wo; + ReadOptions ro; + FlushOptions fo; + fo.wait = true; + std::string value; + + ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); + // Needs insert some keys to make sure files are not filtered out by key + // ranges. + ASSERT_OK(dbfull()->Put(wo, "aaa", "")); + ASSERT_OK(dbfull()->Put(wo, "zzz", "")); + dbfull()->Flush(fo); + + Reopen(options); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + + // Reopen with whole key filtering enabled and prefix extractor + // NULL. Bloom filter should be off for both of whole key and + // prefix bloom. + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.prefix_extractor.reset(); + Reopen(options); + + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + // Write DB with only full key filtering. + ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); + // Needs insert some keys to make sure files are not filtered out by key + // ranges. + ASSERT_OK(dbfull()->Put(wo, "aaa", "")); + ASSERT_OK(dbfull()->Put(wo, "zzz", "")); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + + // Reopen with both of whole key off and prefix extractor enabled. + // Still no bloom filter should be used. + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + bbto.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + + // Try to create a DB with mixed files: + ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); + // Needs insert some keys to make sure files are not filtered out by key + // ranges. + ASSERT_OK(dbfull()->Put(wo, "aaa", "")); + ASSERT_OK(dbfull()->Put(wo, "zzz", "")); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + + options.prefix_extractor.reset(); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + + // Try to create a DB with mixed files. + ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); + // In this case needs insert some keys to make sure files are + // not filtered out by key ranges. + ASSERT_OK(dbfull()->Put(wo, "aaa", "")); + ASSERT_OK(dbfull()->Put(wo, "zzz", "")); + Flush(); + + // Now we have two files: + // File 1: An older file with prefix bloom. + // File 2: A newer file with whole bloom filter. + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + ASSERT_EQ("bar", Get("barfoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + + // Reopen with the same setting: only whole key is used + Reopen(options); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + ASSERT_EQ("bar", Get("barfoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + + // Restart with both filters are allowed + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); + // File 1 will has it filtered out. + // File 2 will not, as prefix `foo` exists in the file. + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + ASSERT_EQ("bar", Get("barfoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + + // Restart with only prefix bloom is allowed. + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + bbto.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + ASSERT_EQ("NOT_FOUND", Get("foo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); + ASSERT_EQ("NOT_FOUND", Get("bar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + ASSERT_EQ("foo", Get("foobar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + ASSERT_EQ("bar", Get("barfoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); +} + +TEST_F(DBBloomFilterTest, BloomFilter) { + do { + Options options = CurrentOptions(); + env_->count_random_reads_ = true; + options.env = env_; + // ChangeCompactOptions() only changes compaction style, which does not + // trigger reset of table_factory + BlockBasedTableOptions table_options; + table_options.no_block_cache = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + CreateAndReopenWithCF({"pikachu"}, options); + + // Populate multiple layers + const int N = 10000; + for (int i = 0; i < N; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + Compact(1, "a", "z"); + for (int i = 0; i < N; i += 100) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + Flush(1); + + // Prevent auto compactions triggered by seeks + env_->delay_sstable_sync_.store(true, std::memory_order_release); + + // Lookup present keys. Should rarely read from small sstable. + env_->random_read_counter_.Reset(); + for (int i = 0; i < N; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + int reads = env_->random_read_counter_.Read(); + fprintf(stderr, "%d present => %d reads\n", N, reads); + ASSERT_GE(reads, N); + ASSERT_LE(reads, N + 2 * N / 100); + + // Lookup present keys. Should rarely read from either sstable. + env_->random_read_counter_.Reset(); + for (int i = 0; i < N; i++) { + ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); + } + reads = env_->random_read_counter_.Read(); + fprintf(stderr, "%d missing => %d reads\n", N, reads); + ASSERT_LE(reads, 3 * N / 100); + + env_->delay_sstable_sync_.store(false, std::memory_order_release); + Close(); + } while (ChangeCompactOptions()); +} + +TEST_F(DBBloomFilterTest, BloomFilterRate) { + while (ChangeFilterOptions()) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + CreateAndReopenWithCF({"pikachu"}, options); + + const int maxKey = 10000; + for (int i = 0; i < maxKey; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + // Add a large key to make the file contain wide range + ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); + Flush(1); + + // Check if they can be found + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + + // Check if filter is useful + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); + } + ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); + } +} + +TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + // Create with block based filter + CreateAndReopenWithCF({"pikachu"}, options); + + const int maxKey = 10000; + for (int i = 0; i < maxKey; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); + Flush(1); + + // Check db with full filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ReopenWithColumnFamilies({"default", "pikachu"}, options); + + // Check if they can be found + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); +} + +TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + // Create with full filter + CreateAndReopenWithCF({"pikachu"}, options); + + const int maxKey = 10000; + for (int i = 0; i < maxKey; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); + Flush(1); + + // Check db with block_based filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ReopenWithColumnFamilies({"default", "pikachu"}, options); + + // Check if they can be found + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); +} + +namespace { +// A wrapped bloom over default FilterPolicy +class WrappedBloom : public FilterPolicy { + public: + explicit WrappedBloom(int bits_per_key) + : filter_(NewBloomFilterPolicy(bits_per_key)), counter_(0) {} + + ~WrappedBloom() { delete filter_; } + + const char* Name() const override { return "WrappedRocksDbFilterPolicy"; } + + void CreateFilter(const rocksdb::Slice* keys, int n, + std::string* dst) const override { + std::unique_ptr user_keys(new rocksdb::Slice[n]); + for (int i = 0; i < n; ++i) { + user_keys[i] = convertKey(keys[i]); + } + return filter_->CreateFilter(user_keys.get(), n, dst); + } + + bool KeyMayMatch(const rocksdb::Slice& key, + const rocksdb::Slice& filter) const override { + counter_++; + return filter_->KeyMayMatch(convertKey(key), filter); + } + + uint32_t GetCounter() { return counter_; } + + private: + const FilterPolicy* filter_; + mutable uint32_t counter_; + + rocksdb::Slice convertKey(const rocksdb::Slice& key) const { return key; } +}; +} // namespace + +TEST_F(DBBloomFilterTest, BloomFilterWrapper) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + WrappedBloom* policy = new WrappedBloom(10); + table_options.filter_policy.reset(policy); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + CreateAndReopenWithCF({"pikachu"}, options); + + const int maxKey = 10000; + for (int i = 0; i < maxKey; i++) { + ASSERT_OK(Put(1, Key(i), Key(i))); + } + // Add a large key to make the file contain wide range + ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); + ASSERT_EQ(0U, policy->GetCounter()); + Flush(1); + + // Check if they can be found + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ(Key(i), Get(1, Key(i))); + } + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ(1U * maxKey, policy->GetCounter()); + + // Check if filter is useful + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); + } + ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); + ASSERT_EQ(2U * maxKey, policy->GetCounter()); +} + +class SliceTransformLimitedDomain : public SliceTransform { + const char* Name() const override { return "SliceTransformLimitedDomain"; } + + Slice Transform(const Slice& src) const override { + return Slice(src.data(), 5); + } + + bool InDomain(const Slice& src) const override { + // prefix will be x???? + return src.size() >= 5 && src[0] == 'x'; + } + + bool InRange(const Slice& dst) const override { + // prefix will be x???? + return dst.size() == 5 && dst[0] == 'x'; + } +}; + +TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { + BlockBasedTableOptions bbto; + // Full Filter Block + bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + + Options options = CurrentOptions(); + options.prefix_extractor = std::make_shared(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + DestroyAndReopen(options); + + ASSERT_OK(Put("x1111_AAAA", "val1")); + ASSERT_OK(Put("x1112_AAAA", "val2")); + ASSERT_OK(Put("x1113_AAAA", "val3")); + ASSERT_OK(Put("x1114_AAAA", "val4")); + // Not in domain, wont be added to filter + ASSERT_OK(Put("zzzzz_AAAA", "val5")); + + ASSERT_OK(Flush()); + + ASSERT_EQ(Get("x1111_AAAA"), "val1"); + ASSERT_EQ(Get("x1112_AAAA"), "val2"); + ASSERT_EQ(Get("x1113_AAAA"), "val3"); + ASSERT_EQ(Get("x1114_AAAA"), "val4"); + // Was not added to filter but rocksdb will try to read it from the filter + ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); +} + +TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) { + BlockBasedTableOptions bbto; + // Block Filter Block + bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + + Options options = CurrentOptions(); + options.prefix_extractor = std::make_shared(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + DestroyAndReopen(options); + + ASSERT_OK(Put("x1113_AAAA", "val3")); + ASSERT_OK(Put("x1114_AAAA", "val4")); + // Not in domain, wont be added to filter + ASSERT_OK(Put("zzzzz_AAAA", "val1")); + ASSERT_OK(Put("zzzzz_AAAB", "val2")); + ASSERT_OK(Put("zzzzz_AAAC", "val3")); + ASSERT_OK(Put("zzzzz_AAAD", "val4")); + + ASSERT_OK(Flush()); + + std::vector iter_res; + auto iter = db_->NewIterator(ReadOptions()); + // Seek to a key that was not in Domain + for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { + iter_res.emplace_back(iter->value().ToString()); + } + + std::vector expected_res = {"val1", "val2", "val3", "val4"}; + ASSERT_EQ(iter_res, expected_res); + delete iter; +} + +#ifndef ROCKSDB_LITE +class BloomStatsTestWithParam + : public DBBloomFilterTest, + public testing::WithParamInterface> { + public: + BloomStatsTestWithParam() { + use_block_table_ = std::get<0>(GetParam()); + use_block_based_builder_ = std::get<1>(GetParam()); + + options_.create_if_missing = true; + options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); + options_.memtable_prefix_bloom_bits = 8 * 1024; + if (use_block_table_) { + BlockBasedTableOptions table_options; + table_options.hash_index_allow_collision = false; + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, use_block_based_builder_)); + options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); + } else { + PlainTableOptions table_options; + options_.table_factory.reset(NewPlainTableFactory(table_options)); + } + + perf_context.Reset(); + DestroyAndReopen(options_); + } + + ~BloomStatsTestWithParam() { + perf_context.Reset(); + Destroy(options_); + } + + // Required if inheriting from testing::WithParamInterface<> + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + bool use_block_table_; + bool use_block_based_builder_; + Options options_; +}; + +// 1 Insert 2 K-V pairs into DB +// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 +// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 +// 4 Call Flush() to create SST +// 5 Call Get() for both keys - expext SST bloom hit stat to be 2 +// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 +// Test both: block and plain SST +TEST_P(BloomStatsTestWithParam, BloomStatsTest) { + std::string key1("AAAA"); + std::string key2("RXDB"); // not in DB + std::string key3("ZBRA"); + std::string value1("Value1"); + std::string value3("Value3"); + + ASSERT_OK(Put(key1, value1, WriteOptions())); + ASSERT_OK(Put(key3, value3, WriteOptions())); + + // check memtable bloom stats + ASSERT_EQ(value1, Get(key1)); + ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(value3, Get(key3)); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + ASSERT_EQ("NOT_FOUND", Get(key2)); + ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + + // sanity checks + ASSERT_EQ(0, perf_context.bloom_sst_hit_count); + ASSERT_EQ(0, perf_context.bloom_sst_miss_count); + + Flush(); + + // sanity checks + ASSERT_EQ(0, perf_context.bloom_sst_hit_count); + ASSERT_EQ(0, perf_context.bloom_sst_miss_count); + + // check SST bloom stats + // NOTE: hits per get differs because of code paths differences + // in BlockBasedTable::Get() + int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1; + ASSERT_EQ(value1, Get(key1)); + ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); + ASSERT_EQ(value3, Get(key3)); + ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); + + ASSERT_EQ("NOT_FOUND", Get(key2)); + ASSERT_EQ(1, perf_context.bloom_sst_miss_count); +} + +// Same scenario as in BloomStatsTest but using an iterator +TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { + std::string key1("AAAA"); + std::string key2("RXDB"); // not in DB + std::string key3("ZBRA"); + std::string value1("Value1"); + std::string value3("Value3"); + + ASSERT_OK(Put(key1, value1, WriteOptions())); + ASSERT_OK(Put(key3, value3, WriteOptions())); + + unique_ptr iter(dbfull()->NewIterator(ReadOptions())); + + // check memtable bloom stats + iter->Seek(key1); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value1, iter->value().ToString()); + ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + iter->Seek(key3); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value3, iter->value().ToString()); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + iter->Seek(key2); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + + Flush(); + + iter.reset(dbfull()->NewIterator(ReadOptions())); + + // Check SST bloom stats + iter->Seek(key1); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value1, iter->value().ToString()); + ASSERT_EQ(1, perf_context.bloom_sst_hit_count); + + iter->Seek(key3); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value3, iter->value().ToString()); + ASSERT_EQ(2, perf_context.bloom_sst_hit_count); + + iter->Seek(key2); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ(1, perf_context.bloom_sst_miss_count); + ASSERT_EQ(2, perf_context.bloom_sst_hit_count); +} + +INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, + ::testing::Values(std::make_tuple(true, true), + std::make_tuple(true, false), + std::make_tuple(false, false))); + +namespace { +void PrefixScanInit(DBBloomFilterTest* dbtest) { + char buf[100]; + std::string keystr; + const int small_range_sstfiles = 5; + const int big_range_sstfiles = 5; + + // Generate 11 sst files with the following prefix ranges. + // GROUP 0: [0,10] (level 1) + // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) + // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) + // + // A seek with the previous API would do 11 random I/Os (to all the + // files). With the new API and a prefix filter enabled, we should + // only do 2 random I/O, to the 2 files containing the key. + + // GROUP 0 + snprintf(buf, sizeof(buf), "%02d______:start", 0); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + snprintf(buf, sizeof(buf), "%02d______:end", 10); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + dbtest->Flush(); + dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, + nullptr); // move to level 1 + + // GROUP 1 + for (int i = 1; i <= small_range_sstfiles; i++) { + snprintf(buf, sizeof(buf), "%02d______:start", i); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + snprintf(buf, sizeof(buf), "%02d______:end", i + 1); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + dbtest->Flush(); + } + + // GROUP 2 + for (int i = 1; i <= big_range_sstfiles; i++) { + snprintf(buf, sizeof(buf), "%02d______:start", 0); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); + keystr = std::string(buf); + ASSERT_OK(dbtest->Put(keystr, keystr)); + dbtest->Flush(); + } +} +} // namespace + +TEST_F(DBBloomFilterTest, PrefixScan) { + XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, + kSkipNoPrefix); + while (ChangeFilterOptions()) { + int count; + Slice prefix; + Slice key; + char buf[100]; + Iterator* iter; + snprintf(buf, sizeof(buf), "03______:"); + prefix = Slice(buf, 8); + key = Slice(buf, 9); + ASSERT_EQ(key.difference_offset(prefix), 8); + ASSERT_EQ(prefix.difference_offset(key), 8); + // db configs + env_->count_random_reads_ = true; + Options options = CurrentOptions(); + options.env = env_; + options.prefix_extractor.reset(NewFixedPrefixTransform(8)); + options.disable_auto_compactions = true; + options.max_background_compactions = 2; + options.create_if_missing = true; + options.memtable_factory.reset(NewHashSkipListRepFactory(16)); + + BlockBasedTableOptions table_options; + table_options.no_block_cache = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + table_options.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + // 11 RAND I/Os + DestroyAndReopen(options); + PrefixScanInit(this); + count = 0; + env_->random_read_counter_.Reset(); + iter = db_->NewIterator(ReadOptions()); + for (iter->Seek(prefix); iter->Valid(); iter->Next()) { + if (!iter->key().starts_with(prefix)) { + break; + } + count++; + } + ASSERT_OK(iter->status()); + delete iter; + ASSERT_EQ(count, 2); + ASSERT_EQ(env_->random_read_counter_.Read(), 2); + Close(); + } // end of while + XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0); +} + +TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { + Options options = CurrentOptions(); + options.write_buffer_size = 64 * 1024; + options.arena_block_size = 4 * 1024; + options.target_file_size_base = 64 * 1024; + options.level0_file_num_compaction_trigger = 2; + options.level0_slowdown_writes_trigger = 2; + options.level0_stop_writes_trigger = 4; + options.max_bytes_for_level_base = 256 * 1024; + options.max_write_buffer_number = 2; + options.max_background_compactions = 8; + options.max_background_flushes = 8; + options.compression = kNoCompression; + options.compaction_style = kCompactionStyleLevel; + options.level_compaction_dynamic_level_bytes = true; + BlockBasedTableOptions bbto; + bbto.cache_index_and_filter_blocks = true; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.optimize_filters_for_hits = true; + options.statistics = rocksdb::CreateDBStatistics(); + CreateAndReopenWithCF({"mypikachu"}, options); + + int numkeys = 200000; + + // Generate randomly shuffled keys, so the updates are almost + // random. + std::vector keys; + keys.reserve(numkeys); + for (int i = 0; i < numkeys; i += 2) { + keys.push_back(i); + } + std::random_shuffle(std::begin(keys), std::end(keys)); + + int num_inserted = 0; + for (int key : keys) { + ASSERT_OK(Put(1, Key(key), "val")); + if (++num_inserted % 1000 == 0) { + dbfull()->TEST_WaitForFlushMemTable(); + dbfull()->TEST_WaitForCompact(); + } + } + ASSERT_OK(Put(1, Key(0), "val")); + ASSERT_OK(Put(1, Key(numkeys), "val")); + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + + if (NumTableFilesAtLevel(0, 1) == 0) { + // No Level 0 file. Create one. + ASSERT_OK(Put(1, Key(0), "val")); + ASSERT_OK(Put(1, Key(numkeys), "val")); + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + } + + for (int i = 1; i < numkeys; i += 2) { + ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); + } + + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); + + // Now we have three sorted run, L0, L5 and L6 with most files in L6 have + // no bloom filter. Most keys be checked bloom filters twice. + ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); + ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); + + for (int i = 0; i < numkeys; i += 2) { + ASSERT_EQ(Get(1, Key(i)), "val"); + } + + // Part 2 (read path): rewrite last level with blooms, then verify they get + // cached only if !optimize_filters_for_hits + options.disable_auto_compactions = true; + options.num_levels = 9; + options.optimize_filters_for_hits = false; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + MoveFilesToLevel(7 /* level */, 1 /* column family index */); + + std::string value = Get(1, Key(0)); + uint64_t prev_cache_filter_hits = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + value = Get(1, Key(0)); + ASSERT_EQ(prev_cache_filter_hits + 1, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + // Now that we know the filter blocks exist in the last level files, see if + // filter caching is skipped for this optimization + options.optimize_filters_for_hits = true; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + value = Get(1, Key(0)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(2 /* index and data block */, + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + // Check filter block ignored for files preloaded during DB::Open() + options.max_open_files = -1; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + uint64_t prev_cache_filter_misses = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + Get(1, Key(0)); + ASSERT_EQ(prev_cache_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(prev_cache_filter_hits, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + // Check filter block ignored for file trivially-moved to bottom level + bbto.block_cache.reset(); + options.max_open_files = 100; // setting > -1 makes it not preload all files + options.statistics = CreateDBStatistics(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + ASSERT_OK(Put(1, Key(numkeys + 1), "val")); + ASSERT_OK(Flush(1)); + + int32_t trivial_move = 0; + int32_t non_trivial_move = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* arg) { trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial", + [&](void* arg) { non_trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + CompactRangeOptions compact_options; + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kSkip; + compact_options.change_level = true; + compact_options.target_level = 7; + db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); + + ASSERT_EQ(trivial_move, 1); + ASSERT_EQ(non_trivial_move, 0); + + prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + prev_cache_filter_misses = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + value = Get(1, Key(numkeys + 1)); + ASSERT_EQ(prev_cache_filter_hits, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(prev_cache_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + + // Check filter block not cached for iterator + bbto.block_cache.reset(); + options.statistics = CreateDBStatistics(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); + iter->SeekToFirst(); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(2 /* index and data block */, + TestGetTickerCount(options, BLOCK_CACHE_ADD)); +} + +#endif // ROCKSDB_LITE + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 677cacb22..c794407e9 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -40,34 +40,6 @@ class DBCompactionTestWithParam }; namespace { -class OnFileDeletionListener : public EventListener { - public: - OnFileDeletionListener() : - matched_count_(0), - expected_file_name_("") {} - - void SetExpectedFileName( - const std::string file_name) { - expected_file_name_ = file_name; - } - - void VerifyMatchedCount(size_t expected_value) { - ASSERT_EQ(matched_count_, expected_value); - } - - void OnTableFileDeleted( - const TableFileDeletionInfo& info) override { - if (expected_file_name_ != "") { - ASSERT_EQ(expected_file_name_, info.file_path); - expected_file_name_ = ""; - matched_count_++; - } - } - - private: - size_t matched_count_; - std::string expected_file_name_; -}; class FlushedFileCollector : public EventListener { public: diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc new file mode 100644 index 000000000..94bdbcd13 --- /dev/null +++ b/db/db_iterator_test.cc @@ -0,0 +1,1319 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/perf_context.h" + +namespace rocksdb { + +class DBIteratorTest : public DBTestBase { + public: + DBIteratorTest() : DBTestBase("/db_iterator_test") {} +}; + +TEST_F(DBIteratorTest, IteratorProperty) { + // The test needs to be changed if kPersistedTier is supported in iterator. + Options options = CurrentOptions(); + CreateAndReopenWithCF({"pikachu"}, options); + Put(1, "1", "2"); + ReadOptions ropt; + ropt.pin_data = false; + { + unique_ptr iter(db_->NewIterator(ropt, handles_[1])); + iter->SeekToFirst(); + std::string prop_value; + ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("0", prop_value); + iter->Next(); + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("Iterator is not valid.", prop_value); + } + Close(); +} + +TEST_F(DBIteratorTest, PersistedTierOnIterator) { + // The test needs to be changed if kPersistedTier is supported in iterator. + Options options = CurrentOptions(); + CreateAndReopenWithCF({"pikachu"}, options); + ReadOptions ropt; + ropt.read_tier = kPersistedTier; + + auto* iter = db_->NewIterator(ropt, handles_[1]); + ASSERT_TRUE(iter->status().IsNotSupported()); + delete iter; + + std::vector iters; + ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported()); + Close(); +} + +TEST_F(DBIteratorTest, NonBlockingIteration) { + do { + ReadOptions non_blocking_opts, regular_opts; + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + non_blocking_opts.read_tier = kBlockCacheTier; + CreateAndReopenWithCF({"pikachu"}, options); + // write one kv to the database. + ASSERT_OK(Put(1, "a", "b")); + + // scan using non-blocking iterator. We should find it because + // it is in memtable. + Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]); + int count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + delete iter; + + // flush memtable to storage. Now, the key should not be in the + // memtable neither in the block cache. + ASSERT_OK(Flush(1)); + + // verify that a non-blocking iterator does not find any + // kvs. Neither does it do any IOs to storage. + uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); + uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + iter = db_->NewIterator(non_blocking_opts, handles_[1]); + count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + count++; + } + ASSERT_EQ(count, 0); + ASSERT_TRUE(iter->status().IsIncomplete()); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + delete iter; + + // read in the specified block via a regular get + ASSERT_EQ(Get(1, "a"), "b"); + + // verify that we can find it via a non-blocking scan + numopen = TestGetTickerCount(options, NO_FILE_OPENS); + cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + iter = db_->NewIterator(non_blocking_opts, handles_[1]); + count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + delete iter; + + // This test verifies block cache behaviors, which is not used by plain + // table format. + // Exclude kHashCuckoo as it does not support iteration currently + } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo | + kSkipMmapReads)); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBIteratorTest, ManagedNonBlockingIteration) { + do { + ReadOptions non_blocking_opts, regular_opts; + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + non_blocking_opts.read_tier = kBlockCacheTier; + non_blocking_opts.managed = true; + CreateAndReopenWithCF({"pikachu"}, options); + // write one kv to the database. + ASSERT_OK(Put(1, "a", "b")); + + // scan using non-blocking iterator. We should find it because + // it is in memtable. + Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]); + int count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + delete iter; + + // flush memtable to storage. Now, the key should not be in the + // memtable neither in the block cache. + ASSERT_OK(Flush(1)); + + // verify that a non-blocking iterator does not find any + // kvs. Neither does it do any IOs to storage. + int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); + int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + iter = db_->NewIterator(non_blocking_opts, handles_[1]); + count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + count++; + } + ASSERT_EQ(count, 0); + ASSERT_TRUE(iter->status().IsIncomplete()); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + delete iter; + + // read in the specified block via a regular get + ASSERT_EQ(Get(1, "a"), "b"); + + // verify that we can find it via a non-blocking scan + numopen = TestGetTickerCount(options, NO_FILE_OPENS); + cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); + iter = db_->NewIterator(non_blocking_opts, handles_[1]); + count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); + ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + delete iter; + + // This test verifies block cache behaviors, which is not used by plain + // table format. + // Exclude kHashCuckoo as it does not support iteration currently + } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo | + kSkipMmapReads)); +} +#endif // ROCKSDB_LITE + +TEST_F(DBIteratorTest, IterSeekBeforePrev) { + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("0", "f")); + ASSERT_OK(Put("1", "h")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("2", "j")); + auto iter = db_->NewIterator(ReadOptions()); + iter->Seek(Slice("c")); + iter->Prev(); + iter->Seek(Slice("a")); + iter->Prev(); + delete iter; +} + +namespace { +std::string MakeLongKey(size_t length, char c) { + return std::string(length, c); +} +} // namespace + +TEST_F(DBIteratorTest, IterLongKeys) { + ASSERT_OK(Put(MakeLongKey(20, 0), "0")); + ASSERT_OK(Put(MakeLongKey(32, 2), "2")); + ASSERT_OK(Put("a", "b")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put(MakeLongKey(50, 1), "1")); + ASSERT_OK(Put(MakeLongKey(127, 3), "3")); + ASSERT_OK(Put(MakeLongKey(64, 4), "4")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + iter->Seek(MakeLongKey(20, 0)); + ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4"); + delete iter; + + iter = db_->NewIterator(ReadOptions()); + iter->Seek(MakeLongKey(50, 1)); + ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); + delete iter; +} + +TEST_F(DBIteratorTest, IterNextWithNewerSeq) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Seek(Slice("a")); + ASSERT_EQ(IterStatus(iter), "a->b"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->d"); + delete iter; +} + +TEST_F(DBIteratorTest, IterPrevWithNewerSeq) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Seek(Slice("d")); + ASSERT_EQ(IterStatus(iter), "d->e"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "c->d"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->b"); + + iter->Prev(); + delete iter; +} + +TEST_F(DBIteratorTest, IterPrevWithNewerSeq2) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + iter->Seek(Slice("c")); + ASSERT_EQ(IterStatus(iter), "c->d"); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->b"); + + iter->Prev(); + delete iter; +} + +TEST_F(DBIteratorTest, IterEmpty) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("foo"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; + } while (ChangeCompactOptions()); +} + +TEST_F(DBIteratorTest, IterSingle) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "a", "va")); + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek(""); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("a"); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("b"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; + } while (ChangeCompactOptions()); +} + +TEST_F(DBIteratorTest, IterMulti) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "a", "va")); + ASSERT_OK(Put(1, "b", "vb")); + ASSERT_OK(Put(1, "c", "vc")); + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek(""); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Seek("a"); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Seek("ax"); + ASSERT_EQ(IterStatus(iter), "b->vb"); + + iter->Seek("b"); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Seek("z"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + // Switch from reverse to forward + iter->SeekToLast(); + iter->Prev(); + iter->Prev(); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + + // Switch from forward to reverse + iter->SeekToFirst(); + iter->Next(); + iter->Next(); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + + // Make sure iter stays at snapshot + ASSERT_OK(Put(1, "a", "va2")); + ASSERT_OK(Put(1, "a2", "va3")); + ASSERT_OK(Put(1, "b", "vb2")); + ASSERT_OK(Put(1, "c", "vc2")); + ASSERT_OK(Delete(1, "b")); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; + } while (ChangeCompactOptions()); +} + +// Check that we can skip over a run of user keys +// by using reseek rather than sequential scan +TEST_F(DBIteratorTest, IterReseek) { + anon::OptionsOverride options_override; + options_override.skip_policy = kSkipNoSnapshot; + Options options = CurrentOptions(options_override); + options.max_sequential_skip_in_iterations = 3; + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + + // insert three keys with same userkey and verify that + // reseek is not invoked. For each of these test cases, + // verify that we can find the next key "b". + ASSERT_OK(Put(1, "a", "zero")); + ASSERT_OK(Put(1, "a", "one")); + ASSERT_OK(Put(1, "a", "two")); + ASSERT_OK(Put(1, "b", "bone")); + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->SeekToFirst(); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); + ASSERT_EQ(IterStatus(iter), "a->two"); + iter->Next(); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); + ASSERT_EQ(IterStatus(iter), "b->bone"); + delete iter; + + // insert a total of three keys with same userkey and verify + // that reseek is still not invoked. + ASSERT_OK(Put(1, "a", "three")); + iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->three"); + iter->Next(); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); + ASSERT_EQ(IterStatus(iter), "b->bone"); + delete iter; + + // insert a total of four keys with same userkey and verify + // that reseek is invoked. + ASSERT_OK(Put(1, "a", "four")); + iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->four"); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); + iter->Next(); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); + ASSERT_EQ(IterStatus(iter), "b->bone"); + delete iter; + + // Testing reverse iterator + // At this point, we have three versions of "a" and one version of "b". + // The reseek statistics is already at 1. + int num_reseeks = static_cast( + TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); + + // Insert another version of b and assert that reseek is not invoked + ASSERT_OK(Put(1, "b", "btwo")); + iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "b->btwo"); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), + num_reseeks); + iter->Prev(); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), + num_reseeks + 1); + ASSERT_EQ(IterStatus(iter), "a->four"); + delete iter; + + // insert two more versions of b. This makes a total of 4 versions + // of b and 4 versions of a. + ASSERT_OK(Put(1, "b", "bthree")); + ASSERT_OK(Put(1, "b", "bfour")); + iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "b->bfour"); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), + num_reseeks + 2); + iter->Prev(); + + // the previous Prev call should have invoked reseek + ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), + num_reseeks + 3); + ASSERT_EQ(IterStatus(iter), "a->four"); + delete iter; +} + +TEST_F(DBIteratorTest, IterSmallAndLargeMix) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "a", "va")); + ASSERT_OK(Put(1, "b", std::string(100000, 'b'))); + ASSERT_OK(Put(1, "c", "vc")); + ASSERT_OK(Put(1, "d", std::string(100000, 'd'))); + ASSERT_OK(Put(1, "e", std::string(100000, 'e'))); + + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; + } while (ChangeCompactOptions()); +} + +TEST_F(DBIteratorTest, IterMultiWithDelete) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "ka", "va")); + ASSERT_OK(Put(1, "kb", "vb")); + ASSERT_OK(Put(1, "kc", "vc")); + ASSERT_OK(Delete(1, "kb")); + ASSERT_EQ("NOT_FOUND", Get(1, "kb")); + + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + iter->Seek("kc"); + ASSERT_EQ(IterStatus(iter), "kc->vc"); + if (!CurrentOptions().merge_operator) { + // TODO: merge operator does not support backward iteration yet + if (kPlainTableAllBytesPrefix != option_config_ && + kBlockBasedTableWithWholeKeyHashIndex != option_config_ && + kHashLinkList != option_config_) { + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "ka->va"); + } + } + delete iter; + } while (ChangeOptions()); +} + +TEST_F(DBIteratorTest, IterPrevMaxSkip) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + for (int i = 0; i < 2; i++) { + ASSERT_OK(Put(1, "key1", "v1")); + ASSERT_OK(Put(1, "key2", "v2")); + ASSERT_OK(Put(1, "key3", "v3")); + ASSERT_OK(Put(1, "key4", "v4")); + ASSERT_OK(Put(1, "key5", "v5")); + } + + VerifyIterLast("key5->v5", 1); + + ASSERT_OK(Delete(1, "key5")); + VerifyIterLast("key4->v4", 1); + + ASSERT_OK(Delete(1, "key4")); + VerifyIterLast("key3->v3", 1); + + ASSERT_OK(Delete(1, "key3")); + VerifyIterLast("key2->v2", 1); + + ASSERT_OK(Delete(1, "key2")); + VerifyIterLast("key1->v1", 1); + + ASSERT_OK(Delete(1, "key1")); + VerifyIterLast("(invalid)", 1); + } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast)); +} + +TEST_F(DBIteratorTest, IterWithSnapshot) { + anon::OptionsOverride options_override; + options_override.skip_policy = kSkipNoSnapshot; + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); + ASSERT_OK(Put(1, "key1", "val1")); + ASSERT_OK(Put(1, "key2", "val2")); + ASSERT_OK(Put(1, "key3", "val3")); + ASSERT_OK(Put(1, "key4", "val4")); + ASSERT_OK(Put(1, "key5", "val5")); + + const Snapshot* snapshot = db_->GetSnapshot(); + ReadOptions options; + options.snapshot = snapshot; + Iterator* iter = db_->NewIterator(options, handles_[1]); + + // Put more values after the snapshot + ASSERT_OK(Put(1, "key100", "val100")); + ASSERT_OK(Put(1, "key101", "val101")); + + iter->Seek("key5"); + ASSERT_EQ(IterStatus(iter), "key5->val5"); + if (!CurrentOptions().merge_operator) { + // TODO: merge operator does not support backward iteration yet + if (kPlainTableAllBytesPrefix != option_config_ && + kBlockBasedTableWithWholeKeyHashIndex != option_config_ && + kHashLinkList != option_config_) { + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "key4->val4"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "key3->val3"); + + iter->Next(); + ASSERT_EQ(IterStatus(iter), "key4->val4"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "key5->val5"); + } + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + } + db_->ReleaseSnapshot(snapshot); + delete iter; + // skip as HashCuckooRep does not support snapshot + } while (ChangeOptions(kSkipHashCuckoo)); +} + +TEST_F(DBIteratorTest, IteratorPinsRef) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + Put(1, "foo", "hello"); + + // Get iterator that will yield the current contents of the DB. + Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); + + // Write to force compactions + Put(1, "foo", "newvalue1"); + for (int i = 0; i < 100; i++) { + // 100K values + ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v'))); + } + Put(1, "foo", "newvalue2"); + + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("foo", iter->key().ToString()); + ASSERT_EQ("hello", iter->value().ToString()); + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + delete iter; + } while (ChangeCompactOptions()); +} + +TEST_F(DBIteratorTest, DBIteratorBoundTest) { + Options options = CurrentOptions(); + options.env = env_; + options.create_if_missing = true; + + options.prefix_extractor = nullptr; + DestroyAndReopen(options); + ASSERT_OK(Put("a", "0")); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Put("foo1", "bar1")); + ASSERT_OK(Put("g1", "0")); + + // testing basic case with no iterate_upper_bound and no prefix_extractor + { + ReadOptions ro; + ro.iterate_upper_bound = nullptr; + + std::unique_ptr iter(db_->NewIterator(ro)); + + iter->Seek("foo"); + + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("foo")), 0); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("g1")), 0); + } + + // testing iterate_upper_bound and forward iterator + // to make sure it stops at bound + { + ReadOptions ro; + // iterate_upper_bound points beyond the last expected entry + Slice prefix("foo2"); + ro.iterate_upper_bound = &prefix; + + std::unique_ptr iter(db_->NewIterator(ro)); + + iter->Seek("foo"); + + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("foo")), 0); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(("foo1")), 0); + + iter->Next(); + // should stop here... + ASSERT_TRUE(!iter->Valid()); + } + // Testing SeekToLast with iterate_upper_bound set + { + ReadOptions ro; + + Slice prefix("foo"); + ro.iterate_upper_bound = &prefix; + + std::unique_ptr iter(db_->NewIterator(ro)); + + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("a")), 0); + } + + // prefix is the first letter of the key + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + + DestroyAndReopen(options); + ASSERT_OK(Put("a", "0")); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Put("foo1", "bar1")); + ASSERT_OK(Put("g1", "0")); + + // testing with iterate_upper_bound and prefix_extractor + // Seek target and iterate_upper_bound are not is same prefix + // This should be an error + { + ReadOptions ro; + Slice upper_bound("g"); + ro.iterate_upper_bound = &upper_bound; + + std::unique_ptr iter(db_->NewIterator(ro)); + + iter->Seek("foo"); + + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("foo", iter->key().ToString()); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("foo1", iter->key().ToString()); + + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + } + + // testing that iterate_upper_bound prevents iterating over deleted items + // if the bound has already reached + { + options.prefix_extractor = nullptr; + DestroyAndReopen(options); + ASSERT_OK(Put("a", "0")); + ASSERT_OK(Put("b", "0")); + ASSERT_OK(Put("b1", "0")); + ASSERT_OK(Put("c", "0")); + ASSERT_OK(Put("d", "0")); + ASSERT_OK(Put("e", "0")); + ASSERT_OK(Delete("c")); + ASSERT_OK(Delete("d")); + + // base case with no bound + ReadOptions ro; + ro.iterate_upper_bound = nullptr; + + std::unique_ptr iter(db_->NewIterator(ro)); + + iter->Seek("b"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("b")), 0); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(("b1")), 0); + + perf_context.Reset(); + iter->Next(); + + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(static_cast(perf_context.internal_delete_skipped_count), 2); + + // now testing with iterate_bound + Slice prefix("c"); + ro.iterate_upper_bound = &prefix; + + iter.reset(db_->NewIterator(ro)); + + perf_context.Reset(); + + iter->Seek("b"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(Slice("b")), 0); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(("b1")), 0); + + iter->Next(); + // the iteration should stop as soon as the the bound key is reached + // even though the key is deleted + // hence internal_delete_skipped_count should be 0 + ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ(static_cast(perf_context.internal_delete_skipped_count), 0); + } +} + +// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary +// return the biggest key which is smaller than the seek key. +TEST_F(DBIteratorTest, PrevAfterMerge) { + Options options; + options.create_if_missing = true; + options.merge_operator = MergeOperators::CreatePutOperator(); + DestroyAndReopen(options); + + // write three entries with different keys using Merge() + WriteOptions wopts; + db_->Merge(wopts, "1", "data1"); + db_->Merge(wopts, "2", "data2"); + db_->Merge(wopts, "3", "data3"); + + std::unique_ptr it(db_->NewIterator(ReadOptions())); + + it->Seek("2"); + ASSERT_TRUE(it->Valid()); + ASSERT_EQ("2", it->key().ToString()); + + it->Prev(); + ASSERT_TRUE(it->Valid()); + ASSERT_EQ("1", it->key().ToString()); +} + +TEST_F(DBIteratorTest, PinnedDataIteratorRandomized) { + enum TestConfig { + NORMAL, + CLOSE_AND_OPEN, + COMPACT_BEFORE_READ, + FLUSH_EVERY_1000, + MAX + }; + + // Generate Random data + Random rnd(301); + + int puts = 100000; + int key_pool = static_cast(puts * 0.7); + int key_size = 100; + int val_size = 1000; + int seeks_percentage = 20; // 20% of keys will be used to test seek() + int delete_percentage = 20; // 20% of keys will be deleted + int merge_percentage = 20; // 20% of keys will be added using Merge() + + for (int run_config = 0; run_config < TestConfig::MAX; run_config++) { + Options options = CurrentOptions(); + BlockBasedTableOptions table_options; + table_options.use_delta_encoding = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.merge_operator = MergeOperators::CreatePutOperator(); + DestroyAndReopen(options); + + std::vector generated_keys(key_pool); + for (int i = 0; i < key_pool; i++) { + generated_keys[i] = RandomString(&rnd, key_size); + } + + std::map true_data; + std::vector random_keys; + std::vector deleted_keys; + for (int i = 0; i < puts; i++) { + auto& k = generated_keys[rnd.Next() % key_pool]; + auto v = RandomString(&rnd, val_size); + + // Insert data to true_data map and to DB + true_data[k] = v; + if (rnd.OneIn(static_cast(100.0 / merge_percentage))) { + ASSERT_OK(db_->Merge(WriteOptions(), k, v)); + } else { + ASSERT_OK(Put(k, v)); + } + + // Pick random keys to be used to test Seek() + if (rnd.OneIn(static_cast(100.0 / seeks_percentage))) { + random_keys.push_back(k); + } + + // Delete some random keys + if (rnd.OneIn(static_cast(100.0 / delete_percentage))) { + deleted_keys.push_back(k); + true_data.erase(k); + ASSERT_OK(Delete(k)); + } + + if (run_config == TestConfig::FLUSH_EVERY_1000) { + if (i && i % 1000 == 0) { + Flush(); + } + } + } + + if (run_config == TestConfig::CLOSE_AND_OPEN) { + Close(); + Reopen(options); + } else if (run_config == TestConfig::COMPACT_BEFORE_READ) { + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + } + + ReadOptions ro; + ro.pin_data = true; + auto iter = db_->NewIterator(ro); + + { + // Test Seek to random keys + printf("Testing seek on %zu keys\n", random_keys.size()); + std::vector keys_slices; + std::vector true_keys; + for (auto& k : random_keys) { + iter->Seek(k); + if (!iter->Valid()) { + ASSERT_EQ(true_data.lower_bound(k), true_data.end()); + continue; + } + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + keys_slices.push_back(iter->key()); + true_keys.push_back(true_data.lower_bound(k)->first); + } + + for (size_t i = 0; i < keys_slices.size(); i++) { + ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); + } + } + + { + // Test iterating all data forward + printf("Testing iterating forward on all keys\n"); + std::vector all_keys; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + all_keys.push_back(iter->key()); + } + ASSERT_EQ(all_keys.size(), true_data.size()); + + // Verify that all keys slices are valid + auto data_iter = true_data.begin(); + for (size_t i = 0; i < all_keys.size(); i++) { + ASSERT_EQ(all_keys[i].ToString(), data_iter->first); + data_iter++; + } + } + + { + // Test iterating all data backward + printf("Testing iterating backward on all keys\n"); + std::vector all_keys; + for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { + std::string prop_value; + ASSERT_OK( + iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + all_keys.push_back(iter->key()); + } + ASSERT_EQ(all_keys.size(), true_data.size()); + + // Verify that all keys slices are valid (backward) + auto data_iter = true_data.rbegin(); + for (size_t i = 0; i < all_keys.size(); i++) { + ASSERT_EQ(all_keys[i].ToString(), data_iter->first); + data_iter++; + } + } + + delete iter; + } +} + +#ifndef ROCKSDB_LITE +TEST_F(DBIteratorTest, PinnedDataIteratorMultipleFiles) { + Options options = CurrentOptions(); + BlockBasedTableOptions table_options; + table_options.use_delta_encoding = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.disable_auto_compactions = true; + options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb + DestroyAndReopen(options); + + std::map true_data; + + // Generate 4 sst files in L2 + Random rnd(301); + for (int i = 1; i <= 1000; i++) { + std::string k = Key(i * 3); + std::string v = RandomString(&rnd, 100); + ASSERT_OK(Put(k, v)); + true_data[k] = v; + if (i % 250 == 0) { + ASSERT_OK(Flush()); + } + } + ASSERT_EQ(FilesPerLevel(0), "4"); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ(FilesPerLevel(0), "0,4"); + + // Generate 4 sst files in L0 + for (int i = 1; i <= 1000; i++) { + std::string k = Key(i * 2); + std::string v = RandomString(&rnd, 100); + ASSERT_OK(Put(k, v)); + true_data[k] = v; + if (i % 250 == 0) { + ASSERT_OK(Flush()); + } + } + ASSERT_EQ(FilesPerLevel(0), "4,4"); + + // Add some keys/values in memtables + for (int i = 1; i <= 1000; i++) { + std::string k = Key(i); + std::string v = RandomString(&rnd, 100); + ASSERT_OK(Put(k, v)); + true_data[k] = v; + } + ASSERT_EQ(FilesPerLevel(0), "4,4"); + + ReadOptions ro; + ro.pin_data = true; + auto iter = db_->NewIterator(ro); + + std::vector> results; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + results.emplace_back(iter->key(), iter->value().ToString()); + } + + ASSERT_EQ(results.size(), true_data.size()); + auto data_iter = true_data.begin(); + for (size_t i = 0; i < results.size(); i++, data_iter++) { + auto& kv = results[i]; + ASSERT_EQ(kv.first, data_iter->first); + ASSERT_EQ(kv.second, data_iter->second); + } + + delete iter; +} +#endif + +TEST_F(DBIteratorTest, PinnedDataIteratorMergeOperator) { + Options options = CurrentOptions(); + BlockBasedTableOptions table_options; + table_options.use_delta_encoding = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.merge_operator = MergeOperators::CreateUInt64AddOperator(); + DestroyAndReopen(options); + + std::string numbers[7]; + for (int val = 0; val <= 6; val++) { + PutFixed64(numbers + val, val); + } + + // +1 all keys in range [ 0 => 999] + for (int i = 0; i < 1000; i++) { + WriteOptions wo; + ASSERT_OK(db_->Merge(wo, Key(i), numbers[1])); + } + + // +2 all keys divisible by 2 in range [ 0 => 999] + for (int i = 0; i < 1000; i += 2) { + WriteOptions wo; + ASSERT_OK(db_->Merge(wo, Key(i), numbers[2])); + } + + // +3 all keys divisible by 5 in range [ 0 => 999] + for (int i = 0; i < 1000; i += 5) { + WriteOptions wo; + ASSERT_OK(db_->Merge(wo, Key(i), numbers[3])); + } + + ReadOptions ro; + ro.pin_data = true; + auto iter = db_->NewIterator(ro); + + std::vector> results; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + results.emplace_back(iter->key(), iter->value().ToString()); + } + + ASSERT_EQ(results.size(), 1000); + for (size_t i = 0; i < results.size(); i++) { + auto& kv = results[i]; + ASSERT_EQ(kv.first, Key(static_cast(i))); + int expected_val = 1; + if (i % 2 == 0) { + expected_val += 2; + } + if (i % 5 == 0) { + expected_val += 3; + } + ASSERT_EQ(kv.second, numbers[expected_val]); + } + + delete iter; +} + +TEST_F(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) { + Options options = CurrentOptions(); + BlockBasedTableOptions table_options; + table_options.use_delta_encoding = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.write_buffer_size = 100000; + DestroyAndReopen(options); + + Random rnd(301); + + std::map true_data; + for (int i = 0; i < 1000; i++) { + std::string k = RandomString(&rnd, 10); + std::string v = RandomString(&rnd, 1000); + ASSERT_OK(Put(k, v)); + true_data[k] = v; + } + + ReadOptions ro; + ro.pin_data = true; + auto iter = db_->NewIterator(ro); + + // Delete 50% of the keys and update the other 50% + for (auto& kv : true_data) { + if (rnd.OneIn(2)) { + ASSERT_OK(Delete(kv.first)); + } else { + std::string new_val = RandomString(&rnd, 1000); + ASSERT_OK(Put(kv.first, new_val)); + } + } + + std::vector> results; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + std::string prop_value; + ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); + ASSERT_EQ("1", prop_value); + results.emplace_back(iter->key(), iter->value().ToString()); + } + + auto data_iter = true_data.begin(); + for (size_t i = 0; i < results.size(); i++, data_iter++) { + auto& kv = results[i]; + ASSERT_EQ(kv.first, data_iter->first); + ASSERT_EQ(kv.second, data_iter->second); + } + + delete iter; +} + +TEST_F(DBIteratorTest, IteratorWithLocalStatistics) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + + Random rnd(301); + for (int i = 0; i < 1000; i++) { + // Key 10 bytes / Value 10 bytes + ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); + } + + std::atomic total_next(0); + std::atomic total_next_found(0); + std::atomic total_prev(0); + std::atomic total_prev_found(0); + std::atomic total_bytes(0); + + std::vector threads; + std::function reader_func_next = [&]() { + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + // Seek will bump ITER_BYTES_READ + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + while (true) { + iter->Next(); + total_next++; + + if (!iter->Valid()) { + break; + } + total_next_found++; + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + } + + delete iter; + }; + + std::function reader_func_prev = [&]() { + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToLast(); + // Seek will bump ITER_BYTES_READ + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + while (true) { + iter->Prev(); + total_prev++; + + if (!iter->Valid()) { + break; + } + total_prev_found++; + total_bytes += iter->key().size(); + total_bytes += iter->value().size(); + } + + delete iter; + }; + + for (int i = 0; i < 10; i++) { + threads.emplace_back(reader_func_next); + } + for (int i = 0; i < 15; i++) { + threads.emplace_back(reader_func_prev); + } + + for (auto& t : threads) { + t.join(); + } + + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND), + total_next_found); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev); + ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND), + total_prev_found); + ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc new file mode 100644 index 000000000..1ec125cb1 --- /dev/null +++ b/db/db_sst_test.cc @@ -0,0 +1,1300 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/sst_file_manager.h" +#include "rocksdb/sst_file_writer.h" +#include "util/sst_file_manager_impl.h" + +namespace rocksdb { + +class DBSSTTest : public DBTestBase { + public: + DBSSTTest() : DBTestBase("/db_sst_test") {} +}; + +TEST_F(DBSSTTest, DontDeletePendingOutputs) { + Options options; + options.env = env_; + options.create_if_missing = true; + DestroyAndReopen(options); + + // Every time we write to a table file, call FOF/POF with full DB scan. This + // will make sure our pending_outputs_ protection work correctly + std::function purge_obsolete_files_function = [&]() { + JobContext job_context(0); + dbfull()->TEST_LockMutex(); + dbfull()->FindObsoleteFiles(&job_context, true /*force*/); + dbfull()->TEST_UnlockMutex(); + dbfull()->PurgeObsoleteFiles(job_context); + job_context.Clean(); + }; + + env_->table_write_callback_ = &purge_obsolete_files_function; + + for (int i = 0; i < 2; ++i) { + ASSERT_OK(Put("a", "begin")); + ASSERT_OK(Put("z", "end")); + ASSERT_OK(Flush()); + } + + // If pending output guard does not work correctly, PurgeObsoleteFiles() will + // delete the file that Compaction is trying to create, causing this: error + // db/db_test.cc:975: IO error: + // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory + Compact("a", "b"); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBSSTTest, DontDeleteMovedFile) { + // This test triggers move compaction and verifies that the file is not + // deleted when it's part of move compaction + Options options = CurrentOptions(); + options.env = env_; + options.create_if_missing = true; + options.max_bytes_for_level_base = 1024 * 1024; // 1 MB + options.level0_file_num_compaction_trigger = + 2; // trigger compaction when we have 2 files + DestroyAndReopen(options); + + Random rnd(301); + // Create two 1MB sst files + for (int i = 0; i < 2; ++i) { + // Create 1MB sst file + for (int j = 0; j < 100; ++j) { + ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); + } + ASSERT_OK(Flush()); + } + // this should execute both L0->L1 and L1->(move)->L2 compactions + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ("0,0,1", FilesPerLevel(0)); + + // If the moved file is actually deleted (the move-safeguard in + // ~Version::Version() is not there), we get this failure: + // Corruption: Can't access /000009.sst + Reopen(options); +} + +// This reproduces a bug where we don't delete a file because when it was +// supposed to be deleted, it was blocked by pending_outputs +// Consider: +// 1. current file_number is 13 +// 2. compaction (1) starts, blocks deletion of all files starting with 13 +// (pending outputs) +// 3. file 13 is created by compaction (2) +// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file +// 13 has no references, it is put into VersionSet::obsolete_files_ +// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13 +// is deleted from obsolete_files_ set. +// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by +// pending outputs since compaction (1) is still running. It is not deleted and +// it is not present in obsolete_files_ anymore. Therefore, we never delete it. +TEST_F(DBSSTTest, DeleteObsoleteFilesPendingOutputs) { + Options options = CurrentOptions(); + options.env = env_; + options.write_buffer_size = 2 * 1024 * 1024; // 2 MB + options.max_bytes_for_level_base = 1024 * 1024; // 1 MB + options.level0_file_num_compaction_trigger = + 2; // trigger compaction when we have 2 files + options.max_background_flushes = 2; + options.max_background_compactions = 2; + + OnFileDeletionListener* listener = new OnFileDeletionListener(); + options.listeners.emplace_back(listener); + + Reopen(options); + + Random rnd(301); + // Create two 1MB sst files + for (int i = 0; i < 2; ++i) { + // Create 1MB sst file + for (int j = 0; j < 100; ++j) { + ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); + } + ASSERT_OK(Flush()); + } + // this should execute both L0->L1 and L1->(move)->L2 compactions + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ("0,0,1", FilesPerLevel(0)); + + test::SleepingBackgroundTask blocking_thread; + port::Mutex mutex_; + bool already_blocked(false); + + // block the flush + std::function block_first_time = [&]() { + bool blocking = false; + { + MutexLock l(&mutex_); + if (!already_blocked) { + blocking = true; + already_blocked = true; + } + } + if (blocking) { + blocking_thread.DoSleep(); + } + }; + env_->table_write_callback_ = &block_first_time; + // Create 1MB sst file + for (int j = 0; j < 256; ++j) { + ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024))); + } + // this should trigger a flush, which is blocked with block_first_time + // pending_file is protecting all the files created after + + ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr)); + + ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + ASSERT_EQ(metadata.size(), 1U); + auto file_on_L2 = metadata[0].name; + listener->SetExpectedFileName(dbname_ + file_on_L2); + + ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, + true /* disallow trivial move */)); + ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); + + // finish the flush! + blocking_thread.WakeUp(); + blocking_thread.WaitUntilDone(); + dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_EQ("1,0,0,0,1", FilesPerLevel(0)); + + metadata.clear(); + db_->GetLiveFilesMetaData(&metadata); + ASSERT_EQ(metadata.size(), 2U); + + // This file should have been deleted during last compaction + ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2)); + listener->VerifyMatchedCount(1); +} + +#endif // ROCKSDB_LITE + +TEST_F(DBSSTTest, DBWithSstFileManager) { + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + int files_added = 0; + int files_deleted = 0; + int files_moved = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnAddFile", [&](void* arg) { files_added++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { files_deleted++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileManagerImpl::OnMoveFile", [&](void* arg) { files_moved++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + DestroyAndReopen(options); + + Random rnd(301); + for (int i = 0; i < 25; i++) { + GenerateNewRandomFile(&rnd); + ASSERT_OK(Flush()); + dbfull()->TEST_WaitForFlushMemTable(); + dbfull()->TEST_WaitForCompact(); + // Verify that we are tracking all sst files in dbname_ + ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles()); + } + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + auto files_in_db = GetAllSSTFiles(); + // Verify that we are tracking all sst files in dbname_ + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + // Verify the total files size + uint64_t total_files_size = 0; + for (auto& file_to_size : files_in_db) { + total_files_size += file_to_size.second; + } + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + // We flushed at least 25 files + ASSERT_GE(files_added, 25); + // Compaction must have deleted some files + ASSERT_GT(files_deleted, 0); + // No files were moved + ASSERT_EQ(files_moved, 0); + + Close(); + Reopen(options); + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + + // Verify that we track all the files again after the DB is closed and opened + Close(); + sst_file_manager.reset(NewSstFileManager(env_)); + options.sst_file_manager = sst_file_manager; + sfm = static_cast(sst_file_manager.get()); + + Reopen(options); + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); + ASSERT_EQ(sfm->GetTotalSize(), total_files_size); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBSSTTest, RateLimitedDelete) { + rocksdb::SyncPoint::GetInstance()->LoadDependency({ + {"DBSSTTest::RateLimitedDelete:1", + "DeleteScheduler::BackgroundEmptyTrash"}, + }); + + std::vector penalties; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::BackgroundEmptyTrash:Wait", + [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.env = env_; + + std::string trash_dir = test::TmpDir(env_) + "/trash"; + int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec + Status s; + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); + ASSERT_OK(s); + auto sfm = static_cast(options.sst_file_manager.get()); + + Destroy(last_options_); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(TryReopen(options)); + // Create 4 files in L0 + for (char v = 'a'; v <= 'd'; v++) { + ASSERT_OK(Put("Key2", DummyString(1024, v))); + ASSERT_OK(Put("Key3", DummyString(1024, v))); + ASSERT_OK(Put("Key4", DummyString(1024, v))); + ASSERT_OK(Put("Key1", DummyString(1024, v))); + ASSERT_OK(Put("Key4", DummyString(1024, v))); + ASSERT_OK(Flush()); + } + // We created 4 sst files in L0 + ASSERT_EQ("4", FilesPerLevel(0)); + + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + + // Compaction will move the 4 files in L0 to trash and create 1 L1 file + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + uint64_t delete_start_time = env_->NowMicros(); + // Hold BackgroundEmptyTrash + TEST_SYNC_POINT("DBSSTTest::RateLimitedDelete:1"); + sfm->WaitForEmptyTrash(); + uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; + + uint64_t total_files_size = 0; + uint64_t expected_penlty = 0; + ASSERT_EQ(penalties.size(), metadata.size()); + for (size_t i = 0; i < metadata.size(); i++) { + total_files_size += metadata[i].size; + expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec); + ASSERT_EQ(expected_penlty, penalties[i]); + } + ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + +// Create a DB with 2 db_paths, and generate multiple files in the 2 +// db_paths using CompactRangeOptions, make sure that files that were +// deleted from first db_path were deleted using DeleteScheduler and +// files in the second path were not. +TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { + int bg_delete_file = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:DeleteFile", + [&](void* arg) { bg_delete_file++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.db_paths.emplace_back(dbname_, 1024 * 100); + options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100); + options.env = env_; + + std::string trash_dir = test::TmpDir(env_) + "/trash"; + int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec + Status s; + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); + ASSERT_OK(s); + auto sfm = static_cast(options.sst_file_manager.get()); + + DestroyAndReopen(options); + + // Create 4 files in L0 + for (int i = 0; i < 4; i++) { + ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); + ASSERT_OK(Flush()); + } + // We created 4 sst files in L0 + ASSERT_EQ("4", FilesPerLevel(0)); + // Compaction will delete files from L0 in first db path and generate a new + // file in L1 in second db path + CompactRangeOptions compact_options; + compact_options.target_path_id = 1; + Slice begin("Key0"); + Slice end("Key3"); + ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + // Create 4 files in L0 + for (int i = 4; i < 8; i++) { + ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'))); + ASSERT_OK(Flush()); + } + ASSERT_EQ("4,1", FilesPerLevel(0)); + + // Compaction will delete files from L0 in first db path and generate a new + // file in L1 in second db path + begin = "Key4"; + end = "Key7"; + ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); + ASSERT_EQ("0,2", FilesPerLevel(0)); + + sfm->WaitForEmptyTrash(); + ASSERT_EQ(bg_delete_file, 8); + + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kForce; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + sfm->WaitForEmptyTrash(); + ASSERT_EQ(bg_delete_file, 8); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { + int bg_delete_file = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:DeleteFile", + [&](void* arg) { bg_delete_file++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.env = env_; + DestroyAndReopen(options); + + // Create 4 files in L0 + for (int i = 0; i < 4; i++) { + ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); + ASSERT_OK(Flush()); + } + // We created 4 sst files in L0 + ASSERT_EQ("4", FilesPerLevel(0)); + + // Close DB and destroy it using DeleteScheduler + Close(); + std::string trash_dir = test::TmpDir(env_) + "/trash"; + int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec + Status s; + options.sst_file_manager.reset(NewSstFileManager( + env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); + ASSERT_OK(s); + ASSERT_OK(DestroyDB(dbname_, options)); + + auto sfm = static_cast(options.sst_file_manager.get()); + sfm->WaitForEmptyTrash(); + // We have deleted the 4 sst files in the delete_scheduler + ASSERT_EQ(bg_delete_file, 4); +} +#endif // ROCKSDB_LITE + +TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + + Random rnd(301); + + // Generate a file containing 100 keys. + for (int i = 0; i < 100; i++) { + ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); + } + ASSERT_OK(Flush()); + + uint64_t first_file_size = 0; + auto files_in_db = GetAllSSTFiles(&first_file_size); + ASSERT_EQ(sfm->GetTotalSize(), first_file_size); + + // Set the maximum allowed space usage to the current total size + sfm->SetMaxAllowedSpaceUsage(first_file_size + 1); + + ASSERT_OK(Put("key1", "val1")); + // This flush will cause bg_error_ and will fail + ASSERT_NOK(Flush()); +} + +TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) { + // This test will set a maximum allowed space for the DB, then it will + // keep filling the DB until the limit is reached and bg_error_ is set. + // When bg_error_ is set we will verify that the DB size is greater + // than the limit. + + std::vector max_space_limits_mbs = {1, 2, 4, 8, 10}; + + bool bg_error_set = false; + uint64_t total_sst_files_size = 0; + + int reached_max_space_on_flush = 0; + int reached_max_space_on_compaction = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", + [&](void* arg) { + bg_error_set = true; + GetAllSSTFiles(&total_sst_files_size); + reached_max_space_on_flush++; + }); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached", + [&](void* arg) { + bg_error_set = true; + GetAllSSTFiles(&total_sst_files_size); + reached_max_space_on_compaction++; + }); + + for (auto limit_mb : max_space_limits_mbs) { + bg_error_set = false; + total_sst_files_size = 0; + rocksdb::SyncPoint::GetInstance()->ClearTrace(); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + std::shared_ptr sst_file_manager(NewSstFileManager(env_)); + auto sfm = static_cast(sst_file_manager.get()); + + Options options = CurrentOptions(); + options.sst_file_manager = sst_file_manager; + options.write_buffer_size = 1024 * 512; // 512 Kb + DestroyAndReopen(options); + Random rnd(301); + + sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024); + + int keys_written = 0; + uint64_t estimated_db_size = 0; + while (true) { + auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50)); + if (!s.ok()) { + break; + } + keys_written++; + // Check the estimated db size vs the db limit just to make sure we + // dont run into an infinite loop + estimated_db_size = keys_written * 60; // ~60 bytes per key + ASSERT_LT(estimated_db_size, limit_mb * 1024 * 1024 * 2); + } + ASSERT_TRUE(bg_error_set); + ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + } + + ASSERT_GT(reached_max_space_on_flush, 0); + ASSERT_GT(reached_max_space_on_compaction, 0); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFiles) { + // Open DB with infinite max open files + // - First iteration use 1 thread to open files + // - Second iteration use 5 threads to open files + for (int iter = 0; iter < 2; iter++) { + Options options; + options.create_if_missing = true; + options.write_buffer_size = 100000; + options.disable_auto_compactions = true; + options.max_open_files = -1; + if (iter == 0) { + options.max_file_opening_threads = 1; + } else { + options.max_file_opening_threads = 5; + } + options = CurrentOptions(options); + DestroyAndReopen(options); + + // Create 12 Files in L0 (then move then to L2) + for (int i = 0; i < 12; i++) { + std::string k = "L2_" + Key(i); + ASSERT_OK(Put(k, k + std::string(1000, 'a'))); + ASSERT_OK(Flush()); + } + CompactRangeOptions compact_options; + compact_options.change_level = true; + compact_options.target_level = 2; + db_->CompactRange(compact_options, nullptr, nullptr); + + // Create 12 Files in L0 + for (int i = 0; i < 12; i++) { + std::string k = "L0_" + Key(i); + ASSERT_OK(Put(k, k + std::string(1000, 'a'))); + ASSERT_OK(Flush()); + } + Close(); + + // Reopening the DB will load all exisitng files + Reopen(options); + ASSERT_EQ("12,0,12", FilesPerLevel(0)); + std::vector> files; + dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); + + for (const auto& level : files) { + for (const auto& file : level) { + ASSERT_TRUE(file.table_reader_handle != nullptr); + } + } + + for (int i = 0; i < 12; i++) { + ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a')); + ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a')); + } + } +} + +TEST_F(DBSSTTest, GetTotalSstFilesSize) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.compression = kNoCompression; + DestroyAndReopen(options); + // Generate 5 files in L0 + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 10; j++) { + std::string val = "val_file_" + ToString(i); + ASSERT_OK(Put(Key(j), val)); + } + Flush(); + } + ASSERT_EQ("5", FilesPerLevel(0)); + + std::vector live_files_meta; + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 5); + uint64_t single_file_size = live_files_meta[0].size; + + uint64_t live_sst_files_size = 0; + uint64_t total_sst_files_size = 0; + for (const auto& file_meta : live_files_meta) { + live_sst_files_size += file_meta.size; + } + + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 5 + // Total SST files = 5 + ASSERT_EQ(live_sst_files_size, 5 * single_file_size); + ASSERT_EQ(total_sst_files_size, 5 * single_file_size); + + // hold current version + std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); + + // Compact 5 files into 1 file in L0 + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + live_files_meta.clear(); + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 1); + + live_sst_files_size = 0; + total_sst_files_size = 0; + for (const auto& file_meta : live_files_meta) { + live_sst_files_size += file_meta.size; + } + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 1 (compacted file) + // Total SST files = 6 (5 original files + compacted file) + ASSERT_EQ(live_sst_files_size, 1 * single_file_size); + ASSERT_EQ(total_sst_files_size, 6 * single_file_size); + + // hold current version + std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); + + // Delete all keys and compact, this will delete all live files + for (int i = 0; i < 10; i++) { + ASSERT_OK(Delete(Key(i))); + } + Flush(); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("", FilesPerLevel(0)); + + live_files_meta.clear(); + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 0); + + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 0 + // Total SST files = 6 (5 original files + compacted file) + ASSERT_EQ(total_sst_files_size, 6 * single_file_size); + + iter1.reset(); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 0 + // Total SST files = 1 (compacted file) + ASSERT_EQ(total_sst_files_size, 1 * single_file_size); + + iter2.reset(); + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 0 + // Total SST files = 0 + ASSERT_EQ(total_sst_files_size, 0); +} + +TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.compression = kNoCompression; + DestroyAndReopen(options); + // Generate 5 files in L0 + for (int i = 0; i < 5; i++) { + ASSERT_OK(Put(Key(i), "val")); + Flush(); + } + ASSERT_EQ("5", FilesPerLevel(0)); + + std::vector live_files_meta; + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 5); + uint64_t single_file_size = live_files_meta[0].size; + + uint64_t live_sst_files_size = 0; + uint64_t total_sst_files_size = 0; + for (const auto& file_meta : live_files_meta) { + live_sst_files_size += file_meta.size; + } + + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + + // Live SST files = 5 + // Total SST files = 5 + ASSERT_EQ(live_sst_files_size, 5 * single_file_size); + ASSERT_EQ(total_sst_files_size, 5 * single_file_size); + + // hold current version + std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); + + // Compaction will do trivial move from L0 to L1 + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("0,5", FilesPerLevel(0)); + + live_files_meta.clear(); + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 5); + + live_sst_files_size = 0; + total_sst_files_size = 0; + for (const auto& file_meta : live_files_meta) { + live_sst_files_size += file_meta.size; + } + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 5 + // Total SST files = 5 (used in 2 version) + ASSERT_EQ(live_sst_files_size, 5 * single_file_size); + ASSERT_EQ(total_sst_files_size, 5 * single_file_size); + + // hold current version + std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); + + // Delete all keys and compact, this will delete all live files + for (int i = 0; i < 5; i++) { + ASSERT_OK(Delete(Key(i))); + } + Flush(); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("", FilesPerLevel(0)); + + live_files_meta.clear(); + dbfull()->GetLiveFilesMetaData(&live_files_meta); + ASSERT_EQ(live_files_meta.size(), 0); + + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 0 + // Total SST files = 5 (used in 2 version) + ASSERT_EQ(total_sst_files_size, 5 * single_file_size); + + iter1.reset(); + iter2.reset(); + + ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", + &total_sst_files_size)); + // Live SST files = 0 + // Total SST files = 0 + ASSERT_EQ(total_sst_files_size, 0); +} + +TEST_F(DBSSTTest, AddExternalSstFile) { + do { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + options.env = env_; + const ImmutableCFOptions ioptions(options); + + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + // file1.sst (0 => 99) + std::string file1 = sst_files_folder + "file1.sst"; + ASSERT_OK(sst_file_writer.Open(file1)); + for (int k = 0; k < 100; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); + } + ExternalSstFileInfo file1_info; + Status s = sst_file_writer.Finish(&file1_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file1_info.file_path, file1); + ASSERT_EQ(file1_info.num_entries, 100); + ASSERT_EQ(file1_info.smallest_key, Key(0)); + ASSERT_EQ(file1_info.largest_key, Key(99)); + // sst_file_writer already finished, cannot add this value + s = sst_file_writer.Add(Key(100), "bad_val"); + ASSERT_FALSE(s.ok()) << s.ToString(); + + // file2.sst (100 => 199) + std::string file2 = sst_files_folder + "file2.sst"; + ASSERT_OK(sst_file_writer.Open(file2)); + for (int k = 100; k < 200; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); + } + // Cannot add this key because it's not after last added key + s = sst_file_writer.Add(Key(99), "bad_val"); + ASSERT_FALSE(s.ok()) << s.ToString(); + ExternalSstFileInfo file2_info; + s = sst_file_writer.Finish(&file2_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file2_info.file_path, file2); + ASSERT_EQ(file2_info.num_entries, 100); + ASSERT_EQ(file2_info.smallest_key, Key(100)); + ASSERT_EQ(file2_info.largest_key, Key(199)); + + // file3.sst (195 => 299) + // This file values overlap with file2 values + std::string file3 = sst_files_folder + "file3.sst"; + ASSERT_OK(sst_file_writer.Open(file3)); + for (int k = 195; k < 300; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); + } + ExternalSstFileInfo file3_info; + s = sst_file_writer.Finish(&file3_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file3_info.file_path, file3); + ASSERT_EQ(file3_info.num_entries, 105); + ASSERT_EQ(file3_info.smallest_key, Key(195)); + ASSERT_EQ(file3_info.largest_key, Key(299)); + + // file4.sst (30 => 39) + // This file values overlap with file1 values + std::string file4 = sst_files_folder + "file4.sst"; + ASSERT_OK(sst_file_writer.Open(file4)); + for (int k = 30; k < 40; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); + } + ExternalSstFileInfo file4_info; + s = sst_file_writer.Finish(&file4_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file4_info.file_path, file4); + ASSERT_EQ(file4_info.num_entries, 10); + ASSERT_EQ(file4_info.smallest_key, Key(30)); + ASSERT_EQ(file4_info.largest_key, Key(39)); + + // file5.sst (400 => 499) + std::string file5 = sst_files_folder + "file5.sst"; + ASSERT_OK(sst_file_writer.Open(file5)); + for (int k = 400; k < 500; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); + } + ExternalSstFileInfo file5_info; + s = sst_file_writer.Finish(&file5_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file5_info.file_path, file5); + ASSERT_EQ(file5_info.num_entries, 100); + ASSERT_EQ(file5_info.smallest_key, Key(400)); + ASSERT_EQ(file5_info.largest_key, Key(499)); + + // Cannot create an empty sst file + std::string file_empty = sst_files_folder + "file_empty.sst"; + ExternalSstFileInfo file_empty_info; + s = sst_file_writer.Finish(&file_empty_info); + ASSERT_NOK(s); + + DestroyAndReopen(options); + // Add file using file path + s = db_->AddFile(file1); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + for (int k = 0; k < 100; k++) { + ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); + } + + // Add file while holding a snapshot will fail + const Snapshot* s1 = db_->GetSnapshot(); + if (s1 != nullptr) { + ASSERT_NOK(db_->AddFile(&file2_info)); + db_->ReleaseSnapshot(s1); + } + // We can add the file after releaseing the snapshot + ASSERT_OK(db_->AddFile(&file2_info)); + + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + for (int k = 0; k < 200; k++) { + ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); + } + + // This file have overlapping values with the exisitng data + s = db_->AddFile(file3); + ASSERT_FALSE(s.ok()) << s.ToString(); + + // This file have overlapping values with the exisitng data + s = db_->AddFile(&file4_info); + ASSERT_FALSE(s.ok()) << s.ToString(); + + // Overwrite values of keys divisible by 5 + for (int k = 0; k < 200; k += 5) { + ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); + } + ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); + + // Key range of file5 (400 => 499) dont overlap with any keys in DB + ASSERT_OK(db_->AddFile(file5)); + + // Make sure values are correct before and after flush/compaction + for (int i = 0; i < 2; i++) { + for (int k = 0; k < 200; k++) { + std::string value = Key(k) + "_val"; + if (k % 5 == 0) { + value += "_new"; + } + ASSERT_EQ(Get(Key(k)), value); + } + for (int k = 400; k < 500; k++) { + std::string value = Key(k) + "_val"; + ASSERT_EQ(Get(Key(k)), value); + } + ASSERT_OK(Flush()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + } + + Close(); + options.disable_auto_compactions = true; + Reopen(options); + + // Delete keys in range (400 => 499) + for (int k = 400; k < 500; k++) { + ASSERT_OK(Delete(Key(k))); + } + // We deleted range (400 => 499) but cannot add file5 because + // of the range tombstones + ASSERT_NOK(db_->AddFile(file5)); + + // Compacting the DB will remove the tombstones + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + // Now we can add the file + ASSERT_OK(db_->AddFile(file5)); + + // Verify values of file5 in DB + for (int k = 400; k < 500; k++) { + std::string value = Key(k) + "_val"; + ASSERT_EQ(Get(Key(k)), value); + } + } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | + kSkipFIFOCompaction)); +} + +// This test reporduce a bug that can happen in some cases if the DB started +// purging obsolete files when we are adding an external sst file. +// This situation may result in deleting the file while it's being added. +TEST_F(DBSSTTest, AddExternalSstFilePurgeObsoleteFilesBug) { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + options.env = env_; + const ImmutableCFOptions ioptions(options); + + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + // file1.sst (0 => 500) + std::string sst_file_path = sst_files_folder + "file1.sst"; + Status s = sst_file_writer.Open(sst_file_path); + ASSERT_OK(s); + for (int i = 0; i < 500; i++) { + std::string k = Key(i); + s = sst_file_writer.Add(k, k + "_val"); + ASSERT_OK(s); + } + + ExternalSstFileInfo sst_file_info; + s = sst_file_writer.Finish(&sst_file_info); + ASSERT_OK(s); + + options.delete_obsolete_files_period_micros = 0; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::AddFile:FileCopied", [&](void* arg) { + ASSERT_OK(Put("aaa", "bbb")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("aaa", "xxx")); + ASSERT_OK(Flush()); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + s = db_->AddFile(sst_file_path); + ASSERT_OK(s); + + for (int i = 0; i < 500; i++) { + std::string k = Key(i); + std::string v = k + "_val"; + ASSERT_EQ(Get(k), v); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(DBSSTTest, AddExternalSstFileNoCopy) { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + options.env = env_; + const ImmutableCFOptions ioptions(options); + + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + // file1.sst (0 => 99) + std::string file1 = sst_files_folder + "file1.sst"; + ASSERT_OK(sst_file_writer.Open(file1)); + for (int k = 0; k < 100; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); + } + ExternalSstFileInfo file1_info; + Status s = sst_file_writer.Finish(&file1_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file1_info.file_path, file1); + ASSERT_EQ(file1_info.num_entries, 100); + ASSERT_EQ(file1_info.smallest_key, Key(0)); + ASSERT_EQ(file1_info.largest_key, Key(99)); + + // file2.sst (100 => 299) + std::string file2 = sst_files_folder + "file2.sst"; + ASSERT_OK(sst_file_writer.Open(file2)); + for (int k = 100; k < 300; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); + } + ExternalSstFileInfo file2_info; + s = sst_file_writer.Finish(&file2_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file2_info.file_path, file2); + ASSERT_EQ(file2_info.num_entries, 200); + ASSERT_EQ(file2_info.smallest_key, Key(100)); + ASSERT_EQ(file2_info.largest_key, Key(299)); + + // file3.sst (110 => 124) .. overlap with file2.sst + std::string file3 = sst_files_folder + "file3.sst"; + ASSERT_OK(sst_file_writer.Open(file3)); + for (int k = 110; k < 125; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); + } + ExternalSstFileInfo file3_info; + s = sst_file_writer.Finish(&file3_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file3_info.file_path, file3); + ASSERT_EQ(file3_info.num_entries, 15); + ASSERT_EQ(file3_info.smallest_key, Key(110)); + ASSERT_EQ(file3_info.largest_key, Key(124)); + + s = db_->AddFile(&file1_info, true /* move file */); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); + + s = db_->AddFile(&file2_info, false /* copy file */); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_OK(env_->FileExists(file2)); + + // This file have overlapping values with the exisitng data + s = db_->AddFile(&file3_info, true /* move file */); + ASSERT_FALSE(s.ok()) << s.ToString(); + ASSERT_OK(env_->FileExists(file3)); + + for (int k = 0; k < 300; k++) { + ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); + } +} + +TEST_F(DBSSTTest, AddExternalSstFileMultiThreaded) { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + // Bulk load 10 files every file contain 1000 keys + int num_files = 10; + int keys_per_file = 1000; + + // Generate file names + std::vector file_names; + for (int i = 0; i < num_files; i++) { + std::string file_name = "file_" + ToString(i) + ".sst"; + file_names.push_back(sst_files_folder + file_name); + } + + do { + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + const ImmutableCFOptions ioptions(options); + + std::atomic thread_num(0); + std::function write_file_func = [&]() { + int file_idx = thread_num.fetch_add(1); + int range_start = file_idx * keys_per_file; + int range_end = range_start + keys_per_file; + + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + ASSERT_OK(sst_file_writer.Open(file_names[file_idx])); + + for (int k = range_start; k < range_end; k++) { + ASSERT_OK(sst_file_writer.Add(Key(k), Key(k))); + } + + Status s = sst_file_writer.Finish(); + ASSERT_TRUE(s.ok()) << s.ToString(); + }; + // Write num_files files in parallel + std::vector sst_writer_threads; + for (int i = 0; i < num_files; ++i) { + sst_writer_threads.emplace_back(write_file_func); + } + + for (auto& t : sst_writer_threads) { + t.join(); + } + + fprintf(stderr, "Wrote %d files (%d keys)\n", num_files, + num_files * keys_per_file); + + thread_num.store(0); + std::atomic files_added(0); + std::function load_file_func = [&]() { + // We intentionally add every file twice, and assert that it was added + // only once and the other add failed + int thread_id = thread_num.fetch_add(1); + int file_idx = thread_id / 2; + // sometimes we use copy, sometimes link .. the result should be the same + bool move_file = (thread_id % 3 == 0); + + Status s = db_->AddFile(file_names[file_idx], move_file); + if (s.ok()) { + files_added++; + } + }; + // Bulk load num_files files in parallel + std::vector add_file_threads; + DestroyAndReopen(options); + for (int i = 0; i < num_files * 2; ++i) { + add_file_threads.emplace_back(load_file_func); + } + + for (auto& t : add_file_threads) { + t.join(); + } + ASSERT_EQ(files_added.load(), num_files); + fprintf(stderr, "Loaded %d files (%d keys)\n", num_files, + num_files * keys_per_file); + + // Overwrite values of keys divisible by 100 + for (int k = 0; k < num_files * keys_per_file; k += 100) { + std::string key = Key(k); + Status s = Put(key, key + "_new"); + ASSERT_TRUE(s.ok()); + } + + for (int i = 0; i < 2; i++) { + // Make sure the values are correct before and after flush/compaction + for (int k = 0; k < num_files * keys_per_file; ++k) { + std::string key = Key(k); + std::string value = (k % 100 == 0) ? (key + "_new") : key; + ASSERT_EQ(Get(key), value); + } + ASSERT_OK(Flush()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + } + + fprintf(stderr, "Verified %d values\n", num_files * keys_per_file); + } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | + kSkipFIFOCompaction)); +} + +TEST_F(DBSSTTest, AddExternalSstFileOverlappingRanges) { + std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; + Random rnd(301); + do { + env_->CreateDir(sst_files_folder); + Options options = CurrentOptions(); + DestroyAndReopen(options); + const ImmutableCFOptions ioptions(options); + SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); + + printf("Option config = %d\n", option_config_); + std::vector> key_ranges; + for (int i = 0; i < 500; i++) { + int range_start = rnd.Uniform(20000); + int keys_per_range = 10 + rnd.Uniform(41); + + key_ranges.emplace_back(range_start, range_start + keys_per_range); + } + + int memtable_add = 0; + int success_add_file = 0; + int failed_add_file = 0; + std::map true_data; + for (size_t i = 0; i < key_ranges.size(); i++) { + int range_start = key_ranges[i].first; + int range_end = key_ranges[i].second; + + Status s; + std::string range_val = "range_" + ToString(i); + + // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile + if (i && i % 5 == 0) { + // Use DB::Put to insert range (insert into memtable) + range_val += "_put"; + for (int k = range_start; k <= range_end; k++) { + s = Put(Key(k), range_val); + ASSERT_OK(s); + } + memtable_add++; + } else { + // Use DB::AddFile to insert range + range_val += "_add_file"; + + // Generate the file containing the range + std::string file_name = sst_files_folder + env_->GenerateUniqueId(); + ASSERT_OK(sst_file_writer.Open(file_name)); + for (int k = range_start; k <= range_end; k++) { + s = sst_file_writer.Add(Key(k), range_val); + ASSERT_OK(s); + } + ExternalSstFileInfo file_info; + s = sst_file_writer.Finish(&file_info); + ASSERT_OK(s); + + // Insert the generated file + s = db_->AddFile(&file_info); + + auto it = true_data.lower_bound(Key(range_start)); + if (it != true_data.end() && it->first <= Key(range_end)) { + // This range overlap with data already exist in DB + ASSERT_NOK(s); + failed_add_file++; + } else { + ASSERT_OK(s); + success_add_file++; + } + } + + if (s.ok()) { + // Update true_data map to include the new inserted data + for (int k = range_start; k <= range_end; k++) { + true_data[Key(k)] = range_val; + } + } + + // Flush / Compact the DB + if (i && i % 50 == 0) { + Flush(); + } + if (i && i % 75 == 0) { + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + } + } + + printf( + "Total: %zu ranges\n" + "AddFile()|Success: %d ranges\n" + "AddFile()|RangeConflict: %d ranges\n" + "Put(): %d ranges\n", + key_ranges.size(), success_add_file, failed_add_file, memtable_add); + + // Verify the correctness of the data + for (const auto& kv : true_data) { + ASSERT_EQ(Get(kv.first), kv.second); + } + printf("keys/values verified\n"); + } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | + kSkipFIFOCompaction)); +} + +#endif // ROCKSDB_LITE + +// 1 Create some SST files by inserting K-V pairs into DB +// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file +// 3 Open DB and check if all key can be read +TEST_F(DBSSTTest, SSTsWithLdbSuffixHandling) { + Options options = CurrentOptions(); + options.write_buffer_size = 110 << 10; // 110KB + options.num_levels = 4; + DestroyAndReopen(options); + + Random rnd(301); + int key_id = 0; + for (int i = 0; i < 10; ++i) { + GenerateNewFile(&rnd, &key_id, false); + } + Flush(); + Close(); + int const num_files = GetSstFileCount(dbname_); + ASSERT_GT(num_files, 0); + + std::vector filenames; + GetSstFiles(dbname_, &filenames); + int num_ldb_files = 0; + for (size_t i = 0; i < filenames.size(); ++i) { + if (i & 1) { + continue; + } + std::string const rdb_name = dbname_ + "/" + filenames[i]; + std::string const ldb_name = Rocks2LevelTableFileName(rdb_name); + ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok()); + ++num_ldb_files; + } + ASSERT_GT(num_ldb_files, 0); + ASSERT_EQ(num_files, GetSstFileCount(dbname_)); + + Reopen(options); + for (int k = 0; k < key_id; ++k) { + ASSERT_NE("NOT_FOUND", Get(Key(k))); + } + Destroy(options); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/db_test.cc b/db/db_test.cc index 70c0646f0..caa230b6a 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -39,18 +39,15 @@ #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/experimental.h" -#include "rocksdb/sst_file_manager.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/snapshot.h" -#include "rocksdb/sst_file_writer.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "rocksdb/thread_status.h" -#include "rocksdb/wal_filter.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/optimistic_transaction_db.h" @@ -65,8 +62,6 @@ #include "util/compression.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" -#include "util/sst_file_manager_impl.h" -#include "util/statistics.h" #include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" @@ -77,30 +72,6 @@ namespace rocksdb { -static uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} - -#ifndef ROCKSDB_LITE -// A helper function that ensures the table properties returned in -// `GetPropertiesOfAllTablesTest` is correct. -// This test assumes entries size is different for each of the tables. -namespace { - -uint64_t GetNumberOfSstFilesForColumnFamily(DB* db, - std::string column_family_name) { - std::vector metadata; - db->GetLiveFilesMetaData(&metadata); - uint64_t result = 0; - for (auto& fileMetadata : metadata) { - result += (fileMetadata.column_family_name == column_family_name); - } - return result; -} - -} // namespace -#endif // ROCKSDB_LITE - class DBTest : public DBTestBase { public: DBTest() : DBTestBase("/db_test") {} @@ -369,103 +340,6 @@ TEST_F(DBTest, CompactedDB) { ASSERT_TRUE(status_list[5].IsNotFound()); } -// Make sure that when options.block_cache is set, after a new table is -// created its index/filter blocks are added to block cache. -TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = rocksdb::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "key", "val")); - // Create a new table. - ASSERT_OK(Flush(1)); - - // index/filter blocks added to block cache right after table creation. - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(2, /* only index/filter were added */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); - uint64_t int_num; - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - - // Make sure filter block is in cache. - std::string value; - ReadOptions ropt; - db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); - - // Miss count should remain the same. - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Make sure index block is in cache. - auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - value = Get(1, "key"); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(index_block_hit + 1, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - value = Get(1, "key"); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(index_block_hit + 2, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); -} - -TEST_F(DBTest, ParanoidFileChecks) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = rocksdb::CreateDBStatistics(); - options.level0_file_num_compaction_trigger = 2; - options.paranoid_file_checks = true; - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = false; - table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "1_key", "val")); - ASSERT_OK(Put(1, "9_key", "val")); - // Create a new table. - ASSERT_OK(Flush(1)); - ASSERT_EQ(1, /* read and cache data block */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Put(1, "1_key2", "val2")); - ASSERT_OK(Put(1, "9_key2", "val2")); - // Create a new SST file. This will further trigger a compaction - // and generate another file. - ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(3, /* Totally 3 files created up to now */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // After disabling options.paranoid_file_checks. NO further block - // is added after generating a new file. - ASSERT_OK( - dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}})); - - ASSERT_OK(Put(1, "1_key3", "val3")); - ASSERT_OK(Put(1, "9_key3", "val3")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "1_key4", "val4")); - ASSERT_OK(Put(1, "9_key4", "val4")); - ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(3, /* Totally 3 files created up to now */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); -} - TEST_F(DBTest, LevelLimitReopen) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); @@ -630,43 +504,6 @@ TEST_F(DBTest, ReadFromPersistedTier) { } while (ChangeOptions(kSkipHashCuckoo)); } -TEST_F(DBTest, IteratorProperty) { - // The test needs to be changed if kPersistedTier is supported in iterator. - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - Put(1, "1", "2"); - ReadOptions ropt; - ropt.pin_data = false; - { - unique_ptr iter(db_->NewIterator(ropt, handles_[1])); - iter->SeekToFirst(); - std::string prop_value; - ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("0", prop_value); - iter->Next(); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("Iterator is not valid.", prop_value); - } - Close(); -} - -TEST_F(DBTest, PersistedTierOnIterator) { - // The test needs to be changed if kPersistedTier is supported in iterator. - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ReadOptions ropt; - ropt.read_tier = kPersistedTier; - - auto* iter = db_->NewIterator(ropt, handles_[1]); - ASSERT_TRUE(iter->status().IsNotSupported()); - delete iter; - - std::vector iters; - ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported()); - Close(); -} - TEST_F(DBTest, SingleDeleteFlush) { // Test to check whether flushing preserves a single delete hidden // behind a put. @@ -964,1076 +801,6 @@ TEST_F(DBTest, GetEncountersEmptyLevel) { } #endif // ROCKSDB_LITE -// KeyMayExist can lead to a few false positives, but not false negatives. -// To make test deterministic, use a much larger number of bits per key-20 than -// bits in the key, so that false positives are eliminated -TEST_F(DBTest, KeyMayExist) { - do { - ReadOptions ropts; - std::string value; - anon::OptionsOverride options_override; - options_override.filter_policy.reset(NewBloomFilterPolicy(20)); - Options options = CurrentOptions(options_override); - options.statistics = rocksdb::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - - ASSERT_OK(Put(1, "a", "b")); - bool value_found = false; - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(value_found); - ASSERT_EQ("b", value); - - ASSERT_OK(Flush(1)); - value.clear(); - - uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(!value_found); - // assert that no new files were opened and no new blocks were - // read into block cache. - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "a")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Flush(1)); - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "c")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // KeyMayExist function only checks data in block caches, which is not used - // by plain table format. - } while ( - ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); -} - -TEST_F(DBTest, NonBlockingIteration) { - do { - ReadOptions non_blocking_opts, regular_opts; - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - non_blocking_opts.read_tier = kBlockCacheTier; - CreateAndReopenWithCF({"pikachu"}, options); - // write one kv to the database. - ASSERT_OK(Put(1, "a", "b")); - - // scan using non-blocking iterator. We should find it because - // it is in memtable. - Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - delete iter; - - // flush memtable to storage. Now, the key should not be in the - // memtable neither in the block cache. - ASSERT_OK(Flush(1)); - - // verify that a non-blocking iterator does not find any - // kvs. Neither does it do any IOs to storage. - uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = db_->NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - count++; - } - ASSERT_EQ(count, 0); - ASSERT_TRUE(iter->status().IsIncomplete()); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // read in the specified block via a regular get - ASSERT_EQ(Get(1, "a"), "b"); - - // verify that we can find it via a non-blocking scan - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = db_->NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // This test verifies block cache behaviors, which is not used by plain - // table format. - // Exclude kHashCuckoo as it does not support iteration currently - } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo | - kSkipMmapReads)); -} - -#ifndef ROCKSDB_LITE -TEST_F(DBTest, ManagedNonBlockingIteration) { - do { - ReadOptions non_blocking_opts, regular_opts; - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - non_blocking_opts.read_tier = kBlockCacheTier; - non_blocking_opts.managed = true; - CreateAndReopenWithCF({"pikachu"}, options); - // write one kv to the database. - ASSERT_OK(Put(1, "a", "b")); - - // scan using non-blocking iterator. We should find it because - // it is in memtable. - Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - delete iter; - - // flush memtable to storage. Now, the key should not be in the - // memtable neither in the block cache. - ASSERT_OK(Flush(1)); - - // verify that a non-blocking iterator does not find any - // kvs. Neither does it do any IOs to storage. - int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = db_->NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - count++; - } - ASSERT_EQ(count, 0); - ASSERT_TRUE(iter->status().IsIncomplete()); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // read in the specified block via a regular get - ASSERT_EQ(Get(1, "a"), "b"); - - // verify that we can find it via a non-blocking scan - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = db_->NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // This test verifies block cache behaviors, which is not used by plain - // table format. - // Exclude kHashCuckoo as it does not support iteration currently - } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo | - kSkipMmapReads)); -} -#endif // ROCKSDB_LITE - -// A delete is skipped for key if KeyMayExist(key) returns False -// Tests Writebatch consistency and proper delete behaviour -TEST_F(DBTest, FilterDeletes) { - do { - anon::OptionsOverride options_override; - options_override.filter_policy.reset(NewBloomFilterPolicy(20)); - Options options = CurrentOptions(options_override); - options.filter_deletes = true; - CreateAndReopenWithCF({"pikachu"}, options); - WriteBatch batch; - - batch.Delete(handles_[1], "a"); - dbfull()->Write(WriteOptions(), &batch); - ASSERT_EQ(AllEntriesFor("a", 1), "[ ]"); // Delete skipped - batch.Clear(); - - batch.Put(handles_[1], "a", "b"); - batch.Delete(handles_[1], "a"); - dbfull()->Write(WriteOptions(), &batch); - ASSERT_EQ(Get(1, "a"), "NOT_FOUND"); - ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]"); // Delete issued - batch.Clear(); - - batch.Delete(handles_[1], "c"); - batch.Put(handles_[1], "c", "d"); - dbfull()->Write(WriteOptions(), &batch); - ASSERT_EQ(Get(1, "c"), "d"); - ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]"); // Delete skipped - batch.Clear(); - - ASSERT_OK(Flush(1)); // A stray Flush - - batch.Delete(handles_[1], "c"); - dbfull()->Write(WriteOptions(), &batch); - ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]"); // Delete issued - batch.Clear(); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, GetFilterByPrefixBloom) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.statistics = rocksdb::CreateDBStatistics(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - - dbfull()->Flush(fo); - - ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); -} - -TEST_F(DBTest, WholeKeyFilterProp) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.statistics = rocksdb::CreateDBStatistics(); - - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - dbfull()->Flush(fo); - - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Reopen with whole key filtering enabled and prefix extractor - // NULL. Bloom filter should be off for both of whole key and - // prefix bloom. - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.prefix_extractor.reset(); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - // Write DB with only full key filtering. - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - - // Reopen with both of whole key off and prefix extractor enabled. - // Still no bloom filter should be used. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Try to create a DB with mixed files: - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - - options.prefix_extractor.reset(); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - // Try to create a DB with mixed files. - ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); - // In this case needs insert some keys to make sure files are - // not filtered out by key ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - Flush(); - - // Now we have two files: - // File 1: An older file with prefix bloom. - // File 2: A newer file with whole bloom filter. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - - // Reopen with the same setting: only whole key is used - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - - // Restart with both filters are allowed - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - // File 1 will has it filtered out. - // File 2 will not, as prefix `foo` exists in the file. - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - - // Restart with only prefix bloom is allowed. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); -} - -TEST_F(DBTest, IterSeekBeforePrev) { - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("0", "f")); - ASSERT_OK(Put("1", "h")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("2", "j")); - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Slice("c")); - iter->Prev(); - iter->Seek(Slice("a")); - iter->Prev(); - delete iter; -} - -namespace { -std::string MakeLongKey(size_t length, char c) { - return std::string(length, c); -} -} // namespace - -TEST_F(DBTest, IterLongKeys) { - ASSERT_OK(Put(MakeLongKey(20, 0), "0")); - ASSERT_OK(Put(MakeLongKey(32, 2), "2")); - ASSERT_OK(Put("a", "b")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put(MakeLongKey(50, 1), "1")); - ASSERT_OK(Put(MakeLongKey(127, 3), "3")); - ASSERT_OK(Put(MakeLongKey(64, 4), "4")); - auto iter = db_->NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - iter->Seek(MakeLongKey(20, 0)); - ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4"); - delete iter; - - iter = db_->NewIterator(ReadOptions()); - iter->Seek(MakeLongKey(50, 1)); - ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); - delete iter; -} - -TEST_F(DBTest, IterNextWithNewerSeq) { - ASSERT_OK(Put("0", "0")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("d", "e")); - auto iter = db_->NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Seek(Slice("a")); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->d"); - delete iter; -} - -TEST_F(DBTest, IterPrevWithNewerSeq) { - ASSERT_OK(Put("0", "0")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("d", "e")); - auto iter = db_->NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Seek(Slice("d")); - ASSERT_EQ(IterStatus(iter), "d->e"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->d"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->b"); - - iter->Prev(); - delete iter; -} - -TEST_F(DBTest, IterPrevWithNewerSeq2) { - ASSERT_OK(Put("0", "0")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("d", "e")); - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Slice("c")); - ASSERT_EQ(IterStatus(iter), "c->d"); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->b"); - - iter->Prev(); - delete iter; -} - -TEST_F(DBTest, IterEmpty) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("foo"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, IterSingle) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, IterMulti) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - ASSERT_OK(Put(1, "b", "vb")); - ASSERT_OK(Put(1, "c", "vc")); - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("ax"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Seek("z"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - // Switch from reverse to forward - iter->SeekToLast(); - iter->Prev(); - iter->Prev(); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Switch from forward to reverse - iter->SeekToFirst(); - iter->Next(); - iter->Next(); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Make sure iter stays at snapshot - ASSERT_OK(Put(1, "a", "va2")); - ASSERT_OK(Put(1, "a2", "va3")); - ASSERT_OK(Put(1, "b", "vb2")); - ASSERT_OK(Put(1, "c", "vc2")); - ASSERT_OK(Delete(1, "b")); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -// Check that we can skip over a run of user keys -// by using reseek rather than sequential scan -TEST_F(DBTest, IterReseek) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - Options options = CurrentOptions(options_override); - options.max_sequential_skip_in_iterations = 3; - options.create_if_missing = true; - options.statistics = rocksdb::CreateDBStatistics(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // insert three keys with same userkey and verify that - // reseek is not invoked. For each of these test cases, - // verify that we can find the next key "b". - ASSERT_OK(Put(1, "a", "zero")); - ASSERT_OK(Put(1, "a", "one")); - ASSERT_OK(Put(1, "a", "two")); - ASSERT_OK(Put(1, "b", "bone")); - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "a->two"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // insert a total of three keys with same userkey and verify - // that reseek is still not invoked. - ASSERT_OK(Put(1, "a", "three")); - iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->three"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // insert a total of four keys with same userkey and verify - // that reseek is invoked. - ASSERT_OK(Put(1, "a", "four")); - iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->four"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // Testing reverse iterator - // At this point, we have three versions of "a" and one version of "b". - // The reseek statistics is already at 1. - int num_reseeks = - (int)TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION); - - // Insert another version of b and assert that reseek is not invoked - ASSERT_OK(Put(1, "b", "btwo")); - iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "b->btwo"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 1); - ASSERT_EQ(IterStatus(iter), "a->four"); - delete iter; - - // insert two more versions of b. This makes a total of 4 versions - // of b and 4 versions of a. - ASSERT_OK(Put(1, "b", "bthree")); - ASSERT_OK(Put(1, "b", "bfour")); - iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "b->bfour"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 2); - iter->Prev(); - - // the previous Prev call should have invoked reseek - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 3); - ASSERT_EQ(IterStatus(iter), "a->four"); - delete iter; -} - -TEST_F(DBTest, IterSmallAndLargeMix) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - ASSERT_OK(Put(1, "b", std::string(100000, 'b'))); - ASSERT_OK(Put(1, "c", "vc")); - ASSERT_OK(Put(1, "d", std::string(100000, 'd'))); - ASSERT_OK(Put(1, "e", std::string(100000, 'e'))); - - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, IterMultiWithDelete) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "ka", "va")); - ASSERT_OK(Put(1, "kb", "vb")); - ASSERT_OK(Put(1, "kc", "vc")); - ASSERT_OK(Delete(1, "kb")); - ASSERT_EQ("NOT_FOUND", Get(1, "kb")); - - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - iter->Seek("kc"); - ASSERT_EQ(IterStatus(iter), "kc->vc"); - if (!CurrentOptions().merge_operator) { - // TODO: merge operator does not support backward iteration yet - if (kPlainTableAllBytesPrefix != option_config_&& - kBlockBasedTableWithWholeKeyHashIndex != option_config_ && - kHashLinkList != option_config_) { - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "ka->va"); - } - } - delete iter; - } while (ChangeOptions()); -} - -TEST_F(DBTest, IterPrevMaxSkip) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - for (int i = 0; i < 2; i++) { - ASSERT_OK(Put(1, "key1", "v1")); - ASSERT_OK(Put(1, "key2", "v2")); - ASSERT_OK(Put(1, "key3", "v3")); - ASSERT_OK(Put(1, "key4", "v4")); - ASSERT_OK(Put(1, "key5", "v5")); - } - - VerifyIterLast("key5->v5", 1); - - ASSERT_OK(Delete(1, "key5")); - VerifyIterLast("key4->v4", 1); - - ASSERT_OK(Delete(1, "key4")); - VerifyIterLast("key3->v3", 1); - - ASSERT_OK(Delete(1, "key3")); - VerifyIterLast("key2->v2", 1); - - ASSERT_OK(Delete(1, "key2")); - VerifyIterLast("key1->v1", 1); - - ASSERT_OK(Delete(1, "key1")); - VerifyIterLast("(invalid)", 1); - } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast)); -} - -TEST_F(DBTest, IterWithSnapshot) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - ASSERT_OK(Put(1, "key1", "val1")); - ASSERT_OK(Put(1, "key2", "val2")); - ASSERT_OK(Put(1, "key3", "val3")); - ASSERT_OK(Put(1, "key4", "val4")); - ASSERT_OK(Put(1, "key5", "val5")); - - const Snapshot *snapshot = db_->GetSnapshot(); - ReadOptions options; - options.snapshot = snapshot; - Iterator* iter = db_->NewIterator(options, handles_[1]); - - // Put more values after the snapshot - ASSERT_OK(Put(1, "key100", "val100")); - ASSERT_OK(Put(1, "key101", "val101")); - - iter->Seek("key5"); - ASSERT_EQ(IterStatus(iter), "key5->val5"); - if (!CurrentOptions().merge_operator) { - // TODO: merge operator does not support backward iteration yet - if (kPlainTableAllBytesPrefix != option_config_&& - kBlockBasedTableWithWholeKeyHashIndex != option_config_ && - kHashLinkList != option_config_) { - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key4->val4"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key3->val3"); - - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key4->val4"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key5->val5"); - } - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - } - db_->ReleaseSnapshot(snapshot); - delete iter; - // skip as HashCuckooRep does not support snapshot - } while (ChangeOptions(kSkipHashCuckoo)); -} - -TEST_F(DBTest, Recover) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "baz", "v5")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v1", Get(1, "foo")); - - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v5", Get(1, "baz")); - ASSERT_OK(Put(1, "bar", "v2")); - ASSERT_OK(Put(1, "foo", "v3")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v3", Get(1, "foo")); - ASSERT_OK(Put(1, "foo", "v4")); - ASSERT_EQ("v4", Get(1, "foo")); - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v5", Get(1, "baz")); - } while (ChangeOptions()); -} - -TEST_F(DBTest, RecoverWithTableHandle) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "bar", "v2")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "v3")); - ASSERT_OK(Put(1, "bar", "v4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "big", std::string(100, 'a'))); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - - std::vector> files; - dbfull()->TEST_GetFilesMetaData(handles_[1], &files); - size_t total_files = 0; - for (const auto& level : files) { - total_files += level.size(); - } - ASSERT_EQ(total_files, 3); - for (const auto& level : files) { - for (const auto& file : level) { - if (kInfiniteMaxOpenFiles == option_config_) { - ASSERT_TRUE(file.table_reader_handle != nullptr); - } else { - ASSERT_TRUE(file.table_reader_handle == nullptr); - } - } - } - } while (ChangeOptions()); -} - -TEST_F(DBTest, IgnoreRecoveredLog) { - std::string backup_logs = dbname_ + "/backup_logs"; - - // delete old files in backup_logs directory - env_->CreateDirIfMissing(backup_logs); - std::vector old_files; - env_->GetChildren(backup_logs, &old_files); - for (auto& file : old_files) { - if (file != "." && file != "..") { - env_->DeleteFile(backup_logs + "/" + file); - } - } - - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - options.wal_dir = dbname_ + "/logs"; - DestroyAndReopen(options); - - // fill up the DB - std::string one, two; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one))); - - // copy the logs to backup - std::vector logs; - env_->GetChildren(options.wal_dir, &logs); - for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log); - } - } - - // recover the DB - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - Close(); - - // copy the logs from backup back to wal dir - for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - } - } - // this should ignore the log files, recovery should not happen again - // if the recovery happens, the same merge operator would be called twice, - // leading to incorrect results - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - Close(); - Destroy(options); - Reopen(options); - Close(); - - // copy the logs from backup back to wal dir - env_->CreateDirIfMissing(options.wal_dir); - for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - } - } - // assert that we successfully recovered only from logs, even though we - // destroyed the DB - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - - // Recovery will fail if DB directory doesn't exist. - Destroy(options); - // copy the logs from backup back to wal dir - env_->CreateDirIfMissing(options.wal_dir); - for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - // we won't be needing this file no more - env_->DeleteFile(backup_logs + "/" + log); - } - } - Status s = TryReopen(options); - ASSERT_TRUE(!s.ok()); - } while (ChangeOptions(kSkipHashCuckoo)); -} - TEST_F(DBTest, CheckLock) { do { DB* localdb; @@ -2113,7 +880,8 @@ TEST_F(DBTest, FLUSH) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; - SetPerfLevel(kEnableTime);; + SetPerfLevel(kEnableTime); + ; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); // this will now also flush the last 2 writes ASSERT_OK(Flush(1)); @@ -2121,7 +889,7 @@ TEST_F(DBTest, FLUSH) { perf_context.Reset(); Get(1, "foo"); - ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0); + ASSERT_TRUE((int)perf_context.get_from_output_files_time > 0); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v1", Get(1, "foo")); @@ -2136,7 +904,7 @@ TEST_F(DBTest, FLUSH) { ASSERT_EQ("v2", Get(1, "bar")); perf_context.Reset(); ASSERT_EQ("v2", Get(1, "foo")); - ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0); + ASSERT_TRUE((int)perf_context.get_from_output_files_time > 0); writeOpt.disableWAL = false; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); @@ -2153,19 +921,6 @@ TEST_F(DBTest, FLUSH) { } while (ChangeCompactOptions()); } -TEST_F(DBTest, RecoveryWithEmptyLog) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "foo", "v2")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v3")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v3", Get(1, "foo")); - } while (ChangeOptions()); -} - #ifndef ROCKSDB_LITE TEST_F(DBTest, FlushSchedule) { Options options = CurrentOptions(); @@ -2212,7 +967,7 @@ TEST_F(DBTest, FlushSchedule) { TEST_F(DBTest, ManifestRollOver) { do { Options options; - options.max_manifest_file_size = 10 ; // 10 bytes + options.max_manifest_file_size = 10; // 10 bytes options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); { @@ -2255,41 +1010,12 @@ TEST_F(DBTest, IdentityAcrossRestarts) { } while (ChangeCompactOptions()); } -#ifndef ROCKSDB_LITE -TEST_F(DBTest, RecoverWithLargeLog) { - do { - { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "big1", std::string(200000, '1'))); - ASSERT_OK(Put(1, "big2", std::string(200000, '2'))); - ASSERT_OK(Put(1, "small3", std::string(10, '3'))); - ASSERT_OK(Put(1, "small4", std::string(10, '4'))); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - } - - // Make sure that if we re-open with a small write buffer size that - // we flush table files in the middle of a large log file. - Options options; - options.write_buffer_size = 100000; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3); - ASSERT_EQ(std::string(200000, '1'), Get(1, "big1")); - ASSERT_EQ(std::string(200000, '2'), Get(1, "big2")); - ASSERT_EQ(std::string(10, '3'), Get(1, "small3")); - ASSERT_EQ(std::string(10, '4'), Get(1, "small4")); - ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); - } while (ChangeCompactOptions()); -} -#endif // ROCKSDB_LITE - namespace { class KeepFilter : public CompactionFilter { public: virtual bool Filter(int level, const Slice& key, const Slice& value, - std::string* new_value, bool* value_changed) const - override { + std::string* new_value, + bool* value_changed) const override { return false; } @@ -2348,122 +1074,6 @@ class DelayFilterFactory : public CompactionFilterFactory { } // namespace #ifndef ROCKSDB_LITE -TEST_F(DBTest, CompressedCache) { - if (!Snappy_Supported()) { - return; - } - int num_iter = 80; - - // Run this test three iterations. - // Iteration 1: only a uncompressed block cache - // Iteration 2: only a compressed block cache - // Iteration 3: both block cache and compressed cache - // Iteration 4: both block cache and compressed cache, but DB is not - // compressed - for (int iter = 0; iter < 4; iter++) { - Options options = CurrentOptions(); - options.write_buffer_size = 64*1024; // small write buffer - options.statistics = rocksdb::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - switch (iter) { - case 0: - // only uncompressed block cache - table_options.block_cache = NewLRUCache(8*1024); - table_options.block_cache_compressed = nullptr; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - break; - case 1: - // no block cache, only compressed cache - table_options.no_block_cache = true; - table_options.block_cache = nullptr; - table_options.block_cache_compressed = NewLRUCache(8*1024); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - break; - case 2: - // both compressed and uncompressed block cache - table_options.block_cache = NewLRUCache(1024); - table_options.block_cache_compressed = NewLRUCache(8*1024); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - break; - case 3: - // both block cache and compressed cache, but DB is not compressed - // also, make block cache sizes bigger, to trigger block cache hits - table_options.block_cache = NewLRUCache(1024 * 1024); - table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.compression = kNoCompression; - break; - default: - ASSERT_TRUE(false); - } - CreateAndReopenWithCF({"pikachu"}, options); - // default column family doesn't have block cache - Options no_block_cache_opts; - no_block_cache_opts.statistics = options.statistics; - no_block_cache_opts = CurrentOptions(no_block_cache_opts); - BlockBasedTableOptions table_options_no_bc; - table_options_no_bc.no_block_cache = true; - no_block_cache_opts.table_factory.reset( - NewBlockBasedTableFactory(table_options_no_bc)); - ReopenWithColumnFamilies({"default", "pikachu"}, - std::vector({no_block_cache_opts, options})); - - Random rnd(301); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - std::vector values; - std::string str; - for (int i = 0; i < num_iter; i++) { - if (i % 4 == 0) { // high compression ratio - str = RandomString(&rnd, 1000); - } - values.push_back(str); - ASSERT_OK(Put(1, Key(i), values[i])); - } - - // flush all data from memtable so that reads are from block cache - ASSERT_OK(Flush(1)); - - for (int i = 0; i < num_iter; i++) { - ASSERT_EQ(Get(1, Key(i)), values[i]); - } - - // check that we triggered the appropriate code paths in the cache - switch (iter) { - case 0: - // only uncompressed block cache - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); - break; - case 1: - // no block cache, only compressed cache - ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); - break; - case 2: - // both compressed and uncompressed block cache - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); - break; - case 3: - // both compressed and uncompressed block cache - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0); - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); - // compressed doesn't have any hits since blocks are not compressed on - // storage - ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0); - break; - default: - ASSERT_TRUE(false); - } - - options.create_if_missing = true; - DestroyAndReopen(options); - } -} static std::string CompressibleString(Random* rnd, int len) { std::string r; @@ -2521,10 +1131,8 @@ namespace { void MinLevelHelper(DBTest* self, Options& options) { Random rnd(301); - for (int num = 0; - num < options.level0_file_num_compaction_trigger - 1; - num++) - { + for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; + num++) { std::vector values; // Write 120KB (12 values, each 10K) for (int i = 0; i < 12; i++) { @@ -2535,7 +1143,7 @@ void MinLevelHelper(DBTest* self, Options& options) { ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1); } - //generate one more file in level-0, and should trigger level-0 compaction + // generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < 12; i++) { values.push_back(DBTestBase::RandomString(&rnd, 10000)); @@ -2550,8 +1158,11 @@ void MinLevelHelper(DBTest* self, Options& options) { // returns false if the calling-Test should be skipped bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, int lev, int strategy) { - fprintf(stderr, "Test with compression options : window_bits = %d, level = %d, strategy = %d}\n", wbits, lev, strategy); - options.write_buffer_size = 100<<10; //100KB + fprintf(stderr, + "Test with compression options : window_bits = %d, level = %d, " + "strategy = %d}\n", + wbits, lev, strategy); + options.write_buffer_size = 100 << 10; // 100KB options.arena_block_size = 4096; options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; @@ -2699,8 +1310,7 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), - (unsigned long long)(low), + (unsigned long long)(val), (unsigned long long)(low), (unsigned long long)(high)); } return result; @@ -2814,7 +1424,7 @@ TEST_F(DBTest, ApproximateSizesMemTable) { TEST_F(DBTest, ApproximateSizes) { do { Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer + options.write_buffer_size = 100000000; // Large write buffer options.compression = kNoCompression; options.create_if_missing = true; DestroyAndReopen(options); @@ -2907,32 +1517,6 @@ TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) { } #endif // ROCKSDB_LITE -TEST_F(DBTest, IteratorPinsRef) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - Put(1, "foo", "hello"); - - // Get iterator that will yield the current contents of the DB. - Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]); - - // Write to force compactions - Put(1, "foo", "newvalue1"); - for (int i = 0; i < 100; i++) { - // 100K values - ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v'))); - } - Put(1, "foo", "newvalue2"); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("hello", iter->value().ToString()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - delete iter; - } while (ChangeCompactOptions()); -} - #ifndef ROCKSDB_LITE TEST_F(DBTest, Snapshot) { anon::OptionsOverride options_override; @@ -3214,7 +1798,8 @@ TEST_F(DBTest, OverlapInLevel0) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); - //Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0. + // Fill levels 1 and 2 to disable the pushing of new memtables to levels > + // 0. ASSERT_OK(Put(1, "100", "v100")); ASSERT_OK(Put(1, "999", "v999")); Flush(1); @@ -3279,8 +1864,8 @@ TEST_F(DBTest, ComparatorCheck) { new_options = CurrentOptions(); new_options.comparator = &cmp; // only the non-default column family has non-matching comparator - Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, - std::vector({options, new_options})); + Status s = TryReopenWithColumnFamilies( + {"default", "pikachu"}, std::vector({options, new_options})); ASSERT_TRUE(!s.ok()); ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) << s.ToString(); @@ -3298,12 +1883,13 @@ TEST_F(DBTest, CustomComparator) { } virtual void FindShortestSeparator(std::string* s, const Slice& l) const override { - ToNumber(*s); // Check format - ToNumber(l); // Check format + ToNumber(*s); // Check format + ToNumber(l); // Check format } virtual void FindShortSuccessor(std::string* key) const override { - ToNumber(*key); // Check format + ToNumber(*key); // Check format } + private: static int ToNumber(const Slice& x) { // Check that there are no extra characters. @@ -3342,7 +1928,7 @@ TEST_F(DBTest, CustomComparator) { for (int run = 0; run < 2; run++) { for (int i = 0; i < 1000; i++) { char buf[100]; - snprintf(buf, sizeof(buf), "[%d]", i*10); + snprintf(buf, sizeof(buf), "[%d]", i * 10); ASSERT_OK(Put(1, buf, buf)); } Compact(1, "[0]", "[1000000]"); @@ -3577,9 +2163,8 @@ TEST_F(DBTest, ManifestWriteError) { // We iterate twice. In the second iteration, everything is the // same except the log record never makes it to the MANIFEST file. for (int iter = 0; iter < 2; iter++) { - std::atomic* error_type = (iter == 0) - ? &env_->manifest_sync_error_ - : &env_->manifest_write_error_; + std::atomic* error_type = (iter == 0) ? &env_->manifest_sync_error_ + : &env_->manifest_write_error_; // Insert foo=>bar mapping Options options = CurrentOptions(); @@ -3596,7 +2181,7 @@ TEST_F(DBTest, ManifestWriteError) { ASSERT_EQ("bar", Get("foo")); const int last = 2; MoveFilesToLevel(2); - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level + ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level // Merging compaction (will fail) error_type->store(true, std::memory_order_release); @@ -3679,223 +2264,11 @@ TEST_F(DBTest, PutFailsParanoid) { ASSERT_TRUE(s.ok()); } -TEST_F(DBTest, BloomFilter) { - do { - Options options = CurrentOptions(); - env_->count_random_reads_ = true; - options.env = env_; - // ChangeCompactOptions() only changes compaction style, which does not - // trigger reset of table_factory - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - CreateAndReopenWithCF({"pikachu"}, options); - - // Populate multiple layers - const int N = 10000; - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - Compact(1, "a", "z"); - for (int i = 0; i < N; i += 100) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - Flush(1); - - // Prevent auto compactions triggered by seeks - env_->delay_sstable_sync_.store(true, std::memory_order_release); - - // Lookup present keys. Should rarely read from small sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - int reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d present => %d reads\n", N, reads); - ASSERT_GE(reads, N); - ASSERT_LE(reads, N + 2*N/100); - - // Lookup present keys. Should rarely read from either sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); - } - reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d missing => %d reads\n", N, reads); - ASSERT_LE(reads, 3*N/100); - - env_->delay_sstable_sync_.store(false, std::memory_order_release); - Close(); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, BloomFilterRate) { - while (ChangeFilterOptions()) { - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - // Check if filter is useful - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333))); - } - ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98); - } -} - -TEST_F(DBTest, BloomFilterCompatibility) { - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // Create with block based filter - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - - // Check db with full filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); -} - -TEST_F(DBTest, BloomFilterReverseCompatibility) { - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // Create with full filter - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - - // Check db with block_based filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); -} - -namespace { -// A wrapped bloom over default FilterPolicy -class WrappedBloom : public FilterPolicy { - public: - explicit WrappedBloom(int bits_per_key) : - filter_(NewBloomFilterPolicy(bits_per_key)), - counter_(0) {} - - ~WrappedBloom() { delete filter_; } - - const char* Name() const override { return "WrappedRocksDbFilterPolicy"; } - - void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst) - const override { - std::unique_ptr user_keys(new rocksdb::Slice[n]); - for (int i = 0; i < n; ++i) { - user_keys[i] = convertKey(keys[i]); - } - return filter_->CreateFilter(user_keys.get(), n, dst); - } - - bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter) - const override { - counter_++; - return filter_->KeyMayMatch(convertKey(key), filter); - } - - uint32_t GetCounter() { return counter_; } - - private: - const FilterPolicy* filter_; - mutable uint32_t counter_; - - rocksdb::Slice convertKey(const rocksdb::Slice& key) const { - return key; - } -}; -} // namespace - -TEST_F(DBTest, BloomFilterWrapper) { - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - WrappedBloom* policy = new WrappedBloom(10); - table_options.filter_policy.reset(policy); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - ASSERT_EQ(0U, policy->GetCounter()); - Flush(1); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ(1U * maxKey, policy->GetCounter()); - - // Check if filter is useful - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333))); - } - ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98); - ASSERT_EQ(2U * maxKey, policy->GetCounter()); -} - #ifndef ROCKSDB_LITE TEST_F(DBTest, SnapshotFiles) { do { Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer + options.write_buffer_size = 100000000; // Large write buffer CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); @@ -3945,7 +2318,7 @@ TEST_F(DBTest, SnapshotFiles) { if (number > manifest_number) { manifest_number = number; ASSERT_GE(size, manifest_size); - size = manifest_size; // copy only valid MANIFEST data + size = manifest_size; // copy only valid MANIFEST data } } } @@ -4114,29 +2487,6 @@ TEST_F(DBTest, CompactOnFlush) { } while (ChangeCompactOptions()); } -namespace { -std::vector ListSpecificFiles( - Env* env, const std::string& path, const FileType expected_file_type) { - std::vector files; - std::vector file_numbers; - env->GetChildren(path, &files); - uint64_t number; - FileType type; - for (size_t i = 0; i < files.size(); ++i) { - if (ParseFileName(files[i], &number, &type)) { - if (type == expected_file_type) { - file_numbers.push_back(number); - } - } - } - return file_numbers; -} - -std::vector ListTableFiles(Env* env, const std::string& path) { - return ListSpecificFiles(env, path, kTableFile); -} -} // namespace - TEST_F(DBTest, FlushOneColumnFamily) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", @@ -4160,136 +2510,10 @@ TEST_F(DBTest, FlushOneColumnFamily) { } #ifndef ROCKSDB_LITE -// In https://reviews.facebook.net/D20661 we change -// recovery behavior: previously for each log file each column family -// memtable was flushed, even it was empty. Now it's changed: -// we try to create the smallest number of table files by merging -// updates from multiple logs -TEST_F(DBTest, RecoverCheckFileAmountWithSmallWriteBuffer) { - Options options = CurrentOptions(); - options.write_buffer_size = 5000000; - CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); - - // Since we will reopen DB with smaller write_buffer_size, - // each key will go to new SST file - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - - ASSERT_OK(Put(3, Key(10), DummyString(1))); - // Make 'dobrynia' to be flushed and new WAL file to be created - ASSERT_OK(Put(2, Key(10), DummyString(7500000))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - dbfull()->TEST_WaitForFlushMemTable(handles_[2]); - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(1)); - // Make sure 'dobrynia' was flushed: check sst files amount - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(1)); - } - // New WAL file - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - - options.write_buffer_size = 4096; - options.arena_block_size = 4096; - ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, - options); - { - // No inserts => default is empty - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), - static_cast(0)); - // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), - static_cast(5)); - // 1 SST for big key + 1 SST for small one - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(2)); - // 1 SST for all keys - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(1)); - } -} - -// In https://reviews.facebook.net/D20661 we change -// recovery behavior: previously for each log file each column family -// memtable was flushed, even it wasn't empty. Now it's changed: -// we try to create the smallest number of table files by merging -// updates from multiple logs -TEST_F(DBTest, RecoverCheckFileAmount) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000; - options.arena_block_size = 4 * 1024; - CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); - - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - // Make 'nikitich' memtable to be flushed - ASSERT_OK(Put(3, Key(10), DummyString(1002400))); - ASSERT_OK(Put(3, Key(1), DummyString(1))); - dbfull()->TEST_WaitForFlushMemTable(handles_[3]); - // 4 memtable are not flushed, 1 sst file - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(1)); - } - // Memtable for 'nikitich' has flushed, new WAL file has opened - // 4 memtable still not flushed - - // Write to new WAL file - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - // Fill up 'nikitich' one more time - ASSERT_OK(Put(3, Key(10), DummyString(1002400))); - // make it flush - ASSERT_OK(Put(3, Key(1), DummyString(1))); - dbfull()->TEST_WaitForFlushMemTable(handles_[3]); - // There are still 4 memtable not flushed, and 2 sst tables - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(2)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(2)); - } - - ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, - options); - { - std::vector table_files = ListTableFiles(env_, dbname_); - // Check, that records for 'default', 'dobrynia' and 'pikachu' from - // first, second and third WALs went to the same SST. - // So, there is 6 SSTs: three for 'nikitich', one for 'default', one for - // 'dobrynia', one for 'pikachu' - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), - static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(3)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), - static_cast(1)); - } -} - TEST_F(DBTest, SharedWriteBuffer) { Options options = CurrentOptions(); options.db_write_buffer_size = 100000; // this is the real limit - options.write_buffer_size = 500000; // this is never hit + options.write_buffer_size = 500000; // this is never hit CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); // Trigger a flush on CF "nikitich" @@ -4455,318 +2679,7 @@ TEST_F(DBTest, PurgeInfoLogs) { } } -TEST_F(DBTest, SyncMultipleLogs) { - const uint64_t kNumBatches = 2; - const int kBatchSize = 1000; - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.write_buffer_size = 4096; - Reopen(options); - - WriteBatch batch; - WriteOptions wo; - wo.sync = true; - - for (uint64_t b = 0; b < kNumBatches; b++) { - batch.Clear(); - for (int i = 0; i < kBatchSize; i++) { - batch.Put(Key(i), DummyString(128)); - } - - dbfull()->Write(wo, &batch); - } - - ASSERT_OK(dbfull()->SyncWAL()); -} - #ifndef ROCKSDB_LITE -// -// Test WAL recovery for the various modes available -// -class RecoveryTestHelper { - public: - // Number of WAL files to generate - static const int kWALFilesCount = 10; - // Starting number for the WAL file name like 00010.log - static const int kWALFileOffset = 10; - // Keys to be written per WAL file - static const int kKeysPerWALFile = 1024; - // Size of the value - static const int kValueSize = 10; - - // Create WAL files with values filled in - static void FillData(DBTest* test, Options& options, const size_t wal_count, - size_t& count) { - DBOptions& db_options = options; - - count = 0; - - shared_ptr table_cache = NewLRUCache(50000, 16); - EnvOptions env_options; - WriteBuffer write_buffer(db_options.db_write_buffer_size); - - unique_ptr versions; - unique_ptr wal_manager; - WriteController write_controller; - - versions.reset(new VersionSet(test->dbname_, &db_options, env_options, - table_cache.get(), &write_buffer, - &write_controller)); - - wal_manager.reset(new WalManager(db_options, env_options)); - - std::unique_ptr current_log_writer; - - for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) { - uint64_t current_log_number = j; - std::string fname = LogFileName(test->dbname_, current_log_number); - unique_ptr file; - ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options)); - unique_ptr file_writer( - new WritableFileWriter(std::move(file), env_options)); - current_log_writer.reset( - new log::Writer(std::move(file_writer), current_log_number, - db_options.recycle_log_file_num > 0)); - - for (int i = 0; i < kKeysPerWALFile; i++) { - std::string key = "key" + ToString(count++); - std::string value = test->DummyString(kValueSize); - assert(current_log_writer.get() != nullptr); - uint64_t seq = versions->LastSequence() + 1; - WriteBatch batch; - batch.Put(key, value); - WriteBatchInternal::SetSequence(&batch, seq); - current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch)); - versions->SetLastSequence(seq); - } - } - } - - // Recreate and fill the store with some data - static size_t FillData(DBTest* test, Options& options) { - options.create_if_missing = true; - test->DestroyAndReopen(options); - test->Close(); - - size_t count = 0; - FillData(test, options, kWALFilesCount, count); - return count; - } - - // Read back all the keys we wrote and return the number of keys found - static size_t GetData(DBTest* test) { - size_t count = 0; - for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) { - if (test->Get("key" + ToString(i)) != "NOT_FOUND") { - ++count; - } - } - return count; - } - - // Manuall corrupt the specified WAL - static void CorruptWAL(DBTest* test, Options& options, const double off, - const double len, const int wal_file_id, - const bool trunc = false) { - Env* env = options.env; - std::string fname = LogFileName(test->dbname_, wal_file_id); - uint64_t size; - ASSERT_OK(env->GetFileSize(fname, &size)); - ASSERT_GT(size, 0); -#ifdef OS_WIN - // Windows disk cache behaves differently. When we truncate - // the original content is still in the cache due to the original - // handle is still open. Generally, in Windows, one prohibits - // shared access to files and it is not needed for WAL but we allow - // it to induce corruption at various tests. - test->Close(); -#endif - if (trunc) { - ASSERT_EQ(0, truncate(fname.c_str(), - static_cast(size * off))); - } else { - InduceCorruption(fname, static_cast(size * off), - static_cast(size * len)); - } - } - - // Overwrite data with 'a' from offset for length len - static void InduceCorruption(const std::string& filename, size_t offset, - size_t len) { - ASSERT_GT(len, 0U); - - int fd = open(filename.c_str(), O_RDWR); - - ASSERT_GT(fd, 0); - ASSERT_EQ(offset, lseek(fd, static_cast(offset), SEEK_SET)); - - void* buf = alloca(len); - memset(buf, 'a', len); - ASSERT_EQ(len, write(fd, buf, static_cast(len))); - - close(fd); - } -}; - -// Test scope: -// - We expect to open the data store when there is incomplete trailing writes -// at the end of any of the logs -// - We do not expect to open the data store for corruption -TEST_F(DBTest, kTolerateCorruptedTailRecords) { - const int jstart = RecoveryTestHelper::kWALFileOffset; - const int jend = jstart + RecoveryTestHelper::kWALFilesCount; - - for (auto trunc : {true, false}) { /* Corruption style */ - for (int i = 0; i < 4; i++) { /* Corruption offset position */ - for (int j = jstart; j < jend; j++) { /* WAL file */ - // Fill data for testing - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, options); - // test checksum failure or parsing - RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, - /*len%=*/.1, /*wal=*/j, trunc); - - if (trunc) { - options.wal_recovery_mode = - WALRecoveryMode::kTolerateCorruptedTailRecords; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - const size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_TRUE(i == 0 || recovered_row_count > 0); - ASSERT_LT(recovered_row_count, row_count); - } else { - options.wal_recovery_mode = - WALRecoveryMode::kTolerateCorruptedTailRecords; - ASSERT_NOK(TryReopen(options)); - } - } - } - } -} - -// Test scope: -// We don't expect the data store to be opened if there is any corruption -// (leading, middle or trailing -- incomplete writes or corruption) -TEST_F(DBTest, kAbsoluteConsistency) { - const int jstart = RecoveryTestHelper::kWALFileOffset; - const int jend = jstart + RecoveryTestHelper::kWALFilesCount; - - // Verify clean slate behavior - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, options); - options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count); - - for (auto trunc : {true, false}) { /* Corruption style */ - for (int i = 0; i < 4; i++) { /* Corruption offset position */ - if (trunc && i == 0) { - continue; - } - - for (int j = jstart; j < jend; j++) { /* wal files */ - // fill with new date - RecoveryTestHelper::FillData(this, options); - // corrupt the wal - RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, - /*len%=*/.1, j, trunc); - // verify - options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; - options.create_if_missing = false; - ASSERT_NOK(TryReopen(options)); - } - } - } -} - -// Test scope: -// - We expect to open data store under all circumstances -// - We expect only data upto the point where the first error was encountered -TEST_F(DBTest, kPointInTimeRecovery) { - const int jstart = RecoveryTestHelper::kWALFileOffset; - const int jend = jstart + RecoveryTestHelper::kWALFilesCount; - const int maxkeys = - RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile; - - for (auto trunc : {true, false}) { /* Corruption style */ - for (int i = 0; i < 4; i++) { /* Offset of corruption */ - for (int j = jstart; j < jend; j++) { /* WAL file */ - // Fill data for testing - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, options); - - // Corrupt the wal - RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, - /*len%=*/.1, j, trunc); - - // Verify - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - - // Probe data for invariants - size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_LT(recovered_row_count, row_count); - - bool expect_data = true; - for (size_t k = 0; k < maxkeys; ++k) { - bool found = Get("key" + ToString(i)) != "NOT_FOUND"; - if (expect_data && !found) { - expect_data = false; - } - ASSERT_EQ(found, expect_data); - } - - const size_t min = RecoveryTestHelper::kKeysPerWALFile * - (j - RecoveryTestHelper::kWALFileOffset); - ASSERT_GE(recovered_row_count, min); - if (!trunc && i != 0) { - const size_t max = RecoveryTestHelper::kKeysPerWALFile * - (j - RecoveryTestHelper::kWALFileOffset + 1); - ASSERT_LE(recovered_row_count, max); - } - } - } - } -} - -// Test scope: -// - We expect to open the data store under all scenarios -// - We expect to have recovered records past the corruption zone -TEST_F(DBTest, kSkipAnyCorruptedRecords) { - const int jstart = RecoveryTestHelper::kWALFileOffset; - const int jend = jstart + RecoveryTestHelper::kWALFilesCount; - - for (auto trunc : {true, false}) { /* Corruption style */ - for (int i = 0; i < 4; i++) { /* Corruption offset */ - for (int j = jstart; j < jend; j++) { /* wal files */ - // Fill data for testing - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, options); - - // Corrupt the WAL - RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, - /*len%=*/.1, j, trunc); - - // Verify behavior - options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - - // Probe data for invariants - size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_LT(recovered_row_count, row_count); - - if (!trunc) { - ASSERT_TRUE(i != 0 || recovered_row_count > 0); - } - } - } - } -} - // Multi-threaded test: namespace { @@ -4850,8 +2763,8 @@ static void MTThreadBody(void* arg) { int unique_id = -1; for (int i = 0; i < kColumnFamilies; ++i) { int k, w, c, cf, u; - ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, - &c, &cf, &u)) + ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c, + &cf, &u)) << values[i]; ASSERT_EQ(k, key); ASSERT_GE(w, 0); @@ -5015,7 +2928,7 @@ namespace { typedef std::map KVMap; } -class ModelDB: public DB { +class ModelDB : public DB { public: class ModelSnapshot : public Snapshot { public: @@ -5109,7 +3022,7 @@ class ModelDB: public DB { if (value_found != nullptr) { *value_found = false; } - return true; // Not Supported directly + return true; // Not Supported directly } using DB::NewIterator; virtual Iterator* NewIterator(const ReadOptions& options, @@ -5150,7 +3063,7 @@ class ModelDB: public DB { } virtual void Merge(const Slice& key, const Slice& value) override { // ignore merge for now - //(*map_)[key.ToString()] = value.ToString(); + // (*map_)[key.ToString()] = value.ToString(); } virtual void Delete(const Slice& key) override { map_->erase(key.ToString()); @@ -5192,11 +3105,11 @@ class ModelDB: public DB { } using DB::CompactFiles; - virtual Status CompactFiles( - const CompactionOptions& compact_options, - ColumnFamilyHandle* column_family, - const std::vector& input_file_names, - const int output_level, const int output_path_id = -1) override { + virtual Status CompactFiles(const CompactionOptions& compact_options, + ColumnFamilyHandle* column_family, + const std::vector& input_file_names, + const int output_level, + const int output_path_id = -1) override { return Status::NotSupported("Not supported operation."); } @@ -5250,9 +3163,7 @@ class ModelDB: public DB { return ret; } - virtual Status SyncWAL() override { - return Status::OK(); - } + virtual Status SyncWAL() override { return Status::OK(); } #ifndef ROCKSDB_LITE virtual Status DisableFileDeletions() override { return Status::OK(); } @@ -5273,8 +3184,8 @@ class ModelDB: public DB { virtual Status GetUpdatesSince( rocksdb::SequenceNumber, unique_ptr*, - const TransactionLogIterator::ReadOptions& - read_options = TransactionLogIterator::ReadOptions()) override { + const TransactionLogIterator::ReadOptions& read_options = + TransactionLogIterator::ReadOptions()) override { return Status::NotSupported("Not supported in Model DB"); } @@ -5294,11 +3205,10 @@ class ModelDB: public DB { } private: - class ModelIter: public Iterator { + class ModelIter : public Iterator { public: ModelIter(const KVMap* map, bool owned) - : map_(map), owned_(owned), iter_(map_->end()) { - } + : map_(map), owned_(owned), iter_(map_->end()) {} ~ModelIter() { if (owned_) delete map_; } @@ -5341,15 +3251,13 @@ static std::string RandomKey(Random* rnd, int minimum = 0) { int len; do { len = (rnd->OneIn(3) - ? 1 // Short sometimes to encourage collisions - : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); + ? 1 // Short sometimes to encourage collisions + : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); } while (len < minimum); return test::RandomKey(rnd, len); } -static bool CompareIterators(int step, - DB* model, - DB* db, +static bool CompareIterators(int step, DB* model, DB* db, const Snapshot* model_snap, const Snapshot* db_snap) { ReadOptions options; @@ -5360,12 +3268,10 @@ static bool CompareIterators(int step, bool ok = true; int count = 0; for (miter->SeekToFirst(), dbiter->SeekToFirst(); - ok && miter->Valid() && dbiter->Valid(); - miter->Next(), dbiter->Next()) { + ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) { count++; if (miter->key().compare(dbiter->key()) != 0) { - fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", - step, + fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step, EscapeString(miter->key()).c_str(), EscapeString(dbiter->key()).c_str()); ok = false; @@ -5374,8 +3280,7 @@ static bool CompareIterators(int step, if (miter->value().compare(dbiter->value()) != 0) { fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", - step, - EscapeString(miter->key()).c_str(), + step, EscapeString(miter->key()).c_str(), EscapeString(miter->value()).c_str(), EscapeString(miter->value()).c_str()); ok = false; @@ -5426,80 +3331,77 @@ TEST_P(DBTestRandomized, Randomized) { Random rnd(test::RandomSeed() + GetParam()); ModelDB model(options); - const int N = 10000; - const Snapshot* model_snap = nullptr; - const Snapshot* db_snap = nullptr; - std::string k, v; - for (int step = 0; step < N; step++) { - // TODO(sanjay): Test Get() works - int p = rnd.Uniform(100); - int minimum = 0; - if (option_config_ == kHashSkipList || - option_config_ == kHashLinkList || - option_config_ == kHashCuckoo || - option_config_ == kPlainTableFirstBytePrefix || - option_config_ == kBlockBasedTableWithWholeKeyHashIndex || - option_config_ == kBlockBasedTableWithPrefixHashIndex) { - minimum = 1; - } - if (p < 45) { // Put - k = RandomKey(&rnd, minimum); - v = RandomString(&rnd, - rnd.OneIn(20) - ? 100 + rnd.Uniform(100) - : rnd.Uniform(8)); - ASSERT_OK(model.Put(WriteOptions(), k, v)); - ASSERT_OK(db_->Put(WriteOptions(), k, v)); - } else if (p < 90) { // Delete - k = RandomKey(&rnd, minimum); - ASSERT_OK(model.Delete(WriteOptions(), k)); - ASSERT_OK(db_->Delete(WriteOptions(), k)); - } else { // Multi-element batch - WriteBatch b; - const int num = rnd.Uniform(8); - for (int i = 0; i < num; i++) { - if (i == 0 || !rnd.OneIn(10)) { - k = RandomKey(&rnd, minimum); - } else { - // Periodically re-use the same key from the previous iter, so - // we have multiple entries in the write batch for the same key - } - if (rnd.OneIn(2)) { - v = RandomString(&rnd, rnd.Uniform(10)); - b.Put(k, v); - } else { - b.Delete(k); - } - } - ASSERT_OK(model.Write(WriteOptions(), &b)); - ASSERT_OK(db_->Write(WriteOptions(), &b)); - } - - if ((step % 100) == 0) { - // For DB instances that use the hash index + block-based table, the - // iterator will be invalid right when seeking a non-existent key, right - // than return a key that is close to it. - if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex && - option_config_ != kBlockBasedTableWithPrefixHashIndex) { - ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); - ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); - } - - // Save a snapshot from each DB this time that we'll use next - // time we compare things, to make sure the current state is - // preserved with the snapshot - if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); - if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); - - Reopen(options); - ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); - - model_snap = model.GetSnapshot(); - db_snap = db_->GetSnapshot(); - } + const int N = 10000; + const Snapshot* model_snap = nullptr; + const Snapshot* db_snap = nullptr; + std::string k, v; + for (int step = 0; step < N; step++) { + // TODO(sanjay): Test Get() works + int p = rnd.Uniform(100); + int minimum = 0; + if (option_config_ == kHashSkipList || option_config_ == kHashLinkList || + option_config_ == kHashCuckoo || + option_config_ == kPlainTableFirstBytePrefix || + option_config_ == kBlockBasedTableWithWholeKeyHashIndex || + option_config_ == kBlockBasedTableWithPrefixHashIndex) { + minimum = 1; } - if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); - if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); + if (p < 45) { // Put + k = RandomKey(&rnd, minimum); + v = RandomString(&rnd, + rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8)); + ASSERT_OK(model.Put(WriteOptions(), k, v)); + ASSERT_OK(db_->Put(WriteOptions(), k, v)); + } else if (p < 90) { // Delete + k = RandomKey(&rnd, minimum); + ASSERT_OK(model.Delete(WriteOptions(), k)); + ASSERT_OK(db_->Delete(WriteOptions(), k)); + } else { // Multi-element batch + WriteBatch b; + const int num = rnd.Uniform(8); + for (int i = 0; i < num; i++) { + if (i == 0 || !rnd.OneIn(10)) { + k = RandomKey(&rnd, minimum); + } else { + // Periodically re-use the same key from the previous iter, so + // we have multiple entries in the write batch for the same key + } + if (rnd.OneIn(2)) { + v = RandomString(&rnd, rnd.Uniform(10)); + b.Put(k, v); + } else { + b.Delete(k); + } + } + ASSERT_OK(model.Write(WriteOptions(), &b)); + ASSERT_OK(db_->Write(WriteOptions(), &b)); + } + + if ((step % 100) == 0) { + // For DB instances that use the hash index + block-based table, the + // iterator will be invalid right when seeking a non-existent key, right + // than return a key that is close to it. + if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex && + option_config_ != kBlockBasedTableWithPrefixHashIndex) { + ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); + ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); + } + + // Save a snapshot from each DB this time that we'll use next + // time we compare things, to make sure the current state is + // preserved with the snapshot + if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); + if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); + + Reopen(options); + ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); + + model_snap = model.GetSnapshot(); + db_snap = db_->GetSnapshot(); + } + } + if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); + if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); } TEST_F(DBTest, MultiGetSimple) { @@ -5559,116 +3461,11 @@ TEST_F(DBTest, MultiGetEmpty) { cfs.push_back(handles_[0]); cfs.push_back(handles_[1]); s = db_->MultiGet(ReadOptions(), cfs, keys, &values); - ASSERT_EQ((int)s.size(), 2); + ASSERT_EQ(static_cast(s.size()), 2); ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound()); } while (ChangeCompactOptions()); } -#ifndef ROCKSDB_LITE -namespace { -void PrefixScanInit(DBTest *dbtest) { - char buf[100]; - std::string keystr; - const int small_range_sstfiles = 5; - const int big_range_sstfiles = 5; - - // Generate 11 sst files with the following prefix ranges. - // GROUP 0: [0,10] (level 1) - // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) - // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) - // - // A seek with the previous API would do 11 random I/Os (to all the - // files). With the new API and a prefix filter enabled, we should - // only do 2 random I/O, to the 2 files containing the key. - - // GROUP 0 - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", 10); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, - nullptr); // move to level 1 - - // GROUP 1 - for (int i = 1; i <= small_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", i); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", i+1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } - - // GROUP 2 - for (int i = 1; i <= big_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", - small_range_sstfiles+i+1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } -} -} // namespace - -TEST_F(DBTest, PrefixScan) { - XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, - kSkipNoPrefix); - while (ChangeFilterOptions()) { - int count; - Slice prefix; - Slice key; - char buf[100]; - Iterator* iter; - snprintf(buf, sizeof(buf), "03______:"); - prefix = Slice(buf, 8); - key = Slice(buf, 9); - ASSERT_EQ(key.difference_offset(prefix), 8); - ASSERT_EQ(prefix.difference_offset(key), 8); - // db configs - env_->count_random_reads_ = true; - Options options = CurrentOptions(); - options.env = env_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.disable_auto_compactions = true; - options.max_background_compactions = 2; - options.create_if_missing = true; - options.memtable_factory.reset(NewHashSkipListRepFactory(16)); - - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - table_options.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // 11 RAND I/Os - DestroyAndReopen(options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - iter = db_->NewIterator(ReadOptions()); - for (iter->Seek(prefix); iter->Valid(); iter->Next()) { - if (! iter->key().starts_with(prefix)) { - break; - } - count++; - } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 2); - Close(); - } // end of while - XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0); -} -#endif // ROCKSDB_LITE - TEST_F(DBTest, BlockBasedTablePrefixIndexTest) { // create a DB with block prefix index BlockBasedTableOptions table_options; @@ -5677,7 +3474,6 @@ TEST_F(DBTest, BlockBasedTablePrefixIndexTest) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - Reopen(options); ASSERT_OK(Put("k1", "v1")); Flush(); @@ -5736,7 +3532,7 @@ TEST_P(DBTestWithParam, FIFOCompactionTest) { // second iteration -- manual compaction Options options; options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 100 << 10; // 100KB + options.write_buffer_size = 100 << 10; // 100KB options.arena_block_size = 4096; options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB options.compression = kNoCompression; @@ -5788,7 +3584,7 @@ TEST_F(DBTest, SimpleWriteTimeoutTest) { */ TEST_F(DBTest, RateLimitingTest) { Options options = CurrentOptions(); - options.write_buffer_size = 1 << 20; // 1MB + options.write_buffer_size = 1 << 20; // 1MB options.level0_file_num_compaction_trigger = 2; options.target_file_size_base = 1 << 20; // 1MB options.max_bytes_for_level_base = 4 << 20; // 4MB @@ -5807,8 +3603,8 @@ TEST_F(DBTest, RateLimitingTest) { uint64_t start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(RandomString(&rnd, 32), - RandomString(&rnd, (1 << 10) + 1), wo)); + ASSERT_OK( + Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } uint64_t elapsed = env_->NowMicros() - start; double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed; @@ -5816,15 +3612,15 @@ TEST_F(DBTest, RateLimitingTest) { // # rate limiting with 0.7 x threshold options.rate_limiter.reset( - NewGenericRateLimiter(static_cast(0.7 * raw_rate))); + NewGenericRateLimiter(static_cast(0.7 * raw_rate))); env_->bytes_written_ = 0; DestroyAndReopen(options); start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(RandomString(&rnd, 32), - RandomString(&rnd, (1 << 10) + 1), wo)); + ASSERT_OK( + Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } elapsed = env_->NowMicros() - start; Close(); @@ -5835,15 +3631,15 @@ TEST_F(DBTest, RateLimitingTest) { // # rate limiting with half of the raw_rate options.rate_limiter.reset( - NewGenericRateLimiter(static_cast(raw_rate / 2))); + NewGenericRateLimiter(static_cast(raw_rate / 2))); env_->bytes_written_ = 0; DestroyAndReopen(options); start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(RandomString(&rnd, 32), - RandomString(&rnd, (1 << 10) + 1), wo)); + ASSERT_OK( + Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } elapsed = env_->NowMicros() - start; Close(); @@ -5963,167 +3759,6 @@ TEST_F(DBTest, SanitizeNumThreads) { } } -TEST_F(DBTest, DBIteratorBoundTest) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - - options.prefix_extractor = nullptr; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("g1", "0")); - - // testing basic case with no iterate_upper_bound and no prefix_extractor - { - ReadOptions ro; - ro.iterate_upper_bound = nullptr; - - std::unique_ptr iter(db_->NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("g1")), 0); - } - - // testing iterate_upper_bound and forward iterator - // to make sure it stops at bound - { - ReadOptions ro; - // iterate_upper_bound points beyond the last expected entry - Slice prefix("foo2"); - ro.iterate_upper_bound = &prefix; - - std::unique_ptr iter(db_->NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("foo1")), 0); - - iter->Next(); - // should stop here... - ASSERT_TRUE(!iter->Valid()); - } - // Testing SeekToLast with iterate_upper_bound set - { - ReadOptions ro; - - Slice prefix("foo"); - ro.iterate_upper_bound = &prefix; - - std::unique_ptr iter(db_->NewIterator(ro)); - - iter->SeekToLast(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("a")), 0); - } - - // prefix is the first letter of the key - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("g1", "0")); - - // testing with iterate_upper_bound and prefix_extractor - // Seek target and iterate_upper_bound are not is same prefix - // This should be an error - { - ReadOptions ro; - Slice upper_bound("g"); - ro.iterate_upper_bound = &upper_bound; - - std::unique_ptr iter(db_->NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo1", iter->key().ToString()); - - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - } - - // testing that iterate_upper_bound prevents iterating over deleted items - // if the bound has already reached - { - options.prefix_extractor = nullptr; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("b", "0")); - ASSERT_OK(Put("b1", "0")); - ASSERT_OK(Put("c", "0")); - ASSERT_OK(Put("d", "0")); - ASSERT_OK(Put("e", "0")); - ASSERT_OK(Delete("c")); - ASSERT_OK(Delete("d")); - - // base case with no bound - ReadOptions ro; - ro.iterate_upper_bound = nullptr; - - std::unique_ptr iter(db_->NewIterator(ro)); - - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("b")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("b1")), 0); - - perf_context.Reset(); - iter->Next(); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(static_cast(perf_context.internal_delete_skipped_count), 2); - - // now testing with iterate_bound - Slice prefix("c"); - ro.iterate_upper_bound = &prefix; - - iter.reset(db_->NewIterator(ro)); - - perf_context.Reset(); - - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("b")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("b1")), 0); - - iter->Next(); - // the iteration should stop as soon as the the bound key is reached - // even though the key is deleted - // hence internal_delete_skipped_count should be 0 - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(static_cast(perf_context.internal_delete_skipped_count), 0); - } -} - TEST_F(DBTest, WriteSingleThreadEntry) { std::vector threads; dbfull()->TEST_LockMutex(); @@ -6216,7 +3851,7 @@ TEST_F(DBTest, DynamicMemtableOptions) { // Increase buffer size ASSERT_OK(dbfull()->SetOptions({ - {"write_buffer_size", "131072"}, + {"write_buffer_size", "131072"}, })); // The existing memtable is still 64KB in size, after it becomes immutable, @@ -6262,7 +3897,7 @@ TEST_F(DBTest, DynamicMemtableOptions) { // Increase ASSERT_OK(dbfull()->SetOptions({ - {"max_write_buffer_number", "8"}, + {"max_write_buffer_number", "8"}, })); // Clean up memtable and L0 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); @@ -6275,8 +3910,8 @@ TEST_F(DBTest, DynamicMemtableOptions) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions())); count++; } - // Windows fails this test. Will tune in the future and figure out - // approp number +// Windows fails this test. Will tune in the future and figure out +// approp number #ifndef OS_WIN ASSERT_GT(static_cast(count), 512 * 0.8); ASSERT_LT(static_cast(count), 512 * 1.2); @@ -6285,7 +3920,7 @@ TEST_F(DBTest, DynamicMemtableOptions) { // Decrease ASSERT_OK(dbfull()->SetOptions({ - {"max_write_buffer_number", "4"}, + {"max_write_buffer_number", "4"}, })); // Clean up memtable and L0 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); @@ -6299,8 +3934,8 @@ TEST_F(DBTest, DynamicMemtableOptions) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions())); count++; } - // Windows fails this test. Will tune in the future and figure out - // approp number +// Windows fails this test. Will tune in the future and figure out +// approp number #ifndef OS_WIN ASSERT_GT(static_cast(count), 256 * 0.8); ASSERT_LT(static_cast(count), 266 * 1.2); @@ -6356,34 +3991,31 @@ TEST_F(DBTest, GetThreadStatus) { thread_type_counts[thread.thread_type]++; } // Verify the total number of threades - ASSERT_EQ( - thread_type_counts[ThreadStatus::HIGH_PRIORITY] + - thread_type_counts[ThreadStatus::LOW_PRIORITY], - kHighPriCounts[test] + kLowPriCounts[test]); + ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY] + + thread_type_counts[ThreadStatus::LOW_PRIORITY], + kHighPriCounts[test] + kLowPriCounts[test]); // Verify the number of high-priority threads - ASSERT_EQ( - thread_type_counts[ThreadStatus::HIGH_PRIORITY], - kHighPriCounts[test]); + ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY], + kHighPriCounts[test]); // Verify the number of low-priority threads - ASSERT_EQ( - thread_type_counts[ThreadStatus::LOW_PRIORITY], - kLowPriCounts[test]); + ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY], + kLowPriCounts[test]); } if (i == 0) { // repeat the test with multiple column families CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap( - handles_, true); + env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, + true); } } db_->DropColumnFamily(handles_[2]); delete handles_[2]; handles_.erase(handles_.begin() + 2); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap( - handles_, true); + env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, + true); Close(); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap( - handles_, true); + env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, + true); } TEST_F(DBTest, DisableThreadStatus) { @@ -6393,8 +4025,8 @@ TEST_F(DBTest, DisableThreadStatus) { TryReopen(options); CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); // Verify non of the column family info exists - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap( - handles_, false); + env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, + false); } TEST_F(DBTest, ThreadStatusFlush) { @@ -6608,7 +4240,7 @@ TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) { {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"}, {"CompactionJob::Run():Start", "DBTest::PreShutdownMultipleCompaction:Preshutdown"}, - {"CompactionJob::Run():Start", + {"CompactionJob::Run():Start", "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"}, {"DBTest::PreShutdownMultipleCompaction:Preshutdown", "CompactionJob::Run():End"}, @@ -6696,7 +4328,7 @@ TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) { rocksdb::SyncPoint::GetInstance()->LoadDependency( {{"DBTest::PreShutdownCompactionMiddle:Preshutdown", "CompactionJob::Run():Inprogress"}, - {"CompactionJob::Run():Start", + {"CompactionJob::Run():Start", "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"}, {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"}, {"CompactionJob::Run():End", @@ -6755,38 +4387,6 @@ TEST_F(DBTest, FlushOnDestroy) { CancelAllBackgroundWork(db_); } -namespace { -class OnFileDeletionListener : public EventListener { - public: - OnFileDeletionListener() : - matched_count_(0), - expected_file_name_("") {} - - void SetExpectedFileName( - const std::string file_name) { - expected_file_name_ = file_name; - } - - void VerifyMatchedCount(size_t expected_value) { - ASSERT_EQ(matched_count_, expected_value); - } - - void OnTableFileDeleted( - const TableFileDeletionInfo& info) override { - if (expected_file_name_ != "") { - ASSERT_EQ(expected_file_name_, info.file_path); - expected_file_name_ = ""; - matched_count_++; - } - } - - private: - size_t matched_count_; - std::string expected_file_name_; -}; - -} // namespace - TEST_F(DBTest, DynamicLevelCompressionPerLevel) { if (!Snappy_Supported()) { return; @@ -7054,10 +4654,8 @@ TEST_F(DBTest, DynamicCompactionOptions) { // Writing to 64KB L0 files should trigger a compaction. Since these // 2 L0 files have the same key range, compaction merge them and should // result in 2 32KB L1 files. - ASSERT_OK(dbfull()->SetOptions({ - {"level0_file_num_compaction_trigger", "2"}, - {"target_file_size_base", ToString(k32KB) } - })); + ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}, + {"target_file_size_base", ToString(k32KB)}})); gen_l0_kb(0, 64, 1); ASSERT_EQ("1,1", FilesPerLevel()); @@ -7076,9 +4674,8 @@ TEST_F(DBTest, DynamicCompactionOptions) { // Increase level base size to 256KB and write enough data that will // fill L1 and L2. L1 size should be around 256KB while L2 size should be // around 256KB x 4. - ASSERT_OK(dbfull()->SetOptions({ - {"max_bytes_for_level_base", ToString(k1MB) } - })); + ASSERT_OK( + dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}})); // writing 96 x 64KB => 6 * 1024KB // (L1 + L2) = (1 + 4) * 1024KB @@ -7097,10 +4694,9 @@ TEST_F(DBTest, DynamicCompactionOptions) { // max_bytes_for_level_base. Now, reduce both mulitplier and level base, // After filling enough data that can fit in L1 - L3, we should see L1 size // reduces to 128KB from 256KB which was asserted previously. Same for L2. - ASSERT_OK(dbfull()->SetOptions({ - {"max_bytes_for_level_multiplier", "2"}, - {"max_bytes_for_level_base", ToString(k128KB) } - })); + ASSERT_OK( + dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"}, + {"max_bytes_for_level_base", ToString(k128KB)}})); // writing 20 x 64KB = 10 x 128KB // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB @@ -7108,8 +4704,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { gen_l0_kb(i, 64, 32); } dbfull()->TEST_WaitForCompact(); - uint64_t total_size = - SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3); + uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3); ASSERT_TRUE(total_size < k128KB * 7 * 1.5); // Test level0_stop_writes_trigger. @@ -7144,9 +4739,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0. // Block compaction thread again. Perform the put and memtable flushes // until we see the stop after 6 memtable flushes. - ASSERT_OK(dbfull()->SetOptions({ - {"level0_stop_writes_trigger", "6"} - })); + ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}})); dbfull()->TEST_FlushMemTable(true); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); @@ -7175,9 +4768,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { // 4 L0 files and compaction should be triggered. If auto compaction is // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of // L0 files do not change after the call. - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"} - })); + ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}})); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); @@ -7191,9 +4782,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { // Enable auto compaction and perform the same test, # of L0 files should be // reduced after compaction. - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"} - })); + ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); @@ -7301,17 +4890,14 @@ TEST_F(DBTest, DynamicMiscOptions) { // No reseek assert_reseek_count(100, 0); - ASSERT_OK(dbfull()->SetOptions({ - {"max_sequential_skip_in_iterations", "4"} - })); + ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}})); // Clear memtable and make new option effective dbfull()->TEST_FlushMemTable(true); // Trigger reseek assert_reseek_count(200, 1); - ASSERT_OK(dbfull()->SetOptions({ - {"max_sequential_skip_in_iterations", "16"} - })); + ASSERT_OK( + dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}})); // Clear memtable and make new option effective dbfull()->TEST_FlushMemTable(true); // No reseek @@ -7319,249 +4905,6 @@ TEST_F(DBTest, DynamicMiscOptions) { } #endif // ROCKSDB_LITE -TEST_F(DBTest, DontDeletePendingOutputs) { - Options options; - options.env = env_; - options.create_if_missing = true; - DestroyAndReopen(options); - - // Every time we write to a table file, call FOF/POF with full DB scan. This - // will make sure our pending_outputs_ protection work correctly - std::function purge_obsolete_files_function = [&]() { - JobContext job_context(0); - dbfull()->TEST_LockMutex(); - dbfull()->FindObsoleteFiles(&job_context, true /*force*/); - dbfull()->TEST_UnlockMutex(); - dbfull()->PurgeObsoleteFiles(job_context); - job_context.Clean(); - }; - - env_->table_write_callback_ = &purge_obsolete_files_function; - - for (int i = 0; i < 2; ++i) { - ASSERT_OK(Put("a", "begin")); - ASSERT_OK(Put("z", "end")); - ASSERT_OK(Flush()); - } - - // If pending output guard does not work correctly, PurgeObsoleteFiles() will - // delete the file that Compaction is trying to create, causing this: error - // db/db_test.cc:975: IO error: - // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory - Compact("a", "b"); -} - -#ifndef ROCKSDB_LITE -TEST_F(DBTest, DontDeleteMovedFile) { - // This test triggers move compaction and verifies that the file is not - // deleted when it's part of move compaction - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.max_bytes_for_level_base = 1024 * 1024; // 1 MB - options.level0_file_num_compaction_trigger = - 2; // trigger compaction when we have 2 files - DestroyAndReopen(options); - - Random rnd(301); - // Create two 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute both L0->L1 and L1->(move)->L2 compactions - dbfull()->TEST_WaitForCompact(); - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - // If the moved file is actually deleted (the move-safeguard in - // ~Version::Version() is not there), we get this failure: - // Corruption: Can't access /000009.sst - Reopen(options); -} - -TEST_F(DBTest, OptimizeFiltersForHits) { - Options options = CurrentOptions(); - options.write_buffer_size = 64 * 1024; - options.arena_block_size = 4 * 1024; - options.target_file_size_base = 64 * 1024; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.max_bytes_for_level_base = 256 * 1024; - options.max_write_buffer_number = 2; - options.max_background_compactions = 8; - options.max_background_flushes = 8; - options.compression = kNoCompression; - options.compaction_style = kCompactionStyleLevel; - options.level_compaction_dynamic_level_bytes = true; - BlockBasedTableOptions bbto; - bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.optimize_filters_for_hits = true; - options.statistics = rocksdb::CreateDBStatistics(); - CreateAndReopenWithCF({"mypikachu"}, options); - - int numkeys = 200000; - - // Generate randomly shuffled keys, so the updates are almost - // random. - std::vector keys; - keys.reserve(numkeys); - for (int i = 0; i < numkeys; i += 2) { - keys.push_back(i); - } - std::random_shuffle(std::begin(keys), std::end(keys)); - - int num_inserted = 0; - for (int key : keys) { - ASSERT_OK(Put(1, Key(key), "val")); - if (++num_inserted % 1000 == 0) { - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - } - } - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); - - if (NumTableFilesAtLevel(0, 1) == 0) { - // No Level 0 file. Create one. - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); - } - - for (int i = 1; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); - } - - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); - - // Now we have three sorted run, L0, L5 and L6 with most files in L6 have - // no bloom filter. Most keys be checked bloom filters twice. - ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); - ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); - - for (int i = 0; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "val"); - } - - // Part 2 (read path): rewrite last level with blooms, then verify they get - // cached only if !optimize_filters_for_hits - options.disable_auto_compactions = true; - options.num_levels = 9; - options.optimize_filters_for_hits = false; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - MoveFilesToLevel(7 /* level */, 1 /* column family index */); - - std::string value = Get(1, Key(0)); - uint64_t prev_cache_filter_hits = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - value = Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_hits + 1, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Now that we know the filter blocks exist in the last level files, see if - // filter caching is skipped for this optimization - options.optimize_filters_for_hits = true; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - value = Get(1, Key(0)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // Check filter block ignored for files preloaded during DB::Open() - options.max_open_files = -1; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - uint64_t prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Check filter block ignored for file trivially-moved to bottom level - bbto.block_cache.reset(); - options.max_open_files = 100; // setting > -1 makes it not preload all files - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - ASSERT_OK(Put(1, Key(numkeys + 1), "val")); - ASSERT_OK(Flush(1)); - - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* arg) { trivial_move++; }); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* arg) { non_trivial_move++; }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - CompactRangeOptions compact_options; - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kSkip; - compact_options.change_level = true; - compact_options.target_level = 7; - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - value = Get(1, Key(numkeys + 1)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - - // Check filter block not cached for iterator - bbto.block_cache.reset(); - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); - iter->SeekToFirst(); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); -} -#endif // ROCKSDB_LITE - TEST_F(DBTest, L0L1L2AndUpHitCounter) { Options options = CurrentOptions(); options.write_buffer_size = 32 * 1024; @@ -7665,113 +5008,13 @@ TEST_F(DBTest, MutexWaitStats) { options.statistics->stats_level_ = StatsLevel::kAll; CreateAndReopenWithCF({"pikachu"}, options); const uint64_t kMutexWaitDelay = 100; - ThreadStatusUtil::TEST_SetStateDelay( - ThreadStatus::STATE_MUTEX_WAIT, kMutexWaitDelay); + ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, + kMutexWaitDelay); ASSERT_OK(Put("hello", "rocksdb")); - ASSERT_GE(TestGetTickerCount( - options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay); - ThreadStatusUtil::TEST_SetStateDelay( - ThreadStatus::STATE_MUTEX_WAIT, 0); + ASSERT_GE(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay); + ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); } -#ifndef ROCKSDB_LITE -// This reproduces a bug where we don't delete a file because when it was -// supposed to be deleted, it was blocked by pending_outputs -// Consider: -// 1. current file_number is 13 -// 2. compaction (1) starts, blocks deletion of all files starting with 13 -// (pending outputs) -// 3. file 13 is created by compaction (2) -// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file -// 13 has no references, it is put into VersionSet::obsolete_files_ -// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13 -// is deleted from obsolete_files_ set. -// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by -// pending outputs since compaction (1) is still running. It is not deleted and -// it is not present in obsolete_files_ anymore. Therefore, we never delete it. -TEST_F(DBTest, DeleteObsoleteFilesPendingOutputs) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 2 * 1024 * 1024; // 2 MB - options.max_bytes_for_level_base = 1024 * 1024; // 1 MB - options.level0_file_num_compaction_trigger = - 2; // trigger compaction when we have 2 files - options.max_background_flushes = 2; - options.max_background_compactions = 2; - - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - - Reopen(options); - - Random rnd(301); - // Create two 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute both L0->L1 and L1->(move)->L2 compactions - dbfull()->TEST_WaitForCompact(); - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - test::SleepingBackgroundTask blocking_thread; - port::Mutex mutex_; - bool already_blocked(false); - - // block the flush - std::function block_first_time = [&]() { - bool blocking = false; - { - MutexLock l(&mutex_); - if (!already_blocked) { - blocking = true; - already_blocked = true; - } - } - if (blocking) { - blocking_thread.DoSleep(); - } - }; - env_->table_write_callback_ = &block_first_time; - // Create 1MB sst file - for (int j = 0; j < 256; ++j) { - ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024))); - } - // this should trigger a flush, which is blocked with block_first_time - // pending_file is protecting all the files created after - - ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr)); - - ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - auto file_on_L2 = metadata[0].name; - listener->SetExpectedFileName(dbname_ + file_on_L2); - - ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, - true /* disallow trivial move */)); - ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); - - // finish the flush! - blocking_thread.WakeUp(); - blocking_thread.WaitUntilDone(); - dbfull()->TEST_WaitForFlushMemTable(); - ASSERT_EQ("1,0,0,0,1", FilesPerLevel(0)); - - metadata.clear(); - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 2U); - - // This file should have been deleted during last compaction - ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2)); - listener->VerifyMatchedCount(1); -} -#endif // ROCKSDB_LITE - TEST_F(DBTest, CloseSpeedup) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; @@ -7988,7 +5231,8 @@ TEST_F(DBTest, SuggestCompactRangeTest) { } static bool IsManual(CompactionFilterFactory* compaction_filter_factory) { return reinterpret_cast( - compaction_filter_factory)->saved_context.is_manual_compaction; + compaction_filter_factory) + ->saved_context.is_manual_compaction; } CompactionFilter::Context saved_context; }; @@ -8019,7 +5263,7 @@ TEST_F(DBTest, SuggestCompactRangeTest) { GenerateNewRandomFile(&rnd); ASSERT_EQ("0,4", FilesPerLevel(0)); ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual( - options.compaction_filter_factory.get())); + options.compaction_filter_factory.get())); GenerateNewRandomFile(&rnd); ASSERT_EQ("1,4", FilesPerLevel(0)); @@ -8209,8 +5453,8 @@ TEST_F(DBTest, LargeBatchWithColumnFamilies) { for (int pass = 1; pass <= 3; pass++) { WriteBatch batch; size_t write_size = 1024 * 1024 * (5 + i); - fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", (write_size / 1024 / 1024), - pass); + fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", + (write_size / 1024 / 1024), pass); for (;;) { std::string data(3000, j++ % 127 + 20); data += ToString(j); @@ -8219,7 +5463,8 @@ TEST_F(DBTest, LargeBatchWithColumnFamilies) { break; } } - fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", (batch.GetDataSize() / 1024 / 1024)); + fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", + (batch.GetDataSize() / 1024 / 1024)); ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); fprintf(stderr, "done\n"); } @@ -8256,11 +5501,11 @@ TEST_F(DBTest, FlushesInParallelWithCompactRange) { } if (iter == 1) { - rocksdb::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::RunManualCompaction()::1", - "DBTest::FlushesInParallelWithCompactRange:1"}, - {"DBTest::FlushesInParallelWithCompactRange:2", - "DBImpl::RunManualCompaction()::2"}}); + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::RunManualCompaction()::1", + "DBTest::FlushesInParallelWithCompactRange:1"}, + {"DBTest::FlushesInParallelWithCompactRange:2", + "DBImpl::RunManualCompaction()::2"}}); } else { rocksdb::SyncPoint::GetInstance()->LoadDependency( {{"CompactionJob::Run():Start", @@ -8564,7 +5809,7 @@ TEST_F(DBTest, LastWriteBufferDelay) { TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { CompressionType compressions[] = {kZlibCompression, kBZip2Compression, - kLZ4Compression, kLZ4HCCompression}; + kLZ4Compression, kLZ4HCCompression}; for (int iter = 0; iter < 4; ++iter) { if (!CompressionTypeSupported(compressions[iter])) { // not supported, we should fail the Open() @@ -8603,35 +5848,10 @@ TEST_F(DBTest, RowCache) { } #endif // ROCKSDB_LITE -// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary -// return the biggest key which is smaller than the seek key. -TEST_F(DBTest, PrevAfterMerge) { - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreatePutOperator(); - DestroyAndReopen(options); - - // write three entries with different keys using Merge() - WriteOptions wopts; - db_->Merge(wopts, "1", "data1"); - db_->Merge(wopts, "2", "data2"); - db_->Merge(wopts, "3", "data3"); - - std::unique_ptr it(db_->NewIterator(ReadOptions())); - - it->Seek("2"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("2", it->key().ToString()); - - it->Prev(); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("1", it->key().ToString()); -} - TEST_F(DBTest, DeletingOldWalAfterDrop) { rocksdb::SyncPoint::GetInstance()->LoadDependency( - { { "Test:AllowFlushes", "DBImpl::BGWorkFlush" }, - { "DBImpl::BGWorkFlush:done", "Test:WaitForFlush"} }); + {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"}, + {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}}); rocksdb::SyncPoint::GetInstance()->ClearTrace(); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); @@ -8639,9 +5859,9 @@ TEST_F(DBTest, DeletingOldWalAfterDrop) { options.max_total_wal_size = 8192; options.compression = kNoCompression; options.write_buffer_size = 1 << 20; - options.level0_file_num_compaction_trigger = (1<<30); - options.level0_slowdown_writes_trigger = (1<<30); - options.level0_stop_writes_trigger = (1<<30); + options.level0_file_num_compaction_trigger = (1 << 30); + options.level0_slowdown_writes_trigger = (1 << 30); + options.level0_stop_writes_trigger = (1 << 30); options.disable_auto_compactions = true; DestroyAndReopen(options); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); @@ -8662,339 +5882,6 @@ TEST_F(DBTest, DeletingOldWalAfterDrop) { EXPECT_GT(lognum2, lognum1); } -TEST_F(DBTest, DBWithSstFileManager) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - int files_added = 0; - int files_deleted = 0; - int files_moved = 0; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnAddFile", [&](void* arg) { files_added++; }); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { files_deleted++; }); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnMoveFile", [&](void* arg) { files_moved++; }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 25; i++) { - GenerateNewRandomFile(&rnd); - ASSERT_OK(Flush()); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - // Verify that we are tracking all sst files in dbname_ - ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles()); - } - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - auto files_in_db = GetAllSSTFiles(); - // Verify that we are tracking all sst files in dbname_ - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - // Verify the total files size - uint64_t total_files_size = 0; - for (auto& file_to_size : files_in_db) { - total_files_size += file_to_size.second; - } - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - // We flushed at least 25 files - ASSERT_GE(files_added, 25); - // Compaction must have deleted some files - ASSERT_GT(files_deleted, 0); - // No files were moved - ASSERT_EQ(files_moved, 0); - - Close(); - Reopen(options); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - // Verify that we track all the files again after the DB is closed and opened - Close(); - sst_file_manager.reset(NewSstFileManager(env_)); - options.sst_file_manager = sst_file_manager; - sfm = static_cast(sst_file_manager.get()); - - Reopen(options); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); -} - -#ifndef ROCKSDB_LITE -TEST_F(DBTest, RateLimitedDelete) { - rocksdb::SyncPoint::GetInstance()->LoadDependency({ - {"DBTest::RateLimitedDelete:1", "DeleteScheduler::BackgroundEmptyTrash"}, - }); - - std::vector penalties; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.env = env_; - - std::string trash_dir = test::TmpDir(env_) + "/trash"; - int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec - Status s; - options.sst_file_manager.reset(NewSstFileManager( - env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); - ASSERT_OK(s); - auto sfm = static_cast(options.sst_file_manager.get()); - - Destroy(last_options_); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(TryReopen(options)); - // Create 4 files in L0 - for (char v = 'a'; v <= 'd'; v++) { - ASSERT_OK(Put("Key2", DummyString(1024, v))); - ASSERT_OK(Put("Key3", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Put("Key1", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - - // Compaction will move the 4 files in L0 to trash and create 1 L1 file - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - uint64_t delete_start_time = env_->NowMicros(); - // Hold BackgroundEmptyTrash - TEST_SYNC_POINT("DBTest::RateLimitedDelete:1"); - sfm->WaitForEmptyTrash(); - uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; - - uint64_t total_files_size = 0; - uint64_t expected_penlty = 0; - ASSERT_EQ(penalties.size(), metadata.size()); - for (size_t i = 0; i < metadata.size(); i++) { - total_files_size += metadata[i].size; - expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec); - ASSERT_EQ(expected_penlty, penalties[i]); - } - ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); - - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Create a DB with 2 db_paths, and generate multiple files in the 2 -// db_paths using CompactRangeOptions, make sure that files that were -// deleted from first db_path were deleted using DeleteScheduler and -// files in the second path were not. -TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) { - int bg_delete_file = 0; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* arg) { bg_delete_file++; }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.db_paths.emplace_back(dbname_, 1024 * 100); - options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100); - options.env = env_; - - std::string trash_dir = test::TmpDir(env_) + "/trash"; - int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec - Status s; - options.sst_file_manager.reset(NewSstFileManager( - env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); - ASSERT_OK(s); - auto sfm = static_cast(options.sst_file_manager.get()); - - DestroyAndReopen(options); - - // Create 4 files in L0 - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - // Compaction will delete files from L0 in first db path and generate a new - // file in L1 in second db path - CompactRangeOptions compact_options; - compact_options.target_path_id = 1; - Slice begin("Key0"); - Slice end("Key3"); - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - // Create 4 files in L0 - for (int i = 4; i < 8; i++) { - ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'))); - ASSERT_OK(Flush()); - } - ASSERT_EQ("4,1", FilesPerLevel(0)); - - // Compaction will delete files from L0 in first db path and generate a new - // file in L1 in second db path - begin = "Key4"; - end = "Key7"; - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - ASSERT_EQ("0,2", FilesPerLevel(0)); - - sfm->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, 8); - - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - sfm->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, 8); - - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBTest, DestroyDBWithRateLimitedDelete) { - int bg_delete_file = 0; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* arg) { bg_delete_file++; }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.env = env_; - DestroyAndReopen(options); - - // Create 4 files in L0 - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - // Close DB and destroy it using DeleteScheduler - Close(); - std::string trash_dir = test::TmpDir(env_) + "/trash"; - int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec - Status s; - options.sst_file_manager.reset(NewSstFileManager( - env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s)); - ASSERT_OK(s); - ASSERT_OK(DestroyDB(dbname_, options)); - - auto sfm = static_cast(options.sst_file_manager.get()); - sfm->WaitForEmptyTrash(); - // We have deleted the 4 sst files in the delete_scheduler - ASSERT_EQ(bg_delete_file, 4); -} -#endif // ROCKSDB_LITE - -TEST_F(DBTest, DBWithMaxSpaceAllowed) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - Random rnd(301); - - // Generate a file containing 100 keys. - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); - } - ASSERT_OK(Flush()); - - uint64_t first_file_size = 0; - auto files_in_db = GetAllSSTFiles(&first_file_size); - ASSERT_EQ(sfm->GetTotalSize(), first_file_size); - - // Set the maximum allowed space usage to the current total size - sfm->SetMaxAllowedSpaceUsage(first_file_size + 1); - - ASSERT_OK(Put("key1", "val1")); - // This flush will cause bg_error_ and will fail - ASSERT_NOK(Flush()); -} - -TEST_F(DBTest, DBWithMaxSpaceAllowedRandomized) { - // This test will set a maximum allowed space for the DB, then it will - // keep filling the DB until the limit is reached and bg_error_ is set. - // When bg_error_ is set we will verify that the DB size is greater - // than the limit. - - std::vector max_space_limits_mbs = {1, 2, 4, 8, 10}; - - bool bg_error_set = false; - uint64_t total_sst_files_size = 0; - - int reached_max_space_on_flush = 0; - int reached_max_space_on_compaction = 0; - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", - [&](void* arg) { - bg_error_set = true; - GetAllSSTFiles(&total_sst_files_size); - reached_max_space_on_flush++; - }); - - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached", - [&](void* arg) { - bg_error_set = true; - GetAllSSTFiles(&total_sst_files_size); - reached_max_space_on_compaction++; - }); - - for (auto limit_mb : max_space_limits_mbs) { - bg_error_set = false; - total_sst_files_size = 0; - rocksdb::SyncPoint::GetInstance()->ClearTrace(); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.write_buffer_size = 1024 * 512; // 512 Kb - DestroyAndReopen(options); - Random rnd(301); - - sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024); - - int keys_written = 0; - uint64_t estimated_db_size = 0; - while (true) { - auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50)); - if (!s.ok()) { - break; - } - keys_written++; - // Check the estimated db size vs the db limit just to make sure we - // dont run into an infinite loop - estimated_db_size = keys_written * 60; // ~60 bytes per key - ASSERT_LT(estimated_db_size, limit_mb * 1024 * 1024 * 2); - } - ASSERT_TRUE(bg_error_set); - ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); - } - - ASSERT_GT(reached_max_space_on_flush, 0); - ASSERT_GT(reached_max_space_on_compaction, 0); -} - TEST_F(DBTest, UnsupportedManualSync) { DestroyAndReopen(CurrentOptions()); env_->is_wal_sync_thread_safe_.store(false); @@ -9002,1112 +5889,6 @@ TEST_F(DBTest, UnsupportedManualSync) { ASSERT_TRUE(s.IsNotSupported()); } -#ifndef ROCKSDB_LITE -TEST_F(DBTest, OpenDBWithInfiniteMaxOpenFiles) { - // Open DB with infinite max open files - // - First iteration use 1 thread to open files - // - Second iteration use 5 threads to open files - for (int iter = 0; iter < 2; iter++) { - Options options; - options.create_if_missing = true; - options.write_buffer_size = 100000; - options.disable_auto_compactions = true; - options.max_open_files = -1; - if (iter == 0) { - options.max_file_opening_threads = 1; - } else { - options.max_file_opening_threads = 5; - } - options = CurrentOptions(options); - DestroyAndReopen(options); - - // Create 12 Files in L0 (then move then to L2) - for (int i = 0; i < 12; i++) { - std::string k = "L2_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()); - } - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - db_->CompactRange(compact_options, nullptr, nullptr); - - // Create 12 Files in L0 - for (int i = 0; i < 12; i++) { - std::string k = "L0_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()); - } - Close(); - - // Reopening the DB will load all exisitng files - Reopen(options); - ASSERT_EQ("12,0,12", FilesPerLevel(0)); - std::vector> files; - dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); - - for (const auto& level : files) { - for (const auto& file : level) { - ASSERT_TRUE(file.table_reader_handle != nullptr); - } - } - - for (int i = 0; i < 12; i++) { - ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a')); - ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a')); - } - } -} - -TEST_F(DBTest, GetTotalSstFilesSize) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - DestroyAndReopen(options); - // Generate 5 files in L0 - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 10; j++) { - std::string val = "val_file_" + ToString(i); - ASSERT_OK(Put(Key(j), val)); - } - Flush(); - } - ASSERT_EQ("5", FilesPerLevel(0)); - - std::vector live_files_meta; - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - uint64_t single_file_size = live_files_meta[0].size; - - uint64_t live_sst_files_size = 0; - uint64_t total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 5 - // Total SST files = 5 - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - - // Compact 5 files into 1 file in L0 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 1); - - live_sst_files_size = 0; - total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 1 (compacted file) - // Total SST files = 6 (5 original files + compacted file) - ASSERT_EQ(live_sst_files_size, 1 * single_file_size); - ASSERT_EQ(total_sst_files_size, 6 * single_file_size); - - // hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - - // Delete all keys and compact, this will delete all live files - for (int i = 0; i < 10; i++) { - ASSERT_OK(Delete(Key(i))); - } - Flush(); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 0); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 6 (5 original files + compacted file) - ASSERT_EQ(total_sst_files_size, 6 * single_file_size); - - iter1.reset(); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 1 (compacted file) - ASSERT_EQ(total_sst_files_size, 1 * single_file_size); - - iter2.reset(); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 0 - ASSERT_EQ(total_sst_files_size, 0); -} - -TEST_F(DBTest, GetTotalSstFilesSizeVersionsFilesShared) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - DestroyAndReopen(options); - // Generate 5 files in L0 - for (int i = 0; i < 5; i++) { - ASSERT_OK(Put(Key(i), "val")); - Flush(); - } - ASSERT_EQ("5", FilesPerLevel(0)); - - std::vector live_files_meta; - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - uint64_t single_file_size = live_files_meta[0].size; - - uint64_t live_sst_files_size = 0; - uint64_t total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - - // Live SST files = 5 - // Total SST files = 5 - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - - // Compaction will do trivial move from L0 to L1 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,5", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - - live_sst_files_size = 0; - total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 5 - // Total SST files = 5 (used in 2 version) - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - - // Delete all keys and compact, this will delete all live files - for (int i = 0; i < 5; i++) { - ASSERT_OK(Delete(Key(i))); - } - Flush(); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 0); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 5 (used in 2 version) - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - iter1.reset(); - iter2.reset(); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 0 - ASSERT_EQ(total_sst_files_size, 0); -} - -TEST_F(DBTest, AddExternalSstFile) { - do { - std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; - env_->CreateDir(sst_files_folder); - Options options = CurrentOptions(); - options.env = env_; - const ImmutableCFOptions ioptions(options); - - SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); - - // file1.sst (0 => 99) - std::string file1 = sst_files_folder + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - // sst_file_writer already finished, cannot add this value - s = sst_file_writer.Add(Key(100), "bad_val"); - ASSERT_FALSE(s.ok()) << s.ToString(); - - // file2.sst (100 => 199) - std::string file2 = sst_files_folder + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 200; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); - } - // Cannot add this key because it's not after last added key - s = sst_file_writer.Add(Key(99), "bad_val"); - ASSERT_FALSE(s.ok()) << s.ToString(); - ExternalSstFileInfo file2_info; - s = sst_file_writer.Finish(&file2_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 100); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(199)); - - // file3.sst (195 => 299) - // This file values overlap with file2 values - std::string file3 = sst_files_folder + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 195; k < 300; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - s = sst_file_writer.Finish(&file3_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 105); - ASSERT_EQ(file3_info.smallest_key, Key(195)); - ASSERT_EQ(file3_info.largest_key, Key(299)); - - // file4.sst (30 => 39) - // This file values overlap with file1 values - std::string file4 = sst_files_folder + "file4.sst"; - ASSERT_OK(sst_file_writer.Open(file4)); - for (int k = 30; k < 40; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file4_info; - s = sst_file_writer.Finish(&file4_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file4_info.file_path, file4); - ASSERT_EQ(file4_info.num_entries, 10); - ASSERT_EQ(file4_info.smallest_key, Key(30)); - ASSERT_EQ(file4_info.largest_key, Key(39)); - - // file5.sst (400 => 499) - std::string file5 = sst_files_folder + "file5.sst"; - ASSERT_OK(sst_file_writer.Open(file5)); - for (int k = 400; k < 500; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file5_info; - s = sst_file_writer.Finish(&file5_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file5_info.file_path, file5); - ASSERT_EQ(file5_info.num_entries, 100); - ASSERT_EQ(file5_info.smallest_key, Key(400)); - ASSERT_EQ(file5_info.largest_key, Key(499)); - - // Cannot create an empty sst file - std::string file_empty = sst_files_folder + "file_empty.sst"; - ExternalSstFileInfo file_empty_info; - s = sst_file_writer.Finish(&file_empty_info); - ASSERT_NOK(s); - - DestroyAndReopen(options); - // Add file using file path - s = db_->AddFile(file1); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 100; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - // Add file while holding a snapshot will fail - const Snapshot* s1 = db_->GetSnapshot(); - if (s1 != nullptr) { - ASSERT_NOK(db_->AddFile(&file2_info)); - db_->ReleaseSnapshot(s1); - } - // We can add the file after releaseing the snapshot - ASSERT_OK(db_->AddFile(&file2_info)); - - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 200; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - // This file have overlapping values with the exisitng data - s = db_->AddFile(file3); - ASSERT_FALSE(s.ok()) << s.ToString(); - - // This file have overlapping values with the exisitng data - s = db_->AddFile(&file4_info); - ASSERT_FALSE(s.ok()) << s.ToString(); - - // Overwrite values of keys divisible by 5 - for (int k = 0; k < 200; k += 5) { - ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); - } - ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); - - // Key range of file5 (400 => 499) dont overlap with any keys in DB - ASSERT_OK(db_->AddFile(file5)); - - // Make sure values are correct before and after flush/compaction - for (int i = 0; i < 2; i++) { - for (int k = 0; k < 200; k++) { - std::string value = Key(k) + "_val"; - if (k % 5 == 0) { - value += "_new"; - } - ASSERT_EQ(Get(Key(k)), value); - } - for (int k = 400; k < 500; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - Close(); - options.disable_auto_compactions = true; - Reopen(options); - - // Delete keys in range (400 => 499) - for (int k = 400; k < 500; k++) { - ASSERT_OK(Delete(Key(k))); - } - // We deleted range (400 => 499) but cannot add file5 because - // of the range tombstones - ASSERT_NOK(db_->AddFile(file5)); - - // Compacting the DB will remove the tombstones - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Now we can add the file - ASSERT_OK(db_->AddFile(file5)); - - // Verify values of file5 in DB - for (int k = 400; k < 500; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | - kSkipFIFOCompaction)); -} - -// This test reporduce a bug that can happen in some cases if the DB started -// purging obsolete files when we are adding an external sst file. -// This situation may result in deleting the file while it's being added. -TEST_F(DBTest, AddExternalSstFilePurgeObsoleteFilesBug) { - std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; - env_->CreateDir(sst_files_folder); - Options options = CurrentOptions(); - options.env = env_; - const ImmutableCFOptions ioptions(options); - - SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); - - // file1.sst (0 => 500) - std::string sst_file_path = sst_files_folder + "file1.sst"; - Status s = sst_file_writer.Open(sst_file_path); - ASSERT_OK(s); - for (int i = 0; i < 500; i++) { - std::string k = Key(i); - s = sst_file_writer.Add(k, k + "_val"); - ASSERT_OK(s); - } - - ExternalSstFileInfo sst_file_info; - s = sst_file_writer.Finish(&sst_file_info); - ASSERT_OK(s); - - options.delete_obsolete_files_period_micros = 0; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::AddFile:FileCopied", [&](void* arg) { - ASSERT_OK(Put("aaa", "bbb")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("aaa", "xxx")); - ASSERT_OK(Flush()); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); - - s = db_->AddFile(sst_file_path); - ASSERT_OK(s); - - for (int i = 0; i < 500; i++) { - std::string k = Key(i); - std::string v = k + "_val"; - ASSERT_EQ(Get(k), v); - } - - rocksdb::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBTest, AddExternalSstFileNoCopy) { - std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; - env_->CreateDir(sst_files_folder); - Options options = CurrentOptions(); - options.env = env_; - const ImmutableCFOptions ioptions(options); - - SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); - - // file1.sst (0 => 99) - std::string file1 = sst_files_folder + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - - // file2.sst (100 => 299) - std::string file2 = sst_files_folder + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 300; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file2_info; - s = sst_file_writer.Finish(&file2_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 200); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(299)); - - // file3.sst (110 => 124) .. overlap with file2.sst - std::string file3 = sst_files_folder + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 110; k < 125; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - s = sst_file_writer.Finish(&file3_info); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 15); - ASSERT_EQ(file3_info.smallest_key, Key(110)); - ASSERT_EQ(file3_info.largest_key, Key(124)); - - s = db_->AddFile(&file1_info, true /* move file */); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); - - s = db_->AddFile(&file2_info, false /* copy file */); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_OK(env_->FileExists(file2)); - - // This file have overlapping values with the exisitng data - s = db_->AddFile(&file3_info, true /* move file */); - ASSERT_FALSE(s.ok()) << s.ToString(); - ASSERT_OK(env_->FileExists(file3)); - - for (int k = 0; k < 300; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } -} - -TEST_F(DBTest, AddExternalSstFileMultiThreaded) { - std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; - // Bulk load 10 files every file contain 1000 keys - int num_files = 10; - int keys_per_file = 1000; - - // Generate file names - std::vector file_names; - for (int i = 0; i < num_files; i++) { - std::string file_name = "file_" + ToString(i) + ".sst"; - file_names.push_back(sst_files_folder + file_name); - } - - do { - env_->CreateDir(sst_files_folder); - Options options = CurrentOptions(); - const ImmutableCFOptions ioptions(options); - - std::atomic thread_num(0); - std::function write_file_func = [&]() { - int file_idx = thread_num.fetch_add(1); - int range_start = file_idx * keys_per_file; - int range_end = range_start + keys_per_file; - - SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); - - ASSERT_OK(sst_file_writer.Open(file_names[file_idx])); - - for (int k = range_start; k < range_end; k++) { - ASSERT_OK(sst_file_writer.Add(Key(k), Key(k))); - } - - Status s = sst_file_writer.Finish(); - ASSERT_TRUE(s.ok()) << s.ToString(); - }; - // Write num_files files in parallel - std::vector sst_writer_threads; - for (int i = 0; i < num_files; ++i) { - sst_writer_threads.emplace_back(write_file_func); - } - - for (auto& t : sst_writer_threads) { - t.join(); - } - - fprintf(stderr, "Wrote %d files (%d keys)\n", num_files, - num_files * keys_per_file); - - thread_num.store(0); - std::atomic files_added(0); - std::function load_file_func = [&]() { - // We intentionally add every file twice, and assert that it was added - // only once and the other add failed - int thread_id = thread_num.fetch_add(1); - int file_idx = thread_id / 2; - // sometimes we use copy, sometimes link .. the result should be the same - bool move_file = (thread_id % 3 == 0); - - Status s = db_->AddFile(file_names[file_idx], move_file); - if (s.ok()) { - files_added++; - } - }; - // Bulk load num_files files in parallel - std::vector add_file_threads; - DestroyAndReopen(options); - for (int i = 0; i < num_files * 2; ++i) { - add_file_threads.emplace_back(load_file_func); - } - - for (auto& t : add_file_threads) { - t.join(); - } - ASSERT_EQ(files_added.load(), num_files); - fprintf(stderr, "Loaded %d files (%d keys)\n", num_files, - num_files * keys_per_file); - - // Overwrite values of keys divisible by 100 - for (int k = 0; k < num_files * keys_per_file; k += 100) { - std::string key = Key(k); - Status s = Put(key, key + "_new"); - ASSERT_TRUE(s.ok()); - } - - for (int i = 0; i < 2; i++) { - // Make sure the values are correct before and after flush/compaction - for (int k = 0; k < num_files * keys_per_file; ++k) { - std::string key = Key(k); - std::string value = (k % 100 == 0) ? (key + "_new") : key; - ASSERT_EQ(Get(key), value); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - fprintf(stderr, "Verified %d values\n", num_files * keys_per_file); - } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | - kSkipFIFOCompaction)); -} - -TEST_F(DBTest, AddExternalSstFileOverlappingRanges) { - std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/"; - Random rnd(301); - do { - env_->CreateDir(sst_files_folder); - Options options = CurrentOptions(); - DestroyAndReopen(options); - const ImmutableCFOptions ioptions(options); - SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator); - - printf("Option config = %d\n", option_config_); - std::vector> key_ranges; - for (int i = 0; i < 500; i++) { - int range_start = rnd.Uniform(20000); - int keys_per_range = 10 + rnd.Uniform(41); - - key_ranges.emplace_back(range_start, range_start + keys_per_range); - } - - int memtable_add = 0; - int success_add_file = 0; - int failed_add_file = 0; - std::map true_data; - for (size_t i = 0; i < key_ranges.size(); i++) { - int range_start = key_ranges[i].first; - int range_end = key_ranges[i].second; - - Status s; - std::string range_val = "range_" + ToString(i); - - // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile - if (i && i % 5 == 0) { - // Use DB::Put to insert range (insert into memtable) - range_val += "_put"; - for (int k = range_start; k <= range_end; k++) { - s = Put(Key(k), range_val); - ASSERT_OK(s); - } - memtable_add++; - } else { - // Use DB::AddFile to insert range - range_val += "_add_file"; - - // Generate the file containing the range - std::string file_name = sst_files_folder + env_->GenerateUniqueId(); - ASSERT_OK(sst_file_writer.Open(file_name)); - for (int k = range_start; k <= range_end; k++) { - s = sst_file_writer.Add(Key(k), range_val); - ASSERT_OK(s); - } - ExternalSstFileInfo file_info; - s = sst_file_writer.Finish(&file_info); - ASSERT_OK(s); - - // Insert the generated file - s = db_->AddFile(&file_info); - - auto it = true_data.lower_bound(Key(range_start)); - if (it != true_data.end() && it->first <= Key(range_end)) { - // This range overlap with data already exist in DB - ASSERT_NOK(s); - failed_add_file++; - } else { - ASSERT_OK(s); - success_add_file++; - } - } - - if (s.ok()) { - // Update true_data map to include the new inserted data - for (int k = range_start; k <= range_end; k++) { - true_data[Key(k)] = range_val; - } - } - - // Flush / Compact the DB - if (i && i % 50 == 0) { - Flush(); - } - if (i && i % 75 == 0) { - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - } - } - - printf( - "Total: %zu ranges\n" - "AddFile()|Success: %d ranges\n" - "AddFile()|RangeConflict: %d ranges\n" - "Put(): %d ranges\n", - key_ranges.size(), success_add_file, failed_add_file, memtable_add); - - // Verify the correctness of the data - for (const auto& kv : true_data) { - ASSERT_EQ(Get(kv.first), kv.second); - } - printf("keys/values verified\n"); - } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction | - kSkipFIFOCompaction)); -} - -#endif // ROCKSDB_LITE - -// 1 Create some SST files by inserting K-V pairs into DB -// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file -// 3 Open DB and check if all key can be read -TEST_F(DBTest, SSTsWithLdbSuffixHandling) { - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; // 110KB - options.num_levels = 4; - DestroyAndReopen(options); - - Random rnd(301); - int key_id = 0; - for (int i = 0; i < 10; ++i) { - GenerateNewFile(&rnd, &key_id, false); - } - Flush(); - Close(); - int const num_files = GetSstFileCount(dbname_); - ASSERT_GT(num_files, 0); - - std::vector filenames; - GetSstFiles(dbname_, &filenames); - int num_ldb_files = 0; - for (size_t i = 0; i < filenames.size(); ++i) { - if (i & 1) { - continue; - } - std::string const rdb_name = dbname_ + "/" + filenames[i]; - std::string const ldb_name = Rocks2LevelTableFileName(rdb_name); - ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok()); - ++num_ldb_files; - } - ASSERT_GT(num_ldb_files, 0); - ASSERT_EQ(num_files, GetSstFileCount(dbname_)); - - Reopen(options); - for (int k = 0; k < key_id; ++k) { - ASSERT_NE("NOT_FOUND", Get(Key(k))); - } - Destroy(options); -} - -TEST_F(DBTest, PinnedDataIteratorRandomized) { - enum TestConfig { - NORMAL, - CLOSE_AND_OPEN, - COMPACT_BEFORE_READ, - FLUSH_EVERY_1000, - MAX - }; - - // Generate Random data - Random rnd(301); - - int puts = 100000; - int key_pool = static_cast(puts * 0.7); - int key_size = 100; - int val_size = 1000; - int seeks_percentage = 20; // 20% of keys will be used to test seek() - int delete_percentage = 20; // 20% of keys will be deleted - int merge_percentage = 20; // 20% of keys will be added using Merge() - - for (int run_config = 0; run_config < TestConfig::MAX; run_config++) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreatePutOperator(); - DestroyAndReopen(options); - - std::vector generated_keys(key_pool); - for (int i = 0; i < key_pool; i++) { - generated_keys[i] = RandomString(&rnd, key_size); - } - - std::map true_data; - std::vector random_keys; - std::vector deleted_keys; - for (int i = 0; i < puts; i++) { - auto& k = generated_keys[rnd.Next() % key_pool]; - auto v = RandomString(&rnd, val_size); - - // Insert data to true_data map and to DB - true_data[k] = v; - if (rnd.OneIn(static_cast(100.0 / merge_percentage))) { - ASSERT_OK(db_->Merge(WriteOptions(), k, v)); - } else { - ASSERT_OK(Put(k, v)); - } - - // Pick random keys to be used to test Seek() - if (rnd.OneIn(static_cast(100.0 / seeks_percentage))) { - random_keys.push_back(k); - } - - // Delete some random keys - if (rnd.OneIn(static_cast(100.0 / delete_percentage))) { - deleted_keys.push_back(k); - true_data.erase(k); - ASSERT_OK(Delete(k)); - } - - if (run_config == TestConfig::FLUSH_EVERY_1000) { - if (i && i % 1000 == 0) { - Flush(); - } - } - } - - if (run_config == TestConfig::CLOSE_AND_OPEN) { - Close(); - Reopen(options); - } else if (run_config == TestConfig::COMPACT_BEFORE_READ) { - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = db_->NewIterator(ro); - - { - // Test Seek to random keys - printf("Testing seek on %zu keys\n", random_keys.size()); - std::vector keys_slices; - std::vector true_keys; - for (auto& k : random_keys) { - iter->Seek(k); - if (!iter->Valid()) { - ASSERT_EQ(true_data.lower_bound(k), true_data.end()); - continue; - } - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - keys_slices.push_back(iter->key()); - true_keys.push_back(true_data.lower_bound(k)->first); - } - - for (size_t i = 0; i < keys_slices.size(); i++) { - ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); - } - } - - { - // Test iterating all data forward - printf("Testing iterating forward on all keys\n"); - std::vector all_keys; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - all_keys.push_back(iter->key()); - } - ASSERT_EQ(all_keys.size(), true_data.size()); - - // Verify that all keys slices are valid - auto data_iter = true_data.begin(); - for (size_t i = 0; i < all_keys.size(); i++) { - ASSERT_EQ(all_keys[i].ToString(), data_iter->first); - data_iter++; - } - } - - { - // Test iterating all data backward - printf("Testing iterating backward on all keys\n"); - std::vector all_keys; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - all_keys.push_back(iter->key()); - } - ASSERT_EQ(all_keys.size(), true_data.size()); - - // Verify that all keys slices are valid (backward) - auto data_iter = true_data.rbegin(); - for (size_t i = 0; i < all_keys.size(); i++) { - ASSERT_EQ(all_keys[i].ToString(), data_iter->first); - data_iter++; - } - } - - delete iter; - } -} - -#ifndef ROCKSDB_LITE -TEST_F(DBTest, PinnedDataIteratorMultipleFiles) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.disable_auto_compactions = true; - options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb - DestroyAndReopen(options); - - std::map true_data; - - // Generate 4 sst files in L2 - Random rnd(301); - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i * 3); - std::string v = RandomString(&rnd, 100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - if (i % 250 == 0) { - ASSERT_OK(Flush()); - } - } - ASSERT_EQ(FilesPerLevel(0), "4"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(FilesPerLevel(0), "0,4"); - - // Generate 4 sst files in L0 - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i * 2); - std::string v = RandomString(&rnd, 100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - if (i % 250 == 0) { - ASSERT_OK(Flush()); - } - } - ASSERT_EQ(FilesPerLevel(0), "4,4"); - - // Add some keys/values in memtables - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i); - std::string v = RandomString(&rnd, 100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - } - ASSERT_EQ(FilesPerLevel(0), "4,4"); - - ReadOptions ro; - ro.pin_data = true; - auto iter = db_->NewIterator(ro); - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - ASSERT_EQ(results.size(), true_data.size()); - auto data_iter = true_data.begin(); - for (size_t i = 0; i < results.size(); i++, data_iter++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, data_iter->first); - ASSERT_EQ(kv.second, data_iter->second); - } - - delete iter; -} -#endif - -TEST_F(DBTest, PinnedDataIteratorMergeOperator) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - DestroyAndReopen(options); - - std::string numbers[7]; - for (int val = 0; val <= 6; val++) { - PutFixed64(numbers + val, val); - } - - // +1 all keys in range [ 0 => 999] - for (int i = 0; i < 1000; i++) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[1])); - } - - // +2 all keys divisible by 2 in range [ 0 => 999] - for (int i = 0; i < 1000; i += 2) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[2])); - } - - // +3 all keys divisible by 5 in range [ 0 => 999] - for (int i = 0; i < 1000; i += 5) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[3])); - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = db_->NewIterator(ro); - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - ASSERT_EQ(results.size(), 1000); - for (size_t i = 0; i < results.size(); i++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, Key(static_cast(i))); - int expected_val = 1; - if (i % 2 == 0) { - expected_val += 2; - } - if (i % 5 == 0) { - expected_val += 3; - } - ASSERT_EQ(kv.second, numbers[expected_val]); - } - - delete iter; -} - -TEST_F(DBTest, PinnedDataIteratorReadAfterUpdate) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.write_buffer_size = 100000; - DestroyAndReopen(options); - - Random rnd(301); - - std::map true_data; - for (int i = 0; i < 1000; i++) { - std::string k = RandomString(&rnd, 10); - std::string v = RandomString(&rnd, 1000); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = db_->NewIterator(ro); - - // Delete 50% of the keys and update the other 50% - for (auto& kv : true_data) { - if (rnd.OneIn(2)) { - ASSERT_OK(Delete(kv.first)); - } else { - std::string new_val = RandomString(&rnd, 1000); - ASSERT_OK(Put(kv.first, new_val)); - } - } - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - auto data_iter = true_data.begin(); - for (size_t i = 0; i < results.size(); i++, data_iter++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, data_iter->first); - ASSERT_EQ(kv.second, data_iter->second); - } - - delete iter; -} - INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam, ::testing::Combine(::testing::Values(1, 4), ::testing::Bool())); @@ -10138,326 +5919,8 @@ TEST_F(DBTest, PauseBackgroundWorkTest) { ASSERT_EQ(true, done.load()); } -class SliceTransformLimitedDomain : public SliceTransform { - const char* Name() const override { return "SliceTransformLimitedDomain"; } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 5); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= 5 && src[0] == 'x'; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == 5 && dst[0] == 'x'; - } -}; - -TEST_F(DBTest, PrefixExtractorFullFilter) { - BlockBasedTableOptions bbto; - // Full Filter Block - bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1111_AAAA", "val1")); - ASSERT_OK(Put("x1112_AAAA", "val2")); - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val5")); - - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("x1111_AAAA"), "val1"); - ASSERT_EQ(Get("x1112_AAAA"), "val2"); - ASSERT_EQ(Get("x1113_AAAA"), "val3"); - ASSERT_EQ(Get("x1114_AAAA"), "val4"); - // Was not added to filter but rocksdb will try to read it from the filter - ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); -} - -TEST_F(DBTest, PrefixExtractorBlockFilter) { - BlockBasedTableOptions bbto; - // Block Filter Block - bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val1")); - ASSERT_OK(Put("zzzzz_AAAB", "val2")); - ASSERT_OK(Put("zzzzz_AAAC", "val3")); - ASSERT_OK(Put("zzzzz_AAAD", "val4")); - - ASSERT_OK(Flush()); - - std::vector iter_res; - auto iter = db_->NewIterator(ReadOptions()); - // Seek to a key that was not in Domain - for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { - iter_res.emplace_back(iter->value().ToString()); - } - - std::vector expected_res = {"val1", "val2", "val3", "val4"}; - ASSERT_EQ(iter_res, expected_res); - delete iter; -} - -TEST_F(DBTest, IteratorWithLocalStatistics) { - Options options = CurrentOptions(); - options.statistics = rocksdb::CreateDBStatistics(); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 1000; i++) { - // Key 10 bytes / Value 10 bytes - ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); - } - - std::atomic total_next(0); - std::atomic total_next_found(0); - std::atomic total_prev(0); - std::atomic total_prev_found(0); - std::atomic total_bytes(0); - - std::vector threads; - std::function reader_func_next = [&]() { - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToFirst(); - // Seek will bump ITER_BYTES_READ - total_bytes += iter->key().size(); - total_bytes += iter->value().size(); - while (true) { - iter->Next(); - total_next++; - - if (!iter->Valid()) { - break; - } - total_next_found++; - total_bytes += iter->key().size(); - total_bytes += iter->value().size(); - } - - delete iter; - }; - - std::function reader_func_prev = [&]() { - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToLast(); - // Seek will bump ITER_BYTES_READ - total_bytes += iter->key().size(); - total_bytes += iter->value().size(); - while (true) { - iter->Prev(); - total_prev++; - - if (!iter->Valid()) { - break; - } - total_prev_found++; - total_bytes += iter->key().size(); - total_bytes += iter->value().size(); - } - - delete iter; - }; - - for (int i = 0; i < 10; i++) { - threads.emplace_back(reader_func_next); - } - for (int i = 0; i < 15; i++) { - threads.emplace_back(reader_func_prev); - } - - for (auto& t : threads) { - t.join(); - } - - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND), - total_next_found); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND), - total_prev_found); - ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes); -} - -#ifndef ROCKSDB_LITE -class BloomStatsTestWithParam - : public DBTest, - public testing::WithParamInterface> { - public: - BloomStatsTestWithParam() { - use_block_table_ = std::get<0>(GetParam()); - use_block_based_builder_ = std::get<1>(GetParam()); - - options_.create_if_missing = true; - options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); - options_.memtable_prefix_bloom_bits = 8 * 1024; - if (use_block_table_) { - BlockBasedTableOptions table_options; - table_options.hash_index_allow_collision = false; - table_options.filter_policy.reset( - NewBloomFilterPolicy(10, use_block_based_builder_)); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } else { - PlainTableOptions table_options; - options_.table_factory.reset(NewPlainTableFactory(table_options)); - } - - perf_context.Reset(); - DestroyAndReopen(options_); - } - - ~BloomStatsTestWithParam() { - perf_context.Reset(); - Destroy(options_); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - bool use_block_table_; - bool use_block_based_builder_; - Options options_; -}; - -// 1 Insert 2 K-V pairs into DB -// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 -// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 -// 4 Call Flush() to create SST -// 5 Call Get() for both keys - expext SST bloom hit stat to be 2 -// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 -// Test both: block and plain SST -TEST_P(BloomStatsTestWithParam, BloomStatsTest) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - // check memtable bloom stats - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); - ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); - ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); - - // sanity checks - ASSERT_EQ(0, perf_context.bloom_sst_hit_count); - ASSERT_EQ(0, perf_context.bloom_sst_miss_count); - - Flush(); - - // sanity checks - ASSERT_EQ(0, perf_context.bloom_sst_hit_count); - ASSERT_EQ(0, perf_context.bloom_sst_miss_count); - - // check SST bloom stats - // NOTE: hits per get differs because of code paths differences - // in BlockBasedTable::Get() - int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1; - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, perf_context.bloom_sst_miss_count); -} - -// Same scenario as in BloomStatsTest but using an iterator -TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - unique_ptr iter(dbfull()->NewIterator(ReadOptions())); - - // check memtable bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); - ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); - ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); - ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); - - Flush(); - - iter.reset(dbfull()->NewIterator(ReadOptions())); - - // Check SST bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, perf_context.bloom_sst_hit_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - ASSERT_EQ(2, perf_context.bloom_sst_hit_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, perf_context.bloom_sst_miss_count); - ASSERT_EQ(2, perf_context.bloom_sst_hit_count); -} - -INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, - ::testing::Values(std::make_tuple(true, true), - std::make_tuple(true, false), - std::make_tuple(false, false))); -#endif // ROCKSDB_LITE } // namespace rocksdb - int main(int argc, char** argv) { rocksdb::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); diff --git a/db/db_test2.cc b/db/db_test2.cc index 7c02a6ffe..32487b1c4 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -13,11 +13,6 @@ namespace rocksdb { -static uint64_t TestGetTickerCount(const Options& options, - Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} - class DBTest2 : public DBTestBase { public: DBTest2() : DBTestBase("/db_test2") {} diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 1af09e286..7e2adaa78 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1043,4 +1043,34 @@ std::unordered_map DBTestBase::GetAllSSTFiles( return res; } +std::vector DBTestBase::ListTableFiles(Env* env, + const std::string& path) { + std::vector files; + std::vector file_numbers; + env->GetChildren(path, &files); + uint64_t number; + FileType type; + for (size_t i = 0; i < files.size(); ++i) { + if (ParseFileName(files[i], &number, &type)) { + if (type == kTableFile) { + file_numbers.push_back(number); + } + } + } + return file_numbers; +} + +#ifndef ROCKSDB_LITE +uint64_t DBTestBase::GetNumberOfSstFilesForColumnFamily( + DB* db, std::string column_family_name) { + std::vector metadata; + db->GetLiveFilesMetaData(&metadata); + uint64_t result = 0; + for (auto& fileMetadata : metadata) { + result += (fileMetadata.column_family_name == column_family_name); + } + return result; +} +#endif // ROCKSDB_LITE + } // namespace rocksdb diff --git a/db/db_test_util.h b/db/db_test_util.h index ca2b466e9..adda26230 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -39,6 +39,7 @@ #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" +#include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "rocksdb/utilities/checkpoint.h" #include "table/block_based_table_factory.h" @@ -491,6 +492,31 @@ class SpecialEnv : public EnvWrapper { std::atomic is_wal_sync_thread_safe_{true}; }; +class OnFileDeletionListener : public EventListener { + public: + OnFileDeletionListener() : matched_count_(0), expected_file_name_("") {} + + void SetExpectedFileName(const std::string file_name) { + expected_file_name_ = file_name; + } + + void VerifyMatchedCount(size_t expected_value) { + ASSERT_EQ(matched_count_, expected_value); + } + + void OnTableFileDeleted(const TableFileDeletionInfo& info) override { + if (expected_file_name_ != "") { + ASSERT_EQ(expected_file_name_, info.file_path); + expected_file_name_ = ""; + matched_count_++; + } + } + + private: + size_t matched_count_; + std::string expected_file_name_; +}; + class DBTestBase : public testing::Test { protected: // Sequence of option configurations to try @@ -756,6 +782,17 @@ class DBTestBase : public testing::Test { std::unordered_map GetAllSSTFiles( uint64_t* total_size = nullptr); + + std::vector ListTableFiles(Env* env, const std::string& path); + +#ifndef ROCKSDB_LITE + uint64_t GetNumberOfSstFilesForColumnFamily(DB* db, + std::string column_family_name); +#endif // ROCKSDB_LITE + + uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { + return options.statistics->getTickerCount(ticker_type); + } }; } // namespace rocksdb diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index ded8ed9b4..868039a5c 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -14,11 +14,6 @@ namespace rocksdb { -static uint64_t TestGetTickerCount(const Options& options, - Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} - static std::string CompressibleString(Random* rnd, int len) { std::string r; test::CompressibleString(rnd, 0.8, len, &r); diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 14b9e2ffd..bb944735d 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -127,6 +127,634 @@ TEST_F(DBWALTest, SyncWALNotWaitWrite) { ASSERT_EQ(Get("foo2"), "bar2"); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } + +TEST_F(DBWALTest, Recover) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "foo", "v1")); + ASSERT_OK(Put(1, "baz", "v5")); + + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + ASSERT_EQ("v1", Get(1, "foo")); + + ASSERT_EQ("v1", Get(1, "foo")); + ASSERT_EQ("v5", Get(1, "baz")); + ASSERT_OK(Put(1, "bar", "v2")); + ASSERT_OK(Put(1, "foo", "v3")); + + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + ASSERT_EQ("v3", Get(1, "foo")); + ASSERT_OK(Put(1, "foo", "v4")); + ASSERT_EQ("v4", Get(1, "foo")); + ASSERT_EQ("v2", Get(1, "bar")); + ASSERT_EQ("v5", Get(1, "baz")); + } while (ChangeOptions()); +} + +TEST_F(DBWALTest, RecoverWithTableHandle) { + do { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_OK(Put(1, "foo", "v1")); + ASSERT_OK(Put(1, "bar", "v2")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "foo", "v3")); + ASSERT_OK(Put(1, "bar", "v4")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "big", std::string(100, 'a'))); + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + + std::vector> files; + dbfull()->TEST_GetFilesMetaData(handles_[1], &files); + size_t total_files = 0; + for (const auto& level : files) { + total_files += level.size(); + } + ASSERT_EQ(total_files, 3); + for (const auto& level : files) { + for (const auto& file : level) { + if (kInfiniteMaxOpenFiles == option_config_) { + ASSERT_TRUE(file.table_reader_handle != nullptr); + } else { + ASSERT_TRUE(file.table_reader_handle == nullptr); + } + } + } + } while (ChangeOptions()); +} + +TEST_F(DBWALTest, IgnoreRecoveredLog) { + std::string backup_logs = dbname_ + "/backup_logs"; + + // delete old files in backup_logs directory + env_->CreateDirIfMissing(backup_logs); + std::vector old_files; + env_->GetChildren(backup_logs, &old_files); + for (auto& file : old_files) { + if (file != "." && file != "..") { + env_->DeleteFile(backup_logs + "/" + file); + } + } + + do { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.merge_operator = MergeOperators::CreateUInt64AddOperator(); + options.wal_dir = dbname_ + "/logs"; + DestroyAndReopen(options); + + // fill up the DB + std::string one, two; + PutFixed64(&one, 1); + PutFixed64(&two, 2); + ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); + ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); + ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one))); + + // copy the logs to backup + std::vector logs; + env_->GetChildren(options.wal_dir, &logs); + for (auto& log : logs) { + if (log != ".." && log != ".") { + CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log); + } + } + + // recover the DB + Reopen(options); + ASSERT_EQ(two, Get("foo")); + ASSERT_EQ(one, Get("bar")); + Close(); + + // copy the logs from backup back to wal dir + for (auto& log : logs) { + if (log != ".." && log != ".") { + CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); + } + } + // this should ignore the log files, recovery should not happen again + // if the recovery happens, the same merge operator would be called twice, + // leading to incorrect results + Reopen(options); + ASSERT_EQ(two, Get("foo")); + ASSERT_EQ(one, Get("bar")); + Close(); + Destroy(options); + Reopen(options); + Close(); + + // copy the logs from backup back to wal dir + env_->CreateDirIfMissing(options.wal_dir); + for (auto& log : logs) { + if (log != ".." && log != ".") { + CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); + } + } + // assert that we successfully recovered only from logs, even though we + // destroyed the DB + Reopen(options); + ASSERT_EQ(two, Get("foo")); + ASSERT_EQ(one, Get("bar")); + + // Recovery will fail if DB directory doesn't exist. + Destroy(options); + // copy the logs from backup back to wal dir + env_->CreateDirIfMissing(options.wal_dir); + for (auto& log : logs) { + if (log != ".." && log != ".") { + CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); + // we won't be needing this file no more + env_->DeleteFile(backup_logs + "/" + log); + } + } + Status s = TryReopen(options); + ASSERT_TRUE(!s.ok()); + } while (ChangeOptions(kSkipHashCuckoo)); +} + +TEST_F(DBWALTest, RecoveryWithEmptyLog) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "foo", "v1")); + ASSERT_OK(Put(1, "foo", "v2")); + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + ASSERT_OK(Put(1, "foo", "v3")); + ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); + ASSERT_EQ("v3", Get(1, "foo")); + } while (ChangeOptions()); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBWALTest, RecoverWithLargeLog) { + do { + { + Options options = CurrentOptions(); + CreateAndReopenWithCF({"pikachu"}, options); + ASSERT_OK(Put(1, "big1", std::string(200000, '1'))); + ASSERT_OK(Put(1, "big2", std::string(200000, '2'))); + ASSERT_OK(Put(1, "small3", std::string(10, '3'))); + ASSERT_OK(Put(1, "small4", std::string(10, '4'))); + ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); + } + + // Make sure that if we re-open with a small write buffer size that + // we flush table files in the middle of a large log file. + Options options; + options.write_buffer_size = 100000; + options = CurrentOptions(options); + ReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3); + ASSERT_EQ(std::string(200000, '1'), Get(1, "big1")); + ASSERT_EQ(std::string(200000, '2'), Get(1, "big2")); + ASSERT_EQ(std::string(10, '3'), Get(1, "small3")); + ASSERT_EQ(std::string(10, '4'), Get(1, "small4")); + ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); + } while (ChangeCompactOptions()); +} + +// In https://reviews.facebook.net/D20661 we change +// recovery behavior: previously for each log file each column family +// memtable was flushed, even it was empty. Now it's changed: +// we try to create the smallest number of table files by merging +// updates from multiple logs +TEST_F(DBWALTest, RecoverCheckFileAmountWithSmallWriteBuffer) { + Options options = CurrentOptions(); + options.write_buffer_size = 5000000; + CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); + + // Since we will reopen DB with smaller write_buffer_size, + // each key will go to new SST file + ASSERT_OK(Put(1, Key(10), DummyString(1000000))); + ASSERT_OK(Put(1, Key(10), DummyString(1000000))); + ASSERT_OK(Put(1, Key(10), DummyString(1000000))); + ASSERT_OK(Put(1, Key(10), DummyString(1000000))); + + ASSERT_OK(Put(3, Key(10), DummyString(1))); + // Make 'dobrynia' to be flushed and new WAL file to be created + ASSERT_OK(Put(2, Key(10), DummyString(7500000))); + ASSERT_OK(Put(2, Key(1), DummyString(1))); + dbfull()->TEST_WaitForFlushMemTable(handles_[2]); + { + auto tables = ListTableFiles(env_, dbname_); + ASSERT_EQ(tables.size(), static_cast(1)); + // Make sure 'dobrynia' was flushed: check sst files amount + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), + static_cast(1)); + } + // New WAL file + ASSERT_OK(Put(1, Key(1), DummyString(1))); + ASSERT_OK(Put(1, Key(1), DummyString(1))); + ASSERT_OK(Put(3, Key(10), DummyString(1))); + ASSERT_OK(Put(3, Key(10), DummyString(1))); + ASSERT_OK(Put(3, Key(10), DummyString(1))); + + options.write_buffer_size = 4096; + options.arena_block_size = 4096; + ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, + options); + { + // No inserts => default is empty + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), + static_cast(0)); + // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), + static_cast(5)); + // 1 SST for big key + 1 SST for small one + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), + static_cast(2)); + // 1 SST for all keys + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), + static_cast(1)); + } +} + +// In https://reviews.facebook.net/D20661 we change +// recovery behavior: previously for each log file each column family +// memtable was flushed, even it wasn't empty. Now it's changed: +// we try to create the smallest number of table files by merging +// updates from multiple logs +TEST_F(DBWALTest, RecoverCheckFileAmount) { + Options options = CurrentOptions(); + options.write_buffer_size = 100000; + options.arena_block_size = 4 * 1024; + CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); + + ASSERT_OK(Put(0, Key(1), DummyString(1))); + ASSERT_OK(Put(1, Key(1), DummyString(1))); + ASSERT_OK(Put(2, Key(1), DummyString(1))); + + // Make 'nikitich' memtable to be flushed + ASSERT_OK(Put(3, Key(10), DummyString(1002400))); + ASSERT_OK(Put(3, Key(1), DummyString(1))); + dbfull()->TEST_WaitForFlushMemTable(handles_[3]); + // 4 memtable are not flushed, 1 sst file + { + auto tables = ListTableFiles(env_, dbname_); + ASSERT_EQ(tables.size(), static_cast(1)); + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), + static_cast(1)); + } + // Memtable for 'nikitich' has flushed, new WAL file has opened + // 4 memtable still not flushed + + // Write to new WAL file + ASSERT_OK(Put(0, Key(1), DummyString(1))); + ASSERT_OK(Put(1, Key(1), DummyString(1))); + ASSERT_OK(Put(2, Key(1), DummyString(1))); + + // Fill up 'nikitich' one more time + ASSERT_OK(Put(3, Key(10), DummyString(1002400))); + // make it flush + ASSERT_OK(Put(3, Key(1), DummyString(1))); + dbfull()->TEST_WaitForFlushMemTable(handles_[3]); + // There are still 4 memtable not flushed, and 2 sst tables + ASSERT_OK(Put(0, Key(1), DummyString(1))); + ASSERT_OK(Put(1, Key(1), DummyString(1))); + ASSERT_OK(Put(2, Key(1), DummyString(1))); + + { + auto tables = ListTableFiles(env_, dbname_); + ASSERT_EQ(tables.size(), static_cast(2)); + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), + static_cast(2)); + } + + ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, + options); + { + std::vector table_files = ListTableFiles(env_, dbname_); + // Check, that records for 'default', 'dobrynia' and 'pikachu' from + // first, second and third WALs went to the same SST. + // So, there is 6 SSTs: three for 'nikitich', one for 'default', one for + // 'dobrynia', one for 'pikachu' + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), + static_cast(1)); + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), + static_cast(3)); + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), + static_cast(1)); + ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), + static_cast(1)); + } +} + +TEST_F(DBWALTest, SyncMultipleLogs) { + const uint64_t kNumBatches = 2; + const int kBatchSize = 1000; + + Options options = CurrentOptions(); + options.create_if_missing = true; + options.write_buffer_size = 4096; + Reopen(options); + + WriteBatch batch; + WriteOptions wo; + wo.sync = true; + + for (uint64_t b = 0; b < kNumBatches; b++) { + batch.Clear(); + for (int i = 0; i < kBatchSize; i++) { + batch.Put(Key(i), DummyString(128)); + } + + dbfull()->Write(wo, &batch); + } + + ASSERT_OK(dbfull()->SyncWAL()); +} + +// +// Test WAL recovery for the various modes available +// +class RecoveryTestHelper { + public: + // Number of WAL files to generate + static const int kWALFilesCount = 10; + // Starting number for the WAL file name like 00010.log + static const int kWALFileOffset = 10; + // Keys to be written per WAL file + static const int kKeysPerWALFile = 1024; + // Size of the value + static const int kValueSize = 10; + + // Create WAL files with values filled in + static void FillData(DBWALTest* test, const Options& options, + const size_t wal_count, size_t* count) { + const DBOptions& db_options = options; + + *count = 0; + + shared_ptr table_cache = NewLRUCache(50000, 16); + EnvOptions env_options; + WriteBuffer write_buffer(db_options.db_write_buffer_size); + + unique_ptr versions; + unique_ptr wal_manager; + WriteController write_controller; + + versions.reset(new VersionSet(test->dbname_, &db_options, env_options, + table_cache.get(), &write_buffer, + &write_controller)); + + wal_manager.reset(new WalManager(db_options, env_options)); + + std::unique_ptr current_log_writer; + + for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) { + uint64_t current_log_number = j; + std::string fname = LogFileName(test->dbname_, current_log_number); + unique_ptr file; + ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options)); + unique_ptr file_writer( + new WritableFileWriter(std::move(file), env_options)); + current_log_writer.reset( + new log::Writer(std::move(file_writer), current_log_number, + db_options.recycle_log_file_num > 0)); + + for (int i = 0; i < kKeysPerWALFile; i++) { + std::string key = "key" + ToString((*count)++); + std::string value = test->DummyString(kValueSize); + assert(current_log_writer.get() != nullptr); + uint64_t seq = versions->LastSequence() + 1; + WriteBatch batch; + batch.Put(key, value); + WriteBatchInternal::SetSequence(&batch, seq); + current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch)); + versions->SetLastSequence(seq); + } + } + } + + // Recreate and fill the store with some data + static size_t FillData(DBWALTest* test, Options* options) { + options->create_if_missing = true; + test->DestroyAndReopen(*options); + test->Close(); + + size_t count = 0; + FillData(test, *options, kWALFilesCount, &count); + return count; + } + + // Read back all the keys we wrote and return the number of keys found + static size_t GetData(DBWALTest* test) { + size_t count = 0; + for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) { + if (test->Get("key" + ToString(i)) != "NOT_FOUND") { + ++count; + } + } + return count; + } + + // Manuall corrupt the specified WAL + static void CorruptWAL(DBWALTest* test, const Options& options, + const double off, const double len, + const int wal_file_id, const bool trunc = false) { + Env* env = options.env; + std::string fname = LogFileName(test->dbname_, wal_file_id); + uint64_t size; + ASSERT_OK(env->GetFileSize(fname, &size)); + ASSERT_GT(size, 0); +#ifdef OS_WIN + // Windows disk cache behaves differently. When we truncate + // the original content is still in the cache due to the original + // handle is still open. Generally, in Windows, one prohibits + // shared access to files and it is not needed for WAL but we allow + // it to induce corruption at various tests. + test->Close(); +#endif + if (trunc) { + ASSERT_EQ(0, truncate(fname.c_str(), static_cast(size * off))); + } else { + InduceCorruption(fname, static_cast(size * off), + static_cast(size * len)); + } + } + + // Overwrite data with 'a' from offset for length len + static void InduceCorruption(const std::string& filename, size_t offset, + size_t len) { + ASSERT_GT(len, 0U); + + int fd = open(filename.c_str(), O_RDWR); + + ASSERT_GT(fd, 0); + ASSERT_EQ(offset, lseek(fd, offset, SEEK_SET)); + + void* buf = alloca(len); + memset(buf, 'a', len); + ASSERT_EQ(len, write(fd, buf, static_cast(len))); + + close(fd); + } +}; + +// Test scope: +// - We expect to open the data store when there is incomplete trailing writes +// at the end of any of the logs +// - We do not expect to open the data store for corruption +TEST_F(DBWALTest, kTolerateCorruptedTailRecords) { + const int jstart = RecoveryTestHelper::kWALFileOffset; + const int jend = jstart + RecoveryTestHelper::kWALFilesCount; + + for (auto trunc : {true, false}) { /* Corruption style */ + for (int i = 0; i < 4; i++) { /* Corruption offset position */ + for (int j = jstart; j < jend; j++) { /* WAL file */ + // Fill data for testing + Options options = CurrentOptions(); + const size_t row_count = RecoveryTestHelper::FillData(this, &options); + // test checksum failure or parsing + RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, + /*len%=*/.1, /*wal=*/j, trunc); + + if (trunc) { + options.wal_recovery_mode = + WALRecoveryMode::kTolerateCorruptedTailRecords; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + const size_t recovered_row_count = RecoveryTestHelper::GetData(this); + ASSERT_TRUE(i == 0 || recovered_row_count > 0); + ASSERT_LT(recovered_row_count, row_count); + } else { + options.wal_recovery_mode = + WALRecoveryMode::kTolerateCorruptedTailRecords; + ASSERT_NOK(TryReopen(options)); + } + } + } + } +} + +// Test scope: +// We don't expect the data store to be opened if there is any corruption +// (leading, middle or trailing -- incomplete writes or corruption) +TEST_F(DBWALTest, kAbsoluteConsistency) { + const int jstart = RecoveryTestHelper::kWALFileOffset; + const int jend = jstart + RecoveryTestHelper::kWALFilesCount; + + // Verify clean slate behavior + Options options = CurrentOptions(); + const size_t row_count = RecoveryTestHelper::FillData(this, &options); + options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count); + + for (auto trunc : {true, false}) { /* Corruption style */ + for (int i = 0; i < 4; i++) { /* Corruption offset position */ + if (trunc && i == 0) { + continue; + } + + for (int j = jstart; j < jend; j++) { /* wal files */ + // fill with new date + RecoveryTestHelper::FillData(this, &options); + // corrupt the wal + RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, + /*len%=*/.1, j, trunc); + // verify + options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; + options.create_if_missing = false; + ASSERT_NOK(TryReopen(options)); + } + } + } +} + +// Test scope: +// - We expect to open data store under all circumstances +// - We expect only data upto the point where the first error was encountered +TEST_F(DBWALTest, kPointInTimeRecovery) { + const int jstart = RecoveryTestHelper::kWALFileOffset; + const int jend = jstart + RecoveryTestHelper::kWALFilesCount; + const int maxkeys = + RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile; + + for (auto trunc : {true, false}) { /* Corruption style */ + for (int i = 0; i < 4; i++) { /* Offset of corruption */ + for (int j = jstart; j < jend; j++) { /* WAL file */ + // Fill data for testing + Options options = CurrentOptions(); + const size_t row_count = RecoveryTestHelper::FillData(this, &options); + + // Corrupt the wal + RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, + /*len%=*/.1, j, trunc); + + // Verify + options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + + // Probe data for invariants + size_t recovered_row_count = RecoveryTestHelper::GetData(this); + ASSERT_LT(recovered_row_count, row_count); + + bool expect_data = true; + for (size_t k = 0; k < maxkeys; ++k) { + bool found = Get("key" + ToString(i)) != "NOT_FOUND"; + if (expect_data && !found) { + expect_data = false; + } + ASSERT_EQ(found, expect_data); + } + + const size_t min = RecoveryTestHelper::kKeysPerWALFile * + (j - RecoveryTestHelper::kWALFileOffset); + ASSERT_GE(recovered_row_count, min); + if (!trunc && i != 0) { + const size_t max = RecoveryTestHelper::kKeysPerWALFile * + (j - RecoveryTestHelper::kWALFileOffset + 1); + ASSERT_LE(recovered_row_count, max); + } + } + } + } +} + +// Test scope: +// - We expect to open the data store under all scenarios +// - We expect to have recovered records past the corruption zone +TEST_F(DBWALTest, kSkipAnyCorruptedRecords) { + const int jstart = RecoveryTestHelper::kWALFileOffset; + const int jend = jstart + RecoveryTestHelper::kWALFilesCount; + + for (auto trunc : {true, false}) { /* Corruption style */ + for (int i = 0; i < 4; i++) { /* Corruption offset */ + for (int j = jstart; j < jend; j++) { /* wal files */ + // Fill data for testing + Options options = CurrentOptions(); + const size_t row_count = RecoveryTestHelper::FillData(this, &options); + + // Corrupt the WAL + RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, + /*len%=*/.1, j, trunc); + + // Verify behavior + options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + + // Probe data for invariants + size_t recovered_row_count = RecoveryTestHelper::GetData(this); + ASSERT_LT(recovered_row_count, row_count); + + if (!trunc) { + ASSERT_TRUE(i != 0 || recovered_row_count > 0); + } + } + } + } +} + +#endif // ROCKSDB_LITE + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/src.mk b/src.mk index 9f3671305..c53627cef 100644 --- a/src.mk +++ b/src.mk @@ -198,13 +198,17 @@ TEST_BENCH_SOURCES = \ db/dbformat_test.cc \ db/db_iter_test.cc \ db/db_test.cc \ + db/db_block_cache_test.cc \ + db/db_bloom_filter_test.cc \ db/db_compaction_filter_test.cc \ db/db_compaction_test.cc \ db/db_dynamic_level_test.cc \ db/db_inplace_update_test.cc \ + db/db_iterator_test.cc \ db/db_log_iter_test.cc \ - db/db_universal_compaction_test.cc \ + db/db_sst_test.cc \ db/db_tailing_iter_test.cc \ + db/db_universal_compaction_test.cc \ db/db_wal_test.cc \ db/db_table_properties_test.cc \ db/deletefile_test.cc \