Readers for partition filter
Summary: This is the last split of this pull request: https://github.com/facebook/rocksdb/pull/1891 which includes the reader part as well as the tests. Closes https://github.com/facebook/rocksdb/pull/1961 Differential Revision: D4672216 Pulled By: maysamyabandeh fbshipit-source-id: 6a2b829
This commit is contained in:
parent
9ef3627fd3
commit
8b0097b49b
4
Makefile
4
Makefile
@ -354,6 +354,7 @@ TESTS = \
|
|||||||
file_reader_writer_test \
|
file_reader_writer_test \
|
||||||
block_based_filter_block_test \
|
block_based_filter_block_test \
|
||||||
full_filter_block_test \
|
full_filter_block_test \
|
||||||
|
partitioned_filter_block_test \
|
||||||
hash_table_test \
|
hash_table_test \
|
||||||
histogram_test \
|
histogram_test \
|
||||||
log_test \
|
log_test \
|
||||||
@ -1158,6 +1159,9 @@ block_based_filter_block_test: table/block_based_filter_block_test.o $(LIBOBJECT
|
|||||||
full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(AM_LINK)
|
$(AM_LINK)
|
||||||
|
|
||||||
|
partitioned_filter_block_test: table/partitioned_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(AM_LINK)
|
||||||
|
|
||||||
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(AM_LINK)
|
$(AM_LINK)
|
||||||
|
|
||||||
|
@ -20,17 +20,23 @@ class DBBloomFilterTest : public DBTestBase {
|
|||||||
DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {}
|
DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {}
|
||||||
};
|
};
|
||||||
|
|
||||||
class DBBloomFilterTestWithParam : public DBTestBase,
|
class DBBloomFilterTestWithParam
|
||||||
public testing::WithParamInterface<bool> {
|
: public DBTestBase,
|
||||||
|
public testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||||
|
// public testing::WithParamInterface<bool> {
|
||||||
protected:
|
protected:
|
||||||
bool use_block_based_filter_;
|
bool use_block_based_filter_;
|
||||||
|
bool partition_filters_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
|
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
|
||||||
|
|
||||||
~DBBloomFilterTestWithParam() {}
|
~DBBloomFilterTestWithParam() {}
|
||||||
|
|
||||||
void SetUp() override { use_block_based_filter_ = GetParam(); }
|
void SetUp() override {
|
||||||
|
use_block_based_filter_ = std::get<0>(GetParam());
|
||||||
|
partition_filters_ = std::get<1>(GetParam());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// KeyMayExist can lead to a few false positives, but not false negatives.
|
// KeyMayExist can lead to a few false positives, but not false negatives.
|
||||||
@ -43,7 +49,17 @@ TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
|
|||||||
anon::OptionsOverride options_override;
|
anon::OptionsOverride options_override;
|
||||||
options_override.filter_policy.reset(
|
options_override.filter_policy.reset(
|
||||||
NewBloomFilterPolicy(20, use_block_based_filter_));
|
NewBloomFilterPolicy(20, use_block_based_filter_));
|
||||||
|
options_override.partition_filters = partition_filters_;
|
||||||
|
options_override.index_per_partition = 2;
|
||||||
Options options = CurrentOptions(options_override);
|
Options options = CurrentOptions(options_override);
|
||||||
|
if (partition_filters_ &&
|
||||||
|
static_cast<BlockBasedTableOptions*>(
|
||||||
|
options.table_factory->GetOptions())
|
||||||
|
->index_type != BlockBasedTableOptions::kTwoLevelIndexSearch) {
|
||||||
|
// In the current implementation partitioned filters depend on partitioned
|
||||||
|
// indexes
|
||||||
|
continue;
|
||||||
|
}
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
CreateAndReopenWithCF({"pikachu"}, options);
|
CreateAndReopenWithCF({"pikachu"}, options);
|
||||||
|
|
||||||
@ -102,192 +118,204 @@ TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
|
TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
|
||||||
Options options = last_options_;
|
for (bool partition_filters : {true, false}) {
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
|
Options options = last_options_;
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
|
||||||
BlockBasedTableOptions bbto;
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
BlockBasedTableOptions bbto;
|
||||||
bbto.whole_key_filtering = false;
|
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
if (partition_filters) {
|
||||||
DestroyAndReopen(options);
|
bbto.partition_filters = true;
|
||||||
|
bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
}
|
||||||
|
bbto.whole_key_filtering = false;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
WriteOptions wo;
|
WriteOptions wo;
|
||||||
ReadOptions ro;
|
ReadOptions ro;
|
||||||
FlushOptions fo;
|
FlushOptions fo;
|
||||||
fo.wait = true;
|
fo.wait = true;
|
||||||
std::string value;
|
std::string value;
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
|
ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
|
ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
|
ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
|
||||||
|
|
||||||
dbfull()->Flush(fo);
|
dbfull()->Flush(fo);
|
||||||
|
|
||||||
ASSERT_EQ("foo", Get("barbarbar"));
|
ASSERT_EQ("foo", Get("barbarbar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
ASSERT_EQ("foo2", Get("barbarbar2"));
|
ASSERT_EQ("foo2", Get("barbarbar2"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
|
ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
|
|
||||||
ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
|
ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
|
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
|
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
||||||
|
|
||||||
ro.total_order_seek = true;
|
ro.total_order_seek = true;
|
||||||
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
|
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
|
TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
|
||||||
Options options = last_options_;
|
for (bool partition_filters : {true, false}) {
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
Options options = last_options_;
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
|
||||||
BlockBasedTableOptions bbto;
|
BlockBasedTableOptions bbto;
|
||||||
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
bbto.whole_key_filtering = false;
|
bbto.whole_key_filtering = false;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
if (partition_filters) {
|
||||||
DestroyAndReopen(options);
|
bbto.partition_filters = true;
|
||||||
|
bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
}
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
WriteOptions wo;
|
WriteOptions wo;
|
||||||
ReadOptions ro;
|
ReadOptions ro;
|
||||||
FlushOptions fo;
|
FlushOptions fo;
|
||||||
fo.wait = true;
|
fo.wait = true;
|
||||||
std::string value;
|
std::string value;
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
||||||
// Needs insert some keys to make sure files are not filtered out by key
|
// Needs insert some keys to make sure files are not filtered out by key
|
||||||
// ranges.
|
// ranges.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
||||||
dbfull()->Flush(fo);
|
dbfull()->Flush(fo);
|
||||||
|
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
|
|
||||||
// Reopen with whole key filtering enabled and prefix extractor
|
// Reopen with whole key filtering enabled and prefix extractor
|
||||||
// NULL. Bloom filter should be off for both of whole key and
|
// NULL. Bloom filter should be off for both of whole key and
|
||||||
// prefix bloom.
|
// prefix bloom.
|
||||||
bbto.whole_key_filtering = true;
|
bbto.whole_key_filtering = true;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
options.prefix_extractor.reset();
|
options.prefix_extractor.reset();
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
|
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
// Write DB with only full key filtering.
|
// Write DB with only full key filtering.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
||||||
// Needs insert some keys to make sure files are not filtered out by key
|
// Needs insert some keys to make sure files are not filtered out by key
|
||||||
// ranges.
|
// ranges.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
||||||
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
||||||
|
|
||||||
// Reopen with both of whole key off and prefix extractor enabled.
|
// Reopen with both of whole key off and prefix extractor enabled.
|
||||||
// Still no bloom filter should be used.
|
// Still no bloom filter should be used.
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||||
bbto.whole_key_filtering = false;
|
bbto.whole_key_filtering = false;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
|
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
|
|
||||||
// Try to create a DB with mixed files:
|
// Try to create a DB with mixed files:
|
||||||
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
|
||||||
// Needs insert some keys to make sure files are not filtered out by key
|
// Needs insert some keys to make sure files are not filtered out by key
|
||||||
// ranges.
|
// ranges.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
||||||
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
||||||
|
|
||||||
options.prefix_extractor.reset();
|
options.prefix_extractor.reset();
|
||||||
bbto.whole_key_filtering = true;
|
bbto.whole_key_filtering = true;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
|
|
||||||
// Try to create a DB with mixed files.
|
// Try to create a DB with mixed files.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
|
ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
|
||||||
// In this case needs insert some keys to make sure files are
|
// In this case needs insert some keys to make sure files are
|
||||||
// not filtered out by key ranges.
|
// not filtered out by key ranges.
|
||||||
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
|
||||||
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
// Now we have two files:
|
// Now we have two files:
|
||||||
// File 1: An older file with prefix bloom.
|
// File 1: An older file with prefix bloom.
|
||||||
// File 2: A newer file with whole bloom filter.
|
// File 2: A newer file with whole bloom filter.
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
||||||
ASSERT_EQ("bar", Get("barfoo"));
|
ASSERT_EQ("bar", Get("barfoo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
||||||
|
|
||||||
// Reopen with the same setting: only whole key is used
|
// Reopen with the same setting: only whole key is used
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
||||||
ASSERT_EQ("bar", Get("barfoo"));
|
ASSERT_EQ("bar", Get("barfoo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
||||||
|
|
||||||
// Restart with both filters are allowed
|
// Restart with both filters are allowed
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||||
bbto.whole_key_filtering = true;
|
bbto.whole_key_filtering = true;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
|
||||||
// File 1 will has it filtered out.
|
// File 1 will has it filtered out.
|
||||||
// File 2 will not, as prefix `foo` exists in the file.
|
// File 2 will not, as prefix `foo` exists in the file.
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
||||||
ASSERT_EQ("bar", Get("barfoo"));
|
ASSERT_EQ("bar", Get("barfoo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
||||||
|
|
||||||
// Restart with only prefix bloom is allowed.
|
// Restart with only prefix bloom is allowed.
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||||
bbto.whole_key_filtering = false;
|
bbto.whole_key_filtering = false;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
|
||||||
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
ASSERT_EQ("NOT_FOUND", Get("bar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
||||||
ASSERT_EQ("foo", Get("foobar"));
|
ASSERT_EQ("foo", Get("foobar"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
||||||
ASSERT_EQ("bar", Get("barfoo"));
|
ASSERT_EQ("bar", Get("barfoo"));
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
||||||
@ -301,6 +329,12 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||||||
table_options.no_block_cache = true;
|
table_options.no_block_cache = true;
|
||||||
table_options.filter_policy.reset(
|
table_options.filter_policy.reset(
|
||||||
NewBloomFilterPolicy(10, use_block_based_filter_));
|
NewBloomFilterPolicy(10, use_block_based_filter_));
|
||||||
|
table_options.partition_filters = partition_filters_;
|
||||||
|
if (partition_filters_) {
|
||||||
|
table_options.index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
}
|
||||||
|
table_options.index_per_partition = 2;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
CreateAndReopenWithCF({"pikachu"}, options);
|
CreateAndReopenWithCF({"pikachu"}, options);
|
||||||
@ -327,7 +361,13 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||||||
int reads = env_->random_read_counter_.Read();
|
int reads = env_->random_read_counter_.Read();
|
||||||
fprintf(stderr, "%d present => %d reads\n", N, reads);
|
fprintf(stderr, "%d present => %d reads\n", N, reads);
|
||||||
ASSERT_GE(reads, N);
|
ASSERT_GE(reads, N);
|
||||||
ASSERT_LE(reads, N + 2 * N / 100);
|
if (partition_filters_) {
|
||||||
|
// Without block cache, we read an extra partition filter per each
|
||||||
|
// level*read and a partition index per each read
|
||||||
|
ASSERT_LE(reads, 4 * N + 2 * N / 100);
|
||||||
|
} else {
|
||||||
|
ASSERT_LE(reads, N + 2 * N / 100);
|
||||||
|
}
|
||||||
|
|
||||||
// Lookup present keys. Should rarely read from either sstable.
|
// Lookup present keys. Should rarely read from either sstable.
|
||||||
env_->random_read_counter_.Reset();
|
env_->random_read_counter_.Reset();
|
||||||
@ -336,7 +376,13 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||||||
}
|
}
|
||||||
reads = env_->random_read_counter_.Read();
|
reads = env_->random_read_counter_.Read();
|
||||||
fprintf(stderr, "%d missing => %d reads\n", N, reads);
|
fprintf(stderr, "%d missing => %d reads\n", N, reads);
|
||||||
ASSERT_LE(reads, 3 * N / 100);
|
if (partition_filters_) {
|
||||||
|
// With partitioned filter we read one extra filter per level per each
|
||||||
|
// missed read.
|
||||||
|
ASSERT_LE(reads, 2 * N + 3 * N / 100);
|
||||||
|
} else {
|
||||||
|
ASSERT_LE(reads, 3 * N / 100);
|
||||||
|
}
|
||||||
|
|
||||||
env_->delay_sstable_sync_.store(false, std::memory_order_release);
|
env_->delay_sstable_sync_.store(false, std::memory_order_release);
|
||||||
Close();
|
Close();
|
||||||
@ -344,7 +390,9 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
|
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
|
||||||
::testing::Bool());
|
::testing::Values(std::make_tuple(true, false),
|
||||||
|
std::make_tuple(false, true),
|
||||||
|
std::make_tuple(false, false)));
|
||||||
|
|
||||||
TEST_F(DBBloomFilterTest, BloomFilterRate) {
|
TEST_F(DBBloomFilterTest, BloomFilterRate) {
|
||||||
while (ChangeFilterOptions()) {
|
while (ChangeFilterOptions()) {
|
||||||
@ -401,28 +449,13 @@ TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
|
|||||||
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
||||||
}
|
}
|
||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) {
|
// Check db with partitioned full filter
|
||||||
Options options = CurrentOptions();
|
table_options.partition_filters = true;
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
table_options.index_type =
|
||||||
BlockBasedTableOptions table_options;
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
// Create with full filter
|
|
||||||
CreateAndReopenWithCF({"pikachu"}, options);
|
|
||||||
|
|
||||||
const int maxKey = 10000;
|
|
||||||
for (int i = 0; i < maxKey; i++) {
|
|
||||||
ASSERT_OK(Put(1, Key(i), Key(i)));
|
|
||||||
}
|
|
||||||
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
|
|
||||||
Flush(1);
|
|
||||||
|
|
||||||
// Check db with block_based filter
|
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
|
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
||||||
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
||||||
|
|
||||||
// Check if they can be found
|
// Check if they can be found
|
||||||
@ -432,6 +465,43 @@ TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) {
|
|||||||
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) {
|
||||||
|
for (bool partition_filters : {true, false}) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
if (partition_filters) {
|
||||||
|
table_options.partition_filters = true;
|
||||||
|
table_options.index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
}
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
// Create with full filter
|
||||||
|
CreateAndReopenWithCF({"pikachu"}, options);
|
||||||
|
|
||||||
|
const int maxKey = 10000;
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_OK(Put(1, Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
|
||||||
|
Flush(1);
|
||||||
|
|
||||||
|
// Check db with block_based filter
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
||||||
|
|
||||||
|
// Check if they can be found
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// A wrapped bloom over default FilterPolicy
|
// A wrapped bloom over default FilterPolicy
|
||||||
class WrappedBloom : public FilterPolicy {
|
class WrappedBloom : public FilterPolicy {
|
||||||
@ -586,11 +656,12 @@ TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) {
|
|||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
class BloomStatsTestWithParam
|
class BloomStatsTestWithParam
|
||||||
: public DBBloomFilterTest,
|
: public DBBloomFilterTest,
|
||||||
public testing::WithParamInterface<std::tuple<bool, bool>> {
|
public testing::WithParamInterface<std::tuple<bool, bool, bool>> {
|
||||||
public:
|
public:
|
||||||
BloomStatsTestWithParam() {
|
BloomStatsTestWithParam() {
|
||||||
use_block_table_ = std::get<0>(GetParam());
|
use_block_table_ = std::get<0>(GetParam());
|
||||||
use_block_based_builder_ = std::get<1>(GetParam());
|
use_block_based_builder_ = std::get<1>(GetParam());
|
||||||
|
partition_filters_ = std::get<2>(GetParam());
|
||||||
|
|
||||||
options_.create_if_missing = true;
|
options_.create_if_missing = true;
|
||||||
options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
|
options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
|
||||||
@ -599,10 +670,17 @@ class BloomStatsTestWithParam
|
|||||||
if (use_block_table_) {
|
if (use_block_table_) {
|
||||||
BlockBasedTableOptions table_options;
|
BlockBasedTableOptions table_options;
|
||||||
table_options.hash_index_allow_collision = false;
|
table_options.hash_index_allow_collision = false;
|
||||||
|
if (partition_filters_) {
|
||||||
|
assert(!use_block_based_builder_);
|
||||||
|
table_options.partition_filters = partition_filters_;
|
||||||
|
table_options.index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
}
|
||||||
table_options.filter_policy.reset(
|
table_options.filter_policy.reset(
|
||||||
NewBloomFilterPolicy(10, use_block_based_builder_));
|
NewBloomFilterPolicy(10, use_block_based_builder_));
|
||||||
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
} else {
|
} else {
|
||||||
|
assert(!partition_filters_); // not supported in plain table
|
||||||
PlainTableOptions table_options;
|
PlainTableOptions table_options;
|
||||||
options_.table_factory.reset(NewPlainTableFactory(table_options));
|
options_.table_factory.reset(NewPlainTableFactory(table_options));
|
||||||
}
|
}
|
||||||
@ -623,6 +701,7 @@ class BloomStatsTestWithParam
|
|||||||
|
|
||||||
bool use_block_table_;
|
bool use_block_table_;
|
||||||
bool use_block_based_builder_;
|
bool use_block_based_builder_;
|
||||||
|
bool partition_filters_;
|
||||||
Options options_;
|
Options options_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -717,25 +796,44 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
|
|||||||
ASSERT_OK(iter->status());
|
ASSERT_OK(iter->status());
|
||||||
ASSERT_TRUE(iter->Valid());
|
ASSERT_TRUE(iter->Valid());
|
||||||
ASSERT_EQ(value1, iter->value().ToString());
|
ASSERT_EQ(value1, iter->value().ToString());
|
||||||
ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
|
if (partition_filters_) {
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_hit_count); // no_io
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_miss_count); // no_io
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
|
||||||
|
}
|
||||||
|
|
||||||
iter->Seek(key3);
|
iter->Seek(key3);
|
||||||
ASSERT_OK(iter->status());
|
ASSERT_OK(iter->status());
|
||||||
ASSERT_TRUE(iter->Valid());
|
ASSERT_TRUE(iter->Valid());
|
||||||
ASSERT_EQ(value3, iter->value().ToString());
|
ASSERT_EQ(value3, iter->value().ToString());
|
||||||
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
|
if (partition_filters_) {
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_hit_count); // no_io
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_miss_count); // no_io
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
|
||||||
|
}
|
||||||
|
|
||||||
iter->Seek(key2);
|
iter->Seek(key2);
|
||||||
ASSERT_OK(iter->status());
|
ASSERT_OK(iter->status());
|
||||||
ASSERT_TRUE(!iter->Valid());
|
if (partition_filters_) {
|
||||||
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
|
// iter is still valid since filter did not reject the key2
|
||||||
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
|
ASSERT_TRUE(iter->Valid());
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_hit_count); // no_io
|
||||||
|
ASSERT_EQ(0, perf_context.bloom_sst_miss_count); // no_io
|
||||||
|
} else {
|
||||||
|
ASSERT_TRUE(!iter->Valid());
|
||||||
|
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
|
||||||
|
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
|
INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
|
||||||
::testing::Values(std::make_tuple(true, true),
|
::testing::Values(std::make_tuple(true, true, false),
|
||||||
std::make_tuple(true, false),
|
std::make_tuple(true, false, false),
|
||||||
std::make_tuple(false, false)));
|
std::make_tuple(true, false, true),
|
||||||
|
std::make_tuple(false, false,
|
||||||
|
false)));
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
void PrefixScanInit(DBBloomFilterTest* dbtest) {
|
void PrefixScanInit(DBBloomFilterTest* dbtest) {
|
||||||
|
@ -1221,7 +1221,7 @@ class PinL0IndexAndFilterBlocksTest : public DBTestBase,
|
|||||||
PinL0IndexAndFilterBlocksTest() : DBTestBase("/db_pin_l0_index_bloom_test") {}
|
PinL0IndexAndFilterBlocksTest() : DBTestBase("/db_pin_l0_index_bloom_test") {}
|
||||||
virtual void SetUp() override { infinite_max_files_ = GetParam(); }
|
virtual void SetUp() override { infinite_max_files_ = GetParam(); }
|
||||||
|
|
||||||
void CreateTwoLevels(Options* options) {
|
void CreateTwoLevels(Options* options, bool close_afterwards) {
|
||||||
if (infinite_max_files_) {
|
if (infinite_max_files_) {
|
||||||
options->max_open_files = -1;
|
options->max_open_files = -1;
|
||||||
}
|
}
|
||||||
@ -1249,6 +1249,9 @@ class PinL0IndexAndFilterBlocksTest : public DBTestBase,
|
|||||||
Put(1, "z2", "end2");
|
Put(1, "z2", "end2");
|
||||||
ASSERT_OK(Flush(1));
|
ASSERT_OK(Flush(1));
|
||||||
|
|
||||||
|
if (close_afterwards) {
|
||||||
|
Close(); // This ensures that there is no ref to block cache entries
|
||||||
|
}
|
||||||
table_options.block_cache->EraseUnRefEntries();
|
table_options.block_cache->EraseUnRefEntries();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1303,7 +1306,7 @@ TEST_P(PinL0IndexAndFilterBlocksTest,
|
|||||||
TEST_P(PinL0IndexAndFilterBlocksTest,
|
TEST_P(PinL0IndexAndFilterBlocksTest,
|
||||||
MultiLevelIndexAndFilterBlocksCachedWithPinning) {
|
MultiLevelIndexAndFilterBlocksCachedWithPinning) {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options);
|
PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, false);
|
||||||
// get base cache values
|
// get base cache values
|
||||||
uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
|
uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
|
||||||
uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
|
uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
|
||||||
@ -1332,7 +1335,10 @@ TEST_P(PinL0IndexAndFilterBlocksTest,
|
|||||||
|
|
||||||
TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) {
|
TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options);
|
// This ensures that db does not ref anything in the block cache, so
|
||||||
|
// EraseUnRefEntries could clear them up.
|
||||||
|
bool close_afterwards = true;
|
||||||
|
PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, close_afterwards);
|
||||||
|
|
||||||
// Get base cache values
|
// Get base cache values
|
||||||
uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
|
uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
|
||||||
|
@ -230,6 +230,8 @@ bool DBTestBase::ChangeFilterOptions() {
|
|||||||
option_config_ = kFilter;
|
option_config_ = kFilter;
|
||||||
} else if (option_config_ == kFilter) {
|
} else if (option_config_ == kFilter) {
|
||||||
option_config_ = kFullFilterWithNewTableReaderForCompactions;
|
option_config_ = kFullFilterWithNewTableReaderForCompactions;
|
||||||
|
} else if (option_config_ == kFullFilterWithNewTableReaderForCompactions) {
|
||||||
|
option_config_ = kPartitionedFilterWithNewTableReaderForCompactions;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -325,6 +327,14 @@ Options DBTestBase::CurrentOptions(
|
|||||||
options.new_table_reader_for_compaction_inputs = true;
|
options.new_table_reader_for_compaction_inputs = true;
|
||||||
options.compaction_readahead_size = 10 * 1024 * 1024;
|
options.compaction_readahead_size = 10 * 1024 * 1024;
|
||||||
break;
|
break;
|
||||||
|
case kPartitionedFilterWithNewTableReaderForCompactions:
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
table_options.partition_filters = true;
|
||||||
|
table_options.index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
options.new_table_reader_for_compaction_inputs = true;
|
||||||
|
options.compaction_readahead_size = 10 * 1024 * 1024;
|
||||||
|
break;
|
||||||
case kUncompressed:
|
case kUncompressed:
|
||||||
options.compression = kNoCompression;
|
options.compression = kNoCompression;
|
||||||
break;
|
break;
|
||||||
@ -426,6 +436,8 @@ Options DBTestBase::CurrentOptions(
|
|||||||
|
|
||||||
if (options_override.filter_policy) {
|
if (options_override.filter_policy) {
|
||||||
table_options.filter_policy = options_override.filter_policy;
|
table_options.filter_policy = options_override.filter_policy;
|
||||||
|
table_options.partition_filters = options_override.partition_filters;
|
||||||
|
table_options.index_per_partition = options_override.index_per_partition;
|
||||||
}
|
}
|
||||||
if (set_block_based_table_factory) {
|
if (set_block_based_table_factory) {
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
@ -109,6 +109,11 @@ class AtomicCounter {
|
|||||||
|
|
||||||
struct OptionsOverride {
|
struct OptionsOverride {
|
||||||
std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
|
std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
|
||||||
|
// These will be used only if filter_policy is set
|
||||||
|
bool partition_filters = false;
|
||||||
|
uint64_t index_per_partition = 1024;
|
||||||
|
BlockBasedTableOptions::IndexType index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kBinarySearch;
|
||||||
|
|
||||||
// Used as a bit mask of individual enums in which to skip an XF test point
|
// Used as a bit mask of individual enums in which to skip an XF test point
|
||||||
int skip_policy = 0;
|
int skip_policy = 0;
|
||||||
@ -617,6 +622,7 @@ class DBTestBase : public testing::Test {
|
|||||||
kUniversalSubcompactions = 32,
|
kUniversalSubcompactions = 32,
|
||||||
kBlockBasedTableWithIndexRestartInterval = 33,
|
kBlockBasedTableWithIndexRestartInterval = 33,
|
||||||
kBlockBasedTableWithPartitionedIndex = 34,
|
kBlockBasedTableWithPartitionedIndex = 34,
|
||||||
|
kPartitionedFilterWithNewTableReaderForCompactions = 35,
|
||||||
};
|
};
|
||||||
int option_config_;
|
int option_config_;
|
||||||
|
|
||||||
|
@ -184,8 +184,9 @@ BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
|
|||||||
num_ = (n - 5 - last_word) / 4;
|
num_ = (n - 5 - last_word) / 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key,
|
bool BlockBasedFilterBlockReader::KeyMayMatch(
|
||||||
uint64_t block_offset) {
|
const Slice& key, uint64_t block_offset, const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
assert(block_offset != kNotValid);
|
assert(block_offset != kNotValid);
|
||||||
if (!whole_key_filtering_) {
|
if (!whole_key_filtering_) {
|
||||||
return true;
|
return true;
|
||||||
@ -193,8 +194,9 @@ bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key,
|
|||||||
return MayMatch(key, block_offset);
|
return MayMatch(key, block_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BlockBasedFilterBlockReader::PrefixMayMatch(const Slice& prefix,
|
bool BlockBasedFilterBlockReader::PrefixMayMatch(
|
||||||
uint64_t block_offset) {
|
const Slice& prefix, uint64_t block_offset, const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
assert(block_offset != kNotValid);
|
assert(block_offset != kNotValid);
|
||||||
if (!prefix_extractor_) {
|
if (!prefix_extractor_) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -81,10 +81,14 @@ class BlockBasedFilterBlockReader : public FilterBlockReader {
|
|||||||
bool whole_key_filtering,
|
bool whole_key_filtering,
|
||||||
BlockContents&& contents, Statistics* statistics);
|
BlockContents&& contents, Statistics* statistics);
|
||||||
virtual bool IsBlockBased() override { return true; }
|
virtual bool IsBlockBased() override { return true; }
|
||||||
virtual bool KeyMayMatch(const Slice& key,
|
virtual bool KeyMayMatch(
|
||||||
uint64_t block_offset = kNotValid) override;
|
const Slice& key, uint64_t block_offset = kNotValid,
|
||||||
virtual bool PrefixMayMatch(const Slice& prefix,
|
const bool no_io = false,
|
||||||
uint64_t block_offset = kNotValid) override;
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
|
virtual bool PrefixMayMatch(
|
||||||
|
const Slice& prefix, uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
virtual size_t ApproximateMemoryUsage() const override;
|
virtual size_t ApproximateMemoryUsage() const override;
|
||||||
|
|
||||||
// convert this object to a human readable form
|
// convert this object to a human readable form
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#include "table/get_context.h"
|
#include "table/get_context.h"
|
||||||
#include "table/internal_iterator.h"
|
#include "table/internal_iterator.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
|
#include "table/partitioned_filter_block.h"
|
||||||
#include "table/persistent_cache_helper.h"
|
#include "table/persistent_cache_helper.h"
|
||||||
#include "table/sst_file_writer_collectors.h"
|
#include "table/sst_file_writer_collectors.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
@ -149,7 +150,7 @@ Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// Index that allows binary search lookup in a two-level index structure.
|
// Index that allows binary search lookup in a two-level index structure.
|
||||||
class PartitionIndexReader : public IndexReader {
|
class PartitionIndexReader : public IndexReader, public Cleanable {
|
||||||
public:
|
public:
|
||||||
// Read the partition index from the file and create an instance for
|
// Read the partition index from the file and create an instance for
|
||||||
// `PartitionIndexReader`.
|
// `PartitionIndexReader`.
|
||||||
@ -159,7 +160,8 @@ class PartitionIndexReader : public IndexReader {
|
|||||||
const Footer& footer, const BlockHandle& index_handle,
|
const Footer& footer, const BlockHandle& index_handle,
|
||||||
const ImmutableCFOptions& ioptions,
|
const ImmutableCFOptions& ioptions,
|
||||||
const Comparator* comparator, IndexReader** index_reader,
|
const Comparator* comparator, IndexReader** index_reader,
|
||||||
const PersistentCacheOptions& cache_options) {
|
const PersistentCacheOptions& cache_options,
|
||||||
|
const int level) {
|
||||||
std::unique_ptr<Block> index_block;
|
std::unique_ptr<Block> index_block;
|
||||||
auto s = ReadBlockFromFile(
|
auto s = ReadBlockFromFile(
|
||||||
file, footer, ReadOptions(), index_handle, &index_block, ioptions,
|
file, footer, ReadOptions(), index_handle, &index_block, ioptions,
|
||||||
@ -167,8 +169,9 @@ class PartitionIndexReader : public IndexReader {
|
|||||||
kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */);
|
kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */);
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
*index_reader = new PartitionIndexReader(
|
*index_reader =
|
||||||
table, comparator, std::move(index_block), ioptions.statistics);
|
new PartitionIndexReader(table, comparator, std::move(index_block),
|
||||||
|
ioptions.statistics, level);
|
||||||
}
|
}
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
@ -177,10 +180,25 @@ class PartitionIndexReader : public IndexReader {
|
|||||||
// return a two-level iterator: first level is on the partition index
|
// return a two-level iterator: first level is on the partition index
|
||||||
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr,
|
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr,
|
||||||
bool dont_care = true) override {
|
bool dont_care = true) override {
|
||||||
|
// Filters are already checked before seeking the index
|
||||||
|
const bool skip_filters = true;
|
||||||
|
const bool is_index = true;
|
||||||
|
Cleanable* block_cache_cleaner = nullptr;
|
||||||
|
const bool pin_cached_indexes =
|
||||||
|
level_ == 0 &&
|
||||||
|
table_->rep_->table_options.pin_l0_filter_and_index_blocks_in_cache;
|
||||||
|
if (pin_cached_indexes) {
|
||||||
|
// Keep partition indexes into the cache as long as the partition index
|
||||||
|
// reader object is alive
|
||||||
|
block_cache_cleaner = this;
|
||||||
|
}
|
||||||
return NewTwoLevelIterator(
|
return NewTwoLevelIterator(
|
||||||
new BlockBasedTable::BlockEntryIteratorState(table_, ReadOptions(),
|
new BlockBasedTable::BlockEntryIteratorState(
|
||||||
false),
|
table_, ReadOptions(), skip_filters, is_index, block_cache_cleaner),
|
||||||
index_block_->NewIterator(comparator_, iter, true));
|
index_block_->NewIterator(comparator_, nullptr, true));
|
||||||
|
// TODO(myabandeh): Update TwoLevelIterator to be able to make use of
|
||||||
|
// on-stack
|
||||||
|
// BlockIter while the state is on heap
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual size_t size() const override { return index_block_->size(); }
|
virtual size_t size() const override { return index_block_->size(); }
|
||||||
@ -195,14 +213,17 @@ class PartitionIndexReader : public IndexReader {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
PartitionIndexReader(BlockBasedTable* table, const Comparator* comparator,
|
PartitionIndexReader(BlockBasedTable* table, const Comparator* comparator,
|
||||||
std::unique_ptr<Block>&& index_block, Statistics* stats)
|
std::unique_ptr<Block>&& index_block, Statistics* stats,
|
||||||
|
const int level)
|
||||||
: IndexReader(comparator, stats),
|
: IndexReader(comparator, stats),
|
||||||
table_(table),
|
table_(table),
|
||||||
index_block_(std::move(index_block)) {
|
index_block_(std::move(index_block)),
|
||||||
|
level_(level) {
|
||||||
assert(index_block_ != nullptr);
|
assert(index_block_ != nullptr);
|
||||||
}
|
}
|
||||||
BlockBasedTable* table_;
|
BlockBasedTable* table_;
|
||||||
std::unique_ptr<Block> index_block_;
|
std::unique_ptr<Block> index_block_;
|
||||||
|
int level_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Index that allows binary search lookup for the first key of each block.
|
// Index that allows binary search lookup for the first key of each block.
|
||||||
@ -555,14 +576,28 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||||||
|
|
||||||
// Find filter handle and filter type
|
// Find filter handle and filter type
|
||||||
if (rep->filter_policy) {
|
if (rep->filter_policy) {
|
||||||
for (auto prefix : {kFullFilterBlockPrefix, kFilterBlockPrefix}) {
|
for (auto filter_type :
|
||||||
|
{Rep::FilterType::kFullFilter, Rep::FilterType::kPartitionedFilter,
|
||||||
|
Rep::FilterType::kBlockFilter}) {
|
||||||
|
std::string prefix;
|
||||||
|
switch (filter_type) {
|
||||||
|
case Rep::FilterType::kFullFilter:
|
||||||
|
prefix = kFullFilterBlockPrefix;
|
||||||
|
break;
|
||||||
|
case Rep::FilterType::kPartitionedFilter:
|
||||||
|
prefix = kPartitionedFilterBlockPrefix;
|
||||||
|
break;
|
||||||
|
case Rep::FilterType::kBlockFilter:
|
||||||
|
prefix = kFilterBlockPrefix;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
std::string filter_block_key = prefix;
|
std::string filter_block_key = prefix;
|
||||||
filter_block_key.append(rep->filter_policy->Name());
|
filter_block_key.append(rep->filter_policy->Name());
|
||||||
if (FindMetaBlock(meta_iter.get(), filter_block_key, &rep->filter_handle)
|
if (FindMetaBlock(meta_iter.get(), filter_block_key, &rep->filter_handle)
|
||||||
.ok()) {
|
.ok()) {
|
||||||
rep->filter_type = (prefix == kFullFilterBlockPrefix)
|
rep->filter_type = filter_type;
|
||||||
? Rep::FilterType::kFullFilter
|
|
||||||
: Rep::FilterType::kBlockFilter;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -697,6 +732,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||||||
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache &&
|
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache &&
|
||||||
level == 0) {
|
level == 0) {
|
||||||
rep->filter_entry = filter_entry;
|
rep->filter_entry = filter_entry;
|
||||||
|
rep->filter_entry.value->SetLevel(level);
|
||||||
} else {
|
} else {
|
||||||
filter_entry.Release(table_options.block_cache.get());
|
filter_entry.Release(table_options.block_cache.get());
|
||||||
}
|
}
|
||||||
@ -707,14 +743,19 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||||||
// pre-load these blocks, which will kept in member variables in Rep
|
// pre-load these blocks, which will kept in member variables in Rep
|
||||||
// and with a same life-time as this table object.
|
// and with a same life-time as this table object.
|
||||||
IndexReader* index_reader = nullptr;
|
IndexReader* index_reader = nullptr;
|
||||||
s = new_table->CreateIndexReader(&index_reader, meta_iter.get());
|
s = new_table->CreateIndexReader(&index_reader, meta_iter.get(), level);
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
rep->index_reader.reset(index_reader);
|
rep->index_reader.reset(index_reader);
|
||||||
|
|
||||||
// Set filter block
|
// Set filter block
|
||||||
if (rep->filter_policy) {
|
if (rep->filter_policy) {
|
||||||
rep->filter.reset(new_table->ReadFilter(rep));
|
const bool is_a_filter_partition = true;
|
||||||
|
rep->filter.reset(
|
||||||
|
new_table->ReadFilter(rep->filter_handle, !is_a_filter_partition));
|
||||||
|
if (rep->filter.get()) {
|
||||||
|
rep->filter->SetLevel(level);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
delete index_reader;
|
delete index_reader;
|
||||||
@ -798,7 +839,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|||||||
Cache* block_cache, Cache* block_cache_compressed,
|
Cache* block_cache, Cache* block_cache_compressed,
|
||||||
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
||||||
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
||||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit) {
|
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||||
|
bool is_index) {
|
||||||
Status s;
|
Status s;
|
||||||
Block* compressed_block = nullptr;
|
Block* compressed_block = nullptr;
|
||||||
Cache::Handle* block_cache_compressed_handle = nullptr;
|
Cache::Handle* block_cache_compressed_handle = nullptr;
|
||||||
@ -806,9 +848,10 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|||||||
|
|
||||||
// Lookup uncompressed cache first
|
// Lookup uncompressed cache first
|
||||||
if (block_cache != nullptr) {
|
if (block_cache != nullptr) {
|
||||||
block->cache_handle =
|
block->cache_handle = GetEntryFromCache(
|
||||||
GetEntryFromCache(block_cache, block_cache_key, BLOCK_CACHE_DATA_MISS,
|
block_cache, block_cache_key,
|
||||||
BLOCK_CACHE_DATA_HIT, statistics);
|
is_index ? BLOCK_CACHE_INDEX_MISS : BLOCK_CACHE_DATA_MISS,
|
||||||
|
is_index ? BLOCK_CACHE_INDEX_HIT : BLOCK_CACHE_DATA_HIT, statistics);
|
||||||
if (block->cache_handle != nullptr) {
|
if (block->cache_handle != nullptr) {
|
||||||
block->value =
|
block->value =
|
||||||
reinterpret_cast<Block*>(block_cache->Value(block->cache_handle));
|
reinterpret_cast<Block*>(block_cache->Value(block->cache_handle));
|
||||||
@ -860,9 +903,15 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|||||||
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
if (is_index) {
|
||||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||||
block->value->usable_size());
|
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||||
|
block->value->usable_size());
|
||||||
|
} else {
|
||||||
|
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||||
|
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||||
|
block->value->usable_size());
|
||||||
|
}
|
||||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||||
block->value->usable_size());
|
block->value->usable_size());
|
||||||
} else {
|
} else {
|
||||||
@ -883,7 +932,8 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
Cache* block_cache, Cache* block_cache_compressed,
|
Cache* block_cache, Cache* block_cache_compressed,
|
||||||
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
||||||
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
||||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit) {
|
const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index,
|
||||||
|
Cache::Priority priority) {
|
||||||
assert(raw_block->compression_type() == kNoCompression ||
|
assert(raw_block->compression_type() == kNoCompression ||
|
||||||
block_cache_compressed != nullptr);
|
block_cache_compressed != nullptr);
|
||||||
|
|
||||||
@ -929,15 +979,21 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
// insert into uncompressed block cache
|
// insert into uncompressed block cache
|
||||||
assert((block->value->compression_type() == kNoCompression));
|
assert((block->value->compression_type() == kNoCompression));
|
||||||
if (block_cache != nullptr && block->value->cachable()) {
|
if (block_cache != nullptr && block->value->cachable()) {
|
||||||
s = block_cache->Insert(block_cache_key, block->value,
|
s = block_cache->Insert(
|
||||||
block->value->usable_size(),
|
block_cache_key, block->value, block->value->usable_size(),
|
||||||
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
&DeleteCachedEntry<Block>, &(block->cache_handle), priority);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
assert(block->cache_handle != nullptr);
|
assert(block->cache_handle != nullptr);
|
||||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
if (is_index) {
|
||||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||||
block->value->usable_size());
|
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||||
|
block->value->usable_size());
|
||||||
|
} else {
|
||||||
|
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||||
|
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||||
|
block->value->usable_size());
|
||||||
|
}
|
||||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||||
block->value->usable_size());
|
block->value->usable_size());
|
||||||
assert(reinterpret_cast<Block*>(
|
assert(reinterpret_cast<Block*>(
|
||||||
@ -952,7 +1008,9 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) const {
|
FilterBlockReader* BlockBasedTable::ReadFilter(
|
||||||
|
const BlockHandle& filter_handle, const bool is_a_filter_partition) const {
|
||||||
|
auto& rep = rep_;
|
||||||
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
||||||
// requiring checksum verification in Table::Open.
|
// requiring checksum verification in Table::Open.
|
||||||
if (rep->filter_type == Rep::FilterType::kNoFilter) {
|
if (rep->filter_type == Rep::FilterType::kNoFilter) {
|
||||||
@ -960,7 +1018,7 @@ FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) const {
|
|||||||
}
|
}
|
||||||
BlockContents block;
|
BlockContents block;
|
||||||
if (!ReadBlockContents(rep->file.get(), rep->footer, ReadOptions(),
|
if (!ReadBlockContents(rep->file.get(), rep->footer, ReadOptions(),
|
||||||
rep->filter_handle, &block, rep->ioptions,
|
filter_handle, &block, rep->ioptions,
|
||||||
false /* decompress */, Slice() /*compression dict*/,
|
false /* decompress */, Slice() /*compression dict*/,
|
||||||
rep->persistent_cache_options)
|
rep->persistent_cache_options)
|
||||||
.ok()) {
|
.ok()) {
|
||||||
@ -970,35 +1028,60 @@ FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) const {
|
|||||||
|
|
||||||
assert(rep->filter_policy);
|
assert(rep->filter_policy);
|
||||||
|
|
||||||
if (rep->filter_type == Rep::FilterType::kBlockFilter) {
|
auto filter_type = rep->filter_type;
|
||||||
return new BlockBasedFilterBlockReader(
|
if (rep->filter_type == Rep::FilterType::kPartitionedFilter &&
|
||||||
rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
|
is_a_filter_partition) {
|
||||||
rep->table_options, rep->whole_key_filtering, std::move(block),
|
filter_type = Rep::FilterType::kFullFilter;
|
||||||
rep->ioptions.statistics);
|
}
|
||||||
} else if (rep->filter_type == Rep::FilterType::kFullFilter) {
|
|
||||||
auto filter_bits_reader =
|
switch (filter_type) {
|
||||||
rep->filter_policy->GetFilterBitsReader(block.data);
|
case Rep::FilterType::kPartitionedFilter: {
|
||||||
if (filter_bits_reader != nullptr) {
|
return new PartitionedFilterBlockReader(
|
||||||
|
rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
|
||||||
|
rep->whole_key_filtering, std::move(block), nullptr,
|
||||||
|
rep->ioptions.statistics, rep->internal_comparator, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
case Rep::FilterType::kBlockFilter:
|
||||||
|
return new BlockBasedFilterBlockReader(
|
||||||
|
rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
|
||||||
|
rep->table_options, rep->whole_key_filtering, std::move(block),
|
||||||
|
rep->ioptions.statistics);
|
||||||
|
|
||||||
|
case Rep::FilterType::kFullFilter: {
|
||||||
|
auto filter_bits_reader =
|
||||||
|
rep->filter_policy->GetFilterBitsReader(block.data);
|
||||||
|
assert(filter_bits_reader != nullptr);
|
||||||
return new FullFilterBlockReader(
|
return new FullFilterBlockReader(
|
||||||
rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
|
rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
|
||||||
rep->whole_key_filtering, std::move(block), filter_bits_reader,
|
rep->whole_key_filtering, std::move(block), filter_bits_reader,
|
||||||
rep->ioptions.statistics);
|
rep->ioptions.statistics);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// filter_type is either kNoFilter (exited the function at the first if),
|
default:
|
||||||
// kBlockFilter or kFullFilter. there is no way for the execution to come here
|
// filter_type is either kNoFilter (exited the function at the first if),
|
||||||
assert(false);
|
// or it must be covered in this switch block
|
||||||
return nullptr;
|
assert(false);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
bool no_io) const {
|
bool no_io) const {
|
||||||
|
const BlockHandle& filter_blk_handle = rep_->filter_handle;
|
||||||
|
const bool is_a_filter_partition = true;
|
||||||
|
return GetFilter(filter_blk_handle, !is_a_filter_partition, no_io);
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
|
const BlockHandle& filter_blk_handle, const bool is_a_filter_partition,
|
||||||
|
bool no_io) const {
|
||||||
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
||||||
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
||||||
// read fails at Open() time. We don't want to reload again since it will
|
// read fails at Open() time. We don't want to reload again since it will
|
||||||
// most probably fail again.
|
// most probably fail again.
|
||||||
if (!rep_->table_options.cache_index_and_filter_blocks) {
|
if (!is_a_filter_partition &&
|
||||||
|
!rep_->table_options.cache_index_and_filter_blocks) {
|
||||||
return {rep_->filter.get(), nullptr /* cache handle */};
|
return {rep_->filter.get(), nullptr /* cache handle */};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1008,8 +1091,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
return {nullptr /* filter */, nullptr /* cache handle */};
|
return {nullptr /* filter */, nullptr /* cache handle */};
|
||||||
}
|
}
|
||||||
|
|
||||||
// we have a pinned filter block
|
if (!is_a_filter_partition && rep_->filter_entry.IsSet()) {
|
||||||
if (rep_->filter_entry.IsSet()) {
|
|
||||||
return rep_->filter_entry;
|
return rep_->filter_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1018,8 +1100,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
// Fetching from the cache
|
// Fetching from the cache
|
||||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||||
rep_->footer.metaindex_handle(),
|
filter_blk_handle, cache_key);
|
||||||
cache_key);
|
|
||||||
|
|
||||||
Statistics* statistics = rep_->ioptions.statistics;
|
Statistics* statistics = rep_->ioptions.statistics;
|
||||||
auto cache_handle =
|
auto cache_handle =
|
||||||
@ -1034,7 +1115,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
// Do not invoke any io.
|
// Do not invoke any io.
|
||||||
return CachableEntry<FilterBlockReader>();
|
return CachableEntry<FilterBlockReader>();
|
||||||
} else {
|
} else {
|
||||||
filter = ReadFilter(rep_);
|
filter = ReadFilter(filter_blk_handle, is_a_filter_partition);
|
||||||
if (filter != nullptr) {
|
if (filter != nullptr) {
|
||||||
assert(filter->size() > 0);
|
assert(filter->size() > 0);
|
||||||
Status s = block_cache->Insert(
|
Status s = block_cache->Insert(
|
||||||
@ -1074,7 +1155,7 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
|||||||
|
|
||||||
PERF_TIMER_GUARD(read_index_block_nanos);
|
PERF_TIMER_GUARD(read_index_block_nanos);
|
||||||
|
|
||||||
bool no_io = read_options.read_tier == kBlockCacheTier;
|
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
auto key =
|
auto key =
|
||||||
@ -1153,29 +1234,36 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
|||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
|
||||||
// into an iterator over the contents of the corresponding block.
|
|
||||||
// If input_iter is null, new a iterator
|
|
||||||
// If input_iter is not null, update this iter and return it
|
|
||||||
InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||||
Rep* rep, const ReadOptions& ro, const Slice& index_value,
|
Rep* rep, const ReadOptions& ro, const Slice& index_value,
|
||||||
BlockIter* input_iter) {
|
BlockIter* input_iter, bool is_index) {
|
||||||
PERF_TIMER_GUARD(new_table_block_iter_nanos);
|
|
||||||
|
|
||||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
|
||||||
Cache* block_cache = rep->table_options.block_cache.get();
|
|
||||||
CachableEntry<Block> block;
|
|
||||||
BlockHandle handle;
|
BlockHandle handle;
|
||||||
Slice input = index_value;
|
Slice input = index_value;
|
||||||
// We intentionally allow extra stuff in index_value so that we
|
// We intentionally allow extra stuff in index_value so that we
|
||||||
// can add more features in the future.
|
// can add more features in the future.
|
||||||
Status s = handle.DecodeFrom(&input);
|
Status s = handle.DecodeFrom(&input);
|
||||||
|
return NewDataBlockIterator(rep, ro, handle, input_iter, is_index, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
||||||
|
// into an iterator over the contents of the corresponding block.
|
||||||
|
// If input_iter is null, new a iterator
|
||||||
|
// If input_iter is not null, update this iter and return it
|
||||||
|
InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||||
|
Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
|
||||||
|
BlockIter* input_iter, bool is_index, Status s) {
|
||||||
|
PERF_TIMER_GUARD(new_table_block_iter_nanos);
|
||||||
|
|
||||||
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||||
|
Cache* block_cache = rep->table_options.block_cache.get();
|
||||||
|
CachableEntry<Block> block;
|
||||||
Slice compression_dict;
|
Slice compression_dict;
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
if (rep->compression_dict_block) {
|
if (rep->compression_dict_block) {
|
||||||
compression_dict = rep->compression_dict_block->data;
|
compression_dict = rep->compression_dict_block->data;
|
||||||
}
|
}
|
||||||
s = MaybeLoadDataBlockToCache(rep, ro, handle, compression_dict, &block);
|
s = MaybeLoadDataBlockToCache(rep, ro, handle, compression_dict, &block,
|
||||||
|
is_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Didn't get any data from block caches.
|
// Didn't get any data from block caches.
|
||||||
@ -1224,7 +1312,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
|||||||
|
|
||||||
Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
||||||
Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
|
Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
|
||||||
Slice compression_dict, CachableEntry<Block>* block_entry) {
|
Slice compression_dict, CachableEntry<Block>* block_entry, bool is_index) {
|
||||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||||
Cache* block_cache = rep->table_options.block_cache.get();
|
Cache* block_cache = rep->table_options.block_cache.get();
|
||||||
Cache* block_cache_compressed =
|
Cache* block_cache_compressed =
|
||||||
@ -1254,7 +1342,7 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
|||||||
s = GetDataBlockFromCache(
|
s = GetDataBlockFromCache(
|
||||||
key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro,
|
key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro,
|
||||||
block_entry, rep->table_options.format_version, compression_dict,
|
block_entry, rep->table_options.format_version, compression_dict,
|
||||||
rep->table_options.read_amp_bytes_per_bit);
|
rep->table_options.read_amp_bytes_per_bit, is_index);
|
||||||
|
|
||||||
if (block_entry->value == nullptr && !no_io && ro.fill_cache) {
|
if (block_entry->value == nullptr && !no_io && ro.fill_cache) {
|
||||||
std::unique_ptr<Block> raw_block;
|
std::unique_ptr<Block> raw_block;
|
||||||
@ -1271,7 +1359,13 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
|||||||
s = PutDataBlockToCache(
|
s = PutDataBlockToCache(
|
||||||
key, ckey, block_cache, block_cache_compressed, ro, rep->ioptions,
|
key, ckey, block_cache, block_cache_compressed, ro, rep->ioptions,
|
||||||
block_entry, raw_block.release(), rep->table_options.format_version,
|
block_entry, raw_block.release(), rep->table_options.format_version,
|
||||||
compression_dict, rep->table_options.read_amp_bytes_per_bit);
|
compression_dict, rep->table_options.read_amp_bytes_per_bit,
|
||||||
|
is_index,
|
||||||
|
is_index &&
|
||||||
|
rep->table_options
|
||||||
|
.cache_index_and_filter_blocks_with_high_priority
|
||||||
|
? Cache::Priority::HIGH
|
||||||
|
: Cache::Priority::LOW);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1279,17 +1373,39 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
|||||||
}
|
}
|
||||||
|
|
||||||
BlockBasedTable::BlockEntryIteratorState::BlockEntryIteratorState(
|
BlockBasedTable::BlockEntryIteratorState::BlockEntryIteratorState(
|
||||||
BlockBasedTable* table, const ReadOptions& read_options, bool skip_filters)
|
BlockBasedTable* table, const ReadOptions& read_options, bool skip_filters,
|
||||||
|
bool is_index, Cleanable* block_cache_cleaner)
|
||||||
: TwoLevelIteratorState(table->rep_->ioptions.prefix_extractor != nullptr),
|
: TwoLevelIteratorState(table->rep_->ioptions.prefix_extractor != nullptr),
|
||||||
table_(table),
|
table_(table),
|
||||||
read_options_(read_options),
|
read_options_(read_options),
|
||||||
skip_filters_(skip_filters) {}
|
skip_filters_(skip_filters),
|
||||||
|
is_index_(is_index),
|
||||||
|
block_cache_cleaner_(block_cache_cleaner) {}
|
||||||
|
|
||||||
InternalIterator*
|
InternalIterator*
|
||||||
BlockBasedTable::BlockEntryIteratorState::NewSecondaryIterator(
|
BlockBasedTable::BlockEntryIteratorState::NewSecondaryIterator(
|
||||||
const Slice& index_value) {
|
const Slice& index_value) {
|
||||||
// Return a block iterator on the index partition
|
// Return a block iterator on the index partition
|
||||||
return NewDataBlockIterator(table_->rep_, read_options_, index_value);
|
BlockHandle handle;
|
||||||
|
Slice input = index_value;
|
||||||
|
Status s = handle.DecodeFrom(&input);
|
||||||
|
auto iter = NewDataBlockIterator(table_->rep_, read_options_, handle, nullptr,
|
||||||
|
is_index_, s);
|
||||||
|
if (block_cache_cleaner_) {
|
||||||
|
uint64_t offset = handle.offset();
|
||||||
|
{
|
||||||
|
ReadLock rl(&cleaner_mu);
|
||||||
|
if (cleaner_set.find(offset) != cleaner_set.end()) {
|
||||||
|
// already have a refernce to the block cache objects
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WriteLock wl(&cleaner_mu);
|
||||||
|
cleaner_set.insert(offset);
|
||||||
|
// Keep the data into cache until the cleaner cleansup
|
||||||
|
iter->DelegateCleanupsTo(block_cache_cleaner_);
|
||||||
|
}
|
||||||
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BlockBasedTable::BlockEntryIteratorState::PrefixMayMatch(
|
bool BlockBasedTable::BlockEntryIteratorState::PrefixMayMatch(
|
||||||
@ -1325,25 +1441,29 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
auto prefix = rep_->ioptions.prefix_extractor->Transform(user_key);
|
auto prefix = rep_->ioptions.prefix_extractor->Transform(user_key);
|
||||||
InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue);
|
|
||||||
auto internal_prefix = internal_key_prefix.Encode();
|
|
||||||
|
|
||||||
bool may_match = true;
|
bool may_match = true;
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
// To prevent any io operation in this method, we set `read_tier` to make
|
|
||||||
// sure we always read index or filter only when they have already been
|
|
||||||
// loaded to memory.
|
|
||||||
ReadOptions no_io_read_options;
|
|
||||||
no_io_read_options.read_tier = kBlockCacheTier;
|
|
||||||
|
|
||||||
// First, try check with full filter
|
// First, try check with full filter
|
||||||
auto filter_entry = GetFilter(true /* no io */);
|
const bool no_io = true;
|
||||||
|
auto filter_entry = GetFilter(no_io);
|
||||||
FilterBlockReader* filter = filter_entry.value;
|
FilterBlockReader* filter = filter_entry.value;
|
||||||
if (filter != nullptr) {
|
if (filter != nullptr) {
|
||||||
if (!filter->IsBlockBased()) {
|
if (!filter->IsBlockBased()) {
|
||||||
may_match = filter->PrefixMayMatch(prefix);
|
const Slice* const const_ikey_ptr = &internal_key;
|
||||||
|
may_match =
|
||||||
|
filter->PrefixMayMatch(prefix, kNotValid, no_io, const_ikey_ptr);
|
||||||
} else {
|
} else {
|
||||||
|
InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue);
|
||||||
|
auto internal_prefix = internal_key_prefix.Encode();
|
||||||
|
|
||||||
|
// To prevent any io operation in this method, we set `read_tier` to make
|
||||||
|
// sure we always read index or filter only when they have already been
|
||||||
|
// loaded to memory.
|
||||||
|
ReadOptions no_io_read_options;
|
||||||
|
no_io_read_options.read_tier = kBlockCacheTier;
|
||||||
|
|
||||||
// Then, try find it within each block
|
// Then, try find it within each block
|
||||||
unique_ptr<InternalIterator> iiter(NewIndexIterator(no_io_read_options));
|
unique_ptr<InternalIterator> iiter(NewIndexIterator(no_io_read_options));
|
||||||
iiter->Seek(internal_prefix);
|
iiter->Seek(internal_prefix);
|
||||||
@ -1438,20 +1558,23 @@ InternalIterator* BlockBasedTable::NewRangeTombstoneIterator(
|
|||||||
|
|
||||||
bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
||||||
FilterBlockReader* filter,
|
FilterBlockReader* filter,
|
||||||
const Slice& internal_key) const {
|
const Slice& internal_key,
|
||||||
|
const bool no_io) const {
|
||||||
if (filter == nullptr || filter->IsBlockBased()) {
|
if (filter == nullptr || filter->IsBlockBased()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
Slice user_key = ExtractUserKey(internal_key);
|
Slice user_key = ExtractUserKey(internal_key);
|
||||||
|
const Slice* const const_ikey_ptr = &internal_key;
|
||||||
if (filter->whole_key_filtering()) {
|
if (filter->whole_key_filtering()) {
|
||||||
return filter->KeyMayMatch(user_key);
|
return filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
|
||||||
}
|
}
|
||||||
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
|
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
|
||||||
rep_->table_properties->prefix_extractor_name.compare(
|
rep_->table_properties->prefix_extractor_name.compare(
|
||||||
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
||||||
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
||||||
!filter->PrefixMayMatch(
|
!filter->PrefixMayMatch(
|
||||||
rep_->ioptions.prefix_extractor->Transform(user_key))) {
|
rep_->ioptions.prefix_extractor->Transform(user_key), kNotValid,
|
||||||
|
false, const_ikey_ptr)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -1460,6 +1583,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
|||||||
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||||
GetContext* get_context, bool skip_filters) {
|
GetContext* get_context, bool skip_filters) {
|
||||||
Status s;
|
Status s;
|
||||||
|
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||||
CachableEntry<FilterBlockReader> filter_entry;
|
CachableEntry<FilterBlockReader> filter_entry;
|
||||||
if (!skip_filters) {
|
if (!skip_filters) {
|
||||||
filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
||||||
@ -1468,14 +1592,14 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
|
|
||||||
// First check the full filter
|
// First check the full filter
|
||||||
// If full filter not useful, Then go into each block
|
// If full filter not useful, Then go into each block
|
||||||
if (!FullFilterKeyMayMatch(read_options, filter, key)) {
|
if (!FullFilterKeyMayMatch(read_options, filter, key, no_io)) {
|
||||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||||
} else {
|
} else {
|
||||||
BlockIter iiter_on_stack;
|
BlockIter iiter_on_stack;
|
||||||
auto iiter = NewIndexIterator(read_options, &iiter_on_stack);
|
auto iiter = NewIndexIterator(read_options, &iiter_on_stack);
|
||||||
std::unique_ptr<InternalIterator> iiter_unique_ptr;
|
std::unique_ptr<InternalIterator> iiter_unique_ptr;
|
||||||
if (iiter != &iiter_on_stack) {
|
if (iiter != &iiter_on_stack) {
|
||||||
iiter_unique_ptr = std::unique_ptr<InternalIterator>(iiter);
|
iiter_unique_ptr.reset(iiter);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool done = false;
|
bool done = false;
|
||||||
@ -1486,7 +1610,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
bool not_exist_in_filter =
|
bool not_exist_in_filter =
|
||||||
filter != nullptr && filter->IsBlockBased() == true &&
|
filter != nullptr && filter->IsBlockBased() == true &&
|
||||||
handle.DecodeFrom(&handle_value).ok() &&
|
handle.DecodeFrom(&handle_value).ok() &&
|
||||||
!filter->KeyMayMatch(ExtractUserKey(key), handle.offset());
|
!filter->KeyMayMatch(ExtractUserKey(key), handle.offset(), no_io);
|
||||||
|
|
||||||
if (not_exist_in_filter) {
|
if (not_exist_in_filter) {
|
||||||
// Not found
|
// Not found
|
||||||
@ -1525,6 +1649,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
}
|
}
|
||||||
s = biter.status();
|
s = biter.status();
|
||||||
}
|
}
|
||||||
|
if (done) {
|
||||||
|
// Avoid the extra Next which is expensive in two-level indexes
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = iiter->status();
|
s = iiter->status();
|
||||||
@ -1631,7 +1759,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|||||||
// 4. internal_comparator
|
// 4. internal_comparator
|
||||||
// 5. index_type
|
// 5. index_type
|
||||||
Status BlockBasedTable::CreateIndexReader(
|
Status BlockBasedTable::CreateIndexReader(
|
||||||
IndexReader** index_reader, InternalIterator* preloaded_meta_index_iter) {
|
IndexReader** index_reader, InternalIterator* preloaded_meta_index_iter,
|
||||||
|
int level) {
|
||||||
// Some old version of block-based tables don't have index type present in
|
// Some old version of block-based tables don't have index type present in
|
||||||
// table properties. If that's the case we can safely use the kBinarySearch.
|
// table properties. If that's the case we can safely use the kBinarySearch.
|
||||||
auto index_type_on_file = BlockBasedTableOptions::kBinarySearch;
|
auto index_type_on_file = BlockBasedTableOptions::kBinarySearch;
|
||||||
@ -1660,7 +1789,7 @@ Status BlockBasedTable::CreateIndexReader(
|
|||||||
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
|
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
|
||||||
return PartitionIndexReader::Create(
|
return PartitionIndexReader::Create(
|
||||||
this, file, footer, footer.index_handle(), rep_->ioptions, comparator,
|
this, file, footer, footer.index_handle(), rep_->ioptions, comparator,
|
||||||
index_reader, rep_->persistent_cache_options);
|
index_reader, rep_->persistent_cache_options, level);
|
||||||
}
|
}
|
||||||
case BlockBasedTableOptions::kBinarySearch: {
|
case BlockBasedTableOptions::kBinarySearch: {
|
||||||
return BinarySearchIndexReader::Create(
|
return BinarySearchIndexReader::Create(
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -193,17 +194,29 @@ class BlockBasedTable : public TableReader {
|
|||||||
|
|
||||||
class BlockEntryIteratorState;
|
class BlockEntryIteratorState;
|
||||||
|
|
||||||
private:
|
friend class PartitionIndexReader;
|
||||||
|
|
||||||
|
protected:
|
||||||
template <class TValue>
|
template <class TValue>
|
||||||
struct CachableEntry;
|
struct CachableEntry;
|
||||||
struct Rep;
|
struct Rep;
|
||||||
Rep* rep_;
|
Rep* rep_;
|
||||||
|
explicit BlockBasedTable(Rep* rep)
|
||||||
|
: rep_(rep), compaction_optimized_(false) {}
|
||||||
|
|
||||||
|
private:
|
||||||
bool compaction_optimized_;
|
bool compaction_optimized_;
|
||||||
|
|
||||||
// input_iter: if it is not null, update this one and return it as Iterator
|
// input_iter: if it is not null, update this one and return it as Iterator
|
||||||
static InternalIterator* NewDataBlockIterator(
|
static InternalIterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||||
Rep* rep, const ReadOptions& ro, const Slice& index_value,
|
const Slice& index_value,
|
||||||
BlockIter* input_iter = nullptr);
|
BlockIter* input_iter = nullptr,
|
||||||
|
bool is_index = false);
|
||||||
|
static InternalIterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||||
|
const BlockHandle& block_hanlde,
|
||||||
|
BlockIter* input_iter = nullptr,
|
||||||
|
bool is_index = false,
|
||||||
|
Status s = Status());
|
||||||
// If block cache enabled (compressed or uncompressed), looks for the block
|
// If block cache enabled (compressed or uncompressed), looks for the block
|
||||||
// identified by handle in (1) uncompressed cache, (2) compressed cache, and
|
// identified by handle in (1) uncompressed cache, (2) compressed cache, and
|
||||||
// then (3) file. If found, inserts into the cache(s) that were searched
|
// then (3) file. If found, inserts into the cache(s) that were searched
|
||||||
@ -213,14 +226,19 @@ class BlockBasedTable : public TableReader {
|
|||||||
// @param block_entry value is set to the uncompressed block if found. If
|
// @param block_entry value is set to the uncompressed block if found. If
|
||||||
// in uncompressed block cache, also sets cache_handle to reference that
|
// in uncompressed block cache, also sets cache_handle to reference that
|
||||||
// block.
|
// block.
|
||||||
static Status MaybeLoadDataBlockToCache(
|
static Status MaybeLoadDataBlockToCache(Rep* rep, const ReadOptions& ro,
|
||||||
Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
|
const BlockHandle& handle,
|
||||||
Slice compression_dict, CachableEntry<Block>* block_entry);
|
Slice compression_dict,
|
||||||
|
CachableEntry<Block>* block_entry,
|
||||||
|
bool is_index = false);
|
||||||
|
|
||||||
// For the following two functions:
|
// For the following two functions:
|
||||||
// if `no_io == true`, we will not try to read filter/index from sst file
|
// if `no_io == true`, we will not try to read filter/index from sst file
|
||||||
// were they not present in cache yet.
|
// were they not present in cache yet.
|
||||||
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
|
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
|
||||||
|
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||||
|
const BlockHandle& filter_blk_handle, const bool is_a_filter_partition,
|
||||||
|
bool no_io) const;
|
||||||
|
|
||||||
// Get the iterator from the index reader.
|
// Get the iterator from the index reader.
|
||||||
// If input_iter is not set, return new Iterator
|
// If input_iter is not set, return new Iterator
|
||||||
@ -247,7 +265,8 @@ class BlockBasedTable : public TableReader {
|
|||||||
Cache* block_cache, Cache* block_cache_compressed,
|
Cache* block_cache, Cache* block_cache_compressed,
|
||||||
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
||||||
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
||||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit);
|
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||||
|
bool is_index = false);
|
||||||
|
|
||||||
// Put a raw block (maybe compressed) to the corresponding block caches.
|
// Put a raw block (maybe compressed) to the corresponding block caches.
|
||||||
// This method will perform decompression against raw_block if needed and then
|
// This method will perform decompression against raw_block if needed and then
|
||||||
@ -264,7 +283,8 @@ class BlockBasedTable : public TableReader {
|
|||||||
Cache* block_cache, Cache* block_cache_compressed,
|
Cache* block_cache, Cache* block_cache_compressed,
|
||||||
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
||||||
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
||||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit);
|
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||||
|
bool is_index = false, Cache::Priority pri = Cache::Priority::LOW);
|
||||||
|
|
||||||
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
|
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
|
||||||
// after a call to Seek(key), until handle_result returns false.
|
// after a call to Seek(key), until handle_result returns false.
|
||||||
@ -280,24 +300,23 @@ class BlockBasedTable : public TableReader {
|
|||||||
// helps avoid re-reading meta index block if caller already created one.
|
// helps avoid re-reading meta index block if caller already created one.
|
||||||
Status CreateIndexReader(
|
Status CreateIndexReader(
|
||||||
IndexReader** index_reader,
|
IndexReader** index_reader,
|
||||||
InternalIterator* preloaded_meta_index_iter = nullptr);
|
InternalIterator* preloaded_meta_index_iter = nullptr,
|
||||||
|
const int level = -1);
|
||||||
|
|
||||||
bool FullFilterKeyMayMatch(const ReadOptions& read_options,
|
bool FullFilterKeyMayMatch(const ReadOptions& read_options,
|
||||||
FilterBlockReader* filter,
|
FilterBlockReader* filter, const Slice& user_key,
|
||||||
const Slice& user_key) const;
|
const bool no_io) const;
|
||||||
|
|
||||||
// Read the meta block from sst.
|
// Read the meta block from sst.
|
||||||
static Status ReadMetaBlock(Rep* rep, std::unique_ptr<Block>* meta_block,
|
static Status ReadMetaBlock(Rep* rep, std::unique_ptr<Block>* meta_block,
|
||||||
std::unique_ptr<InternalIterator>* iter);
|
std::unique_ptr<InternalIterator>* iter);
|
||||||
|
|
||||||
// Create the filter from the filter block.
|
// Create the filter from the filter block.
|
||||||
FilterBlockReader* ReadFilter(Rep* rep) const;
|
FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
|
||||||
|
const bool is_a_filter_partition) const;
|
||||||
|
|
||||||
static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
|
static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
|
||||||
|
|
||||||
explicit BlockBasedTable(Rep* rep)
|
|
||||||
: rep_(rep), compaction_optimized_(false) {}
|
|
||||||
|
|
||||||
// Generate a cache key prefix from the file
|
// Generate a cache key prefix from the file
|
||||||
static void GenerateCachePrefix(Cache* cc,
|
static void GenerateCachePrefix(Cache* cc,
|
||||||
RandomAccessFile* file, char* buffer, size_t* size);
|
RandomAccessFile* file, char* buffer, size_t* size);
|
||||||
@ -313,13 +332,18 @@ class BlockBasedTable : public TableReader {
|
|||||||
// No copying allowed
|
// No copying allowed
|
||||||
explicit BlockBasedTable(const TableReader&) = delete;
|
explicit BlockBasedTable(const TableReader&) = delete;
|
||||||
void operator=(const TableReader&) = delete;
|
void operator=(const TableReader&) = delete;
|
||||||
|
|
||||||
|
friend class PartitionedFilterBlockReader;
|
||||||
|
friend class PartitionedFilterBlockTest;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Maitaning state of a two-level iteration on a partitioned index structure
|
// Maitaning state of a two-level iteration on a partitioned index structure
|
||||||
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
|
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
|
||||||
public:
|
public:
|
||||||
BlockEntryIteratorState(BlockBasedTable* table,
|
BlockEntryIteratorState(BlockBasedTable* table,
|
||||||
const ReadOptions& read_options, bool skip_filters);
|
const ReadOptions& read_options, bool skip_filters,
|
||||||
|
bool is_index = false,
|
||||||
|
Cleanable* block_cache_cleaner = nullptr);
|
||||||
InternalIterator* NewSecondaryIterator(const Slice& index_value) override;
|
InternalIterator* NewSecondaryIterator(const Slice& index_value) override;
|
||||||
bool PrefixMayMatch(const Slice& internal_key) override;
|
bool PrefixMayMatch(const Slice& internal_key) override;
|
||||||
|
|
||||||
@ -328,6 +352,11 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
|
|||||||
BlockBasedTable* table_;
|
BlockBasedTable* table_;
|
||||||
const ReadOptions read_options_;
|
const ReadOptions read_options_;
|
||||||
bool skip_filters_;
|
bool skip_filters_;
|
||||||
|
// true if the 2nd level iterator is on indexes instead of on user data.
|
||||||
|
bool is_index_;
|
||||||
|
Cleanable* block_cache_cleaner_;
|
||||||
|
std::set<uint64_t> cleaner_set;
|
||||||
|
port::RWMutex cleaner_mu;
|
||||||
};
|
};
|
||||||
|
|
||||||
// CachableEntry represents the entries that *may* be fetched from block cache.
|
// CachableEntry represents the entries that *may* be fetched from block cache.
|
||||||
|
@ -82,16 +82,35 @@ class FilterBlockReader {
|
|||||||
virtual ~FilterBlockReader() {}
|
virtual ~FilterBlockReader() {}
|
||||||
|
|
||||||
virtual bool IsBlockBased() = 0; // If is blockbased filter
|
virtual bool IsBlockBased() = 0; // If is blockbased filter
|
||||||
virtual bool KeyMayMatch(const Slice& key,
|
/**
|
||||||
uint64_t block_offset = kNotValid) = 0;
|
* If no_io is set, then it returns true if it cannot answer the query without
|
||||||
|
* reading data from disk. This is used in PartitionedFilterBlockReader to
|
||||||
|
* avoid reading partitions that are not in block cache already
|
||||||
|
*
|
||||||
|
* Normally filters are built on only the user keys and the InternalKey is not
|
||||||
|
* needed for a query. The index in PartitionedFilterBlockReader however is
|
||||||
|
* built upon InternalKey and must be provided via const_ikey_ptr when running
|
||||||
|
* queries.
|
||||||
|
*/
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) = 0;
|
||||||
|
/**
|
||||||
|
* no_io and const_ikey_ptr here means the same as in KeyMayMatch
|
||||||
|
*/
|
||||||
virtual bool PrefixMayMatch(const Slice& prefix,
|
virtual bool PrefixMayMatch(const Slice& prefix,
|
||||||
uint64_t block_offset = kNotValid) = 0;
|
uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) = 0;
|
||||||
virtual size_t ApproximateMemoryUsage() const = 0;
|
virtual size_t ApproximateMemoryUsage() const = 0;
|
||||||
virtual size_t size() const { return size_; }
|
virtual size_t size() const { return size_; }
|
||||||
virtual Statistics* statistics() const { return statistics_; }
|
virtual Statistics* statistics() const { return statistics_; }
|
||||||
|
|
||||||
bool whole_key_filtering() const { return whole_key_filtering_; }
|
bool whole_key_filtering() const { return whole_key_filtering_; }
|
||||||
|
|
||||||
|
int GetLevel() const { return level_; }
|
||||||
|
void SetLevel(int level) { level_ = level; }
|
||||||
|
|
||||||
// convert this object to a human readable form
|
// convert this object to a human readable form
|
||||||
virtual std::string ToString() const {
|
virtual std::string ToString() const {
|
||||||
std::string error_msg("Unsupported filter \n");
|
std::string error_msg("Unsupported filter \n");
|
||||||
@ -107,6 +126,7 @@ class FilterBlockReader {
|
|||||||
void operator=(const FilterBlockReader&);
|
void operator=(const FilterBlockReader&);
|
||||||
size_t size_;
|
size_t size_;
|
||||||
Statistics* statistics_;
|
Statistics* statistics_;
|
||||||
|
int level_ = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -73,8 +73,9 @@ FullFilterBlockReader::FullFilterBlockReader(
|
|||||||
block_contents_ = std::move(contents);
|
block_contents_ = std::move(contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FullFilterBlockReader::KeyMayMatch(const Slice& key,
|
bool FullFilterBlockReader::KeyMayMatch(const Slice& key, uint64_t block_offset,
|
||||||
uint64_t block_offset) {
|
const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
assert(block_offset == kNotValid);
|
assert(block_offset == kNotValid);
|
||||||
if (!whole_key_filtering_) {
|
if (!whole_key_filtering_) {
|
||||||
return true;
|
return true;
|
||||||
@ -83,7 +84,9 @@ bool FullFilterBlockReader::KeyMayMatch(const Slice& key,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix,
|
bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix,
|
||||||
uint64_t block_offset) {
|
uint64_t block_offset,
|
||||||
|
const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
assert(block_offset == kNotValid);
|
assert(block_offset == kNotValid);
|
||||||
if (!prefix_extractor_) {
|
if (!prefix_extractor_) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -91,10 +91,14 @@ class FullFilterBlockReader : public FilterBlockReader {
|
|||||||
~FullFilterBlockReader() {}
|
~FullFilterBlockReader() {}
|
||||||
|
|
||||||
virtual bool IsBlockBased() override { return false; }
|
virtual bool IsBlockBased() override { return false; }
|
||||||
virtual bool KeyMayMatch(const Slice& key,
|
virtual bool KeyMayMatch(
|
||||||
uint64_t block_offset = kNotValid) override;
|
const Slice& key, uint64_t block_offset = kNotValid,
|
||||||
virtual bool PrefixMayMatch(const Slice& prefix,
|
const bool no_io = false,
|
||||||
uint64_t block_offset = kNotValid) override;
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
|
virtual bool PrefixMayMatch(
|
||||||
|
const Slice& prefix, uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
virtual size_t ApproximateMemoryUsage() const override;
|
virtual size_t ApproximateMemoryUsage() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -5,7 +5,12 @@
|
|||||||
|
|
||||||
#include "table/partitioned_filter_block.h"
|
#include "table/partitioned_filter_block.h"
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
#include "table/block.h"
|
||||||
|
#include "table/block_based_table_reader.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
@ -67,4 +72,149 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PartitionedFilterBlockReader::PartitionedFilterBlockReader(
|
||||||
|
const SliceTransform* prefix_extractor, bool _whole_key_filtering,
|
||||||
|
BlockContents&& contents, FilterBitsReader* filter_bits_reader,
|
||||||
|
Statistics* stats, const Comparator& comparator,
|
||||||
|
const BlockBasedTable* table)
|
||||||
|
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
|
||||||
|
prefix_extractor_(prefix_extractor),
|
||||||
|
comparator_(comparator),
|
||||||
|
table_(table) {
|
||||||
|
idx_on_fltr_blk_.reset(new Block(std::move(contents),
|
||||||
|
kDisableGlobalSequenceNumber,
|
||||||
|
0 /* read_amp_bytes_per_bit */, stats));
|
||||||
|
}
|
||||||
|
|
||||||
|
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
|
||||||
|
ReadLock rl(&mu_);
|
||||||
|
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
|
||||||
|
table_->rep_->table_options.block_cache.get()->Release(*it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartitionedFilterBlockReader::KeyMayMatch(
|
||||||
|
const Slice& key, uint64_t block_offset, const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
|
assert(const_ikey_ptr != nullptr);
|
||||||
|
assert(block_offset == kNotValid);
|
||||||
|
if (!whole_key_filtering_) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// This is the user key vs. the full key in the partition index. We assume
|
||||||
|
// that user key <= full key
|
||||||
|
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
|
||||||
|
if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool cached = false;
|
||||||
|
auto filter_partition = GetFilterPartition(&filter_handle, no_io, &cached);
|
||||||
|
if (UNLIKELY(!filter_partition.value)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto res = filter_partition.value->KeyMayMatch(key, block_offset, no_io);
|
||||||
|
if (cached) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if (LIKELY(filter_partition.IsSet())) {
|
||||||
|
filter_partition.Release(table_->rep_->table_options.block_cache.get());
|
||||||
|
} else {
|
||||||
|
delete filter_partition.value;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartitionedFilterBlockReader::PrefixMayMatch(
|
||||||
|
const Slice& prefix, uint64_t block_offset, const bool no_io,
|
||||||
|
const Slice* const const_ikey_ptr) {
|
||||||
|
assert(const_ikey_ptr != nullptr);
|
||||||
|
assert(block_offset == kNotValid);
|
||||||
|
if (!prefix_extractor_) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
|
||||||
|
if (UNLIKELY(filter_handle.size() == 0)) { // prefix is out of range
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool cached = false;
|
||||||
|
auto filter_partition = GetFilterPartition(&filter_handle, no_io, &cached);
|
||||||
|
if (UNLIKELY(!filter_partition.value)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto res = filter_partition.value->PrefixMayMatch(prefix, kNotValid, no_io);
|
||||||
|
if (cached) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if (LIKELY(filter_partition.IsSet())) {
|
||||||
|
filter_partition.Release(table_->rep_->table_options.block_cache.get());
|
||||||
|
} else {
|
||||||
|
delete filter_partition.value;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice PartitionedFilterBlockReader::GetFilterPartitionHandle(
|
||||||
|
const Slice& entry) {
|
||||||
|
BlockIter iter;
|
||||||
|
idx_on_fltr_blk_->NewIterator(&comparator_, &iter, true);
|
||||||
|
iter.Seek(entry);
|
||||||
|
if (UNLIKELY(!iter.Valid())) {
|
||||||
|
return Slice();
|
||||||
|
}
|
||||||
|
assert(iter.Valid());
|
||||||
|
Slice handle_value = iter.value();
|
||||||
|
return handle_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockBasedTable::CachableEntry<FilterBlockReader>
|
||||||
|
PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
|
||||||
|
const bool no_io,
|
||||||
|
bool* cached) {
|
||||||
|
BlockHandle fltr_blk_handle;
|
||||||
|
auto s = fltr_blk_handle.DecodeFrom(handle_value);
|
||||||
|
assert(s.ok());
|
||||||
|
const bool is_a_filter_partition = true;
|
||||||
|
auto block_cache = table_->rep_->table_options.block_cache.get();
|
||||||
|
if (LIKELY(block_cache != nullptr)) {
|
||||||
|
bool pin_cached_filters =
|
||||||
|
GetLevel() == 0 &&
|
||||||
|
table_->rep_->table_options.pin_l0_filter_and_index_blocks_in_cache;
|
||||||
|
if (pin_cached_filters) {
|
||||||
|
ReadLock rl(&mu_);
|
||||||
|
auto iter = filter_cache_.find(fltr_blk_handle.offset());
|
||||||
|
if (iter != filter_cache_.end()) {
|
||||||
|
RecordTick(statistics(), BLOCK_CACHE_FILTER_HIT);
|
||||||
|
*cached = true;
|
||||||
|
return {iter->second, nullptr};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto filter =
|
||||||
|
table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io);
|
||||||
|
if (pin_cached_filters && filter.IsSet()) {
|
||||||
|
WriteLock wl(&mu_);
|
||||||
|
std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(),
|
||||||
|
filter.value);
|
||||||
|
auto succ = filter_cache_.insert(pair).second;
|
||||||
|
if (succ) {
|
||||||
|
handle_list_.push_back(filter.cache_handle);
|
||||||
|
} // Otherwise it is already inserted by a concurrent thread
|
||||||
|
*cached = true;
|
||||||
|
}
|
||||||
|
return filter;
|
||||||
|
} else {
|
||||||
|
auto filter = table_->ReadFilter(fltr_blk_handle, is_a_filter_partition);
|
||||||
|
return {filter, nullptr};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
|
||||||
|
return idx_on_fltr_blk_->size();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -7,14 +7,17 @@
|
|||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <unordered_map>
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
#include "rocksdb/slice_transform.h"
|
#include "rocksdb/slice_transform.h"
|
||||||
|
|
||||||
|
#include "table/block.h"
|
||||||
|
#include "table/block_based_table_reader.h"
|
||||||
#include "table/full_filter_block.h"
|
#include "table/full_filter_block.h"
|
||||||
#include "table/index_builder.h"
|
#include "table/index_builder.h"
|
||||||
|
#include "util/autovector.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -49,4 +52,40 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||||||
PartitionedIndexBuilder* const p_index_builder_;
|
PartitionedIndexBuilder* const p_index_builder_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class PartitionedFilterBlockReader : public FilterBlockReader {
|
||||||
|
public:
|
||||||
|
explicit PartitionedFilterBlockReader(const SliceTransform* prefix_extractor,
|
||||||
|
bool whole_key_filtering,
|
||||||
|
BlockContents&& contents,
|
||||||
|
FilterBitsReader* filter_bits_reader,
|
||||||
|
Statistics* stats,
|
||||||
|
const Comparator& comparator,
|
||||||
|
const BlockBasedTable* table);
|
||||||
|
virtual ~PartitionedFilterBlockReader();
|
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return false; }
|
||||||
|
virtual bool KeyMayMatch(
|
||||||
|
const Slice& key, uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
|
virtual bool PrefixMayMatch(
|
||||||
|
const Slice& prefix, uint64_t block_offset = kNotValid,
|
||||||
|
const bool no_io = false,
|
||||||
|
const Slice* const const_ikey_ptr = nullptr) override;
|
||||||
|
virtual size_t ApproximateMemoryUsage() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Slice GetFilterPartitionHandle(const Slice& entry);
|
||||||
|
BlockBasedTable::CachableEntry<FilterBlockReader> GetFilterPartition(
|
||||||
|
Slice* handle, const bool no_io, bool* cached);
|
||||||
|
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
std::unique_ptr<Block> idx_on_fltr_blk_;
|
||||||
|
const Comparator& comparator_;
|
||||||
|
const BlockBasedTable* table_;
|
||||||
|
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
|
||||||
|
autovector<Cache::Handle*> handle_list_;
|
||||||
|
port::RWMutex mu_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
242
table/partitioned_filter_block_test.cc
Normal file
242
table/partitioned_filter_block_test.cc
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
|
||||||
|
#include "table/partitioned_filter_block.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
std::map<uint64_t, Slice> slices;
|
||||||
|
|
||||||
|
class MockedBlockBasedTable : public BlockBasedTable {
|
||||||
|
public:
|
||||||
|
explicit MockedBlockBasedTable(Rep* rep) : BlockBasedTable(rep) {}
|
||||||
|
|
||||||
|
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||||
|
const BlockHandle& filter_blk_handle, const bool is_a_filter_partition,
|
||||||
|
bool no_io) const override {
|
||||||
|
Slice slice = slices[filter_blk_handle.offset()];
|
||||||
|
auto obj = new FullFilterBlockReader(
|
||||||
|
nullptr, true, BlockContents(slice, false, kNoCompression),
|
||||||
|
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
||||||
|
return {obj, nullptr};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class PartitionedFilterBlockTest : public testing::Test {
|
||||||
|
public:
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator());
|
||||||
|
|
||||||
|
PartitionedFilterBlockTest() {
|
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close
|
||||||
|
// will access variable that are not
|
||||||
|
// initialized in our mocked version
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Cache> cache_;
|
||||||
|
~PartitionedFilterBlockTest() {}
|
||||||
|
|
||||||
|
const std::string keys[4] = {"afoo", "bar", "box", "hello"};
|
||||||
|
const std::string missing_keys[2] = {"missing", "other"};
|
||||||
|
|
||||||
|
int last_offset = 10;
|
||||||
|
BlockHandle Write(const Slice& slice) {
|
||||||
|
BlockHandle bh(last_offset + 1, slice.size());
|
||||||
|
slices[bh.offset()] = slice;
|
||||||
|
last_offset += bh.size();
|
||||||
|
return bh;
|
||||||
|
}
|
||||||
|
|
||||||
|
PartitionedIndexBuilder* NewIndexBuilder() {
|
||||||
|
return PartitionedIndexBuilder::CreateIndexBuilder(&icomp, table_options_);
|
||||||
|
}
|
||||||
|
|
||||||
|
PartitionedFilterBlockBuilder* NewBuilder(
|
||||||
|
PartitionedIndexBuilder* const p_index_builder) {
|
||||||
|
return new PartitionedFilterBlockBuilder(
|
||||||
|
nullptr, table_options_.whole_key_filtering,
|
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder(),
|
||||||
|
table_options_.index_block_restart_interval, p_index_builder);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<MockedBlockBasedTable> table;
|
||||||
|
|
||||||
|
PartitionedFilterBlockReader* NewReader(
|
||||||
|
PartitionedFilterBlockBuilder* builder) {
|
||||||
|
BlockHandle bh;
|
||||||
|
Status status;
|
||||||
|
Slice slice;
|
||||||
|
do {
|
||||||
|
slice = builder->Finish(bh, &status);
|
||||||
|
bh = Write(slice);
|
||||||
|
} while (status.IsIncomplete());
|
||||||
|
const Options options;
|
||||||
|
const ImmutableCFOptions ioptions(options);
|
||||||
|
const EnvOptions env_options;
|
||||||
|
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
|
||||||
|
ioptions, env_options, table_options_, icomp, false)));
|
||||||
|
auto reader = new PartitionedFilterBlockReader(
|
||||||
|
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
|
||||||
|
nullptr, *icomp.user_comparator(), table.get());
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VerifyReader(PartitionedFilterBlockBuilder* builder,
|
||||||
|
bool empty = false) {
|
||||||
|
std::unique_ptr<PartitionedFilterBlockReader> reader(NewReader(builder));
|
||||||
|
// Querying added keys
|
||||||
|
const bool no_io = true;
|
||||||
|
for (auto key : keys) {
|
||||||
|
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||||
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch(key, kNotValid, !no_io, &ikey_slice));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// querying a key twice
|
||||||
|
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
|
||||||
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch(keys[0], kNotValid, !no_io, &ikey_slice));
|
||||||
|
}
|
||||||
|
// querying missing keys
|
||||||
|
for (auto key : missing_keys) {
|
||||||
|
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||||
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
|
if (empty) {
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch(key, kNotValid, !no_io, &ikey_slice));
|
||||||
|
} else {
|
||||||
|
// assuming a good hash function
|
||||||
|
ASSERT_FALSE(reader->KeyMayMatch(key, kNotValid, !no_io, &ikey_slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestBlockPerKey() {
|
||||||
|
table_options_.index_per_partition = 1;
|
||||||
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||||
|
NewBuilder(pib.get()));
|
||||||
|
int i = 0;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i], keys[i + 1]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i], keys[i + 1]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i], keys[i + 1]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i]);
|
||||||
|
|
||||||
|
VerifyReader(builder.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestBlockPerTwoKeys() {
|
||||||
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||||
|
NewBuilder(pib.get()));
|
||||||
|
int i = 0;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i], keys[i + 1]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i]);
|
||||||
|
|
||||||
|
VerifyReader(builder.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestBlockPerAllKeys() {
|
||||||
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||||
|
NewBuilder(pib.get()));
|
||||||
|
int i = 0;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
i++;
|
||||||
|
builder->Add(keys[i]);
|
||||||
|
CutABlock(pib.get(), keys[i]);
|
||||||
|
|
||||||
|
VerifyReader(builder.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
void CutABlock(PartitionedIndexBuilder* builder,
|
||||||
|
const std::string& user_key) {
|
||||||
|
// Assuming a block is cut, add an entry to the index
|
||||||
|
std::string key =
|
||||||
|
std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
|
||||||
|
BlockHandle dont_care_block_handle(1, 1);
|
||||||
|
builder->AddIndexEntry(&key, nullptr, dont_care_block_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key,
|
||||||
|
const std::string& next_user_key) {
|
||||||
|
// Assuming a block is cut, add an entry to the index
|
||||||
|
std::string key =
|
||||||
|
std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
|
||||||
|
std::string next_key = std::string(
|
||||||
|
*InternalKey(next_user_key, 0, ValueType::kTypeValue).rep());
|
||||||
|
BlockHandle dont_care_block_handle(1, 1);
|
||||||
|
Slice slice = Slice(next_key.data(), next_key.size());
|
||||||
|
builder->AddIndexEntry(&key, &slice, dont_care_block_handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
|
||||||
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
|
||||||
|
const bool empty = true;
|
||||||
|
VerifyReader(builder.get(), empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PartitionedFilterBlockTest, OneBlock) {
|
||||||
|
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||||
|
for (int i = 1; i < num_keys + 1; i++) {
|
||||||
|
table_options_.index_per_partition = i;
|
||||||
|
TestBlockPerAllKeys();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
||||||
|
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||||
|
for (int i = 1; i < num_keys + 1; i++) {
|
||||||
|
table_options_.index_per_partition = i;
|
||||||
|
TestBlockPerTwoKeys();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
||||||
|
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||||
|
for (int i = 1; i < num_keys + 1; i++) {
|
||||||
|
table_options_.index_per_partition = i;
|
||||||
|
TestBlockPerKey();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user