Summary:
1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file.
2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter.
3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type.
4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h.
5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc
Benchmark: base commit 1d23b5c470
Command:
db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
Read QPS increase for about 30% from 2230002 to 2991411.
Test Plan:
make all check
valgrind db_test
db_stress --use_block_based_filter = 0
./auto_sanity_test.sh
Reviewers: igor, yhchiang, ljin, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D20979
243 lines
7.3 KiB
C++
243 lines
7.3 KiB
C++
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "table/block_based_filter_block.h"
|
|
|
|
#include "rocksdb/filter_policy.h"
|
|
#include "util/coding.h"
|
|
#include "util/hash.h"
|
|
#include "util/logging.h"
|
|
#include "util/testharness.h"
|
|
#include "util/testutil.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
// For testing: emit an array with one hash value per key
|
|
class TestHashFilter : public FilterPolicy {
|
|
public:
|
|
virtual const char* Name() const {
|
|
return "TestHashFilter";
|
|
}
|
|
|
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
|
for (int i = 0; i < n; i++) {
|
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
|
PutFixed32(dst, h);
|
|
}
|
|
}
|
|
|
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
|
uint32_t h = Hash(key.data(), key.size(), 1);
|
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
|
|
if (h == DecodeFixed32(filter.data() + i)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
class FilterBlockTest {
|
|
public:
|
|
TestHashFilter policy_;
|
|
BlockBasedTableOptions table_options_;
|
|
|
|
FilterBlockTest() {
|
|
table_options_.filter_policy.reset(new TestHashFilter());
|
|
}
|
|
};
|
|
|
|
TEST(FilterBlockTest, EmptyBuilder) {
|
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
|
Slice block = builder.Finish();
|
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0));
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100000));
|
|
}
|
|
|
|
TEST(FilterBlockTest, SingleChunk) {
|
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
|
builder.StartBlock(100);
|
|
builder.Add("foo");
|
|
builder.Add("bar");
|
|
builder.Add("box");
|
|
builder.StartBlock(200);
|
|
builder.Add("box");
|
|
builder.StartBlock(300);
|
|
builder.Add("hello");
|
|
Slice block = builder.Finish();
|
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100));
|
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 100));
|
|
ASSERT_TRUE(reader.KeyMayMatch("box", 100));
|
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 100));
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("missing", 100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("other", 100));
|
|
}
|
|
|
|
TEST(FilterBlockTest, MultiChunk) {
|
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
|
|
|
// First filter
|
|
builder.StartBlock(0);
|
|
builder.Add("foo");
|
|
builder.StartBlock(2000);
|
|
builder.Add("bar");
|
|
|
|
// Second filter
|
|
builder.StartBlock(3100);
|
|
builder.Add("box");
|
|
|
|
// Third filter is empty
|
|
|
|
// Last filter
|
|
builder.StartBlock(9000);
|
|
builder.Add("box");
|
|
builder.Add("hello");
|
|
|
|
Slice block = builder.Finish();
|
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
|
|
|
// Check first filter
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0));
|
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 2000));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 0));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 0));
|
|
|
|
// Check second filter
|
|
ASSERT_TRUE(reader.KeyMayMatch("box", 3100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 3100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 3100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 3100));
|
|
|
|
// Check third filter (empty)
|
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 4100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 4100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 4100));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 4100));
|
|
|
|
// Check last filter
|
|
ASSERT_TRUE(reader.KeyMayMatch("box", 9000));
|
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 9000));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 9000));
|
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 9000));
|
|
}
|
|
|
|
// Test for block based filter block
|
|
// use new interface in FilterPolicy to create filter builder/reader
|
|
class BlockBasedFilterBlockTest {
|
|
public:
|
|
BlockBasedTableOptions table_options_;
|
|
|
|
BlockBasedFilterBlockTest() {
|
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10));
|
|
}
|
|
|
|
~BlockBasedFilterBlockTest() {}
|
|
};
|
|
|
|
TEST(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) {
|
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
|
nullptr, table_options_);
|
|
Slice block = builder->Finish();
|
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
|
nullptr, table_options_, block);
|
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0));
|
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100000));
|
|
|
|
delete builder;
|
|
delete reader;
|
|
}
|
|
|
|
TEST(BlockBasedFilterBlockTest, BlockBasedSingleChunk) {
|
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
|
nullptr, table_options_);
|
|
builder->StartBlock(100);
|
|
builder->Add("foo");
|
|
builder->Add("bar");
|
|
builder->Add("box");
|
|
builder->StartBlock(200);
|
|
builder->Add("box");
|
|
builder->StartBlock(300);
|
|
builder->Add("hello");
|
|
Slice block = builder->Finish();
|
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
|
nullptr, table_options_, block);
|
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100));
|
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 100));
|
|
ASSERT_TRUE(reader->KeyMayMatch("box", 100));
|
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 100));
|
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("missing", 100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("other", 100));
|
|
|
|
delete builder;
|
|
delete reader;
|
|
}
|
|
|
|
TEST(BlockBasedFilterBlockTest, BlockBasedMultiChunk) {
|
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
|
nullptr, table_options_);
|
|
|
|
// First filter
|
|
builder->StartBlock(0);
|
|
builder->Add("foo");
|
|
builder->StartBlock(2000);
|
|
builder->Add("bar");
|
|
|
|
// Second filter
|
|
builder->StartBlock(3100);
|
|
builder->Add("box");
|
|
|
|
// Third filter is empty
|
|
|
|
// Last filter
|
|
builder->StartBlock(9000);
|
|
builder->Add("box");
|
|
builder->Add("hello");
|
|
|
|
Slice block = builder->Finish();
|
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
|
nullptr, table_options_, block);
|
|
|
|
// Check first filter
|
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0));
|
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 2000));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 0));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 0));
|
|
|
|
// Check second filter
|
|
ASSERT_TRUE(reader->KeyMayMatch("box", 3100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 3100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 3100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 3100));
|
|
|
|
// Check third filter (empty)
|
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 4100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 4100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 4100));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 4100));
|
|
|
|
// Check last filter
|
|
ASSERT_TRUE(reader->KeyMayMatch("box", 9000));
|
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 9000));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 9000));
|
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 9000));
|
|
|
|
delete builder;
|
|
delete reader;
|
|
}
|
|
|
|
} // namespace rocksdb
|
|
|
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|