Implement full filter for block based table.
Summary:
1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file.
2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter.
3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type.
4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h.
5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc
Benchmark: base commit 1d23b5c470
Command:
db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
Read QPS increase for about 30% from 2230002 to 2991411.
Test Plan:
make all check
valgrind db_test
db_stress --use_block_based_filter = 0
./auto_sanity_test.sh
Reviewers: igor, yhchiang, ljin, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D20979
This commit is contained in:
parent
9360cc690e
commit
0af157f9bf
10
Makefile
10
Makefile
@ -90,7 +90,8 @@ TESTS = \
|
|||||||
blob_store_test \
|
blob_store_test \
|
||||||
filelock_test \
|
filelock_test \
|
||||||
filename_test \
|
filename_test \
|
||||||
filter_block_test \
|
block_based_filter_block_test \
|
||||||
|
full_filter_block_test \
|
||||||
histogram_test \
|
histogram_test \
|
||||||
log_test \
|
log_test \
|
||||||
manual_compaction_test \
|
manual_compaction_test \
|
||||||
@ -393,8 +394,11 @@ rate_limiter_test: util/rate_limiter_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
|||||||
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
|
||||||
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
block_based_filter_block_test: table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
$(CXX) table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
|
||||||
|
full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CXX) table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
|
||||||
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
2
db/c.cc
2
db/c.cc
@ -118,7 +118,7 @@ struct rocksdb_compactionfilter_t : public CompactionFilter {
|
|||||||
const Slice& existing_value,
|
const Slice& existing_value,
|
||||||
std::string* new_value,
|
std::string* new_value,
|
||||||
bool* value_changed) const {
|
bool* value_changed) const {
|
||||||
char* c_new_value = NULL;
|
char* c_new_value = nullptr;
|
||||||
size_t new_value_length = 0;
|
size_t new_value_length = 0;
|
||||||
unsigned char c_value_changed = 0;
|
unsigned char c_value_changed = 0;
|
||||||
unsigned char result = (*filter_)(
|
unsigned char result = (*filter_)(
|
||||||
|
@ -39,8 +39,8 @@ int main() {
|
|||||||
#include "rocksdb/memtablerep.h"
|
#include "rocksdb/memtablerep.h"
|
||||||
#include "rocksdb/write_batch.h"
|
#include "rocksdb/write_batch.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
#include "rocksdb/slice_transform.h"
|
#include "rocksdb/slice_transform.h"
|
||||||
#include "rocksdb/statistics.h"
|
|
||||||
#include "rocksdb/perf_context.h"
|
#include "rocksdb/perf_context.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "port/stack_trace.h"
|
#include "port/stack_trace.h"
|
||||||
@ -553,7 +553,9 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table.");
|
|||||||
DEFINE_bool(use_hash_search, false, "if use kHashSearch "
|
DEFINE_bool(use_hash_search, false, "if use kHashSearch "
|
||||||
"instead of kBinarySearch. "
|
"instead of kBinarySearch. "
|
||||||
"This is valid if only we use BlockTable");
|
"This is valid if only we use BlockTable");
|
||||||
|
DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter "
|
||||||
|
"instead of kFullFilter for filter block. "
|
||||||
|
"This is valid if only we use BlockTable");
|
||||||
DEFINE_string(merge_operator, "", "The merge operator to use with the database."
|
DEFINE_string(merge_operator, "", "The merge operator to use with the database."
|
||||||
"If a new merge operator is specified, be sure to use fresh"
|
"If a new merge operator is specified, be sure to use fresh"
|
||||||
" database The possible merge operators are defined in"
|
" database The possible merge operators are defined in"
|
||||||
@ -1076,9 +1078,9 @@ class Benchmark {
|
|||||||
(FLAGS_cache_numshardbits >= 1 ?
|
(FLAGS_cache_numshardbits >= 1 ?
|
||||||
NewLRUCache(FLAGS_compressed_cache_size, FLAGS_cache_numshardbits) :
|
NewLRUCache(FLAGS_compressed_cache_size, FLAGS_cache_numshardbits) :
|
||||||
NewLRUCache(FLAGS_compressed_cache_size)) : nullptr),
|
NewLRUCache(FLAGS_compressed_cache_size)) : nullptr),
|
||||||
filter_policy_(FLAGS_bloom_bits >= 0
|
filter_policy_(FLAGS_bloom_bits >= 0 ?
|
||||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
NewBloomFilterPolicy(FLAGS_bloom_bits, FLAGS_use_block_based_filter)
|
||||||
: nullptr),
|
: nullptr),
|
||||||
prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
|
prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
|
||||||
num_(FLAGS_num),
|
num_(FLAGS_num),
|
||||||
value_size_(FLAGS_value_size),
|
value_size_(FLAGS_value_size),
|
||||||
|
224
db/db_test.cc
224
db/db_test.cc
@ -324,21 +324,22 @@ class DBTest {
|
|||||||
kHashCuckoo = 7,
|
kHashCuckoo = 7,
|
||||||
kMergePut = 8,
|
kMergePut = 8,
|
||||||
kFilter = 9,
|
kFilter = 9,
|
||||||
kUncompressed = 10,
|
kFullFilter = 10,
|
||||||
kNumLevel_3 = 11,
|
kUncompressed = 11,
|
||||||
kDBLogDir = 12,
|
kNumLevel_3 = 12,
|
||||||
kWalDir = 13,
|
kDBLogDir = 13,
|
||||||
kManifestFileSize = 14,
|
kWalDir = 14,
|
||||||
kCompactOnFlush = 15,
|
kManifestFileSize = 15,
|
||||||
kPerfOptions = 16,
|
kCompactOnFlush = 16,
|
||||||
kDeletesFilterFirst = 17,
|
kPerfOptions = 17,
|
||||||
kHashSkipList = 18,
|
kDeletesFilterFirst = 18,
|
||||||
kUniversalCompaction = 19,
|
kHashSkipList = 19,
|
||||||
kCompressedBlockCache = 20,
|
kUniversalCompaction = 20,
|
||||||
kInfiniteMaxOpenFiles = 21,
|
kCompressedBlockCache = 21,
|
||||||
kxxHashChecksum = 22,
|
kInfiniteMaxOpenFiles = 22,
|
||||||
kFIFOCompaction = 23,
|
kxxHashChecksum = 23,
|
||||||
kEnd = 24
|
kFIFOCompaction = 24,
|
||||||
|
kEnd = 25
|
||||||
};
|
};
|
||||||
int option_config_;
|
int option_config_;
|
||||||
|
|
||||||
@ -448,6 +449,30 @@ class DBTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Switch between different filter policy
|
||||||
|
// Jump from kDefault to kFilter to kFullFilter
|
||||||
|
bool ChangeFilterOptions(Options* prev_options = nullptr) {
|
||||||
|
if (option_config_ == kDefault) {
|
||||||
|
option_config_ = kFilter;
|
||||||
|
if (prev_options == nullptr) {
|
||||||
|
prev_options = &last_options_;
|
||||||
|
}
|
||||||
|
Destroy(prev_options);
|
||||||
|
TryReopen();
|
||||||
|
return true;
|
||||||
|
} else if (option_config_ == kFilter) {
|
||||||
|
option_config_ = kFullFilter;
|
||||||
|
if (prev_options == nullptr) {
|
||||||
|
prev_options = &last_options_;
|
||||||
|
}
|
||||||
|
Destroy(prev_options);
|
||||||
|
TryReopen();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Return the current option configuration.
|
// Return the current option configuration.
|
||||||
Options CurrentOptions(
|
Options CurrentOptions(
|
||||||
const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
|
const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
|
||||||
@ -486,7 +511,10 @@ class DBTest {
|
|||||||
options.merge_operator = MergeOperators::CreatePutOperator();
|
options.merge_operator = MergeOperators::CreatePutOperator();
|
||||||
break;
|
break;
|
||||||
case kFilter:
|
case kFilter:
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
|
||||||
|
break;
|
||||||
|
case kFullFilter:
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
break;
|
break;
|
||||||
case kUncompressed:
|
case kUncompressed:
|
||||||
options.compression = kNoCompression;
|
options.compression = kNoCompression;
|
||||||
@ -5744,6 +5772,92 @@ TEST(DBTest, BloomFilter) {
|
|||||||
} while (ChangeCompactOptions());
|
} while (ChangeCompactOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, BloomFilterRate) {
|
||||||
|
while (ChangeFilterOptions()) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
CreateAndReopenWithCF({"pikachu"}, &options);
|
||||||
|
|
||||||
|
const int maxKey = 10000;
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_OK(Put(1, Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
// Add a large key to make the file contain wide range
|
||||||
|
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
|
||||||
|
Flush(1);
|
||||||
|
|
||||||
|
// Check if they can be found
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
|
|
||||||
|
// Check if filter is useful
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
|
||||||
|
}
|
||||||
|
ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, BloomFilterCompatibility) {
|
||||||
|
Options options;
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
|
// Create with block based filter
|
||||||
|
CreateAndReopenWithCF({"pikachu"}, &options);
|
||||||
|
|
||||||
|
const int maxKey = 10000;
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_OK(Put(1, Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
|
||||||
|
Flush(1);
|
||||||
|
|
||||||
|
// Check db with full filter
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ReopenWithColumnFamilies({"default", "pikachu"}, &options);
|
||||||
|
|
||||||
|
// Check if they can be found
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, BloomFilterReverseCompatibility) {
|
||||||
|
Options options;
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
|
// Create with full filter
|
||||||
|
CreateAndReopenWithCF({"pikachu"}, &options);
|
||||||
|
|
||||||
|
const int maxKey = 10000;
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_OK(Put(1, Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
|
||||||
|
Flush(1);
|
||||||
|
|
||||||
|
// Check db with block_based filter
|
||||||
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ReopenWithColumnFamilies({"default", "pikachu"}, &options);
|
||||||
|
|
||||||
|
// Check if they can be found
|
||||||
|
for (int i = 0; i < maxKey; i++) {
|
||||||
|
ASSERT_EQ(Key(i), Get(1, Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(DBTest, SnapshotFiles) {
|
TEST(DBTest, SnapshotFiles) {
|
||||||
do {
|
do {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
@ -7194,47 +7308,49 @@ void PrefixScanInit(DBTest *dbtest) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST(DBTest, PrefixScan) {
|
TEST(DBTest, PrefixScan) {
|
||||||
int count;
|
while (ChangeFilterOptions()) {
|
||||||
Slice prefix;
|
int count;
|
||||||
Slice key;
|
Slice prefix;
|
||||||
char buf[100];
|
Slice key;
|
||||||
Iterator* iter;
|
char buf[100];
|
||||||
snprintf(buf, sizeof(buf), "03______:");
|
Iterator* iter;
|
||||||
prefix = Slice(buf, 8);
|
snprintf(buf, sizeof(buf), "03______:");
|
||||||
key = Slice(buf, 9);
|
prefix = Slice(buf, 8);
|
||||||
// db configs
|
key = Slice(buf, 9);
|
||||||
env_->count_random_reads_ = true;
|
// db configs
|
||||||
Options options = CurrentOptions();
|
env_->count_random_reads_ = true;
|
||||||
options.env = env_;
|
Options options = CurrentOptions();
|
||||||
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
|
options.env = env_;
|
||||||
options.disable_auto_compactions = true;
|
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
|
||||||
options.max_background_compactions = 2;
|
options.disable_auto_compactions = true;
|
||||||
options.create_if_missing = true;
|
options.max_background_compactions = 2;
|
||||||
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
|
options.create_if_missing = true;
|
||||||
|
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
|
||||||
|
|
||||||
BlockBasedTableOptions table_options;
|
BlockBasedTableOptions table_options;
|
||||||
table_options.no_block_cache = true;
|
table_options.no_block_cache = true;
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
|
||||||
table_options.whole_key_filtering = false;
|
table_options.whole_key_filtering = false;
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
// 11 RAND I/Os
|
// 11 RAND I/Os
|
||||||
DestroyAndReopen(&options);
|
DestroyAndReopen(&options);
|
||||||
PrefixScanInit(this);
|
PrefixScanInit(this);
|
||||||
count = 0;
|
count = 0;
|
||||||
env_->random_read_counter_.Reset();
|
env_->random_read_counter_.Reset();
|
||||||
iter = db_->NewIterator(ReadOptions());
|
iter = db_->NewIterator(ReadOptions());
|
||||||
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
|
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
|
||||||
if (! iter->key().starts_with(prefix)) {
|
if (! iter->key().starts_with(prefix)) {
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
count++;
|
ASSERT_OK(iter->status());
|
||||||
}
|
delete iter;
|
||||||
ASSERT_OK(iter->status());
|
ASSERT_EQ(count, 2);
|
||||||
delete iter;
|
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||||
ASSERT_EQ(count, 2);
|
Close();
|
||||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
} // end of while
|
||||||
Close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, TailingIteratorSingle) {
|
TEST(DBTest, TailingIteratorSingle) {
|
||||||
|
@ -21,11 +21,52 @@
|
|||||||
#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
class Slice;
|
class Slice;
|
||||||
|
|
||||||
|
// A class that takes a bunch of keys, then generates filter
|
||||||
|
class FilterBitsBuilder {
|
||||||
|
public:
|
||||||
|
virtual ~FilterBitsBuilder() {}
|
||||||
|
|
||||||
|
// Add Key to filter, you could use any way to store the key.
|
||||||
|
// Such as: storing hashes or original keys
|
||||||
|
// Keys are in sorted order and duplicated keys are possible.
|
||||||
|
virtual void AddKey(const Slice& key) = 0;
|
||||||
|
|
||||||
|
// Generate the filter using the keys that are added
|
||||||
|
// The return value of this function would be the filter bits,
|
||||||
|
// The ownership of actual data is set to buf
|
||||||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A class that checks if a key can be in filter
|
||||||
|
// It should be initialized by Slice generated by BitsBuilder
|
||||||
|
class FilterBitsReader {
|
||||||
|
public:
|
||||||
|
virtual ~FilterBitsReader() {}
|
||||||
|
|
||||||
|
// Check if the entry match the bits in filter
|
||||||
|
virtual bool MayMatch(const Slice& entry) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// We add a new format of filter block called full filter block
|
||||||
|
// This new interface gives you more space of customization
|
||||||
|
//
|
||||||
|
// For the full filter block, you can plug in your version by implement
|
||||||
|
// the FilterBitsBuilder and FilterBitsReader
|
||||||
|
//
|
||||||
|
// There are two sets of interface in FilterPolicy
|
||||||
|
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
|
||||||
|
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
|
||||||
|
// full filter.
|
||||||
|
// Set 1 MUST be implemented correctly, Set 2 is optional
|
||||||
|
// RocksDB would first try using functions in Set 2. if they return nullptr,
|
||||||
|
// it would use Set 1 instead.
|
||||||
|
// You can choose filter type in NewBloomFilterPolicy
|
||||||
class FilterPolicy {
|
class FilterPolicy {
|
||||||
public:
|
public:
|
||||||
virtual ~FilterPolicy();
|
virtual ~FilterPolicy();
|
||||||
@ -51,11 +92,28 @@ class FilterPolicy {
|
|||||||
// This method may return true or false if the key was not on the
|
// This method may return true or false if the key was not on the
|
||||||
// list, but it should aim to return false with a high probability.
|
// list, but it should aim to return false with a high probability.
|
||||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
||||||
|
|
||||||
|
// Get the FilterBitsBuilder, which is ONLY used for full filter block
|
||||||
|
// It contains interface to take individual key, then generate filter
|
||||||
|
virtual FilterBitsBuilder* GetFilterBitsBuilder() const {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the FilterBitsReader, which is ONLY used for full filter block
|
||||||
|
// It contains interface to tell if key can be in filter
|
||||||
|
// The input slice should NOT be deleted by FilterPolicy
|
||||||
|
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) const {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Return a new filter policy that uses a bloom filter with approximately
|
// Return a new filter policy that uses a bloom filter with approximately
|
||||||
// the specified number of bits per key. A good value for bits_per_key
|
// the specified number of bits per key.
|
||||||
|
//
|
||||||
|
// bits_per_key: bits per key in bloom filter. A good value for bits_per_key
|
||||||
// is 10, which yields a filter with ~ 1% false positive rate.
|
// is 10, which yields a filter with ~ 1% false positive rate.
|
||||||
|
// use_block_based_builder: use block based filter rather than full fiter.
|
||||||
|
// If you want to builder full filter, it needs to be set to false.
|
||||||
//
|
//
|
||||||
// Callers must delete the result after any database that is using the
|
// Callers must delete the result after any database that is using the
|
||||||
// result has been closed.
|
// result has been closed.
|
||||||
@ -67,8 +125,8 @@ class FilterPolicy {
|
|||||||
// ignores trailing spaces, it would be incorrect to use a
|
// ignores trailing spaces, it would be incorrect to use a
|
||||||
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
||||||
// trailing spaces in keys.
|
// trailing spaces in keys.
|
||||||
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
|
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
|
||||||
|
bool use_block_based_builder = true);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
#endif // STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
||||||
|
@ -115,7 +115,7 @@ enum Tickers : uint32_t {
|
|||||||
// head of the writers queue.
|
// head of the writers queue.
|
||||||
WRITE_DONE_BY_SELF,
|
WRITE_DONE_BY_SELF,
|
||||||
WRITE_DONE_BY_OTHER,
|
WRITE_DONE_BY_OTHER,
|
||||||
WRITE_TIMEDOUT, // Number of writes ending up with timed-out.
|
WRITE_TIMEDOUT, // Number of writes ending up with timed-out.
|
||||||
WRITE_WITH_WAL, // Number of Write calls that request WAL
|
WRITE_WITH_WAL, // Number of Write calls that request WAL
|
||||||
COMPACT_READ_BYTES, // Bytes read during compaction
|
COMPACT_READ_BYTES, // Bytes read during compaction
|
||||||
COMPACT_WRITE_BYTES, // Bytes written during compaction
|
COMPACT_WRITE_BYTES, // Bytes written during compaction
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
// Use of this source code is governed by a BSD-style license that can be
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
#include "table/filter_block.h"
|
#include "table/block_based_filter_block.h"
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
@ -15,21 +15,39 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
bool SamePrefix(const SliceTransform* prefix_extractor,
|
||||||
|
const Slice& key1, const Slice& key2) {
|
||||||
|
if (!prefix_extractor->InDomain(key1) &&
|
||||||
|
!prefix_extractor->InDomain(key2)) {
|
||||||
|
return true;
|
||||||
|
} else if (!prefix_extractor->InDomain(key1) ||
|
||||||
|
!prefix_extractor->InDomain(key2)) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return (prefix_extractor->Transform(key1) ==
|
||||||
|
prefix_extractor->Transform(key2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
// See doc/table_format.txt for an explanation of the filter block format.
|
// See doc/table_format.txt for an explanation of the filter block format.
|
||||||
|
|
||||||
// Generate new filter every 2KB of data
|
// Generate new filter every 2KB of data
|
||||||
static const size_t kFilterBaseLg = 11;
|
static const size_t kFilterBaseLg = 11;
|
||||||
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
||||||
|
|
||||||
FilterBlockBuilder::FilterBlockBuilder(const SliceTransform* prefix_extractor,
|
BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
|
||||||
const BlockBasedTableOptions& table_opt,
|
const SliceTransform* prefix_extractor,
|
||||||
const Comparator* internal_comparator)
|
const BlockBasedTableOptions& table_opt)
|
||||||
: policy_(table_opt.filter_policy.get()),
|
: policy_(table_opt.filter_policy.get()),
|
||||||
prefix_extractor_(prefix_extractor),
|
prefix_extractor_(prefix_extractor),
|
||||||
whole_key_filtering_(table_opt.whole_key_filtering),
|
whole_key_filtering_(table_opt.whole_key_filtering) {
|
||||||
comparator_(internal_comparator) {}
|
assert(policy_);
|
||||||
|
}
|
||||||
|
|
||||||
void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
||||||
uint64_t filter_index = (block_offset / kFilterBase);
|
uint64_t filter_index = (block_offset / kFilterBase);
|
||||||
assert(filter_index >= filter_offsets_.size());
|
assert(filter_index >= filter_offsets_.size());
|
||||||
while (filter_index > filter_offsets_.size()) {
|
while (filter_index > filter_offsets_.size()) {
|
||||||
@ -37,53 +55,45 @@ void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterBlockBuilder::SamePrefix(const Slice &key1,
|
void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
|
||||||
const Slice &key2) const {
|
added_to_start_ = 0;
|
||||||
if (!prefix_extractor_->InDomain(key1) &&
|
if (whole_key_filtering_) {
|
||||||
!prefix_extractor_->InDomain(key2)) {
|
AddKey(key);
|
||||||
return true;
|
added_to_start_ = 1;
|
||||||
} else if (!prefix_extractor_->InDomain(key1) ||
|
}
|
||||||
!prefix_extractor_->InDomain(key2)) {
|
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
|
||||||
return false;
|
AddPrefix(key);
|
||||||
} else {
|
|
||||||
return (prefix_extractor_->Transform(key1) ==
|
|
||||||
prefix_extractor_->Transform(key2));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FilterBlockBuilder::AddKey(const Slice& key) {
|
// Add key to filter if needed
|
||||||
|
inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
|
||||||
|
start_.push_back(entries_.size());
|
||||||
|
entries_.append(key.data(), key.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add prefix to filter if needed
|
||||||
|
inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
|
||||||
// get slice for most recently added entry
|
// get slice for most recently added entry
|
||||||
Slice prev;
|
Slice prev;
|
||||||
size_t added_to_start = 0;
|
if (start_.size() > added_to_start_) {
|
||||||
|
size_t prev_start = start_[start_.size() - 1 - added_to_start_];
|
||||||
// add key to filter if needed
|
|
||||||
if (whole_key_filtering_) {
|
|
||||||
start_.push_back(entries_.size());
|
|
||||||
++added_to_start;
|
|
||||||
entries_.append(key.data(), key.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (start_.size() > added_to_start) {
|
|
||||||
size_t prev_start = start_[start_.size() - 1 - added_to_start];
|
|
||||||
const char* base = entries_.data() + prev_start;
|
const char* base = entries_.data() + prev_start;
|
||||||
size_t length = entries_.size() - prev_start;
|
size_t length = entries_.size() - prev_start;
|
||||||
prev = Slice(base, length);
|
prev = Slice(base, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// add prefix to filter if needed
|
// this assumes prefix(prefix(key)) == prefix(key), as the last
|
||||||
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
|
// entry in entries_ may be either a key or prefix, and we use
|
||||||
// this assumes prefix(prefix(key)) == prefix(key), as the last
|
// prefix(last entry) to get the prefix of the last key.
|
||||||
// entry in entries_ may be either a key or prefix, and we use
|
if (prev.size() == 0 || !SamePrefix(prefix_extractor_, key, prev)) {
|
||||||
// prefix(last entry) to get the prefix of the last key.
|
Slice prefix = prefix_extractor_->Transform(key);
|
||||||
if (prev.size() == 0 || !SamePrefix(key, prev)) {
|
start_.push_back(entries_.size());
|
||||||
Slice prefix = prefix_extractor_->Transform(key);
|
entries_.append(prefix.data(), prefix.size());
|
||||||
start_.push_back(entries_.size());
|
|
||||||
entries_.append(prefix.data(), prefix.size());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice FilterBlockBuilder::Finish() {
|
Slice BlockBasedFilterBlockBuilder::Finish() {
|
||||||
if (!start_.empty()) {
|
if (!start_.empty()) {
|
||||||
GenerateFilter();
|
GenerateFilter();
|
||||||
}
|
}
|
||||||
@ -99,7 +109,7 @@ Slice FilterBlockBuilder::Finish() {
|
|||||||
return Slice(result_);
|
return Slice(result_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FilterBlockBuilder::GenerateFilter() {
|
void BlockBasedFilterBlockBuilder::GenerateFilter() {
|
||||||
const size_t num_entries = start_.size();
|
const size_t num_entries = start_.size();
|
||||||
if (num_entries == 0) {
|
if (num_entries == 0) {
|
||||||
// Fast path if there are no keys for this filter
|
// Fast path if there are no keys for this filter
|
||||||
@ -112,7 +122,7 @@ void FilterBlockBuilder::GenerateFilter() {
|
|||||||
tmp_entries_.resize(num_entries);
|
tmp_entries_.resize(num_entries);
|
||||||
for (size_t i = 0; i < num_entries; i++) {
|
for (size_t i = 0; i < num_entries; i++) {
|
||||||
const char* base = entries_.data() + start_[i];
|
const char* base = entries_.data() + start_[i];
|
||||||
size_t length = start_[i+1] - start_[i];
|
size_t length = start_[i + 1] - start_[i];
|
||||||
tmp_entries_[i] = Slice(base, length);
|
tmp_entries_[i] = Slice(base, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,7 +135,7 @@ void FilterBlockBuilder::GenerateFilter() {
|
|||||||
start_.clear();
|
start_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterBlockReader::FilterBlockReader(
|
BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
|
||||||
const SliceTransform* prefix_extractor,
|
const SliceTransform* prefix_extractor,
|
||||||
const BlockBasedTableOptions& table_opt,
|
const BlockBasedTableOptions& table_opt,
|
||||||
const Slice& contents, bool delete_contents_after_use)
|
const Slice& contents, bool delete_contents_after_use)
|
||||||
@ -136,9 +146,10 @@ FilterBlockReader::FilterBlockReader(
|
|||||||
offset_(nullptr),
|
offset_(nullptr),
|
||||||
num_(0),
|
num_(0),
|
||||||
base_lg_(0) {
|
base_lg_(0) {
|
||||||
|
assert(policy_);
|
||||||
size_t n = contents.size();
|
size_t n = contents.size();
|
||||||
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
||||||
base_lg_ = contents[n-1];
|
base_lg_ = contents[n - 1];
|
||||||
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
|
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
|
||||||
if (last_word > n - 5) return;
|
if (last_word > n - 5) return;
|
||||||
data_ = contents.data();
|
data_ = contents.data();
|
||||||
@ -149,27 +160,30 @@ FilterBlockReader::FilterBlockReader(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset,
|
bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key,
|
||||||
const Slice& key) {
|
uint64_t block_offset) {
|
||||||
|
assert(block_offset != kNotValid);
|
||||||
if (!whole_key_filtering_) {
|
if (!whole_key_filtering_) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return MayMatch(block_offset, key);
|
return MayMatch(key, block_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterBlockReader::PrefixMayMatch(uint64_t block_offset,
|
bool BlockBasedFilterBlockReader::PrefixMayMatch(const Slice& prefix,
|
||||||
const Slice& prefix) {
|
uint64_t block_offset) {
|
||||||
|
assert(block_offset != kNotValid);
|
||||||
if (!prefix_extractor_) {
|
if (!prefix_extractor_) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return MayMatch(block_offset, prefix);
|
return MayMatch(prefix, block_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterBlockReader::MayMatch(uint64_t block_offset, const Slice& entry) {
|
bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
|
||||||
|
uint64_t block_offset) {
|
||||||
uint64_t index = block_offset >> base_lg_;
|
uint64_t index = block_offset >> base_lg_;
|
||||||
if (index < num_) {
|
if (index < num_) {
|
||||||
uint32_t start = DecodeFixed32(offset_ + index*4);
|
uint32_t start = DecodeFixed32(offset_ + index * 4);
|
||||||
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
|
uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
|
||||||
if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
|
if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
|
||||||
Slice filter = Slice(data_ + start, limit - start);
|
Slice filter = Slice(data_ + start, limit - start);
|
||||||
return policy_->KeyMayMatch(entry, filter);
|
return policy_->KeyMayMatch(entry, filter);
|
||||||
@ -181,7 +195,7 @@ bool FilterBlockReader::MayMatch(uint64_t block_offset, const Slice& entry) {
|
|||||||
return true; // Errors are treated as potential matches
|
return true; // Errors are treated as potential matches
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t FilterBlockReader::ApproximateMemoryUsage() const {
|
size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
|
||||||
return num_ * 4 + 5 + (offset_ - data_);
|
return num_ * 4 + 5 + (offset_ - data_);
|
||||||
}
|
}
|
||||||
}
|
}
|
102
table/block_based_filter_block.h
Normal file
102
table/block_based_filter_block.h
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
|
// into a single filter block.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "rocksdb/options.h"
|
||||||
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/slice_transform.h"
|
||||||
|
#include "table/filter_block.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
|
||||||
|
// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
//
|
||||||
|
// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp:
|
||||||
|
// (StartBlock Add*)* Finish
|
||||||
|
class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
|
||||||
|
public:
|
||||||
|
BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt);
|
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return true; }
|
||||||
|
virtual void StartBlock(uint64_t block_offset) override;
|
||||||
|
virtual void Add(const Slice& key) override;
|
||||||
|
virtual Slice Finish() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void AddKey(const Slice& key);
|
||||||
|
void AddPrefix(const Slice& key);
|
||||||
|
void GenerateFilter();
|
||||||
|
|
||||||
|
// important: all of these might point to invalid addresses
|
||||||
|
// at the time of destruction of this filter block. destructor
|
||||||
|
// should NOT dereference them.
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
bool whole_key_filtering_;
|
||||||
|
|
||||||
|
std::string entries_; // Flattened entry contents
|
||||||
|
std::vector<size_t> start_; // Starting index in entries_ of each entry
|
||||||
|
uint32_t added_to_start_; // To indicate if key is added
|
||||||
|
std::string result_; // Filter data computed so far
|
||||||
|
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
|
||||||
|
std::vector<uint32_t> filter_offsets_;
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&);
|
||||||
|
void operator=(const BlockBasedFilterBlockBuilder&);
|
||||||
|
};
|
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
class BlockBasedFilterBlockReader : public FilterBlockReader {
|
||||||
|
public:
|
||||||
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
|
BlockBasedFilterBlockReader(const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt,
|
||||||
|
const Slice& contents,
|
||||||
|
bool delete_contents_after_use = false);
|
||||||
|
virtual bool IsBlockBased() override { return true; }
|
||||||
|
virtual bool KeyMayMatch(const Slice& key,
|
||||||
|
uint64_t block_offset = kNotValid) override;
|
||||||
|
virtual bool PrefixMayMatch(const Slice& prefix,
|
||||||
|
uint64_t block_offset = kNotValid) override;
|
||||||
|
virtual size_t ApproximateMemoryUsage() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
bool whole_key_filtering_;
|
||||||
|
const char* data_; // Pointer to filter data (at block-start)
|
||||||
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
|
size_t num_; // Number of entries in offset array
|
||||||
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
|
|
||||||
|
bool MayMatch(const Slice& entry, uint64_t block_offset);
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&);
|
||||||
|
void operator=(const BlockBasedFilterBlockReader&);
|
||||||
|
};
|
||||||
|
} // namespace rocksdb
|
242
table/block_based_filter_block_test.cc
Normal file
242
table/block_based_filter_block_test.cc
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/block_based_filter_block.h"
|
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
// For testing: emit an array with one hash value per key
|
||||||
|
class TestHashFilter : public FilterPolicy {
|
||||||
|
public:
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return "TestHashFilter";
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
||||||
|
PutFixed32(dst, h);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1);
|
||||||
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
|
||||||
|
if (h == DecodeFixed32(filter.data() + i)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class FilterBlockTest {
|
||||||
|
public:
|
||||||
|
TestHashFilter policy_;
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
|
||||||
|
FilterBlockTest() {
|
||||||
|
table_options_.filter_policy.reset(new TestHashFilter());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, EmptyBuilder) {
|
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100000));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, SingleChunk) {
|
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
||||||
|
builder.StartBlock(100);
|
||||||
|
builder.Add("foo");
|
||||||
|
builder.Add("bar");
|
||||||
|
builder.Add("box");
|
||||||
|
builder.StartBlock(200);
|
||||||
|
builder.Add("box");
|
||||||
|
builder.StartBlock(300);
|
||||||
|
builder.Add("hello");
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 100));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 100));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 100));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing", 100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other", 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, MultiChunk) {
|
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
|
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder.StartBlock(0);
|
||||||
|
builder.Add("foo");
|
||||||
|
builder.StartBlock(2000);
|
||||||
|
builder.Add("bar");
|
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder.StartBlock(3100);
|
||||||
|
builder.Add("box");
|
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder.StartBlock(9000);
|
||||||
|
builder.Add("box");
|
||||||
|
builder.Add("hello");
|
||||||
|
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block);
|
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 2000));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 0));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 0));
|
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 3100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 3100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 3100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 3100));
|
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 4100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 4100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 4100));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 4100));
|
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 9000));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 9000));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 9000));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 9000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for block based filter block
|
||||||
|
// use new interface in FilterPolicy to create filter builder/reader
|
||||||
|
class BlockBasedFilterBlockTest {
|
||||||
|
public:
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
|
||||||
|
BlockBasedFilterBlockTest() {
|
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
~BlockBasedFilterBlockTest() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) {
|
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
||||||
|
nullptr, table_options_);
|
||||||
|
Slice block = builder->Finish();
|
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
||||||
|
nullptr, table_options_, block);
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100000));
|
||||||
|
|
||||||
|
delete builder;
|
||||||
|
delete reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedSingleChunk) {
|
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
||||||
|
nullptr, table_options_);
|
||||||
|
builder->StartBlock(100);
|
||||||
|
builder->Add("foo");
|
||||||
|
builder->Add("bar");
|
||||||
|
builder->Add("box");
|
||||||
|
builder->StartBlock(200);
|
||||||
|
builder->Add("box");
|
||||||
|
builder->StartBlock(300);
|
||||||
|
builder->Add("hello");
|
||||||
|
Slice block = builder->Finish();
|
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
||||||
|
nullptr, table_options_, block);
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 100));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 100));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 100));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("missing", 100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("other", 100));
|
||||||
|
|
||||||
|
delete builder;
|
||||||
|
delete reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedMultiChunk) {
|
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(
|
||||||
|
nullptr, table_options_);
|
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder->StartBlock(0);
|
||||||
|
builder->Add("foo");
|
||||||
|
builder->StartBlock(2000);
|
||||||
|
builder->Add("bar");
|
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder->StartBlock(3100);
|
||||||
|
builder->Add("box");
|
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder->StartBlock(9000);
|
||||||
|
builder->Add("box");
|
||||||
|
builder->Add("hello");
|
||||||
|
|
||||||
|
Slice block = builder->Finish();
|
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
|
||||||
|
nullptr, table_options_, block);
|
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 2000));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 0));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 0));
|
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 3100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 3100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 3100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 3100));
|
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 4100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 4100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 4100));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 4100));
|
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 9000));
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 9000));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 9000));
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 9000));
|
||||||
|
|
||||||
|
delete builder;
|
||||||
|
delete reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|
@ -31,6 +31,8 @@
|
|||||||
#include "table/block_based_table_reader.h"
|
#include "table/block_based_table_reader.h"
|
||||||
#include "table/block_builder.h"
|
#include "table/block_builder.h"
|
||||||
#include "table/filter_block.h"
|
#include "table/filter_block.h"
|
||||||
|
#include "table/block_based_filter_block.h"
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
#include "table/table_builder.h"
|
#include "table/table_builder.h"
|
||||||
@ -274,6 +276,21 @@ IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a index builder based on its type.
|
||||||
|
FilterBlockBuilder* CreateFilterBlockBuilder(const ImmutableCFOptions& opt,
|
||||||
|
const BlockBasedTableOptions& table_opt) {
|
||||||
|
if (table_opt.filter_policy == nullptr) return nullptr;
|
||||||
|
|
||||||
|
FilterBitsBuilder* filter_bits_builder =
|
||||||
|
table_opt.filter_policy->GetFilterBitsBuilder();
|
||||||
|
if (filter_bits_builder == nullptr) {
|
||||||
|
return new BlockBasedFilterBlockBuilder(opt.prefix_extractor, table_opt);
|
||||||
|
} else {
|
||||||
|
return new FullFilterBlockBuilder(opt.prefix_extractor, table_opt,
|
||||||
|
filter_bits_builder);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) {
|
bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) {
|
||||||
// Check to see if compressed less than 12.5%
|
// Check to see if compressed less than 12.5%
|
||||||
return compressed_size < raw_size - (raw_size / 8u);
|
return compressed_size < raw_size - (raw_size / 8u);
|
||||||
@ -365,7 +382,6 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
|
|||||||
std::string val;
|
std::string val;
|
||||||
PutFixed32(&val, static_cast<uint32_t>(index_type_));
|
PutFixed32(&val, static_cast<uint32_t>(index_type_));
|
||||||
properties->insert({BlockBasedTablePropertyNames::kIndexType, val});
|
properties->insert({BlockBasedTablePropertyNames::kIndexType, val});
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -428,11 +444,7 @@ struct BlockBasedTableBuilder::Rep {
|
|||||||
table_options.index_type, &internal_comparator,
|
table_options.index_type, &internal_comparator,
|
||||||
&this->internal_prefix_transform)),
|
&this->internal_prefix_transform)),
|
||||||
compression_type(compression_type),
|
compression_type(compression_type),
|
||||||
compression_opts(compression_opts),
|
filter_block(CreateFilterBlockBuilder(ioptions, table_options)),
|
||||||
filter_block(table_options.filter_policy == nullptr ?
|
|
||||||
nullptr :
|
|
||||||
new FilterBlockBuilder(ioptions.prefix_extractor,
|
|
||||||
table_options, &internal_comparator)),
|
|
||||||
flush_block_policy(
|
flush_block_policy(
|
||||||
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
|
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
|
||||||
table_options, data_block)) {
|
table_options, data_block)) {
|
||||||
@ -497,7 +509,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (r->filter_block != nullptr) {
|
if (r->filter_block != nullptr) {
|
||||||
r->filter_block->AddKey(ExtractUserKey(key));
|
r->filter_block->Add(ExtractUserKey(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
r->last_key.assign(key.data(), key.size());
|
r->last_key.assign(key.data(), key.size());
|
||||||
@ -661,10 +673,7 @@ Status BlockBasedTableBuilder::Finish() {
|
|||||||
assert(!r->closed);
|
assert(!r->closed);
|
||||||
r->closed = true;
|
r->closed = true;
|
||||||
|
|
||||||
BlockHandle filter_block_handle,
|
BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
|
||||||
metaindex_block_handle,
|
|
||||||
index_block_handle;
|
|
||||||
|
|
||||||
// Write filter block
|
// Write filter block
|
||||||
if (ok() && r->filter_block != nullptr) {
|
if (ok() && r->filter_block != nullptr) {
|
||||||
auto filter_contents = r->filter_block->Finish();
|
auto filter_contents = r->filter_block->Finish();
|
||||||
@ -703,7 +712,12 @@ Status BlockBasedTableBuilder::Finish() {
|
|||||||
if (r->filter_block != nullptr) {
|
if (r->filter_block != nullptr) {
|
||||||
// Add mapping from "<filter_block_prefix>.Name" to location
|
// Add mapping from "<filter_block_prefix>.Name" to location
|
||||||
// of filter data.
|
// of filter data.
|
||||||
std::string key = BlockBasedTable::kFilterBlockPrefix;
|
std::string key;
|
||||||
|
if (r->filter_block->IsBlockBased()) {
|
||||||
|
key = BlockBasedTable::kFilterBlockPrefix;
|
||||||
|
} else {
|
||||||
|
key = BlockBasedTable::kFullFilterBlockPrefix;
|
||||||
|
}
|
||||||
key.append(r->table_options.filter_policy->Name());
|
key.append(r->table_options.filter_policy->Name());
|
||||||
meta_index_builder.Add(key, filter_block_handle);
|
meta_index_builder.Add(key, filter_block_handle);
|
||||||
}
|
}
|
||||||
@ -807,5 +821,6 @@ uint64_t BlockBasedTableBuilder::FileSize() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
|
const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
|
||||||
|
const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter.";
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
|
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
#include "table/filter_block.h"
|
#include "table/filter_block.h"
|
||||||
|
#include "table/block_based_filter_block.h"
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
#include "table/block_hash_index.h"
|
#include "table/block_hash_index.h"
|
||||||
#include "table/block_prefix_index.h"
|
#include "table/block_prefix_index.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
@ -46,7 +48,6 @@ using std::unique_ptr;
|
|||||||
typedef BlockBasedTable::IndexReader IndexReader;
|
typedef BlockBasedTable::IndexReader IndexReader;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// The longest the prefix of the cache key used to identify blocks can be.
|
// The longest the prefix of the cache key used to identify blocks can be.
|
||||||
// We are using the fact that we know for Posix files the unique ID is three
|
// We are using the fact that we know for Posix files the unique ID is three
|
||||||
// varints.
|
// varints.
|
||||||
@ -527,11 +528,18 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||||||
|
|
||||||
// Set filter block
|
// Set filter block
|
||||||
if (rep->filter_policy) {
|
if (rep->filter_policy) {
|
||||||
std::string key = kFilterBlockPrefix;
|
// First try reading full_filter, then reading block_based_filter
|
||||||
key.append(rep->filter_policy->Name());
|
for (auto filter_block_prefix : { kFullFilterBlockPrefix,
|
||||||
BlockHandle handle;
|
kFilterBlockPrefix }) {
|
||||||
if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) {
|
std::string key = filter_block_prefix;
|
||||||
rep->filter.reset(ReadFilter(handle, rep));
|
key.append(rep->filter_policy->Name());
|
||||||
|
|
||||||
|
BlockHandle handle;
|
||||||
|
if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) {
|
||||||
|
rep->filter.reset(ReadFilter(handle, rep,
|
||||||
|
filter_block_prefix, nullptr));
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -741,9 +749,9 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle,
|
FilterBlockReader* BlockBasedTable::ReadFilter(
|
||||||
BlockBasedTable::Rep* rep,
|
const BlockHandle& filter_handle, BlockBasedTable::Rep* rep,
|
||||||
size_t* filter_size) {
|
const std::string& filter_block_prefix, size_t* filter_size) {
|
||||||
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
||||||
// requiring checksum verification in Table::Open.
|
// requiring checksum verification in Table::Open.
|
||||||
ReadOptions opt;
|
ReadOptions opt;
|
||||||
@ -757,13 +765,25 @@ FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle,
|
|||||||
*filter_size = block.data.size();
|
*filter_size = block.data.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
return new FilterBlockReader(
|
assert(rep->filter_policy);
|
||||||
rep->ioptions.prefix_extractor, rep->table_options,
|
if (kFilterBlockPrefix == filter_block_prefix) {
|
||||||
block.data, block.heap_allocated);
|
return new BlockBasedFilterBlockReader(rep->ioptions.prefix_extractor,
|
||||||
|
rep->table_options, block.data, block.heap_allocated);
|
||||||
|
} else if (kFullFilterBlockPrefix == filter_block_prefix) {
|
||||||
|
auto filter_bits_reader = rep->filter_policy->
|
||||||
|
GetFilterBitsReader(block.data);
|
||||||
|
|
||||||
|
if (filter_bits_reader != nullptr) {
|
||||||
|
return new FullFilterBlockReader(rep->ioptions.prefix_extractor,
|
||||||
|
rep->table_options, block.data, filter_bits_reader,
|
||||||
|
block.heap_allocated);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
bool no_io) const {
|
bool no_io) const {
|
||||||
// filter pre-populated
|
// filter pre-populated
|
||||||
if (rep_->filter != nullptr) {
|
if (rep_->filter != nullptr) {
|
||||||
return {rep_->filter.get(), nullptr /* cache handle */};
|
return {rep_->filter.get(), nullptr /* cache handle */};
|
||||||
@ -777,11 +797,9 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
|
|
||||||
// Fetching from the cache
|
// Fetching from the cache
|
||||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
auto key = GetCacheKey(
|
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||||
rep_->cache_key_prefix,
|
rep_->footer.metaindex_handle(),
|
||||||
rep_->cache_key_prefix_size,
|
cache_key
|
||||||
rep_->footer.metaindex_handle(),
|
|
||||||
cache_key
|
|
||||||
);
|
);
|
||||||
|
|
||||||
Statistics* statistics = rep_->ioptions.statistics;
|
Statistics* statistics = rep_->ioptions.statistics;
|
||||||
@ -791,8 +809,8 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
|
|
||||||
FilterBlockReader* filter = nullptr;
|
FilterBlockReader* filter = nullptr;
|
||||||
if (cache_handle != nullptr) {
|
if (cache_handle != nullptr) {
|
||||||
filter = reinterpret_cast<FilterBlockReader*>(
|
filter = reinterpret_cast<FilterBlockReader*>(
|
||||||
block_cache->Value(cache_handle));
|
block_cache->Value(cache_handle));
|
||||||
} else if (no_io) {
|
} else if (no_io) {
|
||||||
// Do not invoke any io.
|
// Do not invoke any io.
|
||||||
return CachableEntry<FilterBlockReader>();
|
return CachableEntry<FilterBlockReader>();
|
||||||
@ -803,17 +821,22 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
auto s = ReadMetaBlock(rep_, &meta, &iter);
|
auto s = ReadMetaBlock(rep_, &meta, &iter);
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
std::string filter_block_key = kFilterBlockPrefix;
|
// First try reading full_filter, then reading block_based_filter
|
||||||
filter_block_key.append(rep_->filter_policy->Name());
|
for (auto filter_block_prefix : {kFullFilterBlockPrefix,
|
||||||
BlockHandle handle;
|
kFilterBlockPrefix}) {
|
||||||
if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) {
|
std::string filter_block_key = filter_block_prefix;
|
||||||
filter = ReadFilter(handle, rep_, &filter_size);
|
filter_block_key.append(rep_->filter_policy->Name());
|
||||||
assert(filter);
|
BlockHandle handle;
|
||||||
assert(filter_size > 0);
|
if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) {
|
||||||
|
filter = ReadFilter(handle, rep_, filter_block_prefix, &filter_size);
|
||||||
|
|
||||||
cache_handle = block_cache->Insert(
|
if (filter == nullptr) break; // err happen in ReadFilter
|
||||||
key, filter, filter_size, &DeleteCachedEntry<FilterBlockReader>);
|
assert(filter_size > 0);
|
||||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
cache_handle = block_cache->Insert(
|
||||||
|
key, filter, filter_size, &DeleteCachedEntry<FilterBlockReader>);
|
||||||
|
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -918,8 +941,8 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
|||||||
|
|
||||||
// create key for block cache
|
// create key for block cache
|
||||||
if (block_cache != nullptr) {
|
if (block_cache != nullptr) {
|
||||||
key = GetCacheKey(rep->cache_key_prefix,
|
key = GetCacheKey(rep->cache_key_prefix, rep->cache_key_prefix_size,
|
||||||
rep->cache_key_prefix_size, handle, cache_key);
|
handle, cache_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (block_cache_compressed != nullptr) {
|
if (block_cache_compressed != nullptr) {
|
||||||
@ -1039,42 +1062,50 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
|||||||
// loaded to memory.
|
// loaded to memory.
|
||||||
ReadOptions no_io_read_options;
|
ReadOptions no_io_read_options;
|
||||||
no_io_read_options.read_tier = kBlockCacheTier;
|
no_io_read_options.read_tier = kBlockCacheTier;
|
||||||
unique_ptr<Iterator> iiter(NewIndexIterator(no_io_read_options));
|
|
||||||
iiter->Seek(internal_prefix);
|
|
||||||
|
|
||||||
if (!iiter->Valid()) {
|
// First, try check with full filter
|
||||||
// we're past end of file
|
auto filter_entry = GetFilter(true /* no io */);
|
||||||
// if it's incomplete, it means that we avoided I/O
|
FilterBlockReader* filter = filter_entry.value;
|
||||||
// and we're not really sure that we're past the end
|
if (filter != nullptr && !filter->IsBlockBased()) {
|
||||||
// of the file
|
may_match = filter->PrefixMayMatch(prefix);
|
||||||
may_match = iiter->status().IsIncomplete();
|
}
|
||||||
} else if (ExtractUserKey(iiter->key()).starts_with(
|
|
||||||
ExtractUserKey(internal_prefix))) {
|
// Then, try find it within each block
|
||||||
// we need to check for this subtle case because our only
|
if (may_match) {
|
||||||
// guarantee is that "the key is a string >= last key in that data
|
unique_ptr<Iterator> iiter(NewIndexIterator(no_io_read_options));
|
||||||
// block" according to the doc/table_format.txt spec.
|
iiter->Seek(internal_prefix);
|
||||||
//
|
|
||||||
// Suppose iiter->key() starts with the desired prefix; it is not
|
if (!iiter->Valid()) {
|
||||||
// necessarily the case that the corresponding data block will
|
// we're past end of file
|
||||||
// contain the prefix, since iiter->key() need not be in the
|
// if it's incomplete, it means that we avoided I/O
|
||||||
// block. However, the next data block may contain the prefix, so
|
// and we're not really sure that we're past the end
|
||||||
// we return true to play it safe.
|
// of the file
|
||||||
may_match = true;
|
may_match = iiter->status().IsIncomplete();
|
||||||
} else {
|
} else if (ExtractUserKey(iiter->key()).starts_with(
|
||||||
// iiter->key() does NOT start with the desired prefix. Because
|
ExtractUserKey(internal_prefix))) {
|
||||||
// Seek() finds the first key that is >= the seek target, this
|
// we need to check for this subtle case because our only
|
||||||
// means that iiter->key() > prefix. Thus, any data blocks coming
|
// guarantee is that "the key is a string >= last key in that data
|
||||||
// after the data block corresponding to iiter->key() cannot
|
// block" according to the doc/table_format.txt spec.
|
||||||
// possibly contain the key. Thus, the corresponding data block
|
//
|
||||||
// is the only one which could potentially contain the prefix.
|
// Suppose iiter->key() starts with the desired prefix; it is not
|
||||||
Slice handle_value = iiter->value();
|
// necessarily the case that the corresponding data block will
|
||||||
BlockHandle handle;
|
// contain the prefix, since iiter->key() need not be in the
|
||||||
s = handle.DecodeFrom(&handle_value);
|
// block. However, the next data block may contain the prefix, so
|
||||||
assert(s.ok());
|
// we return true to play it safe.
|
||||||
auto filter_entry = GetFilter(true /* no io */);
|
may_match = true;
|
||||||
may_match = filter_entry.value == nullptr ||
|
} else if (filter != nullptr && filter->IsBlockBased()) {
|
||||||
filter_entry.value->PrefixMayMatch(handle.offset(), prefix);
|
// iiter->key() does NOT start with the desired prefix. Because
|
||||||
filter_entry.Release(rep_->table_options.block_cache.get());
|
// Seek() finds the first key that is >= the seek target, this
|
||||||
|
// means that iiter->key() > prefix. Thus, any data blocks coming
|
||||||
|
// after the data block corresponding to iiter->key() cannot
|
||||||
|
// possibly contain the key. Thus, the corresponding data block
|
||||||
|
// is the only on could potentially contain the prefix.
|
||||||
|
Slice handle_value = iiter->value();
|
||||||
|
BlockHandle handle;
|
||||||
|
s = handle.DecodeFrom(&handle_value);
|
||||||
|
assert(s.ok());
|
||||||
|
may_match = filter->PrefixMayMatch(prefix, handle.offset());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Statistics* statistics = rep_->ioptions.statistics;
|
Statistics* statistics = rep_->ioptions.statistics;
|
||||||
@ -1083,6 +1114,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
|||||||
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
|
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
filter_entry.Release(rep_->table_options.block_cache.get());
|
||||||
return may_match;
|
return may_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1098,64 +1130,72 @@ Status BlockBasedTable::Get(
|
|||||||
const Slice& v),
|
const Slice& v),
|
||||||
void (*mark_key_may_exist_handler)(void* handle_context)) {
|
void (*mark_key_may_exist_handler)(void* handle_context)) {
|
||||||
Status s;
|
Status s;
|
||||||
BlockIter iiter;
|
|
||||||
NewIndexIterator(read_options, &iiter);
|
|
||||||
|
|
||||||
auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
||||||
FilterBlockReader* filter = filter_entry.value;
|
FilterBlockReader* filter = filter_entry.value;
|
||||||
bool done = false;
|
|
||||||
for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) {
|
|
||||||
Slice handle_value = iiter.value();
|
|
||||||
|
|
||||||
BlockHandle handle;
|
// First check the full filter
|
||||||
bool may_not_exist_in_filter =
|
// If full filter not useful, Then go into each block
|
||||||
filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
|
if (filter != nullptr && !filter->IsBlockBased()
|
||||||
!filter->KeyMayMatch(handle.offset(), ExtractUserKey(key));
|
&& !filter->KeyMayMatch(ExtractUserKey(key))) {
|
||||||
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||||
|
} else {
|
||||||
|
BlockIter iiter;
|
||||||
|
NewIndexIterator(read_options, &iiter);
|
||||||
|
|
||||||
if (may_not_exist_in_filter) {
|
bool done = false;
|
||||||
// Not found
|
for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) {
|
||||||
// TODO: think about interaction with Merge. If a user key cannot
|
Slice handle_value = iiter.value();
|
||||||
// cross one data block, we should be fine.
|
|
||||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
BlockIter biter;
|
|
||||||
NewDataBlockIterator(rep_, read_options, iiter.value(), &biter);
|
|
||||||
|
|
||||||
if (read_options.read_tier && biter.status().IsIncomplete()) {
|
BlockHandle handle;
|
||||||
// couldn't get block from block_cache
|
bool not_exist_in_filter =
|
||||||
// Update Saver.state to Found because we are only looking for whether
|
filter != nullptr && filter->IsBlockBased() == true &&
|
||||||
// we can guarantee the key is not there when "no_io" is set
|
handle.DecodeFrom(&handle_value).ok() &&
|
||||||
(*mark_key_may_exist_handler)(handle_context);
|
!filter->KeyMayMatch(ExtractUserKey(key), handle.offset());
|
||||||
|
|
||||||
|
if (not_exist_in_filter) {
|
||||||
|
// Not found
|
||||||
|
// TODO: think about interaction with Merge. If a user key cannot
|
||||||
|
// cross one data block, we should be fine.
|
||||||
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||||
break;
|
break;
|
||||||
}
|
} else {
|
||||||
if (!biter.status().ok()) {
|
BlockIter biter;
|
||||||
s = biter.status();
|
NewDataBlockIterator(rep_, read_options, iiter.value(), &biter);
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call the *saver function on each entry/block until it returns false
|
if (read_options.read_tier && biter.status().IsIncomplete()) {
|
||||||
for (biter.Seek(key); biter.Valid(); biter.Next()) {
|
// couldn't get block from block_cache
|
||||||
ParsedInternalKey parsed_key;
|
// Update Saver.state to Found because we are only looking for whether
|
||||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
// we can guarantee the key is not there when "no_io" is set
|
||||||
s = Status::Corruption(Slice());
|
(*mark_key_may_exist_handler)(handle_context);
|
||||||
}
|
|
||||||
|
|
||||||
if (!(*result_handler)(handle_context, parsed_key,
|
|
||||||
biter.value())) {
|
|
||||||
done = true;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (!biter.status().ok()) {
|
||||||
|
s = biter.status();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the *saver function on each entry/block until it returns false
|
||||||
|
for (biter.Seek(key); biter.Valid(); biter.Next()) {
|
||||||
|
ParsedInternalKey parsed_key;
|
||||||
|
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||||
|
s = Status::Corruption(Slice());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(*result_handler)(handle_context, parsed_key,
|
||||||
|
biter.value())) {
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s = biter.status();
|
||||||
}
|
}
|
||||||
s = biter.status();
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
s = iiter.status();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_entry.Release(rep_->table_options.block_cache.get());
|
filter_entry.Release(rep_->table_options.block_cache.get());
|
||||||
if (s.ok()) {
|
|
||||||
s = iiter.status();
|
|
||||||
}
|
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1175,8 +1215,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|||||||
|
|
||||||
char cache_key_storage[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char cache_key_storage[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
Slice cache_key =
|
Slice cache_key =
|
||||||
GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle,
|
GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||||
cache_key_storage);
|
handle, cache_key_storage);
|
||||||
Slice ckey;
|
Slice ckey;
|
||||||
|
|
||||||
s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, nullptr,
|
s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, nullptr,
|
||||||
|
@ -28,6 +28,8 @@ class BlockIter;
|
|||||||
class BlockHandle;
|
class BlockHandle;
|
||||||
class Cache;
|
class Cache;
|
||||||
class FilterBlockReader;
|
class FilterBlockReader;
|
||||||
|
class BlockBasedFilterBlockReader;
|
||||||
|
class FullFilterBlockReader;
|
||||||
class Footer;
|
class Footer;
|
||||||
class InternalKeyComparator;
|
class InternalKeyComparator;
|
||||||
class Iterator;
|
class Iterator;
|
||||||
@ -47,6 +49,7 @@ using std::unique_ptr;
|
|||||||
class BlockBasedTable : public TableReader {
|
class BlockBasedTable : public TableReader {
|
||||||
public:
|
public:
|
||||||
static const std::string kFilterBlockPrefix;
|
static const std::string kFilterBlockPrefix;
|
||||||
|
static const std::string kFullFilterBlockPrefix;
|
||||||
|
|
||||||
// Attempt to open the table that is stored in bytes [0..file_size)
|
// Attempt to open the table that is stored in bytes [0..file_size)
|
||||||
// of "file", and read the metadata entries necessary to allow
|
// of "file", and read the metadata entries necessary to allow
|
||||||
@ -184,7 +187,9 @@ class BlockBasedTable : public TableReader {
|
|||||||
|
|
||||||
// Create the filter from the filter block.
|
// Create the filter from the filter block.
|
||||||
static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
|
static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
|
||||||
Rep* rep, size_t* filter_size = nullptr);
|
Rep* rep,
|
||||||
|
const std::string& filter_block_prefix,
|
||||||
|
size_t* filter_size = nullptr);
|
||||||
|
|
||||||
static void SetupCacheKeyPrefix(Rep* rep);
|
static void SetupCacheKeyPrefix(Rep* rep);
|
||||||
|
|
||||||
|
@ -10,86 +10,70 @@
|
|||||||
// A filter block is stored near the end of a Table file. It contains
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
// filters (e.g., bloom filters) for all data blocks in the table combined
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
// into a single filter block.
|
// into a single filter block.
|
||||||
|
//
|
||||||
|
// It is a base class for BlockBasedFilter and FullFilter.
|
||||||
|
// These two are both used in BlockBasedTable. The first one contain filter
|
||||||
|
// For a part of keys in sst file, the second contain filter for all keys
|
||||||
|
// in sst file.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "rocksdb/options.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
#include "rocksdb/slice_transform.h"
|
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
#include "util/hash.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
class FilterPolicy;
|
const uint64_t kNotValid = ULLONG_MAX;
|
||||||
|
|
||||||
// A FilterBlockBuilder is used to construct all of the filters for a
|
// A FilterBlockBuilder is used to construct all of the filters for a
|
||||||
// particular Table. It generates a single string which is stored as
|
// particular Table. It generates a single string which is stored as
|
||||||
// a special block in the Table.
|
// a special block in the Table.
|
||||||
//
|
//
|
||||||
// The sequence of calls to FilterBlockBuilder must match the regexp:
|
// The sequence of calls to FilterBlockBuilder must match the regexp:
|
||||||
// (StartBlock AddKey*)* Finish
|
// (StartBlock Add*)* Finish
|
||||||
|
//
|
||||||
|
// BlockBased/Full FilterBlock would be called in the same way.
|
||||||
class FilterBlockBuilder {
|
class FilterBlockBuilder {
|
||||||
public:
|
public:
|
||||||
explicit FilterBlockBuilder(const SliceTransform* prefix_extractor,
|
explicit FilterBlockBuilder() {}
|
||||||
const BlockBasedTableOptions& table_opt,
|
virtual ~FilterBlockBuilder() {}
|
||||||
const Comparator* internal_comparator);
|
|
||||||
|
|
||||||
void StartBlock(uint64_t block_offset);
|
virtual bool IsBlockBased() = 0; // If is blockbased filter
|
||||||
void AddKey(const Slice& key);
|
virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter
|
||||||
Slice Finish();
|
virtual void Add(const Slice& key) = 0; // Add a key to current filter
|
||||||
|
virtual Slice Finish() = 0; // Generate Filter
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool SamePrefix(const Slice &key1, const Slice &key2) const;
|
|
||||||
void GenerateFilter();
|
|
||||||
|
|
||||||
// important: all of these might point to invalid addresses
|
|
||||||
// at the time of destruction of this filter block. destructor
|
|
||||||
// should NOT dereference them.
|
|
||||||
const FilterPolicy* policy_;
|
|
||||||
const SliceTransform* prefix_extractor_;
|
|
||||||
bool whole_key_filtering_;
|
|
||||||
const Comparator* comparator_;
|
|
||||||
|
|
||||||
std::string entries_; // Flattened entry contents
|
|
||||||
std::vector<size_t> start_; // Starting index in entries_ of each entry
|
|
||||||
std::string result_; // Filter data computed so far
|
|
||||||
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
|
|
||||||
std::vector<uint32_t> filter_offsets_;
|
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
FilterBlockBuilder(const FilterBlockBuilder&);
|
FilterBlockBuilder(const FilterBlockBuilder&);
|
||||||
void operator=(const FilterBlockBuilder&);
|
void operator=(const FilterBlockBuilder&);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
//
|
||||||
|
// BlockBased/Full FilterBlock would be called in the same way.
|
||||||
class FilterBlockReader {
|
class FilterBlockReader {
|
||||||
public:
|
public:
|
||||||
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
explicit FilterBlockReader() {}
|
||||||
FilterBlockReader(
|
virtual ~FilterBlockReader() {}
|
||||||
const SliceTransform* prefix_extractor,
|
|
||||||
const BlockBasedTableOptions& table_opt,
|
virtual bool IsBlockBased() = 0; // If is blockbased filter
|
||||||
const Slice& contents,
|
virtual bool KeyMayMatch(const Slice& key,
|
||||||
bool delete_contents_after_use = false);
|
uint64_t block_offset = kNotValid) = 0;
|
||||||
bool KeyMayMatch(uint64_t block_offset, const Slice& key);
|
virtual bool PrefixMayMatch(const Slice& prefix,
|
||||||
bool PrefixMayMatch(uint64_t block_offset, const Slice& prefix);
|
uint64_t block_offset = kNotValid) = 0;
|
||||||
size_t ApproximateMemoryUsage() const;
|
virtual size_t ApproximateMemoryUsage() const = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const FilterPolicy* policy_;
|
// No copying allowed
|
||||||
const SliceTransform* prefix_extractor_;
|
FilterBlockReader(const FilterBlockReader&);
|
||||||
bool whole_key_filtering_;
|
void operator=(const FilterBlockReader&);
|
||||||
const char* data_; // Pointer to filter data (at block-start)
|
|
||||||
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
|
||||||
size_t num_; // Number of entries in offset array
|
|
||||||
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
|
||||||
std::unique_ptr<const char[]> filter_data;
|
|
||||||
|
|
||||||
|
|
||||||
bool MayMatch(uint64_t block_offset, const Slice& entry);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace rocksdb
|
||||||
|
@ -1,139 +0,0 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
//
|
|
||||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style license that can be
|
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
||||||
|
|
||||||
#include "table/filter_block.h"
|
|
||||||
|
|
||||||
#include "rocksdb/filter_policy.h"
|
|
||||||
#include "util/coding.h"
|
|
||||||
#include "util/hash.h"
|
|
||||||
#include "util/logging.h"
|
|
||||||
#include "util/testharness.h"
|
|
||||||
#include "util/testutil.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
// For testing: emit an array with one hash value per key
|
|
||||||
class TestHashFilter : public FilterPolicy {
|
|
||||||
public:
|
|
||||||
virtual const char* Name() const {
|
|
||||||
return "TestHashFilter";
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
|
||||||
PutFixed32(dst, h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
|
||||||
uint32_t h = Hash(key.data(), key.size(), 1);
|
|
||||||
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
|
|
||||||
if (h == DecodeFixed32(filter.data() + i)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class FilterBlockTest {
|
|
||||||
public:
|
|
||||||
const Comparator* comparator_;
|
|
||||||
BlockBasedTableOptions table_options_;
|
|
||||||
|
|
||||||
FilterBlockTest()
|
|
||||||
: comparator_(BytewiseComparator()) {
|
|
||||||
table_options_.filter_policy.reset(new TestHashFilter());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST(FilterBlockTest, EmptyBuilder) {
|
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
|
|
||||||
Slice block = builder.Finish();
|
|
||||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
|
||||||
FilterBlockReader reader(nullptr, table_options_, block);
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(FilterBlockTest, SingleChunk) {
|
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
|
|
||||||
builder.StartBlock(100);
|
|
||||||
builder.AddKey("foo");
|
|
||||||
builder.AddKey("bar");
|
|
||||||
builder.AddKey("box");
|
|
||||||
builder.StartBlock(200);
|
|
||||||
builder.AddKey("box");
|
|
||||||
builder.StartBlock(300);
|
|
||||||
builder.AddKey("hello");
|
|
||||||
Slice block = builder.Finish();
|
|
||||||
FilterBlockReader reader(nullptr, table_options_, block);
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "hello"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "other"));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(FilterBlockTest, MultiChunk) {
|
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
|
|
||||||
|
|
||||||
// First filter
|
|
||||||
builder.StartBlock(0);
|
|
||||||
builder.AddKey("foo");
|
|
||||||
builder.StartBlock(2000);
|
|
||||||
builder.AddKey("bar");
|
|
||||||
|
|
||||||
// Second filter
|
|
||||||
builder.StartBlock(3100);
|
|
||||||
builder.AddKey("box");
|
|
||||||
|
|
||||||
// Third filter is empty
|
|
||||||
|
|
||||||
// Last filter
|
|
||||||
builder.StartBlock(9000);
|
|
||||||
builder.AddKey("box");
|
|
||||||
builder.AddKey("hello");
|
|
||||||
|
|
||||||
Slice block = builder.Finish();
|
|
||||||
FilterBlockReader reader(nullptr, table_options_, block);
|
|
||||||
|
|
||||||
// Check first filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "box"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello"));
|
|
||||||
|
|
||||||
// Check second filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(3100, "box"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello"));
|
|
||||||
|
|
||||||
// Check third filter (empty)
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello"));
|
|
||||||
|
|
||||||
// Check last filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "box"));
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo"));
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar"));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
return rocksdb::test::RunAllTests();
|
|
||||||
}
|
|
99
table/full_filter_block.cc
Normal file
99
table/full_filter_block.cc
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
#include "port/port.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
FullFilterBlockBuilder::FullFilterBlockBuilder(
|
||||||
|
const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt,
|
||||||
|
FilterBitsBuilder* filter_bits_builder)
|
||||||
|
: prefix_extractor_(prefix_extractor),
|
||||||
|
whole_key_filtering_(table_opt.whole_key_filtering),
|
||||||
|
num_added_(0) {
|
||||||
|
assert(filter_bits_builder != nullptr);
|
||||||
|
filter_bits_builder_.reset(filter_bits_builder);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FullFilterBlockBuilder::Add(const Slice& key) {
|
||||||
|
if (whole_key_filtering_) {
|
||||||
|
AddKey(key);
|
||||||
|
}
|
||||||
|
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
|
||||||
|
AddPrefix(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add key to filter if needed
|
||||||
|
inline void FullFilterBlockBuilder::AddKey(const Slice& key) {
|
||||||
|
filter_bits_builder_->AddKey(key);
|
||||||
|
num_added_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add prefix to filter if needed
|
||||||
|
inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
|
||||||
|
Slice prefix = prefix_extractor_->Transform(key);
|
||||||
|
filter_bits_builder_->AddKey(prefix);
|
||||||
|
num_added_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice FullFilterBlockBuilder::Finish() {
|
||||||
|
if (num_added_ != 0) {
|
||||||
|
num_added_ = 0;
|
||||||
|
return filter_bits_builder_->Finish(&filter_data);
|
||||||
|
}
|
||||||
|
return Slice();
|
||||||
|
}
|
||||||
|
|
||||||
|
FullFilterBlockReader::FullFilterBlockReader(
|
||||||
|
const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt,
|
||||||
|
const Slice& contents,
|
||||||
|
FilterBitsReader* filter_bits_reader, bool delete_contents_after_use)
|
||||||
|
: prefix_extractor_(prefix_extractor),
|
||||||
|
whole_key_filtering_(table_opt.whole_key_filtering),
|
||||||
|
contents_(contents) {
|
||||||
|
assert(filter_bits_reader != nullptr);
|
||||||
|
filter_bits_reader_.reset(filter_bits_reader);
|
||||||
|
|
||||||
|
if (delete_contents_after_use) {
|
||||||
|
filter_data.reset(contents.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FullFilterBlockReader::KeyMayMatch(const Slice& key,
|
||||||
|
uint64_t block_offset) {
|
||||||
|
assert(block_offset == kNotValid);
|
||||||
|
if (!whole_key_filtering_) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return MayMatch(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix,
|
||||||
|
uint64_t block_offset) {
|
||||||
|
assert(block_offset == kNotValid);
|
||||||
|
if (!prefix_extractor_) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return MayMatch(prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FullFilterBlockReader::MayMatch(const Slice& entry) {
|
||||||
|
if (contents_.size() != 0) {
|
||||||
|
return filter_bits_reader_->MayMatch(entry);
|
||||||
|
}
|
||||||
|
return true; // remain the same with block_based filter
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
|
||||||
|
return contents_.size();
|
||||||
|
}
|
||||||
|
} // namespace rocksdb
|
107
table/full_filter_block.h
Normal file
107
table/full_filter_block.h
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "rocksdb/options.h"
|
||||||
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/slice_transform.h"
|
||||||
|
#include "db/dbformat.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
#include "table/filter_block.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class FilterPolicy;
|
||||||
|
class FilterBitsBuilder;
|
||||||
|
class FilterBitsReader;
|
||||||
|
|
||||||
|
// A FullFilterBlockBuilder is used to construct a full filter for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
// The format of full filter block is:
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// | full filter for all keys in sst file |
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// The full filter can be very large. At the end of it, we put
|
||||||
|
// num_probes: how many hash functions are used in bloom filter
|
||||||
|
//
|
||||||
|
class FullFilterBlockBuilder : public FilterBlockBuilder {
|
||||||
|
public:
|
||||||
|
explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt,
|
||||||
|
FilterBitsBuilder* filter_bits_builder);
|
||||||
|
// bits_builder is created in filter_policy, it should be passed in here
|
||||||
|
// directly. and be deleted here
|
||||||
|
~FullFilterBlockBuilder() {}
|
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return false; }
|
||||||
|
virtual void StartBlock(uint64_t block_offset) override {}
|
||||||
|
virtual void Add(const Slice& key) override;
|
||||||
|
virtual Slice Finish() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// important: all of these might point to invalid addresses
|
||||||
|
// at the time of destruction of this filter block. destructor
|
||||||
|
// should NOT dereference them.
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
bool whole_key_filtering_;
|
||||||
|
|
||||||
|
uint32_t num_added_;
|
||||||
|
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
|
|
||||||
|
void AddKey(const Slice& key);
|
||||||
|
void AddPrefix(const Slice& key);
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FullFilterBlockBuilder(const FullFilterBlockBuilder&);
|
||||||
|
void operator=(const FullFilterBlockBuilder&);
|
||||||
|
};
|
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
class FullFilterBlockReader : public FilterBlockReader {
|
||||||
|
public:
|
||||||
|
// REQUIRES: "contents" and filter_bits_reader must stay live
|
||||||
|
// while *this is live.
|
||||||
|
explicit FullFilterBlockReader(const SliceTransform* prefix_extractor,
|
||||||
|
const BlockBasedTableOptions& table_opt,
|
||||||
|
const Slice& contents,
|
||||||
|
FilterBitsReader* filter_bits_reader,
|
||||||
|
bool delete_contents_after_use = false);
|
||||||
|
|
||||||
|
// bits_reader is created in filter_policy, it should be passed in here
|
||||||
|
// directly. and be deleted here
|
||||||
|
~FullFilterBlockReader() {}
|
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return false; }
|
||||||
|
virtual bool KeyMayMatch(const Slice& key,
|
||||||
|
uint64_t block_offset = kNotValid) override;
|
||||||
|
virtual bool PrefixMayMatch(const Slice& prefix,
|
||||||
|
uint64_t block_offset = kNotValid) override;
|
||||||
|
virtual size_t ApproximateMemoryUsage() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
bool whole_key_filtering_;
|
||||||
|
|
||||||
|
std::unique_ptr<FilterBitsReader> filter_bits_reader_;
|
||||||
|
Slice contents_;
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
|
|
||||||
|
bool MayMatch(const Slice& entry);
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FullFilterBlockReader(const FullFilterBlockReader&);
|
||||||
|
void operator=(const FullFilterBlockReader&);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
181
table/full_filter_block_test.cc
Normal file
181
table/full_filter_block_test.cc
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class TestFilterBitsBuilder : public FilterBitsBuilder {
|
||||||
|
public:
|
||||||
|
explicit TestFilterBitsBuilder() {}
|
||||||
|
|
||||||
|
// Add Key to filter
|
||||||
|
virtual void AddKey(const Slice& key) override {
|
||||||
|
hash_entries_.push_back(Hash(key.data(), key.size(), 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the filter using the keys that are added
|
||||||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
||||||
|
uint32_t len = hash_entries_.size() * 4;
|
||||||
|
char* data = new char[len];
|
||||||
|
for (size_t i = 0; i < hash_entries_.size(); i++) {
|
||||||
|
EncodeFixed32(data + i * 4, hash_entries_[i]);
|
||||||
|
}
|
||||||
|
buf->reset(data);
|
||||||
|
return Slice(data, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<uint32_t> hash_entries_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TestFilterBitsReader : public FilterBitsReader {
|
||||||
|
public:
|
||||||
|
explicit TestFilterBitsReader(const Slice& contents)
|
||||||
|
: data_(contents.data()), len_(contents.size()) {}
|
||||||
|
|
||||||
|
virtual bool MayMatch(const Slice& entry) override {
|
||||||
|
uint32_t h = Hash(entry.data(), entry.size(), 1);
|
||||||
|
for (size_t i = 0; i + 4 <= len_; i += 4) {
|
||||||
|
if (h == DecodeFixed32(data_ + i)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* data_;
|
||||||
|
uint32_t len_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class TestHashFilter : public FilterPolicy {
|
||||||
|
public:
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return "TestHashFilter";
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
||||||
|
PutFixed32(dst, h);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1);
|
||||||
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
|
||||||
|
if (h == DecodeFixed32(filter.data() + i)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual FilterBitsBuilder* GetFilterBitsBuilder() const override {
|
||||||
|
return new TestFilterBitsBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents)
|
||||||
|
const override {
|
||||||
|
return new TestFilterBitsReader(contents);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class PluginFullFilterBlockTest {
|
||||||
|
public:
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
|
||||||
|
PluginFullFilterBlockTest() {
|
||||||
|
table_options_.filter_policy.reset(new TestHashFilter());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(PluginFullFilterBlockTest, PluginEmptyBuilder) {
|
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_,
|
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
ASSERT_EQ("", EscapeString(block));
|
||||||
|
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block,
|
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block));
|
||||||
|
// Remain same symantic with blockbased filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PluginFullFilterBlockTest, PluginSingleChunk) {
|
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_,
|
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
|
builder.Add("foo");
|
||||||
|
builder.Add("bar");
|
||||||
|
builder.Add("box");
|
||||||
|
builder.Add("box");
|
||||||
|
builder.Add("hello");
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block,
|
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing"));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other"));
|
||||||
|
}
|
||||||
|
|
||||||
|
class FullFilterBlockTest {
|
||||||
|
public:
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
|
||||||
|
FullFilterBlockTest() {
|
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
~FullFilterBlockTest() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(FullFilterBlockTest, EmptyBuilder) {
|
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_,
|
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
ASSERT_EQ("", EscapeString(block));
|
||||||
|
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block,
|
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block));
|
||||||
|
// Remain same symantic with blockbased filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FullFilterBlockTest, SingleChunk) {
|
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_,
|
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
|
builder.Add("foo");
|
||||||
|
builder.Add("bar");
|
||||||
|
builder.Add("box");
|
||||||
|
builder.Add("box");
|
||||||
|
builder.Add("hello");
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block,
|
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo"));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing"));
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other"));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|
@ -20,7 +20,6 @@
|
|||||||
#include "table/block_builder.h"
|
#include "table/block_builder.h"
|
||||||
#include "table/bloom_block.h"
|
#include "table/bloom_block.h"
|
||||||
#include "table/plain_table_index.h"
|
#include "table/plain_table_index.h"
|
||||||
#include "table/filter_block.h"
|
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
|
@ -20,7 +20,6 @@
|
|||||||
|
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
#include "table/bloom_block.h"
|
#include "table/bloom_block.h"
|
||||||
#include "table/filter_block.h"
|
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "include/rocksdb/comparator.h"
|
#include "include/rocksdb/comparator.h"
|
||||||
#include "include/rocksdb/table.h"
|
#include "include/rocksdb/table.h"
|
||||||
#include "include/rocksdb/slice_transform.h"
|
#include "include/rocksdb/slice_transform.h"
|
||||||
|
#include "include/rocksdb/filter_policy.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -146,13 +147,30 @@ class SanityTestPlainTableFactory : public SanityTest {
|
|||||||
Options options_;
|
Options options_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class SanityTestBloomFilter : public SanityTest {
|
||||||
|
public:
|
||||||
|
explicit SanityTestBloomFilter(const std::string& path)
|
||||||
|
: SanityTest(path) {
|
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10));
|
||||||
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options_));
|
||||||
|
}
|
||||||
|
~SanityTestBloomFilter() {}
|
||||||
|
virtual Options GetOptions() const { return options_; }
|
||||||
|
virtual std::string Name() const { return "BloomFilter"; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
Options options_;
|
||||||
|
BlockBasedTableOptions table_options_;
|
||||||
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
bool RunSanityTests(const std::string& command, const std::string& path) {
|
bool RunSanityTests(const std::string& command, const std::string& path) {
|
||||||
std::vector<SanityTest*> sanity_tests = {
|
std::vector<SanityTest*> sanity_tests = {
|
||||||
new SanityTestBasic(path),
|
new SanityTestBasic(path),
|
||||||
new SanityTestSpecialComparator(path),
|
new SanityTestSpecialComparator(path),
|
||||||
new SanityTestZlibCompression(path),
|
new SanityTestZlibCompression(path),
|
||||||
new SanityTestPlainTableFactory(path)};
|
new SanityTestPlainTableFactory(path),
|
||||||
|
new SanityTestBloomFilter(path)};
|
||||||
|
|
||||||
if (command == "create") {
|
if (command == "create") {
|
||||||
fprintf(stderr, "Creating...\n");
|
fprintf(stderr, "Creating...\n");
|
||||||
|
@ -209,6 +209,9 @@ static const bool FLAGS_reopen_dummy __attribute__((unused)) =
|
|||||||
DEFINE_int32(bloom_bits, 10, "Bloom filter bits per key. "
|
DEFINE_int32(bloom_bits, 10, "Bloom filter bits per key. "
|
||||||
"Negative means use default settings.");
|
"Negative means use default settings.");
|
||||||
|
|
||||||
|
DEFINE_bool(use_block_based_filter, false, "use block based filter"
|
||||||
|
"instead of full filter for block based table");
|
||||||
|
|
||||||
DEFINE_string(db, "", "Use the db with the following name.");
|
DEFINE_string(db, "", "Use the db with the following name.");
|
||||||
|
|
||||||
DEFINE_bool(verify_checksum, false,
|
DEFINE_bool(verify_checksum, false,
|
||||||
@ -757,8 +760,10 @@ class StressTest {
|
|||||||
? NewLRUCache(FLAGS_compressed_cache_size)
|
? NewLRUCache(FLAGS_compressed_cache_size)
|
||||||
: nullptr),
|
: nullptr),
|
||||||
filter_policy_(FLAGS_bloom_bits >= 0
|
filter_policy_(FLAGS_bloom_bits >= 0
|
||||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
? FLAGS_use_block_based_filter
|
||||||
: nullptr),
|
? NewBloomFilterPolicy(FLAGS_bloom_bits, true)
|
||||||
|
: NewBloomFilterPolicy(FLAGS_bloom_bits, false)
|
||||||
|
: nullptr),
|
||||||
db_(nullptr),
|
db_(nullptr),
|
||||||
new_column_family_name_(1),
|
new_column_family_name_(1),
|
||||||
num_times_reopened_(0) {
|
num_times_reopened_(0) {
|
||||||
|
309
util/bloom.cc
309
util/bloom.cc
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
@ -10,42 +10,266 @@
|
|||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
|
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
|
#include "table/block_based_filter_block.h"
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
namespace {
|
class BlockBasedFilterBlockBuilder;
|
||||||
|
class FullFilterBlockBuilder;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||||
|
public:
|
||||||
|
explicit FullFilterBitsBuilder(const size_t bits_per_key,
|
||||||
|
const size_t num_probes)
|
||||||
|
: bits_per_key_(bits_per_key),
|
||||||
|
num_probes_(num_probes) {
|
||||||
|
assert(bits_per_key_);
|
||||||
|
}
|
||||||
|
|
||||||
|
~FullFilterBitsBuilder() {}
|
||||||
|
|
||||||
|
virtual void AddKey(const Slice& key) override {
|
||||||
|
uint32_t hash = BloomHash(key);
|
||||||
|
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
|
||||||
|
hash_entries_.push_back(hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
||||||
|
// When creating filter, it is ensured that
|
||||||
|
// total_bits = num_lines * CACHE_LINE_SIZE * 8
|
||||||
|
// dst len is >= 5, 1 for num_probes, 4 for num_lines
|
||||||
|
// Then total_bits = (len - 5) * 8, and cache_line_size could be calulated
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// | filter data with length total_bits/8 |
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// | |
|
||||||
|
// | ... |
|
||||||
|
// | |
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
||||||
|
char* data = nullptr;
|
||||||
|
uint32_t total_bits, num_lines;
|
||||||
|
data = ReserveSpace(hash_entries_.size(), &total_bits, &num_lines);
|
||||||
|
assert(data);
|
||||||
|
|
||||||
|
if (total_bits != 0 && num_lines != 0) {
|
||||||
|
for (auto h : hash_entries_) {
|
||||||
|
AddHash(h, data, num_lines, total_bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data[total_bits/8] = static_cast<char>(num_probes_);
|
||||||
|
EncodeFixed32(data + total_bits/8 + 1, static_cast<uint32_t>(num_lines));
|
||||||
|
|
||||||
|
buf->reset(data);
|
||||||
|
hash_entries_.clear();
|
||||||
|
|
||||||
|
return Slice(data, total_bits / 8 + 5);
|
||||||
|
}
|
||||||
|
|
||||||
class BloomFilterPolicy : public FilterPolicy {
|
|
||||||
private:
|
private:
|
||||||
size_t bits_per_key_;
|
size_t bits_per_key_;
|
||||||
size_t k_;
|
size_t num_probes_;
|
||||||
uint32_t (*hash_func_)(const Slice& key);
|
std::vector<uint32_t> hash_entries_;
|
||||||
|
|
||||||
void initialize() {
|
// Get totalbits that optimized for cpu cache line
|
||||||
// We intentionally round down to reduce probing cost a little bit
|
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
|
||||||
k_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
|
|
||||||
if (k_ < 1) k_ = 1;
|
// Reserve space for new filter
|
||||||
if (k_ > 30) k_ = 30;
|
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
|
||||||
|
uint32_t* num_lines);
|
||||||
|
|
||||||
|
// Assuming single threaded access to this function.
|
||||||
|
void AddHash(uint32_t h, char* data, uint32_t num_lines,
|
||||||
|
uint32_t total_bits);
|
||||||
|
|
||||||
|
// No Copy allowed
|
||||||
|
FullFilterBitsBuilder(const FullFilterBitsBuilder&);
|
||||||
|
void operator=(const FullFilterBitsBuilder&);
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
|
||||||
|
uint32_t num_lines =
|
||||||
|
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
|
||||||
|
|
||||||
|
// Make num_lines an odd number to make sure more bits are involved
|
||||||
|
// when determining which block.
|
||||||
|
if (num_lines % 2 == 0) {
|
||||||
|
num_lines++;
|
||||||
|
}
|
||||||
|
return num_lines * (CACHE_LINE_SIZE * 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
||||||
|
uint32_t* total_bits, uint32_t* num_lines) {
|
||||||
|
assert(bits_per_key_);
|
||||||
|
char* data = nullptr;
|
||||||
|
if (num_entry != 0) {
|
||||||
|
uint32_t total_bits_tmp = num_entry * bits_per_key_;
|
||||||
|
|
||||||
|
*total_bits = GetTotalBitsForLocality(total_bits_tmp);
|
||||||
|
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
|
||||||
|
assert(*total_bits > 0 && *total_bits % 8 == 0);
|
||||||
|
} else {
|
||||||
|
// filter is empty, just leave space for metadata
|
||||||
|
*total_bits = 0;
|
||||||
|
*num_lines = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reserve space for Filter
|
||||||
|
uint32_t sz = *total_bits / 8;
|
||||||
|
sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
|
||||||
|
|
||||||
|
data = new char[sz];
|
||||||
|
memset(data, 0, sz);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
|
||||||
|
uint32_t num_lines, uint32_t total_bits) {
|
||||||
|
assert(num_lines > 0 && total_bits > 0);
|
||||||
|
|
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_probes_; ++i) {
|
||||||
|
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
|
||||||
|
// to a simple operation by compiler.
|
||||||
|
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
|
||||||
|
data[bitpos / 8] |= (1 << (bitpos % 8));
|
||||||
|
|
||||||
|
h += delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FullFilterBitsReader : public FilterBitsReader {
|
||||||
public:
|
public:
|
||||||
explicit BloomFilterPolicy(int bits_per_key,
|
explicit FullFilterBitsReader(const Slice& contents)
|
||||||
uint32_t (*hash_func)(const Slice& key))
|
: data_(const_cast<char*>(contents.data())),
|
||||||
: bits_per_key_(bits_per_key), hash_func_(hash_func) {
|
data_len_(contents.size()),
|
||||||
initialize();
|
num_probes_(0), num_lines_(0) {
|
||||||
|
assert(data_);
|
||||||
|
GetFilterMeta(contents, &num_probes_, &num_lines_);
|
||||||
|
// Sanitize broken parameter
|
||||||
|
if (num_lines_ != 0 && (data_len_-5) % num_lines_ != 0) {
|
||||||
|
num_lines_ = 0;
|
||||||
|
num_probes_ = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
explicit BloomFilterPolicy(int bits_per_key)
|
|
||||||
: bits_per_key_(bits_per_key) {
|
~FullFilterBitsReader() {}
|
||||||
hash_func_ = BloomHash;
|
|
||||||
|
virtual bool MayMatch(const Slice& entry) override {
|
||||||
|
if (data_len_ <= 5) { // remain same with original filter
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Other Error params, including a broken filter, regarded as match
|
||||||
|
if (num_probes_ == 0 || num_lines_ == 0) return true;
|
||||||
|
uint32_t hash = BloomHash(entry);
|
||||||
|
return HashMayMatch(hash, Slice(data_, data_len_),
|
||||||
|
num_probes_, num_lines_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Filter meta data
|
||||||
|
char* data_;
|
||||||
|
uint32_t data_len_;
|
||||||
|
size_t num_probes_;
|
||||||
|
uint32_t num_lines_;
|
||||||
|
|
||||||
|
// Get num_probes, and num_lines from filter
|
||||||
|
// If filter format broken, set both to 0.
|
||||||
|
void GetFilterMeta(const Slice& filter, size_t* num_probes,
|
||||||
|
uint32_t* num_lines);
|
||||||
|
|
||||||
|
// "filter" contains the data appended by a preceding call to
|
||||||
|
// CreateFilterFromHash() on this class. This method must return true if
|
||||||
|
// the key was in the list of keys passed to CreateFilter().
|
||||||
|
// This method may return true or false if the key was not on the
|
||||||
|
// list, but it should aim to return false with a high probability.
|
||||||
|
//
|
||||||
|
// hash: target to be checked
|
||||||
|
// filter: the whole filter, including meta data bytes
|
||||||
|
// num_probes: number of probes, read before hand
|
||||||
|
// num_lines: filter metadata, read before hand
|
||||||
|
// Before calling this function, need to ensure the input meta data
|
||||||
|
// is valid.
|
||||||
|
bool HashMayMatch(const uint32_t& hash, const Slice& filter,
|
||||||
|
const size_t& num_probes, const uint32_t& num_lines);
|
||||||
|
|
||||||
|
// No Copy allowed
|
||||||
|
FullFilterBitsReader(const FullFilterBitsReader&);
|
||||||
|
void operator=(const FullFilterBitsReader&);
|
||||||
|
};
|
||||||
|
|
||||||
|
void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
|
||||||
|
size_t* num_probes, uint32_t* num_lines) {
|
||||||
|
uint32_t len = filter.size();
|
||||||
|
if (len <= 5) {
|
||||||
|
// filter is empty or broken
|
||||||
|
*num_probes = 0;
|
||||||
|
*num_lines = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
*num_probes = filter.data()[len - 5];
|
||||||
|
*num_lines = DecodeFixed32(filter.data() + len - 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
|
||||||
|
const Slice& filter, const size_t& num_probes,
|
||||||
|
const uint32_t& num_lines) {
|
||||||
|
uint32_t len = filter.size();
|
||||||
|
if (len <= 5) return false; // remain the same with original filter
|
||||||
|
|
||||||
|
// It is ensured the params are valid before calling it
|
||||||
|
assert(num_probes != 0);
|
||||||
|
assert(num_lines != 0 && (len - 5) % num_lines == 0);
|
||||||
|
uint32_t cache_line_size = (len - 5) / num_lines;
|
||||||
|
const char* data = filter.data();
|
||||||
|
|
||||||
|
uint32_t h = hash;
|
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
uint32_t b = (h % num_lines) * (cache_line_size * 8);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < num_probes; ++i) {
|
||||||
|
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
|
||||||
|
// to a simple and operation by compiler.
|
||||||
|
const uint32_t bitpos = b + (h % (cache_line_size * 8));
|
||||||
|
if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
h += delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// An implementation of filter policy
|
||||||
|
class BloomFilterPolicy : public FilterPolicy {
|
||||||
|
public:
|
||||||
|
explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder)
|
||||||
|
: bits_per_key_(bits_per_key), hash_func_(BloomHash),
|
||||||
|
use_block_based_builder_(use_block_based_builder) {
|
||||||
initialize();
|
initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const char* Name() const {
|
~BloomFilterPolicy() {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* Name() const override {
|
||||||
return "rocksdb.BuiltinBloomFilter";
|
return "rocksdb.BuiltinBloomFilter";
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
virtual void CreateFilter(const Slice* keys, int n,
|
||||||
|
std::string* dst) const override {
|
||||||
// Compute bloom filter size (in both bits and bytes)
|
// Compute bloom filter size (in both bits and bytes)
|
||||||
size_t bits = n * bits_per_key_;
|
size_t bits = n * bits_per_key_;
|
||||||
|
|
||||||
@ -58,14 +282,14 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
|
|
||||||
const size_t init_size = dst->size();
|
const size_t init_size = dst->size();
|
||||||
dst->resize(init_size + bytes, 0);
|
dst->resize(init_size + bytes, 0);
|
||||||
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes
|
||||||
char* array = &(*dst)[init_size];
|
char* array = &(*dst)[init_size];
|
||||||
for (size_t i = 0; i < (size_t)n; i++) {
|
for (size_t i = 0; i < (size_t)n; i++) {
|
||||||
// Use double-hashing to generate a sequence of hash values.
|
// Use double-hashing to generate a sequence of hash values.
|
||||||
// See analysis in [Kirsch,Mitzenmacher 2006].
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
||||||
uint32_t h = hash_func_(keys[i]);
|
uint32_t h = hash_func_(keys[i]);
|
||||||
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
for (size_t j = 0; j < k_; j++) {
|
for (size_t j = 0; j < num_probes_; j++) {
|
||||||
const uint32_t bitpos = h % bits;
|
const uint32_t bitpos = h % bits;
|
||||||
array[bitpos/8] |= (1 << (bitpos % 8));
|
array[bitpos/8] |= (1 << (bitpos % 8));
|
||||||
h += delta;
|
h += delta;
|
||||||
@ -73,7 +297,8 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
virtual bool KeyMayMatch(const Slice& key,
|
||||||
|
const Slice& bloom_filter) const override {
|
||||||
const size_t len = bloom_filter.size();
|
const size_t len = bloom_filter.size();
|
||||||
if (len < 2) return false;
|
if (len < 2) return false;
|
||||||
|
|
||||||
@ -98,11 +323,43 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
virtual FilterBitsBuilder* GetFilterBitsBuilder() const override {
|
||||||
return new BloomFilterPolicy(bits_per_key);
|
if (use_block_based_builder_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents)
|
||||||
|
const override {
|
||||||
|
return new FullFilterBitsReader(contents);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If choose to use block based builder
|
||||||
|
bool UseBlockBasedBuilder() { return use_block_based_builder_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t bits_per_key_;
|
||||||
|
size_t num_probes_;
|
||||||
|
uint32_t (*hash_func_)(const Slice& key);
|
||||||
|
|
||||||
|
const bool use_block_based_builder_;
|
||||||
|
|
||||||
|
void initialize() {
|
||||||
|
// We intentionally round down to reduce probing cost a little bit
|
||||||
|
num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
|
||||||
|
if (num_probes_ < 1) num_probes_ = 1;
|
||||||
|
if (num_probes_ > 30) num_probes_ = 30;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
|
||||||
|
bool use_block_based_builder) {
|
||||||
|
return new BloomFilterPolicy(bits_per_key, use_block_based_builder);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -16,12 +16,13 @@ int main() {
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
|
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
|
#include "util/arena.h"
|
||||||
|
|
||||||
using GFLAGS::ParseCommandLineFlags;
|
using GFLAGS::ParseCommandLineFlags;
|
||||||
|
|
||||||
@ -36,6 +37,19 @@ static Slice Key(int i, char* buffer) {
|
|||||||
return Slice(buffer, sizeof(i));
|
return Slice(buffer, sizeof(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int NextLength(int length) {
|
||||||
|
if (length < 10) {
|
||||||
|
length += 1;
|
||||||
|
} else if (length < 100) {
|
||||||
|
length += 10;
|
||||||
|
} else if (length < 1000) {
|
||||||
|
length += 100;
|
||||||
|
} else {
|
||||||
|
length += 1000;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
class BloomTest {
|
class BloomTest {
|
||||||
private:
|
private:
|
||||||
const FilterPolicy* policy_;
|
const FilterPolicy* policy_;
|
||||||
@ -43,7 +57,8 @@ class BloomTest {
|
|||||||
std::vector<std::string> keys_;
|
std::vector<std::string> keys_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BloomTest() : policy_(NewBloomFilterPolicy(FLAGS_bits_per_key)) { }
|
BloomTest() : policy_(
|
||||||
|
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
||||||
|
|
||||||
~BloomTest() {
|
~BloomTest() {
|
||||||
delete policy_;
|
delete policy_;
|
||||||
@ -117,19 +132,6 @@ TEST(BloomTest, Small) {
|
|||||||
ASSERT_TRUE(! Matches("foo"));
|
ASSERT_TRUE(! Matches("foo"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int NextLength(int length) {
|
|
||||||
if (length < 10) {
|
|
||||||
length += 1;
|
|
||||||
} else if (length < 100) {
|
|
||||||
length += 10;
|
|
||||||
} else if (length < 1000) {
|
|
||||||
length += 100;
|
|
||||||
} else {
|
|
||||||
length += 1000;
|
|
||||||
}
|
|
||||||
return length;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(BloomTest, VaryingLengths) {
|
TEST(BloomTest, VaryingLengths) {
|
||||||
char buffer[sizeof(int)];
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
@ -171,6 +173,121 @@ TEST(BloomTest, VaryingLengths) {
|
|||||||
|
|
||||||
// Different bits-per-byte
|
// Different bits-per-byte
|
||||||
|
|
||||||
|
class FullBloomTest {
|
||||||
|
private:
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
||||||
|
std::unique_ptr<FilterBitsReader> bits_reader_;
|
||||||
|
std::unique_ptr<const char[]> buf_;
|
||||||
|
size_t filter_size_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FullBloomTest() :
|
||||||
|
policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, false)),
|
||||||
|
filter_size_(0) {
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
~FullBloomTest() {
|
||||||
|
delete policy_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset() {
|
||||||
|
bits_builder_.reset(policy_->GetFilterBitsBuilder());
|
||||||
|
bits_reader_.reset(nullptr);
|
||||||
|
buf_.reset(nullptr);
|
||||||
|
filter_size_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Add(const Slice& s) {
|
||||||
|
bits_builder_->AddKey(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Build() {
|
||||||
|
Slice filter = bits_builder_->Finish(&buf_);
|
||||||
|
bits_reader_.reset(policy_->GetFilterBitsReader(filter));
|
||||||
|
filter_size_ = filter.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t FilterSize() const {
|
||||||
|
return filter_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Matches(const Slice& s) {
|
||||||
|
if (bits_reader_ == nullptr) {
|
||||||
|
Build();
|
||||||
|
}
|
||||||
|
return bits_reader_->MayMatch(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
double FalsePositiveRate() {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
int result = 0;
|
||||||
|
for (int i = 0; i < 10000; i++) {
|
||||||
|
if (Matches(Key(i + 1000000000, buffer))) {
|
||||||
|
result++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result / 10000.0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(FullBloomTest, FullEmptyFilter) {
|
||||||
|
// Empty filter is not match, at this level
|
||||||
|
ASSERT_TRUE(!Matches("hello"));
|
||||||
|
ASSERT_TRUE(!Matches("world"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FullBloomTest, FullSmall) {
|
||||||
|
Add("hello");
|
||||||
|
Add("world");
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
ASSERT_TRUE(!Matches("x"));
|
||||||
|
ASSERT_TRUE(!Matches("foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FullBloomTest, FullVaryingLengths) {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
|
// Count number of filters that significantly exceed the false positive rate
|
||||||
|
int mediocre_filters = 0;
|
||||||
|
int good_filters = 0;
|
||||||
|
|
||||||
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
||||||
|
Reset();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
Add(Key(i, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
|
||||||
|
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 128 + 5)) << length;
|
||||||
|
|
||||||
|
// All added keys must match
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
||||||
|
<< "Length " << length << "; key " << i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check false positive rate
|
||||||
|
double rate = FalsePositiveRate();
|
||||||
|
if (kVerbose >= 1) {
|
||||||
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
||||||
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
||||||
|
}
|
||||||
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||||
|
if (rate > 0.0125)
|
||||||
|
mediocre_filters++; // Allowed, but not too often
|
||||||
|
else
|
||||||
|
good_filters++;
|
||||||
|
}
|
||||||
|
if (kVerbose >= 1) {
|
||||||
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
||||||
|
good_filters, mediocre_filters);
|
||||||
|
}
|
||||||
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
Loading…
Reference in New Issue
Block a user