Add BloomFilter to PlainTableIterator::Seek()
Summary: This patch adds a simple bloom filter in PlainTableIterator::Seek() Test Plan: N/A Reviewers: CC: Task ID: # Blame Rev:
This commit is contained in:
parent
b135d01e7b
commit
718488abc5
@ -59,7 +59,7 @@ public:
|
||||
// Return the current option configuration.
|
||||
Options CurrentOptions() {
|
||||
Options options;
|
||||
options.table_factory.reset(new PlainTableFactory(16, 8));
|
||||
options.table_factory.reset(new PlainTableFactory(16, 8, 2, 0.8));
|
||||
options.allow_mmap_reads = true;
|
||||
return options;
|
||||
}
|
||||
|
@ -47,8 +47,15 @@ class PlainTableFactory: public TableFactory {
|
||||
public:
|
||||
~PlainTableFactory() {
|
||||
}
|
||||
PlainTableFactory(int user_key_size, int key_prefix_len) :
|
||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
|
||||
// user_key_size is the length of the user key. key_prefix_len is the
|
||||
// length of the prefix used for im-memory indexes. bloom_num_bits is
|
||||
// number of bits is used for bloom filer per key. hash_table_ratio is
|
||||
// the desired ultilization of the hash table used for prefix hashing.
|
||||
// hash_table_ratio = number of prefixes / #buckets in the hash table
|
||||
PlainTableFactory(int user_key_size, int key_prefix_len,
|
||||
int bloom_num_bits = 0, double hash_table_ratio = 0.75) :
|
||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
|
||||
bloom_num_bits_(bloom_num_bits), hash_table_ratio_(hash_table_ratio) {
|
||||
}
|
||||
const char* Name() const override {
|
||||
return "PlainTable";
|
||||
@ -64,6 +71,8 @@ public:
|
||||
private:
|
||||
int user_key_size_;
|
||||
int key_prefix_len_;
|
||||
int bloom_num_bits_;
|
||||
double hash_table_ratio_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -19,13 +19,13 @@ Status PlainTableFactory::GetTableReader(const Options& options,
|
||||
unique_ptr<TableReader>* table)
|
||||
const {
|
||||
return PlainTableReader::Open(options, soptions, std::move(file), file_size,
|
||||
table, user_key_size_, key_prefix_len_);
|
||||
table, user_key_size_, key_prefix_len_,
|
||||
bloom_num_bits_, hash_table_ratio_);
|
||||
}
|
||||
|
||||
TableBuilder* PlainTableFactory::GetTableBuilder(
|
||||
const Options& options, WritableFile* file,
|
||||
CompressionType compression_type) const {
|
||||
return new PlainTableBuilder(options, file, user_key_size_,
|
||||
key_prefix_len_);
|
||||
return new PlainTableBuilder(options, file, user_key_size_, key_prefix_len_);
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
@ -40,9 +40,16 @@ namespace rocksdb {
|
||||
|
||||
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
|
||||
uint64_t file_size, int user_key_size,
|
||||
int key_prefix_len) :
|
||||
soptions_(storage_options), file_size_(file_size),
|
||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
|
||||
int key_prefix_len, int bloom_bits_per_key,
|
||||
double hash_table_ratio) :
|
||||
hash_table_size_(0), soptions_(storage_options), file_size_(file_size),
|
||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
|
||||
hash_table_ratio_(hash_table_ratio) {
|
||||
if (bloom_bits_per_key > 0) {
|
||||
filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key);
|
||||
} else {
|
||||
filter_policy_ = nullptr;
|
||||
}
|
||||
hash_table_ = nullptr;
|
||||
}
|
||||
|
||||
@ -50,6 +57,9 @@ PlainTableReader::~PlainTableReader() {
|
||||
if (hash_table_ != nullptr) {
|
||||
delete[] hash_table_;
|
||||
}
|
||||
if (filter_policy_ != nullptr) {
|
||||
delete filter_policy_;
|
||||
}
|
||||
}
|
||||
|
||||
Status PlainTableReader::Open(const Options& options,
|
||||
@ -58,12 +68,16 @@ Status PlainTableReader::Open(const Options& options,
|
||||
uint64_t file_size,
|
||||
unique_ptr<TableReader>* table_reader,
|
||||
const int user_key_size,
|
||||
const int key_prefix_len) {
|
||||
const int key_prefix_len,
|
||||
const int bloom_num_bits,
|
||||
double hash_table_ratio) {
|
||||
assert(options.allow_mmap_reads);
|
||||
|
||||
PlainTableReader* t = new PlainTableReader(soptions, file_size,
|
||||
user_key_size,
|
||||
key_prefix_len);
|
||||
key_prefix_len,
|
||||
bloom_num_bits,
|
||||
hash_table_ratio);
|
||||
t->file_ = std::move(file);
|
||||
t->options_ = options;
|
||||
Status s = t->PopulateIndex(file_size);
|
||||
@ -146,14 +160,25 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
||||
delete[] hash_table_;
|
||||
}
|
||||
// Make the hash table 3/5 full
|
||||
hash_table_size_ = tmp_index.size() * 1.66;
|
||||
std::vector<Slice> filter_entries(0); // for creating bloom filter;
|
||||
if (filter_policy_ != nullptr) {
|
||||
filter_entries.resize(tmp_index.size());
|
||||
}
|
||||
double hash_table_size_multipier =
|
||||
(hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_;
|
||||
hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1;
|
||||
hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_];
|
||||
for (int i = 0; i < hash_table_size_; i++) {
|
||||
memcpy(GetHashTableBucketPtr(i) + key_prefix_len_, &file_size_,
|
||||
kOffsetLen);
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) {
|
||||
if (filter_policy_ != nullptr) {
|
||||
filter_entries[count++] = it->first;
|
||||
}
|
||||
|
||||
int bucket = GetHashTableBucket(it->first);
|
||||
uint64_t* hash_value;
|
||||
while (true) {
|
||||
@ -168,6 +193,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
||||
memcpy(bucket_ptr, it->first.data(), key_prefix_len_);
|
||||
memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen);
|
||||
}
|
||||
if (filter_policy_ != nullptr) {
|
||||
filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_);
|
||||
filter_slice_ = Slice(filter_str_.data(), filter_str_.size());
|
||||
}
|
||||
|
||||
Log(options_.info_log, "Number of prefixes: %d, suffix_map length %ld",
|
||||
hash_table_size_, sub_index_.length());
|
||||
@ -187,7 +216,6 @@ inline void PlainTableReader::GetHashValue(int bucket, uint64_t** ret_value) {
|
||||
|
||||
Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
||||
Status s;
|
||||
|
||||
int bucket = GetHashTableBucket(target);
|
||||
uint64_t* found_value;
|
||||
Slice hash_key;
|
||||
@ -248,6 +276,12 @@ Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
||||
return s;
|
||||
}
|
||||
|
||||
bool PlainTableReader::MayHavePrefix(const Slice& target_prefix) {
|
||||
return filter_policy_ == nullptr
|
||||
|| filter_policy_->KeyMayMatch(target_prefix, filter_slice_);
|
||||
}
|
||||
|
||||
|
||||
uint64_t PlainTableReader::Next(uint64_t offset, Slice* key, Slice* value,
|
||||
Slice* tmp_slice) {
|
||||
if (offset >= file_size_) {
|
||||
@ -321,6 +355,11 @@ void PlainTableIterator::SeekToLast() {
|
||||
}
|
||||
|
||||
void PlainTableIterator::Seek(const Slice& target) {
|
||||
if (!table_->MayHavePrefix(Slice(target.data(), table_->key_prefix_len_))) {
|
||||
offset_ = next_offset_ = table_->file_size_;
|
||||
return;
|
||||
}
|
||||
|
||||
Status s = table_->GetOffset(target, &next_offset_);
|
||||
if (!s.ok()) {
|
||||
status_ = s;
|
||||
|
@ -57,7 +57,8 @@ public:
|
||||
static Status Open(const Options& options, const EnvOptions& soptions,
|
||||
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
||||
unique_ptr<TableReader>* table, const int user_key_size,
|
||||
const int key_prefix_len);
|
||||
const int key_prefix_len, const int bloom_num_bits,
|
||||
double hash_table_ratio);
|
||||
|
||||
bool PrefixMayMatch(const Slice& internal_prefix);
|
||||
|
||||
@ -79,7 +80,8 @@ public:
|
||||
}
|
||||
|
||||
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
|
||||
int user_key_size, int key_prefix_len);
|
||||
int user_key_size, int key_prefix_len, int bloom_num_bits,
|
||||
double hash_table_ratio);
|
||||
~PlainTableReader();
|
||||
|
||||
private:
|
||||
@ -95,6 +97,10 @@ private:
|
||||
uint64_t file_size_;
|
||||
const size_t user_key_size_;
|
||||
const size_t key_prefix_len_;
|
||||
const double hash_table_ratio_;
|
||||
const FilterPolicy* filter_policy_;
|
||||
std::string filter_str_;
|
||||
Slice filter_slice_;
|
||||
|
||||
TableProperties tbl_props;
|
||||
|
||||
@ -123,6 +129,7 @@ private:
|
||||
Status PopulateIndex(uint64_t file_size);
|
||||
uint64_t Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice);
|
||||
Status GetOffset(const Slice& target, uint64_t* offset);
|
||||
bool MayHavePrefix(const Slice& target_prefix);
|
||||
|
||||
// No copying allowed
|
||||
explicit PlainTableReader(const TableReader&) = delete;
|
||||
|
@ -242,7 +242,7 @@ int main(int argc, char** argv) {
|
||||
if (FLAGS_plain_table) {
|
||||
options.allow_mmap_reads = true;
|
||||
env_options.use_mmap_reads = true;
|
||||
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len);
|
||||
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len);
|
||||
} else {
|
||||
tf = new rocksdb::BlockBasedTableFactory();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user