Add BloomFilter to PlainTableIterator::Seek()
Summary: This patch adds a simple bloom filter in PlainTableIterator::Seek() Test Plan: N/A Reviewers: CC: Task ID: # Blame Rev:
This commit is contained in:
parent
b135d01e7b
commit
718488abc5
@ -59,7 +59,7 @@ public:
|
|||||||
// Return the current option configuration.
|
// Return the current option configuration.
|
||||||
Options CurrentOptions() {
|
Options CurrentOptions() {
|
||||||
Options options;
|
Options options;
|
||||||
options.table_factory.reset(new PlainTableFactory(16, 8));
|
options.table_factory.reset(new PlainTableFactory(16, 8, 2, 0.8));
|
||||||
options.allow_mmap_reads = true;
|
options.allow_mmap_reads = true;
|
||||||
return options;
|
return options;
|
||||||
}
|
}
|
||||||
|
@ -47,8 +47,15 @@ class PlainTableFactory: public TableFactory {
|
|||||||
public:
|
public:
|
||||||
~PlainTableFactory() {
|
~PlainTableFactory() {
|
||||||
}
|
}
|
||||||
PlainTableFactory(int user_key_size, int key_prefix_len) :
|
// user_key_size is the length of the user key. key_prefix_len is the
|
||||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
|
// length of the prefix used for im-memory indexes. bloom_num_bits is
|
||||||
|
// number of bits is used for bloom filer per key. hash_table_ratio is
|
||||||
|
// the desired ultilization of the hash table used for prefix hashing.
|
||||||
|
// hash_table_ratio = number of prefixes / #buckets in the hash table
|
||||||
|
PlainTableFactory(int user_key_size, int key_prefix_len,
|
||||||
|
int bloom_num_bits = 0, double hash_table_ratio = 0.75) :
|
||||||
|
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
|
||||||
|
bloom_num_bits_(bloom_num_bits), hash_table_ratio_(hash_table_ratio) {
|
||||||
}
|
}
|
||||||
const char* Name() const override {
|
const char* Name() const override {
|
||||||
return "PlainTable";
|
return "PlainTable";
|
||||||
@ -64,6 +71,8 @@ public:
|
|||||||
private:
|
private:
|
||||||
int user_key_size_;
|
int user_key_size_;
|
||||||
int key_prefix_len_;
|
int key_prefix_len_;
|
||||||
|
int bloom_num_bits_;
|
||||||
|
double hash_table_ratio_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -19,13 +19,13 @@ Status PlainTableFactory::GetTableReader(const Options& options,
|
|||||||
unique_ptr<TableReader>* table)
|
unique_ptr<TableReader>* table)
|
||||||
const {
|
const {
|
||||||
return PlainTableReader::Open(options, soptions, std::move(file), file_size,
|
return PlainTableReader::Open(options, soptions, std::move(file), file_size,
|
||||||
table, user_key_size_, key_prefix_len_);
|
table, user_key_size_, key_prefix_len_,
|
||||||
|
bloom_num_bits_, hash_table_ratio_);
|
||||||
}
|
}
|
||||||
|
|
||||||
TableBuilder* PlainTableFactory::GetTableBuilder(
|
TableBuilder* PlainTableFactory::GetTableBuilder(
|
||||||
const Options& options, WritableFile* file,
|
const Options& options, WritableFile* file,
|
||||||
CompressionType compression_type) const {
|
CompressionType compression_type) const {
|
||||||
return new PlainTableBuilder(options, file, user_key_size_,
|
return new PlainTableBuilder(options, file, user_key_size_, key_prefix_len_);
|
||||||
key_prefix_len_);
|
|
||||||
}
|
}
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -40,9 +40,16 @@ namespace rocksdb {
|
|||||||
|
|
||||||
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
|
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
|
||||||
uint64_t file_size, int user_key_size,
|
uint64_t file_size, int user_key_size,
|
||||||
int key_prefix_len) :
|
int key_prefix_len, int bloom_bits_per_key,
|
||||||
soptions_(storage_options), file_size_(file_size),
|
double hash_table_ratio) :
|
||||||
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len) {
|
hash_table_size_(0), soptions_(storage_options), file_size_(file_size),
|
||||||
|
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
|
||||||
|
hash_table_ratio_(hash_table_ratio) {
|
||||||
|
if (bloom_bits_per_key > 0) {
|
||||||
|
filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key);
|
||||||
|
} else {
|
||||||
|
filter_policy_ = nullptr;
|
||||||
|
}
|
||||||
hash_table_ = nullptr;
|
hash_table_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,6 +57,9 @@ PlainTableReader::~PlainTableReader() {
|
|||||||
if (hash_table_ != nullptr) {
|
if (hash_table_ != nullptr) {
|
||||||
delete[] hash_table_;
|
delete[] hash_table_;
|
||||||
}
|
}
|
||||||
|
if (filter_policy_ != nullptr) {
|
||||||
|
delete filter_policy_;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Status PlainTableReader::Open(const Options& options,
|
Status PlainTableReader::Open(const Options& options,
|
||||||
@ -58,12 +68,16 @@ Status PlainTableReader::Open(const Options& options,
|
|||||||
uint64_t file_size,
|
uint64_t file_size,
|
||||||
unique_ptr<TableReader>* table_reader,
|
unique_ptr<TableReader>* table_reader,
|
||||||
const int user_key_size,
|
const int user_key_size,
|
||||||
const int key_prefix_len) {
|
const int key_prefix_len,
|
||||||
|
const int bloom_num_bits,
|
||||||
|
double hash_table_ratio) {
|
||||||
assert(options.allow_mmap_reads);
|
assert(options.allow_mmap_reads);
|
||||||
|
|
||||||
PlainTableReader* t = new PlainTableReader(soptions, file_size,
|
PlainTableReader* t = new PlainTableReader(soptions, file_size,
|
||||||
user_key_size,
|
user_key_size,
|
||||||
key_prefix_len);
|
key_prefix_len,
|
||||||
|
bloom_num_bits,
|
||||||
|
hash_table_ratio);
|
||||||
t->file_ = std::move(file);
|
t->file_ = std::move(file);
|
||||||
t->options_ = options;
|
t->options_ = options;
|
||||||
Status s = t->PopulateIndex(file_size);
|
Status s = t->PopulateIndex(file_size);
|
||||||
@ -146,14 +160,25 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
|||||||
delete[] hash_table_;
|
delete[] hash_table_;
|
||||||
}
|
}
|
||||||
// Make the hash table 3/5 full
|
// Make the hash table 3/5 full
|
||||||
hash_table_size_ = tmp_index.size() * 1.66;
|
std::vector<Slice> filter_entries(0); // for creating bloom filter;
|
||||||
|
if (filter_policy_ != nullptr) {
|
||||||
|
filter_entries.resize(tmp_index.size());
|
||||||
|
}
|
||||||
|
double hash_table_size_multipier =
|
||||||
|
(hash_table_ratio_ < 1.0) ? 1.0 : 1.0 / hash_table_ratio_;
|
||||||
|
hash_table_size_ = tmp_index.size() * hash_table_size_multipier + 1;
|
||||||
hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_];
|
hash_table_ = new char[GetHashTableRecordLen() * hash_table_size_];
|
||||||
for (int i = 0; i < hash_table_size_; i++) {
|
for (int i = 0; i < hash_table_size_; i++) {
|
||||||
memcpy(GetHashTableBucketPtr(i) + key_prefix_len_, &file_size_,
|
memcpy(GetHashTableBucketPtr(i) + key_prefix_len_, &file_size_,
|
||||||
kOffsetLen);
|
kOffsetLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t count = 0;
|
||||||
for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) {
|
for (auto it = tmp_index.begin(); it != tmp_index.end(); ++it) {
|
||||||
|
if (filter_policy_ != nullptr) {
|
||||||
|
filter_entries[count++] = it->first;
|
||||||
|
}
|
||||||
|
|
||||||
int bucket = GetHashTableBucket(it->first);
|
int bucket = GetHashTableBucket(it->first);
|
||||||
uint64_t* hash_value;
|
uint64_t* hash_value;
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -168,6 +193,10 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
|
|||||||
memcpy(bucket_ptr, it->first.data(), key_prefix_len_);
|
memcpy(bucket_ptr, it->first.data(), key_prefix_len_);
|
||||||
memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen);
|
memcpy(bucket_ptr + key_prefix_len_, &it->second, kOffsetLen);
|
||||||
}
|
}
|
||||||
|
if (filter_policy_ != nullptr) {
|
||||||
|
filter_policy_->CreateFilter(&filter_entries[0], count, &filter_str_);
|
||||||
|
filter_slice_ = Slice(filter_str_.data(), filter_str_.size());
|
||||||
|
}
|
||||||
|
|
||||||
Log(options_.info_log, "Number of prefixes: %d, suffix_map length %ld",
|
Log(options_.info_log, "Number of prefixes: %d, suffix_map length %ld",
|
||||||
hash_table_size_, sub_index_.length());
|
hash_table_size_, sub_index_.length());
|
||||||
@ -187,7 +216,6 @@ inline void PlainTableReader::GetHashValue(int bucket, uint64_t** ret_value) {
|
|||||||
|
|
||||||
Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
int bucket = GetHashTableBucket(target);
|
int bucket = GetHashTableBucket(target);
|
||||||
uint64_t* found_value;
|
uint64_t* found_value;
|
||||||
Slice hash_key;
|
Slice hash_key;
|
||||||
@ -248,6 +276,12 @@ Status PlainTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PlainTableReader::MayHavePrefix(const Slice& target_prefix) {
|
||||||
|
return filter_policy_ == nullptr
|
||||||
|
|| filter_policy_->KeyMayMatch(target_prefix, filter_slice_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint64_t PlainTableReader::Next(uint64_t offset, Slice* key, Slice* value,
|
uint64_t PlainTableReader::Next(uint64_t offset, Slice* key, Slice* value,
|
||||||
Slice* tmp_slice) {
|
Slice* tmp_slice) {
|
||||||
if (offset >= file_size_) {
|
if (offset >= file_size_) {
|
||||||
@ -321,6 +355,11 @@ void PlainTableIterator::SeekToLast() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void PlainTableIterator::Seek(const Slice& target) {
|
void PlainTableIterator::Seek(const Slice& target) {
|
||||||
|
if (!table_->MayHavePrefix(Slice(target.data(), table_->key_prefix_len_))) {
|
||||||
|
offset_ = next_offset_ = table_->file_size_;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Status s = table_->GetOffset(target, &next_offset_);
|
Status s = table_->GetOffset(target, &next_offset_);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
status_ = s;
|
status_ = s;
|
||||||
|
@ -57,7 +57,8 @@ public:
|
|||||||
static Status Open(const Options& options, const EnvOptions& soptions,
|
static Status Open(const Options& options, const EnvOptions& soptions,
|
||||||
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
||||||
unique_ptr<TableReader>* table, const int user_key_size,
|
unique_ptr<TableReader>* table, const int user_key_size,
|
||||||
const int key_prefix_len);
|
const int key_prefix_len, const int bloom_num_bits,
|
||||||
|
double hash_table_ratio);
|
||||||
|
|
||||||
bool PrefixMayMatch(const Slice& internal_prefix);
|
bool PrefixMayMatch(const Slice& internal_prefix);
|
||||||
|
|
||||||
@ -79,7 +80,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
|
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
|
||||||
int user_key_size, int key_prefix_len);
|
int user_key_size, int key_prefix_len, int bloom_num_bits,
|
||||||
|
double hash_table_ratio);
|
||||||
~PlainTableReader();
|
~PlainTableReader();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -95,6 +97,10 @@ private:
|
|||||||
uint64_t file_size_;
|
uint64_t file_size_;
|
||||||
const size_t user_key_size_;
|
const size_t user_key_size_;
|
||||||
const size_t key_prefix_len_;
|
const size_t key_prefix_len_;
|
||||||
|
const double hash_table_ratio_;
|
||||||
|
const FilterPolicy* filter_policy_;
|
||||||
|
std::string filter_str_;
|
||||||
|
Slice filter_slice_;
|
||||||
|
|
||||||
TableProperties tbl_props;
|
TableProperties tbl_props;
|
||||||
|
|
||||||
@ -123,6 +129,7 @@ private:
|
|||||||
Status PopulateIndex(uint64_t file_size);
|
Status PopulateIndex(uint64_t file_size);
|
||||||
uint64_t Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice);
|
uint64_t Next(uint64_t offset, Slice* key, Slice* value, Slice* tmp_slice);
|
||||||
Status GetOffset(const Slice& target, uint64_t* offset);
|
Status GetOffset(const Slice& target, uint64_t* offset);
|
||||||
|
bool MayHavePrefix(const Slice& target_prefix);
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
explicit PlainTableReader(const TableReader&) = delete;
|
explicit PlainTableReader(const TableReader&) = delete;
|
||||||
|
@ -242,7 +242,7 @@ int main(int argc, char** argv) {
|
|||||||
if (FLAGS_plain_table) {
|
if (FLAGS_plain_table) {
|
||||||
options.allow_mmap_reads = true;
|
options.allow_mmap_reads = true;
|
||||||
env_options.use_mmap_reads = true;
|
env_options.use_mmap_reads = true;
|
||||||
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len);
|
tf = new rocksdb::PlainTableFactory(16, FLAGS_prefix_len, FLAGS_prefix_len);
|
||||||
} else {
|
} else {
|
||||||
tf = new rocksdb::BlockBasedTableFactory();
|
tf = new rocksdb::BlockBasedTableFactory();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user