2013-10-16 14:59:46 -07:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2012-04-17 08:36:46 -07:00
|
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "table/filter_block.h"
|
|
|
|
|
2013-08-13 14:04:56 -07:00
|
|
|
#include "db/dbformat.h"
|
2013-08-23 08:38:13 -07:00
|
|
|
#include "rocksdb/filter_policy.h"
|
2012-04-17 08:36:46 -07:00
|
|
|
#include "util/coding.h"
|
|
|
|
|
2013-10-03 21:49:15 -07:00
|
|
|
namespace rocksdb {
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
// See doc/table_format.txt for an explanation of the filter block format.
|
|
|
|
|
|
|
|
// Generate new filter every 2KB of data
|
|
|
|
static const size_t kFilterBaseLg = 11;
|
|
|
|
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
|
|
|
|
2014-01-27 13:53:22 -08:00
|
|
|
FilterBlockBuilder::FilterBlockBuilder(const Options& opt,
|
2014-08-25 14:22:05 -07:00
|
|
|
const BlockBasedTableOptions& table_opt,
|
2014-01-27 13:53:22 -08:00
|
|
|
const Comparator* internal_comparator)
|
2014-08-25 14:22:05 -07:00
|
|
|
: policy_(table_opt.filter_policy.get()),
|
2014-03-10 12:56:46 -07:00
|
|
|
prefix_extractor_(opt.prefix_extractor.get()),
|
2014-08-25 14:22:05 -07:00
|
|
|
whole_key_filtering_(table_opt.whole_key_filtering),
|
2014-01-27 13:53:22 -08:00
|
|
|
comparator_(internal_comparator) {}
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
|
|
|
uint64_t filter_index = (block_offset / kFilterBase);
|
|
|
|
assert(filter_index >= filter_offsets_.size());
|
|
|
|
while (filter_index > filter_offsets_.size()) {
|
|
|
|
GenerateFilter();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-13 14:04:56 -07:00
|
|
|
bool FilterBlockBuilder::SamePrefix(const Slice &key1,
|
|
|
|
const Slice &key2) const {
|
|
|
|
if (!prefix_extractor_->InDomain(key1) &&
|
|
|
|
!prefix_extractor_->InDomain(key2)) {
|
|
|
|
return true;
|
|
|
|
} else if (!prefix_extractor_->InDomain(key1) ||
|
|
|
|
!prefix_extractor_->InDomain(key2)) {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
return (prefix_extractor_->Transform(key1) ==
|
|
|
|
prefix_extractor_->Transform(key2));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-17 08:36:46 -07:00
|
|
|
void FilterBlockBuilder::AddKey(const Slice& key) {
|
2013-08-23 14:49:57 -07:00
|
|
|
// get slice for most recently added entry
|
2013-08-13 14:04:56 -07:00
|
|
|
Slice prev;
|
Fix two nasty use-after-free-bugs
Summary:
These bugs were caught by ASAN crash test.
1. The first one, in table/filter_block.cc is very nasty. We first reference entries_ and store the reference to Slice prev. Then, we call entries_.append(), which can change the reference. The Slice prev now points to junk.
2. The second one is a bug in a test, so it's not very serious. Once we set read_opts.prefix, we never clear it, so some other function might still reference it.
Test Plan: asan crash test now runs more than 5 mins. Before, it failed immediately. I will run the full one, but the full one takes quite some time (5 hours)
Reviewers: dhruba, haobo, kailiu
Reviewed By: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14223
2013-11-19 21:01:48 -08:00
|
|
|
size_t added_to_start = 0;
|
2013-08-13 14:04:56 -07:00
|
|
|
|
2013-08-23 14:49:57 -07:00
|
|
|
// add key to filter if needed
|
2013-08-13 14:04:56 -07:00
|
|
|
if (whole_key_filtering_) {
|
|
|
|
start_.push_back(entries_.size());
|
Fix two nasty use-after-free-bugs
Summary:
These bugs were caught by ASAN crash test.
1. The first one, in table/filter_block.cc is very nasty. We first reference entries_ and store the reference to Slice prev. Then, we call entries_.append(), which can change the reference. The Slice prev now points to junk.
2. The second one is a bug in a test, so it's not very serious. Once we set read_opts.prefix, we never clear it, so some other function might still reference it.
Test Plan: asan crash test now runs more than 5 mins. Before, it failed immediately. I will run the full one, but the full one takes quite some time (5 hours)
Reviewers: dhruba, haobo, kailiu
Reviewed By: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14223
2013-11-19 21:01:48 -08:00
|
|
|
++added_to_start;
|
2013-08-13 14:04:56 -07:00
|
|
|
entries_.append(key.data(), key.size());
|
|
|
|
}
|
|
|
|
|
Fix two nasty use-after-free-bugs
Summary:
These bugs were caught by ASAN crash test.
1. The first one, in table/filter_block.cc is very nasty. We first reference entries_ and store the reference to Slice prev. Then, we call entries_.append(), which can change the reference. The Slice prev now points to junk.
2. The second one is a bug in a test, so it's not very serious. Once we set read_opts.prefix, we never clear it, so some other function might still reference it.
Test Plan: asan crash test now runs more than 5 mins. Before, it failed immediately. I will run the full one, but the full one takes quite some time (5 hours)
Reviewers: dhruba, haobo, kailiu
Reviewed By: dhruba
CC: leveldb
Differential Revision: https://reviews.facebook.net/D14223
2013-11-19 21:01:48 -08:00
|
|
|
if (start_.size() > added_to_start) {
|
|
|
|
size_t prev_start = start_[start_.size() - 1 - added_to_start];
|
|
|
|
const char* base = entries_.data() + prev_start;
|
|
|
|
size_t length = entries_.size() - prev_start;
|
|
|
|
prev = Slice(base, length);
|
|
|
|
}
|
|
|
|
|
2013-08-23 14:49:57 -07:00
|
|
|
// add prefix to filter if needed
|
|
|
|
if (prefix_extractor_ && prefix_extractor_->InDomain(ExtractUserKey(key))) {
|
|
|
|
// If prefix_extractor_, this filter_block layer assumes we only
|
|
|
|
// operate on internal keys.
|
|
|
|
Slice user_key = ExtractUserKey(key);
|
2013-08-13 14:04:56 -07:00
|
|
|
// this assumes prefix(prefix(key)) == prefix(key), as the last
|
|
|
|
// entry in entries_ may be either a key or prefix, and we use
|
|
|
|
// prefix(last entry) to get the prefix of the last key.
|
2013-08-23 14:49:57 -07:00
|
|
|
if (prev.size() == 0 ||
|
|
|
|
!SamePrefix(user_key, ExtractUserKey(prev))) {
|
|
|
|
Slice prefix = prefix_extractor_->Transform(user_key);
|
2013-08-13 14:04:56 -07:00
|
|
|
InternalKey internal_prefix_tmp(prefix, 0, kTypeValue);
|
|
|
|
Slice internal_prefix = internal_prefix_tmp.Encode();
|
|
|
|
start_.push_back(entries_.size());
|
|
|
|
entries_.append(internal_prefix.data(), internal_prefix.size());
|
|
|
|
}
|
|
|
|
}
|
2012-04-17 08:36:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
Slice FilterBlockBuilder::Finish() {
|
|
|
|
if (!start_.empty()) {
|
|
|
|
GenerateFilter();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append array of per-filter offsets
|
|
|
|
const uint32_t array_offset = result_.size();
|
|
|
|
for (size_t i = 0; i < filter_offsets_.size(); i++) {
|
|
|
|
PutFixed32(&result_, filter_offsets_[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
PutFixed32(&result_, array_offset);
|
|
|
|
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
|
|
|
|
return Slice(result_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void FilterBlockBuilder::GenerateFilter() {
|
2013-08-13 14:04:56 -07:00
|
|
|
const size_t num_entries = start_.size();
|
|
|
|
if (num_entries == 0) {
|
2012-04-17 08:36:46 -07:00
|
|
|
// Fast path if there are no keys for this filter
|
|
|
|
filter_offsets_.push_back(result_.size());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make list of keys from flattened key structure
|
2013-08-13 14:04:56 -07:00
|
|
|
start_.push_back(entries_.size()); // Simplify length computation
|
|
|
|
tmp_entries_.resize(num_entries);
|
|
|
|
for (size_t i = 0; i < num_entries; i++) {
|
|
|
|
const char* base = entries_.data() + start_[i];
|
2012-04-17 08:36:46 -07:00
|
|
|
size_t length = start_[i+1] - start_[i];
|
2013-08-13 14:04:56 -07:00
|
|
|
tmp_entries_[i] = Slice(base, length);
|
2012-04-17 08:36:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Generate filter for current set of keys and append to result_.
|
|
|
|
filter_offsets_.push_back(result_.size());
|
2013-08-13 14:04:56 -07:00
|
|
|
policy_->CreateFilter(&tmp_entries_[0], num_entries, &result_);
|
2012-04-17 08:36:46 -07:00
|
|
|
|
2013-08-13 14:04:56 -07:00
|
|
|
tmp_entries_.clear();
|
|
|
|
entries_.clear();
|
2012-04-17 08:36:46 -07:00
|
|
|
start_.clear();
|
|
|
|
}
|
|
|
|
|
2013-11-12 22:46:51 -08:00
|
|
|
FilterBlockReader::FilterBlockReader(
|
2014-08-25 14:22:05 -07:00
|
|
|
const Options& opt, const BlockBasedTableOptions& table_opt,
|
|
|
|
const Slice& contents, bool delete_contents_after_use)
|
|
|
|
: policy_(table_opt.filter_policy.get()),
|
2014-03-10 12:56:46 -07:00
|
|
|
prefix_extractor_(opt.prefix_extractor.get()),
|
2014-08-25 14:22:05 -07:00
|
|
|
whole_key_filtering_(table_opt.whole_key_filtering),
|
2013-02-28 18:04:58 -08:00
|
|
|
data_(nullptr),
|
|
|
|
offset_(nullptr),
|
2012-04-17 08:36:46 -07:00
|
|
|
num_(0),
|
|
|
|
base_lg_(0) {
|
|
|
|
size_t n = contents.size();
|
|
|
|
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
|
|
|
base_lg_ = contents[n-1];
|
|
|
|
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
|
|
|
|
if (last_word > n - 5) return;
|
|
|
|
data_ = contents.data();
|
|
|
|
offset_ = data_ + last_word;
|
|
|
|
num_ = (n - 5 - last_word) / 4;
|
2013-11-12 22:46:51 -08:00
|
|
|
if (delete_contents_after_use) {
|
|
|
|
filter_data.reset(contents.data());
|
|
|
|
}
|
2012-04-17 08:36:46 -07:00
|
|
|
}
|
|
|
|
|
2013-08-13 14:04:56 -07:00
|
|
|
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset,
|
|
|
|
const Slice& key) {
|
|
|
|
if (!whole_key_filtering_) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return MayMatch(block_offset, key);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool FilterBlockReader::PrefixMayMatch(uint64_t block_offset,
|
|
|
|
const Slice& prefix) {
|
|
|
|
if (!prefix_extractor_) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return MayMatch(block_offset, prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool FilterBlockReader::MayMatch(uint64_t block_offset, const Slice& entry) {
|
2012-04-17 08:36:46 -07:00
|
|
|
uint64_t index = block_offset >> base_lg_;
|
|
|
|
if (index < num_) {
|
|
|
|
uint32_t start = DecodeFixed32(offset_ + index*4);
|
|
|
|
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
|
2014-02-03 13:48:30 -08:00
|
|
|
if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
|
2012-04-17 08:36:46 -07:00
|
|
|
Slice filter = Slice(data_ + start, limit - start);
|
2013-08-13 14:04:56 -07:00
|
|
|
return policy_->KeyMayMatch(entry, filter);
|
2012-04-17 08:36:46 -07:00
|
|
|
} else if (start == limit) {
|
2013-08-13 14:04:56 -07:00
|
|
|
// Empty filters do not match any entries
|
2012-04-17 08:36:46 -07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true; // Errors are treated as potential matches
|
|
|
|
}
|
|
|
|
|
2014-08-05 11:27:34 -07:00
|
|
|
size_t FilterBlockReader::ApproximateMemoryUsage() const {
|
|
|
|
return num_ * 4 + 5 + (offset_ - data_);
|
|
|
|
}
|
2012-04-17 08:36:46 -07:00
|
|
|
}
|