Stats for false positive rate of full filtesr
Summary: Adds two stats to allow us measuring the false positive rate of full filters: - The total count of positives: rocksdb.bloom.filter.full.positive - The total count of true positives: rocksdb.bloom.filter.full.true.positive Not the term "full" in the stat name to indicate that they are meaningful in full filters. block-based filters are to be deprecated soon and supporting it is not worth the the additional cost of if-then-else branches. Closes #3680 Tested by: $ ./db_bench -benchmarks=fillrandom -db /dev/shm/rocksdb-tmpdb --num=1000000 -bloom_bits=10 $ ./db_bench -benchmarks="readwhilewriting" -db /dev/shm/rocksdb-tmpdb --statistics -bloom_bits=10 --duration=60 --num=2000000 --use_existing_db 2>&1 > /tmp/full.log $ grep filter.full /tmp/full.log rocksdb.bloom.filter.full.positive COUNT : 3628593 rocksdb.bloom.filter.full.true.positive COUNT : 3536026 which gives the false positive rate of 2.5% Closes https://github.com/facebook/rocksdb/pull/3681 Differential Revision: D7517570 Pulled By: maysamyabandeh fbshipit-source-id: 630ab1a473afdce404916d297035b6318de4c052
This commit is contained in:
parent
f2827845f9
commit
5b66eee2cb
@ -71,8 +71,13 @@ enum Tickers : uint32_t {
|
|||||||
// # of bytes written into cache.
|
// # of bytes written into cache.
|
||||||
BLOCK_CACHE_BYTES_WRITE,
|
BLOCK_CACHE_BYTES_WRITE,
|
||||||
|
|
||||||
// # of times bloom filter has avoided file reads.
|
// # of times bloom filter has avoided file reads, i.e., negatives.
|
||||||
BLOOM_FILTER_USEFUL,
|
BLOOM_FILTER_USEFUL,
|
||||||
|
// # of times bloom FullFilter has not avoided the reads.
|
||||||
|
BLOOM_FILTER_FULL_POSITIVE,
|
||||||
|
// # of times bloom FullFilter has not avoided the reads and data actually
|
||||||
|
// exist.
|
||||||
|
BLOOM_FILTER_FULL_TRUE_POSITIVE,
|
||||||
|
|
||||||
// # persistent cache hit
|
// # persistent cache hit
|
||||||
PERSISTENT_CACHE_HIT,
|
PERSISTENT_CACHE_HIT,
|
||||||
@ -332,6 +337,9 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|||||||
{BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
|
{BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
|
||||||
{BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
|
{BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
|
||||||
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
|
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
|
||||||
|
{BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"},
|
||||||
|
{BLOOM_FILTER_FULL_TRUE_POSITIVE,
|
||||||
|
"rocksdb.bloom.filter.full.true.positive"},
|
||||||
{PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
|
{PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
|
||||||
{PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
|
{PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
|
||||||
{SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
|
{SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
|
||||||
@ -349,8 +357,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|||||||
"rocksdb.compaction.range_del.drop.obsolete"},
|
"rocksdb.compaction.range_del.drop.obsolete"},
|
||||||
{COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
|
{COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
|
||||||
"rocksdb.compaction.optimized.del.drop.obsolete"},
|
"rocksdb.compaction.optimized.del.drop.obsolete"},
|
||||||
{COMPACTION_CANCELLED,
|
{COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"},
|
||||||
"rocksdb.compaction.cancelled"},
|
|
||||||
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
|
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
|
||||||
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
|
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
|
||||||
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
|
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
|
||||||
|
@ -2030,19 +2030,23 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
|||||||
}
|
}
|
||||||
Slice user_key = ExtractUserKey(internal_key);
|
Slice user_key = ExtractUserKey(internal_key);
|
||||||
const Slice* const const_ikey_ptr = &internal_key;
|
const Slice* const const_ikey_ptr = &internal_key;
|
||||||
|
bool may_match = true;
|
||||||
if (filter->whole_key_filtering()) {
|
if (filter->whole_key_filtering()) {
|
||||||
return filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
|
may_match = filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
|
||||||
|
} else if (!read_options.total_order_seek &&
|
||||||
|
rep_->ioptions.prefix_extractor &&
|
||||||
|
rep_->table_properties->prefix_extractor_name.compare(
|
||||||
|
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
||||||
|
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
||||||
|
!filter->PrefixMayMatch(
|
||||||
|
rep_->ioptions.prefix_extractor->Transform(user_key),
|
||||||
|
kNotValid, false, const_ikey_ptr)) {
|
||||||
|
may_match = false;
|
||||||
}
|
}
|
||||||
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
|
if (may_match) {
|
||||||
rep_->table_properties->prefix_extractor_name.compare(
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
|
||||||
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
|
||||||
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
|
||||||
!filter->PrefixMayMatch(
|
|
||||||
rep_->ioptions.prefix_extractor->Transform(user_key), kNotValid,
|
|
||||||
false, const_ikey_ptr)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return true;
|
return may_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||||
@ -2070,6 +2074,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
iiter_unique_ptr.reset(iiter);
|
iiter_unique_ptr.reset(iiter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool matched = false; // if such user key mathced a key in SST
|
||||||
bool done = false;
|
bool done = false;
|
||||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
||||||
Slice handle_value = iiter->value();
|
Slice handle_value = iiter->value();
|
||||||
@ -2111,7 +2116,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
s = Status::Corruption(Slice());
|
s = Status::Corruption(Slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!get_context->SaveValue(parsed_key, biter.value(), &biter)) {
|
if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
|
||||||
|
&biter)) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2123,6 +2129,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (matched && filter != nullptr && !filter->IsBlockBased()) {
|
||||||
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
|
||||||
|
}
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = iiter->status();
|
s = iiter->status();
|
||||||
}
|
}
|
||||||
|
@ -170,7 +170,8 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
|
|||||||
Slice full_key(bucket, key_length_);
|
Slice full_key(bucket, key_length_);
|
||||||
ParsedInternalKey found_ikey;
|
ParsedInternalKey found_ikey;
|
||||||
ParseInternalKey(full_key, &found_ikey);
|
ParseInternalKey(full_key, &found_ikey);
|
||||||
get_context->SaveValue(found_ikey, value);
|
bool dont_care __attribute__((__unused__));
|
||||||
|
get_context->SaveValue(found_ikey, value, &dont_care);
|
||||||
}
|
}
|
||||||
// We don't support merge operations. So, we return here.
|
// We don't support merge operations. So, we return here.
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
|
@ -95,10 +95,13 @@ void GetContext::RecordCounters(Tickers ticker, size_t val) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
||||||
const Slice& value, Cleanable* value_pinner) {
|
const Slice& value, bool* matched,
|
||||||
|
Cleanable* value_pinner) {
|
||||||
|
assert(matched);
|
||||||
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
||||||
merge_context_ != nullptr);
|
merge_context_ != nullptr);
|
||||||
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
|
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
|
||||||
|
*matched = true;
|
||||||
// If the value is not in the snapshot, skip it
|
// If the value is not in the snapshot, skip it
|
||||||
if (!CheckCallback(parsed_key.sequence)) {
|
if (!CheckCallback(parsed_key.sequence)) {
|
||||||
return true; // to continue to the next seq
|
return true; // to continue to the next seq
|
||||||
@ -231,11 +234,12 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
|||||||
assert(ret);
|
assert(ret);
|
||||||
(void)ret;
|
(void)ret;
|
||||||
|
|
||||||
|
bool dont_care __attribute__((__unused__));
|
||||||
// Since SequenceNumber is not stored and unknown, we will use
|
// Since SequenceNumber is not stored and unknown, we will use
|
||||||
// kMaxSequenceNumber.
|
// kMaxSequenceNumber.
|
||||||
get_context->SaveValue(
|
get_context->SaveValue(
|
||||||
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
|
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
|
||||||
value_pinner);
|
&dont_care, value_pinner);
|
||||||
}
|
}
|
||||||
#else // ROCKSDB_LITE
|
#else // ROCKSDB_LITE
|
||||||
assert(false);
|
assert(false);
|
||||||
|
@ -42,10 +42,13 @@ class GetContext {
|
|||||||
// Records this key, value, and any meta-data (such as sequence number and
|
// Records this key, value, and any meta-data (such as sequence number and
|
||||||
// state) into this GetContext.
|
// state) into this GetContext.
|
||||||
//
|
//
|
||||||
|
// If the parsed_key matches the user key that we are looking for, sets
|
||||||
|
// mathced to true.
|
||||||
|
//
|
||||||
// Returns True if more keys need to be read (due to merges) or
|
// Returns True if more keys need to be read (due to merges) or
|
||||||
// False if the complete value has been found.
|
// False if the complete value has been found.
|
||||||
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
|
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
|
||||||
Cleanable* value_pinner = nullptr);
|
bool* matched, Cleanable* value_pinner = nullptr);
|
||||||
|
|
||||||
// Simplified version of the previous function. Should only be used when we
|
// Simplified version of the previous function. Should only be used when we
|
||||||
// know that the operation is a Put.
|
// know that the operation is a Put.
|
||||||
|
@ -41,7 +41,8 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key,
|
|||||||
return Status::Corruption(Slice());
|
return Status::Corruption(Slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!get_context->SaveValue(parsed_key, iter->value())) {
|
bool dont_care __attribute__((__unused__));
|
||||||
|
if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -594,7 +594,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
|
|||||||
// TODO(ljin): since we know the key comparison result here,
|
// TODO(ljin): since we know the key comparison result here,
|
||||||
// can we enable the fast path?
|
// can we enable the fast path?
|
||||||
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
|
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
|
||||||
if (!get_context->SaveValue(found_key, found_value)) {
|
bool dont_care __attribute__((__unused__));
|
||||||
|
if (!get_context->SaveValue(found_key, found_value, &dont_care)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user