Stats for false positive rate of full filtesr
Summary: Adds two stats to allow us measuring the false positive rate of full filters: - The total count of positives: rocksdb.bloom.filter.full.positive - The total count of true positives: rocksdb.bloom.filter.full.true.positive Not the term "full" in the stat name to indicate that they are meaningful in full filters. block-based filters are to be deprecated soon and supporting it is not worth the the additional cost of if-then-else branches. Closes #3680 Tested by: $ ./db_bench -benchmarks=fillrandom -db /dev/shm/rocksdb-tmpdb --num=1000000 -bloom_bits=10 $ ./db_bench -benchmarks="readwhilewriting" -db /dev/shm/rocksdb-tmpdb --statistics -bloom_bits=10 --duration=60 --num=2000000 --use_existing_db 2>&1 > /tmp/full.log $ grep filter.full /tmp/full.log rocksdb.bloom.filter.full.positive COUNT : 3628593 rocksdb.bloom.filter.full.true.positive COUNT : 3536026 which gives the false positive rate of 2.5% Closes https://github.com/facebook/rocksdb/pull/3681 Differential Revision: D7517570 Pulled By: maysamyabandeh fbshipit-source-id: 630ab1a473afdce404916d297035b6318de4c052
This commit is contained in:
parent
f2827845f9
commit
5b66eee2cb
@ -71,8 +71,13 @@ enum Tickers : uint32_t {
|
||||
// # of bytes written into cache.
|
||||
BLOCK_CACHE_BYTES_WRITE,
|
||||
|
||||
// # of times bloom filter has avoided file reads.
|
||||
// # of times bloom filter has avoided file reads, i.e., negatives.
|
||||
BLOOM_FILTER_USEFUL,
|
||||
// # of times bloom FullFilter has not avoided the reads.
|
||||
BLOOM_FILTER_FULL_POSITIVE,
|
||||
// # of times bloom FullFilter has not avoided the reads and data actually
|
||||
// exist.
|
||||
BLOOM_FILTER_FULL_TRUE_POSITIVE,
|
||||
|
||||
// # persistent cache hit
|
||||
PERSISTENT_CACHE_HIT,
|
||||
@ -332,6 +337,9 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
{BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
|
||||
{BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
|
||||
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
|
||||
{BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"},
|
||||
{BLOOM_FILTER_FULL_TRUE_POSITIVE,
|
||||
"rocksdb.bloom.filter.full.true.positive"},
|
||||
{PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
|
||||
{PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
|
||||
{SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
|
||||
@ -349,8 +357,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
"rocksdb.compaction.range_del.drop.obsolete"},
|
||||
{COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
|
||||
"rocksdb.compaction.optimized.del.drop.obsolete"},
|
||||
{COMPACTION_CANCELLED,
|
||||
"rocksdb.compaction.cancelled"},
|
||||
{COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"},
|
||||
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
|
||||
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
|
||||
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
|
||||
|
@ -2030,19 +2030,23 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
|
||||
}
|
||||
Slice user_key = ExtractUserKey(internal_key);
|
||||
const Slice* const const_ikey_ptr = &internal_key;
|
||||
bool may_match = true;
|
||||
if (filter->whole_key_filtering()) {
|
||||
return filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
|
||||
may_match = filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
|
||||
} else if (!read_options.total_order_seek &&
|
||||
rep_->ioptions.prefix_extractor &&
|
||||
rep_->table_properties->prefix_extractor_name.compare(
|
||||
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
||||
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
||||
!filter->PrefixMayMatch(
|
||||
rep_->ioptions.prefix_extractor->Transform(user_key),
|
||||
kNotValid, false, const_ikey_ptr)) {
|
||||
may_match = false;
|
||||
}
|
||||
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
|
||||
rep_->table_properties->prefix_extractor_name.compare(
|
||||
rep_->ioptions.prefix_extractor->Name()) == 0 &&
|
||||
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
|
||||
!filter->PrefixMayMatch(
|
||||
rep_->ioptions.prefix_extractor->Transform(user_key), kNotValid,
|
||||
false, const_ikey_ptr)) {
|
||||
return false;
|
||||
if (may_match) {
|
||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
|
||||
}
|
||||
return true;
|
||||
return may_match;
|
||||
}
|
||||
|
||||
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
@ -2070,6 +2074,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
iiter_unique_ptr.reset(iiter);
|
||||
}
|
||||
|
||||
bool matched = false; // if such user key mathced a key in SST
|
||||
bool done = false;
|
||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
||||
Slice handle_value = iiter->value();
|
||||
@ -2111,7 +2116,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(parsed_key, biter.value(), &biter)) {
|
||||
if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
|
||||
&biter)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
@ -2123,6 +2129,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matched && filter != nullptr && !filter->IsBlockBased()) {
|
||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = iiter->status();
|
||||
}
|
||||
|
@ -170,7 +170,8 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
|
||||
Slice full_key(bucket, key_length_);
|
||||
ParsedInternalKey found_ikey;
|
||||
ParseInternalKey(full_key, &found_ikey);
|
||||
get_context->SaveValue(found_ikey, value);
|
||||
bool dont_care __attribute__((__unused__));
|
||||
get_context->SaveValue(found_ikey, value, &dont_care);
|
||||
}
|
||||
// We don't support merge operations. So, we return here.
|
||||
return Status::OK();
|
||||
|
@ -95,10 +95,13 @@ void GetContext::RecordCounters(Tickers ticker, size_t val) {
|
||||
}
|
||||
|
||||
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
||||
const Slice& value, Cleanable* value_pinner) {
|
||||
const Slice& value, bool* matched,
|
||||
Cleanable* value_pinner) {
|
||||
assert(matched);
|
||||
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
||||
merge_context_ != nullptr);
|
||||
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
|
||||
*matched = true;
|
||||
// If the value is not in the snapshot, skip it
|
||||
if (!CheckCallback(parsed_key.sequence)) {
|
||||
return true; // to continue to the next seq
|
||||
@ -231,11 +234,12 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
||||
assert(ret);
|
||||
(void)ret;
|
||||
|
||||
bool dont_care __attribute__((__unused__));
|
||||
// Since SequenceNumber is not stored and unknown, we will use
|
||||
// kMaxSequenceNumber.
|
||||
get_context->SaveValue(
|
||||
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
|
||||
value_pinner);
|
||||
&dont_care, value_pinner);
|
||||
}
|
||||
#else // ROCKSDB_LITE
|
||||
assert(false);
|
||||
|
@ -42,10 +42,13 @@ class GetContext {
|
||||
// Records this key, value, and any meta-data (such as sequence number and
|
||||
// state) into this GetContext.
|
||||
//
|
||||
// If the parsed_key matches the user key that we are looking for, sets
|
||||
// mathced to true.
|
||||
//
|
||||
// Returns True if more keys need to be read (due to merges) or
|
||||
// False if the complete value has been found.
|
||||
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
|
||||
Cleanable* value_pinner = nullptr);
|
||||
bool* matched, Cleanable* value_pinner = nullptr);
|
||||
|
||||
// Simplified version of the previous function. Should only be used when we
|
||||
// know that the operation is a Put.
|
||||
|
@ -41,7 +41,8 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key,
|
||||
return Status::Corruption(Slice());
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(parsed_key, iter->value())) {
|
||||
bool dont_care __attribute__((__unused__));
|
||||
if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -594,7 +594,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
|
||||
// TODO(ljin): since we know the key comparison result here,
|
||||
// can we enable the fast path?
|
||||
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
|
||||
if (!get_context->SaveValue(found_key, found_value)) {
|
||||
bool dont_care __attribute__((__unused__));
|
||||
if (!get_context->SaveValue(found_key, found_value, &dont_care)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user