5b66eee2cb
Summary: Adds two stats to allow us measuring the false positive rate of full filters: - The total count of positives: rocksdb.bloom.filter.full.positive - The total count of true positives: rocksdb.bloom.filter.full.true.positive Not the term "full" in the stat name to indicate that they are meaningful in full filters. block-based filters are to be deprecated soon and supporting it is not worth the the additional cost of if-then-else branches. Closes #3680 Tested by: $ ./db_bench -benchmarks=fillrandom -db /dev/shm/rocksdb-tmpdb --num=1000000 -bloom_bits=10 $ ./db_bench -benchmarks="readwhilewriting" -db /dev/shm/rocksdb-tmpdb --statistics -bloom_bits=10 --duration=60 --num=2000000 --use_existing_db 2>&1 > /tmp/full.log $ grep filter.full /tmp/full.log rocksdb.bloom.filter.full.positive COUNT : 3628593 rocksdb.bloom.filter.full.true.positive COUNT : 3536026 which gives the false positive rate of 2.5% Closes https://github.com/facebook/rocksdb/pull/3681 Differential Revision: D7517570 Pulled By: maysamyabandeh fbshipit-source-id: 630ab1a473afdce404916d297035b6318de4c052
112 lines
3.7 KiB
C++
112 lines
3.7 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
#include <string>
|
|
#include "db/merge_context.h"
|
|
#include "db/range_del_aggregator.h"
|
|
#include "db/read_callback.h"
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/statistics.h"
|
|
#include "rocksdb/types.h"
|
|
#include "table/block.h"
|
|
|
|
namespace rocksdb {
|
|
class MergeContext;
|
|
class PinnedIteratorsManager;
|
|
|
|
class GetContext {
|
|
public:
|
|
enum GetState {
|
|
kNotFound,
|
|
kFound,
|
|
kDeleted,
|
|
kCorrupt,
|
|
kMerge, // saver contains the current merge result (the operands)
|
|
kBlobIndex,
|
|
};
|
|
uint64_t tickers_value[Tickers::TICKER_ENUM_MAX] = {0};
|
|
|
|
GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
|
|
Logger* logger, Statistics* statistics, GetState init_state,
|
|
const Slice& user_key, PinnableSlice* value, bool* value_found,
|
|
MergeContext* merge_context, RangeDelAggregator* range_del_agg,
|
|
Env* env, SequenceNumber* seq = nullptr,
|
|
PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
|
|
ReadCallback* callback = nullptr, bool* is_blob_index = nullptr);
|
|
|
|
void MarkKeyMayExist();
|
|
|
|
// Records this key, value, and any meta-data (such as sequence number and
|
|
// state) into this GetContext.
|
|
//
|
|
// If the parsed_key matches the user key that we are looking for, sets
|
|
// mathced to true.
|
|
//
|
|
// Returns True if more keys need to be read (due to merges) or
|
|
// False if the complete value has been found.
|
|
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
|
|
bool* matched, Cleanable* value_pinner = nullptr);
|
|
|
|
// Simplified version of the previous function. Should only be used when we
|
|
// know that the operation is a Put.
|
|
void SaveValue(const Slice& value, SequenceNumber seq);
|
|
|
|
GetState State() const { return state_; }
|
|
|
|
RangeDelAggregator* range_del_agg() { return range_del_agg_; }
|
|
|
|
PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
|
|
|
|
// If a non-null string is passed, all the SaveValue calls will be
|
|
// logged into the string. The operations can then be replayed on
|
|
// another GetContext with replayGetContextLog.
|
|
void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
|
|
|
|
// Do we need to fetch the SequenceNumber for this key?
|
|
bool NeedToReadSequence() const { return (seq_ != nullptr); }
|
|
|
|
bool sample() const { return sample_; }
|
|
|
|
bool CheckCallback(SequenceNumber seq) {
|
|
if (callback_) {
|
|
return callback_->IsCommitted(seq);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void RecordCounters(Tickers ticker, size_t val);
|
|
|
|
private:
|
|
const Comparator* ucmp_;
|
|
const MergeOperator* merge_operator_;
|
|
// the merge operations encountered;
|
|
Logger* logger_;
|
|
Statistics* statistics_;
|
|
|
|
GetState state_;
|
|
Slice user_key_;
|
|
PinnableSlice* pinnable_val_;
|
|
bool* value_found_; // Is value set correctly? Used by KeyMayExist
|
|
MergeContext* merge_context_;
|
|
RangeDelAggregator* range_del_agg_;
|
|
Env* env_;
|
|
// If a key is found, seq_ will be set to the SequenceNumber of most recent
|
|
// write to the key or kMaxSequenceNumber if unknown
|
|
SequenceNumber* seq_;
|
|
std::string* replay_log_;
|
|
// Used to temporarily pin blocks when state_ == GetContext::kMerge
|
|
PinnedIteratorsManager* pinned_iters_mgr_;
|
|
ReadCallback* callback_;
|
|
bool sample_;
|
|
bool* is_blob_index_;
|
|
};
|
|
|
|
void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
|
GetContext* get_context,
|
|
Cleanable* value_pinner = nullptr);
|
|
|
|
} // namespace rocksdb
|