Block cache tracing: Associate a unique id with Get and MultiGet (#5514)
Summary: This PR associates a unique id with Get and MultiGet. This enables us to track how many blocks a Get/MultiGet request accesses. We can also measure the impact of row cache vs block cache. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5514 Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32 Differential Revision: D16032681 Pulled By: HaoyuHuang fbshipit-source-id: 775b05f4440badd58de6667e3ec9f4fc87a0af4c
This commit is contained in:
parent
84c5c9aab1
commit
6edc5d0719
@ -1663,11 +1663,17 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
}
|
||||
|
||||
PinnedIteratorsManager pinned_iters_mgr;
|
||||
uint64_t tracing_get_id = BlockCacheTraceHelper::kReservedGetId;
|
||||
if (vset_ && vset_->block_cache_tracer_ &&
|
||||
vset_->block_cache_tracer_->is_tracing_enabled()) {
|
||||
tracing_get_id = vset_->block_cache_tracer_->NextGetId();
|
||||
}
|
||||
GetContext get_context(
|
||||
user_comparator(), merge_operator_, info_log_, db_statistics_,
|
||||
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
|
||||
value, value_found, merge_context, max_covering_tombstone_seq, this->env_,
|
||||
seq, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob);
|
||||
seq, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob,
|
||||
tracing_get_id);
|
||||
|
||||
// Pin blocks that we read to hold merge operands
|
||||
if (merge_operator_) {
|
||||
@ -1785,7 +1791,12 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
||||
if (merge_operator_) {
|
||||
pinned_iters_mgr.StartPinning();
|
||||
}
|
||||
uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
|
||||
|
||||
if (vset_ && vset_->block_cache_tracer_ &&
|
||||
vset_->block_cache_tracer_->is_tracing_enabled()) {
|
||||
tracing_mget_id = vset_->block_cache_tracer_->NextGetId();
|
||||
}
|
||||
// Even though we know the batch size won't be > MAX_BATCH_SIZE,
|
||||
// use autovector in order to avoid unnecessary construction of GetContext
|
||||
// objects, which is expensive
|
||||
@ -1797,7 +1808,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
||||
iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge, iter->ukey,
|
||||
iter->value, nullptr, &(iter->merge_context),
|
||||
&iter->max_covering_tombstone_seq, this->env_, &iter->seq,
|
||||
merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob);
|
||||
merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob,
|
||||
tracing_mget_id);
|
||||
}
|
||||
int get_ctx_index = 0;
|
||||
for (auto iter = range->begin(); iter != range->end();
|
||||
|
@ -1983,7 +1983,7 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
/*no_insert=*/no_io);
|
||||
/*no_insert=*/no_io, lookup_context->get_id);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
@ -2065,7 +2065,7 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
/*no_insert=*/no_io);
|
||||
/*no_insert=*/no_io, lookup_context->get_id);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, cache_key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
@ -2426,7 +2426,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
no_insert);
|
||||
no_insert, lookup_context->get_id);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
@ -3340,7 +3340,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
CachableEntry<FilterBlockReader> filter_entry;
|
||||
bool may_match;
|
||||
FilterBlockReader* filter = nullptr;
|
||||
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserGet};
|
||||
uint64_t tracing_get_id = get_context ? get_context->tracing_get_id()
|
||||
: BlockCacheTraceHelper::kReservedGetId;
|
||||
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserGet,
|
||||
tracing_get_id};
|
||||
{
|
||||
if (!skip_filters) {
|
||||
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
|
||||
@ -3406,7 +3409,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
}
|
||||
|
||||
BlockCacheLookupContext lookup_data_block_context{
|
||||
TableReaderCaller::kUserGet};
|
||||
TableReaderCaller::kUserGet, tracing_get_id};
|
||||
bool does_referenced_key_exist = false;
|
||||
DataBlockIter biter;
|
||||
uint64_t referenced_data_size = 0;
|
||||
@ -3447,8 +3450,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter.value(), &matched,
|
||||
biter.IsValuePinned() ? &biter : nullptr)) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size = biter.key().size() + biter.value().size();
|
||||
if (get_context->State() == GetContext::GetState::kFound) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size = biter.key().size() + biter.value().size();
|
||||
}
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
@ -3459,6 +3464,12 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
|
||||
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||
// constructing the access record.
|
||||
Slice referenced_key;
|
||||
if (does_referenced_key_exist) {
|
||||
referenced_key = biter.key();
|
||||
} else {
|
||||
referenced_key = ExtractUserKey(key);
|
||||
}
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||
@ -3467,12 +3478,13 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||
lookup_data_block_context.is_cache_hit,
|
||||
lookup_data_block_context.no_insert,
|
||||
lookup_data_block_context.get_id,
|
||||
/*referenced_key=*/"", referenced_data_size,
|
||||
lookup_data_block_context.num_keys_in_block,
|
||||
does_referenced_key_exist);
|
||||
block_cache_tracer_->WriteBlockAccess(
|
||||
access_record, lookup_data_block_context.block_key,
|
||||
rep_->cf_name_for_tracing(), key);
|
||||
rep_->cf_name_for_tracing(), referenced_key);
|
||||
}
|
||||
|
||||
if (done) {
|
||||
@ -3498,14 +3510,19 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
const MultiGetRange* mget_range,
|
||||
const SliceTransform* prefix_extractor,
|
||||
bool skip_filters) {
|
||||
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserMultiGet};
|
||||
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||
CachableEntry<FilterBlockReader> filter_entry;
|
||||
FilterBlockReader* filter = nullptr;
|
||||
MultiGetRange sst_file_range(*mget_range, mget_range->begin(),
|
||||
mget_range->end());
|
||||
{
|
||||
if (!skip_filters) {
|
||||
uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
|
||||
if (!sst_file_range.empty() && sst_file_range.begin()->get_context) {
|
||||
tracing_mget_id = sst_file_range.begin()->get_context->tracing_get_id();
|
||||
}
|
||||
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserMultiGet,
|
||||
tracing_mget_id};
|
||||
if (!skip_filters) {
|
||||
{
|
||||
// TODO: Figure out where the stats should go
|
||||
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
|
||||
read_options.read_tier == kBlockCacheTier,
|
||||
@ -3644,7 +3661,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
uint64_t referenced_data_size = 0;
|
||||
bool does_referenced_key_exist = false;
|
||||
BlockCacheLookupContext lookup_data_block_context(
|
||||
TableReaderCaller::kUserMultiGet);
|
||||
TableReaderCaller::kUserMultiGet, tracing_mget_id);
|
||||
if (first_block) {
|
||||
if (!block_handles[idx_in_batch].IsNull() ||
|
||||
!results[idx_in_batch].IsEmpty()) {
|
||||
@ -3703,7 +3720,6 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
ParsedInternalKey parsed_key;
|
||||
Cleanable dummy;
|
||||
Cleanable* value_pinner = nullptr;
|
||||
|
||||
if (!ParseInternalKey(biter->key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
@ -3719,11 +3735,13 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
value_pinner = biter;
|
||||
}
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter->value(), &matched, value_pinner)) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size = biter->key().size() + biter->value().size();
|
||||
if (!get_context->SaveValue(parsed_key, biter->value(), &matched,
|
||||
value_pinner)) {
|
||||
if (get_context->State() == GetContext::GetState::kFound) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size =
|
||||
biter->key().size() + biter->value().size();
|
||||
}
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
@ -3733,6 +3751,12 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
|
||||
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||
// constructing the access record.
|
||||
Slice referenced_key;
|
||||
if (does_referenced_key_exist) {
|
||||
referenced_key = biter->key();
|
||||
} else {
|
||||
referenced_key = ExtractUserKey(key);
|
||||
}
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||
@ -3741,12 +3765,13 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||
lookup_data_block_context.is_cache_hit,
|
||||
lookup_data_block_context.no_insert,
|
||||
lookup_data_block_context.get_id,
|
||||
/*referenced_key=*/"", referenced_data_size,
|
||||
lookup_data_block_context.num_keys_in_block,
|
||||
does_referenced_key_exist);
|
||||
block_cache_tracer_->WriteBlockAccess(
|
||||
access_record, lookup_data_block_context.block_key,
|
||||
rep_->cf_name_for_tracing(), key);
|
||||
rep_->cf_name_for_tracing(), referenced_key);
|
||||
}
|
||||
s = biter->status();
|
||||
if (done) {
|
||||
|
@ -38,15 +38,13 @@ void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) {
|
||||
|
||||
} // namespace
|
||||
|
||||
GetContext::GetContext(const Comparator* ucmp,
|
||||
const MergeOperator* merge_operator, Logger* logger,
|
||||
Statistics* statistics, GetState init_state,
|
||||
const Slice& user_key, PinnableSlice* pinnable_val,
|
||||
bool* value_found, MergeContext* merge_context,
|
||||
SequenceNumber* _max_covering_tombstone_seq, Env* env,
|
||||
SequenceNumber* seq,
|
||||
PinnedIteratorsManager* _pinned_iters_mgr,
|
||||
ReadCallback* callback, bool* is_blob_index)
|
||||
GetContext::GetContext(
|
||||
const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger,
|
||||
Statistics* statistics, GetState init_state, const Slice& user_key,
|
||||
PinnableSlice* pinnable_val, bool* value_found, MergeContext* merge_context,
|
||||
SequenceNumber* _max_covering_tombstone_seq, Env* env, SequenceNumber* seq,
|
||||
PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback,
|
||||
bool* is_blob_index, uint64_t tracing_get_id)
|
||||
: ucmp_(ucmp),
|
||||
merge_operator_(merge_operator),
|
||||
logger_(logger),
|
||||
@ -62,7 +60,8 @@ GetContext::GetContext(const Comparator* ucmp,
|
||||
replay_log_(nullptr),
|
||||
pinned_iters_mgr_(_pinned_iters_mgr),
|
||||
callback_(callback),
|
||||
is_blob_index_(is_blob_index) {
|
||||
is_blob_index_(is_blob_index),
|
||||
tracing_get_id_(tracing_get_id) {
|
||||
if (seq_) {
|
||||
*seq_ = kMaxSequenceNumber;
|
||||
}
|
||||
|
@ -85,7 +85,8 @@ class GetContext {
|
||||
SequenceNumber* max_covering_tombstone_seq, Env* env,
|
||||
SequenceNumber* seq = nullptr,
|
||||
PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
|
||||
ReadCallback* callback = nullptr, bool* is_blob_index = nullptr);
|
||||
ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
|
||||
uint64_t tracing_get_id = 0);
|
||||
|
||||
GetContext() = default;
|
||||
|
||||
@ -135,6 +136,8 @@ class GetContext {
|
||||
|
||||
void ReportCounters();
|
||||
|
||||
uint64_t tracing_get_id() const { return tracing_get_id_; }
|
||||
|
||||
private:
|
||||
const Comparator* ucmp_;
|
||||
const MergeOperator* merge_operator_;
|
||||
@ -158,6 +161,9 @@ class GetContext {
|
||||
ReadCallback* callback_;
|
||||
bool sample_;
|
||||
bool* is_blob_index_;
|
||||
// Used for block cache tracing only. A tracing get id uniquely identifies a
|
||||
// Get or a MultiGet.
|
||||
const uint64_t tracing_get_id_;
|
||||
};
|
||||
|
||||
// Call this to replay a log and bring the get_context up to date. The replay
|
||||
|
@ -2563,23 +2563,25 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
|
||||
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
|
||||
auto reader = c.GetTableReader();
|
||||
PinnableSlice value;
|
||||
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
|
||||
GetContext::kNotFound, user_key, &value, nullptr,
|
||||
nullptr, nullptr, nullptr);
|
||||
get_perf_context()->Reset();
|
||||
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
|
||||
moptions.prefix_extractor.get()));
|
||||
if (index_and_filter_in_cache) {
|
||||
// data, index and filter block
|
||||
ASSERT_EQ(get_perf_context()->block_read_count, 3);
|
||||
ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
|
||||
ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
|
||||
} else {
|
||||
// just the data block
|
||||
ASSERT_EQ(get_perf_context()->block_read_count, 1);
|
||||
{
|
||||
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
|
||||
GetContext::kNotFound, user_key, &value, nullptr,
|
||||
nullptr, nullptr, nullptr);
|
||||
get_perf_context()->Reset();
|
||||
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
|
||||
moptions.prefix_extractor.get()));
|
||||
if (index_and_filter_in_cache) {
|
||||
// data, index and filter block
|
||||
ASSERT_EQ(get_perf_context()->block_read_count, 3);
|
||||
ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
|
||||
ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
|
||||
} else {
|
||||
// just the data block
|
||||
ASSERT_EQ(get_perf_context()->block_read_count, 1);
|
||||
}
|
||||
ASSERT_EQ(get_context.State(), GetContext::kFound);
|
||||
ASSERT_STREQ(value.data(), "hello");
|
||||
}
|
||||
ASSERT_EQ(get_context.State(), GetContext::kFound);
|
||||
ASSERT_STREQ(value.data(), "hello");
|
||||
|
||||
// Get non-existing key
|
||||
user_key = "does-not-exist";
|
||||
@ -2587,13 +2589,15 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
|
||||
encoded_key = internal_key.Encode().ToString();
|
||||
|
||||
value.Reset();
|
||||
get_context = GetContext(options.comparator, nullptr, nullptr, nullptr,
|
||||
{
|
||||
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
|
||||
GetContext::kNotFound, user_key, &value, nullptr,
|
||||
nullptr, nullptr, nullptr);
|
||||
get_perf_context()->Reset();
|
||||
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
|
||||
moptions.prefix_extractor.get()));
|
||||
ASSERT_EQ(get_context.State(), GetContext::kNotFound);
|
||||
get_perf_context()->Reset();
|
||||
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
|
||||
moptions.prefix_extractor.get()));
|
||||
ASSERT_EQ(get_context.State(), GetContext::kNotFound);
|
||||
}
|
||||
|
||||
if (index_and_filter_in_cache) {
|
||||
if (bloom_filter_type == 0) {
|
||||
|
@ -31,6 +31,7 @@ bool ShouldTrace(const Slice& block_key, const TraceOptions& trace_options) {
|
||||
const uint64_t kMicrosInSecond = 1000 * 1000;
|
||||
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
|
||||
"UnknownColumnFamily";
|
||||
const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
|
||||
|
||||
bool BlockCacheTraceHelper::ShouldTraceReferencedKey(TraceType block_type,
|
||||
TableReaderCaller caller) {
|
||||
@ -39,6 +40,11 @@ bool BlockCacheTraceHelper::ShouldTraceReferencedKey(TraceType block_type,
|
||||
caller == TableReaderCaller::kUserMultiGet);
|
||||
}
|
||||
|
||||
bool BlockCacheTraceHelper::ShouldTraceGetId(TableReaderCaller caller) {
|
||||
return caller == TableReaderCaller::kUserGet ||
|
||||
caller == TableReaderCaller::kUserMultiGet;
|
||||
}
|
||||
|
||||
BlockCacheTraceWriter::BlockCacheTraceWriter(
|
||||
Env* env, const TraceOptions& trace_options,
|
||||
std::unique_ptr<TraceWriter>&& trace_writer)
|
||||
@ -65,6 +71,9 @@ Status BlockCacheTraceWriter::WriteBlockAccess(
|
||||
trace.payload.push_back(record.caller);
|
||||
trace.payload.push_back(record.is_cache_hit);
|
||||
trace.payload.push_back(record.no_insert);
|
||||
if (BlockCacheTraceHelper::ShouldTraceGetId(record.caller)) {
|
||||
PutFixed64(&trace.payload, record.get_id);
|
||||
}
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record.block_type,
|
||||
record.caller)) {
|
||||
PutLengthPrefixedSlice(&trace.payload, referenced_key);
|
||||
@ -197,7 +206,12 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
}
|
||||
record->no_insert = static_cast<Boolean>(enc_slice[0]);
|
||||
enc_slice.remove_prefix(kCharSize);
|
||||
|
||||
if (BlockCacheTraceHelper::ShouldTraceGetId(record->caller)) {
|
||||
if (!GetFixed64(&enc_slice, &record->get_id)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read the get id.");
|
||||
}
|
||||
}
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record->block_type,
|
||||
record->caller)) {
|
||||
Slice referenced_key;
|
||||
@ -236,6 +250,7 @@ Status BlockCacheTracer::StartTrace(
|
||||
if (writer_.load()) {
|
||||
return Status::Busy();
|
||||
}
|
||||
get_id_counter_.store(1);
|
||||
trace_options_ = trace_options;
|
||||
writer_.store(
|
||||
new BlockCacheTraceWriter(env, trace_options, std::move(trace_writer)));
|
||||
@ -266,4 +281,16 @@ Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||
referenced_key);
|
||||
}
|
||||
|
||||
uint64_t BlockCacheTracer::NextGetId() {
|
||||
if (!writer_.load(std::memory_order_relaxed)) {
|
||||
return BlockCacheTraceHelper::kReservedGetId;
|
||||
}
|
||||
uint64_t prev_value = get_id_counter_.fetch_add(1);
|
||||
if (prev_value == BlockCacheTraceHelper::kReservedGetId) {
|
||||
// fetch and add again.
|
||||
return get_id_counter_.fetch_add(1);
|
||||
}
|
||||
return prev_value;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -18,6 +18,16 @@ namespace rocksdb {
|
||||
|
||||
extern const uint64_t kMicrosInSecond;
|
||||
|
||||
class BlockCacheTraceHelper {
|
||||
public:
|
||||
static bool ShouldTraceReferencedKey(TraceType block_type,
|
||||
TableReaderCaller caller);
|
||||
static bool ShouldTraceGetId(TableReaderCaller caller);
|
||||
|
||||
static const std::string kUnknownColumnFamilyName;
|
||||
static const uint64_t kReservedGetId;
|
||||
};
|
||||
|
||||
// Lookup context for tracing block cache accesses.
|
||||
// We trace block accesses at five places:
|
||||
// 1. BlockBasedTable::GetFilter
|
||||
@ -38,8 +48,10 @@ extern const uint64_t kMicrosInSecond;
|
||||
// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
|
||||
// kUserApproximateSize).
|
||||
struct BlockCacheLookupContext {
|
||||
BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
|
||||
const TableReaderCaller caller;
|
||||
BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
|
||||
BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id)
|
||||
: caller(_caller), get_id(_get_id) {}
|
||||
const TableReaderCaller caller;
|
||||
// These are populated when we perform lookup/insert on block cache. The block
|
||||
// cache tracer uses these inforation when logging the block access at
|
||||
// BlockBasedTable::GET and BlockBasedTable::MultiGet.
|
||||
@ -49,6 +61,10 @@ const TableReaderCaller caller;
|
||||
uint64_t block_size = 0;
|
||||
std::string block_key;
|
||||
uint64_t num_keys_in_block = 0;
|
||||
// The unique id associated with Get and MultiGet. This enables us to track
|
||||
// how many blocks a Get/MultiGet request accesses. We can also measure the
|
||||
// impact of row cache vs block cache.
|
||||
uint64_t get_id = 0;
|
||||
|
||||
void FillLookupContext(bool _is_cache_hit, bool _no_insert,
|
||||
TraceType _block_type, uint64_t _block_size,
|
||||
@ -78,7 +94,8 @@ struct BlockCacheTraceRecord {
|
||||
TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller;
|
||||
Boolean is_cache_hit = Boolean::kFalse;
|
||||
Boolean no_insert = Boolean::kFalse;
|
||||
|
||||
// Required field for Get and MultiGet
|
||||
uint64_t get_id = BlockCacheTraceHelper::kReservedGetId;
|
||||
// Required fields for data block and user Get/Multi-Get only.
|
||||
std::string referenced_key;
|
||||
uint64_t referenced_data_size = 0;
|
||||
@ -91,7 +108,7 @@ struct BlockCacheTraceRecord {
|
||||
TraceType _block_type, uint64_t _block_size,
|
||||
uint64_t _cf_id, std::string _cf_name, uint32_t _level,
|
||||
uint64_t _sst_fd_number, TableReaderCaller _caller,
|
||||
bool _is_cache_hit, bool _no_insert,
|
||||
bool _is_cache_hit, bool _no_insert, uint64_t _get_id,
|
||||
std::string _referenced_key = "",
|
||||
uint64_t _referenced_data_size = 0,
|
||||
uint64_t _num_keys_in_block = 0,
|
||||
@ -107,6 +124,7 @@ struct BlockCacheTraceRecord {
|
||||
caller(_caller),
|
||||
is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse),
|
||||
no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse),
|
||||
get_id(_get_id),
|
||||
referenced_key(_referenced_key),
|
||||
referenced_data_size(_referenced_data_size),
|
||||
num_keys_in_block(_num_keys_in_block),
|
||||
@ -121,14 +139,6 @@ struct BlockCacheTraceHeader {
|
||||
uint32_t rocksdb_minor_version;
|
||||
};
|
||||
|
||||
class BlockCacheTraceHelper {
|
||||
public:
|
||||
static bool ShouldTraceReferencedKey(TraceType block_type,
|
||||
TableReaderCaller caller);
|
||||
|
||||
static const std::string kUnknownColumnFamilyName;
|
||||
};
|
||||
|
||||
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
|
||||
// user-provided TraceWriter. Every RocksDB operation is written as a single
|
||||
// trace. Each trace will have a timestamp and type, followed by the trace
|
||||
@ -207,11 +217,15 @@ class BlockCacheTracer {
|
||||
const Slice& block_key, const Slice& cf_name,
|
||||
const Slice& referenced_key);
|
||||
|
||||
// GetId cycles from 1 to port::kMaxUint64.
|
||||
uint64_t NextGetId();
|
||||
|
||||
private:
|
||||
TraceOptions trace_options_;
|
||||
// A mutex protects the writer_.
|
||||
InstrumentedMutex trace_writer_mutex_;
|
||||
std::atomic<BlockCacheTraceWriter*> writer_;
|
||||
std::atomic<uint64_t> get_id_counter_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -71,6 +71,9 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
record.sst_fd_number = kSSTFDNumber + key_id;
|
||||
record.is_cache_hit = Boolean::kFalse;
|
||||
record.no_insert = Boolean::kFalse;
|
||||
// Provide get_id for all callers. The writer should only write get_id
|
||||
// when the caller is either GET or MGET.
|
||||
record.get_id = key_id + 1;
|
||||
// Provide these fields for all block types.
|
||||
// The writer should only write these fields for data blocks and the
|
||||
// caller is either GET or MGET.
|
||||
@ -120,6 +123,12 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
ASSERT_EQ(kSSTFDNumber + key_id, record.sst_fd_number);
|
||||
ASSERT_EQ(Boolean::kFalse, record.is_cache_hit);
|
||||
ASSERT_EQ(Boolean::kFalse, record.no_insert);
|
||||
if (record.caller == TableReaderCaller::kUserGet ||
|
||||
record.caller == TableReaderCaller::kUserMultiGet) {
|
||||
ASSERT_EQ(key_id + 1, record.get_id);
|
||||
} else {
|
||||
ASSERT_EQ(BlockCacheTraceHelper::kReservedGetId, record.get_id);
|
||||
}
|
||||
if (block_type == TraceType::kBlockTraceDataBlock &&
|
||||
(record.caller == TableReaderCaller::kUserGet ||
|
||||
record.caller == TableReaderCaller::kUserMultiGet)) {
|
||||
@ -239,6 +248,35 @@ TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BlockCacheTracerTest, NextGetId) {
|
||||
BlockCacheTracer writer;
|
||||
{
|
||||
TraceOptions trace_opt;
|
||||
std::unique_ptr<TraceWriter> trace_writer;
|
||||
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
||||
&trace_writer));
|
||||
// next get id should always return 0 before we call StartTrace.
|
||||
ASSERT_EQ(0, writer.NextGetId());
|
||||
ASSERT_EQ(0, writer.NextGetId());
|
||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||
ASSERT_EQ(1, writer.NextGetId());
|
||||
ASSERT_EQ(2, writer.NextGetId());
|
||||
writer.EndTrace();
|
||||
// next get id should return 0.
|
||||
ASSERT_EQ(0, writer.NextGetId());
|
||||
}
|
||||
|
||||
// Start trace again and next get id should return 1.
|
||||
{
|
||||
TraceOptions trace_opt;
|
||||
std::unique_ptr<TraceWriter> trace_writer;
|
||||
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
||||
&trace_writer));
|
||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||
ASSERT_EQ(1, writer.NextGetId());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BlockCacheTracerTest, MixedBlocks) {
|
||||
{
|
||||
// Generate a trace file containing a mix of blocks.
|
||||
|
Loading…
Reference in New Issue
Block a user