Integrate block cache tracer in block based table reader. (#5441)
Summary: This PR integrates the block cache tracer into block based table reader. The tracer will write the block cache accesses using the trace_writer. The tracer is null in this PR so that nothing will be logged. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5441 Differential Revision: D15772029 Pulled By: HaoyuHuang fbshipit-source-id: a64adb92642cd23222e0ba8b10d86bf522b42f9b
This commit is contained in:
parent
f1219644ec
commit
7a8d7358bb
@ -1877,9 +1877,8 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
|
||||
const bool is_a_filter_partition, bool no_io, GetContext* get_context,
|
||||
BlockCacheLookupContext* /*lookup_context*/,
|
||||
BlockCacheLookupContext* lookup_context,
|
||||
const SliceTransform* prefix_extractor) const {
|
||||
// TODO(haoyu): Trace filter block access here.
|
||||
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
||||
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
||||
// read fails at Open() time. We don't want to reload again since it will
|
||||
@ -1912,17 +1911,22 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
GetEntryFromCache(block_cache, key, BlockType::kFilter, get_context);
|
||||
|
||||
FilterBlockReader* filter = nullptr;
|
||||
size_t usage = 0;
|
||||
bool is_cache_hit = false;
|
||||
bool return_empty_reader = false;
|
||||
if (cache_handle != nullptr) {
|
||||
filter =
|
||||
reinterpret_cast<FilterBlockReader*>(block_cache->Value(cache_handle));
|
||||
usage = filter->ApproximateMemoryUsage();
|
||||
is_cache_hit = true;
|
||||
} else if (no_io) {
|
||||
// Do not invoke any io.
|
||||
return CachableEntry<FilterBlockReader>();
|
||||
return_empty_reader = true;
|
||||
} else {
|
||||
filter = ReadFilter(prefetch_buffer, filter_blk_handle,
|
||||
is_a_filter_partition, prefix_extractor);
|
||||
if (filter != nullptr) {
|
||||
size_t usage = filter->ApproximateMemoryUsage();
|
||||
usage = filter->ApproximateMemoryUsage();
|
||||
Status s = block_cache->Insert(
|
||||
key, filter, usage, &DeleteCachedFilterEntry, &cache_handle,
|
||||
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
|
||||
@ -1934,19 +1938,36 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
} else {
|
||||
RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES);
|
||||
delete filter;
|
||||
return CachableEntry<FilterBlockReader>();
|
||||
return_empty_reader = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (block_cache_tracer_ && lookup_context) {
|
||||
// Avoid making copy of block_key and cf_name when constructing the access
|
||||
// record.
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", TraceType::kBlockTraceFilterBlock,
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
/*no_insert=*/no_io);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
}
|
||||
|
||||
if (return_empty_reader) {
|
||||
return CachableEntry<FilterBlockReader>();
|
||||
}
|
||||
return {filter, cache_handle ? block_cache : nullptr, cache_handle,
|
||||
/*own_value=*/false};
|
||||
}
|
||||
|
||||
CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
|
||||
BlockCacheLookupContext* /*lookup_context*/) const {
|
||||
// TODO(haoyu): Trace the access on the uncompression dictionary here.
|
||||
BlockCacheLookupContext* lookup_context) const {
|
||||
if (!rep_->table_options.cache_index_and_filter_blocks) {
|
||||
// block cache is either disabled or not used for meta-blocks. In either
|
||||
// case, BlockBasedTableReader is the owner of the uncompression dictionary.
|
||||
@ -1964,9 +1985,13 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||
GetEntryFromCache(rep_->table_options.block_cache.get(), cache_key,
|
||||
BlockType::kCompressionDictionary, get_context);
|
||||
UncompressionDict* dict = nullptr;
|
||||
bool is_cache_hit = false;
|
||||
size_t usage = 0;
|
||||
if (cache_handle != nullptr) {
|
||||
dict = reinterpret_cast<UncompressionDict*>(
|
||||
rep_->table_options.block_cache->Value(cache_handle));
|
||||
is_cache_hit = true;
|
||||
usage = dict->ApproximateMemoryUsage();
|
||||
} else if (no_io) {
|
||||
// Do not invoke any io.
|
||||
} else {
|
||||
@ -1980,7 +2005,7 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||
new UncompressionDict(compression_dict_block->data.ToString(),
|
||||
rep_->blocks_definitely_zstd_compressed,
|
||||
rep_->ioptions.statistics));
|
||||
const size_t usage = uncompression_dict->ApproximateMemoryUsage();
|
||||
usage = uncompression_dict->ApproximateMemoryUsage();
|
||||
s = rep_->table_options.block_cache->Insert(
|
||||
cache_key, uncompression_dict.get(), usage,
|
||||
&DeleteCachedUncompressionDictEntry, &cache_handle,
|
||||
@ -2000,6 +2025,20 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||
}
|
||||
}
|
||||
}
|
||||
if (block_cache_tracer_ && lookup_context) {
|
||||
// Avoid making copy of block_key and cf_name when constructing the access
|
||||
// record.
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", TraceType::kBlockTraceUncompressionDictBlock,
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
/*no_insert=*/no_io);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, cache_key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
}
|
||||
return {dict, cache_handle ? rep_->table_options.block_cache.get() : nullptr,
|
||||
cache_handle, false /* own_value */};
|
||||
}
|
||||
@ -2116,13 +2155,10 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
||||
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
||||
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
||||
CachableEntry<Block>* block_entry, BlockType block_type,
|
||||
GetContext* get_context,
|
||||
BlockCacheLookupContext* /*lookup_context*/) const {
|
||||
// TODO(haoyu): Trace data/index/range deletion block access here.
|
||||
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
|
||||
assert(block_entry != nullptr);
|
||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||
|
||||
// No point to cache compressed blocks if it never goes away
|
||||
Cache* block_cache_compressed =
|
||||
rep_->immortal_table ? nullptr
|
||||
@ -2136,6 +2172,8 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
||||
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
Slice key /* key to the block cache */;
|
||||
Slice ckey /* key to the compressed block cache */;
|
||||
bool is_cache_hit = false;
|
||||
bool no_insert = true;
|
||||
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
||||
// create key for block cache
|
||||
if (block_cache != nullptr) {
|
||||
@ -2152,10 +2190,15 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
||||
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
|
||||
ro, block_entry, uncompression_dict, block_type,
|
||||
get_context);
|
||||
|
||||
if (block_entry->GetValue()) {
|
||||
// TODO(haoyu): Differentiate cache hit on uncompressed block cache and
|
||||
// compressed block cache.
|
||||
is_cache_hit = true;
|
||||
}
|
||||
// Can't find the block from the cache. If I/O is allowed, read from the
|
||||
// file.
|
||||
if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) {
|
||||
no_insert = false;
|
||||
Statistics* statistics = rep_->ioptions.statistics;
|
||||
bool do_decompress =
|
||||
block_cache_compressed == nullptr && rep_->blocks_maybe_compressed;
|
||||
@ -2186,6 +2229,59 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fill lookup_context.
|
||||
if (block_cache_tracer_ && lookup_context) {
|
||||
size_t usage = 0;
|
||||
uint64_t nkeys = 0;
|
||||
if (block_entry->GetValue()) {
|
||||
// Approximate the number of keys in the block using restarts.
|
||||
nkeys = rep_->table_options.block_restart_interval *
|
||||
block_entry->GetValue()->NumRestarts();
|
||||
usage = block_entry->GetValue()->ApproximateMemoryUsage();
|
||||
}
|
||||
TraceType trace_block_type = TraceType::kTraceMax;
|
||||
switch (block_type) {
|
||||
case BlockType::kIndex:
|
||||
trace_block_type = TraceType::kBlockTraceIndexBlock;
|
||||
break;
|
||||
case BlockType::kData:
|
||||
trace_block_type = TraceType::kBlockTraceDataBlock;
|
||||
break;
|
||||
case BlockType::kRangeDeletion:
|
||||
trace_block_type = TraceType::kBlockTraceRangeDeletionBlock;
|
||||
break;
|
||||
default:
|
||||
// This cannot happen.
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(
|
||||
trace_block_type, lookup_context->caller)) {
|
||||
// Defer logging the access to Get() and MultiGet() to trace additional
|
||||
// information, e.g., the referenced key,
|
||||
// referenced_key_exist_in_block.
|
||||
|
||||
// Make a copy of the block key here since it will be logged later.
|
||||
lookup_context->FillLookupContext(
|
||||
is_cache_hit, no_insert, trace_block_type,
|
||||
/*block_size=*/usage, /*block_key=*/key.ToString(), nkeys);
|
||||
} else {
|
||||
// Avoid making copy of block_key and cf_name when constructing the access
|
||||
// record.
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", trace_block_type,
|
||||
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||
no_insert);
|
||||
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||
rep_->cf_name_for_tracing(),
|
||||
/*referenced_key=*/nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
assert(s.ok() || block_entry->GetValue() == nullptr);
|
||||
return s;
|
||||
}
|
||||
@ -2874,11 +2970,15 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
|
||||
break;
|
||||
} else {
|
||||
BlockCacheLookupContext lookup_data_block_context{
|
||||
BlockCacheLookupCaller::kUserGet};
|
||||
bool does_referenced_key_exist = false;
|
||||
DataBlockIter biter;
|
||||
uint64_t referenced_data_size = 0;
|
||||
NewDataBlockIterator<DataBlockIter>(
|
||||
read_options, iiter->value(), &biter, BlockType::kData,
|
||||
/*key_includes_seq=*/true,
|
||||
/*index_key_is_full=*/true, get_context, &lookup_context,
|
||||
/*index_key_is_full=*/true, get_context, &lookup_data_block_context,
|
||||
/*s=*/Status(), /*prefetch_buffer*/ nullptr);
|
||||
|
||||
if (read_options.read_tier == kBlockCacheTier &&
|
||||
@ -2902,25 +3002,47 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
// the end of the block, i.e. cannot be in the following blocks
|
||||
// either. In this case, the seek_key cannot be found, so we break
|
||||
// from the top level for-loop.
|
||||
break;
|
||||
}
|
||||
done = true;
|
||||
} else {
|
||||
// Call the *saver function on each entry/block until it returns false
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
ParsedInternalKey parsed_key;
|
||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
|
||||
// Call the *saver function on each entry/block until it returns false
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
ParsedInternalKey parsed_key;
|
||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter.value(), &matched,
|
||||
biter.IsValuePinned() ? &biter : nullptr)) {
|
||||
done = true;
|
||||
break;
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter.value(), &matched,
|
||||
biter.IsValuePinned() ? &biter : nullptr)) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size = biter.key().size() + biter.value().size();
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
s = biter.status();
|
||||
}
|
||||
// Write the block cache access record.
|
||||
if (block_cache_tracer_) {
|
||||
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||
// constructing the access record.
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||
lookup_data_block_context.is_cache_hit,
|
||||
lookup_data_block_context.no_insert,
|
||||
/*referenced_key=*/"", referenced_data_size,
|
||||
lookup_data_block_context.num_keys_in_block,
|
||||
does_referenced_key_exist);
|
||||
block_cache_tracer_->WriteBlockAccess(
|
||||
access_record, lookup_data_block_context.block_key,
|
||||
rep_->cf_name_for_tracing(), key);
|
||||
}
|
||||
s = biter.status();
|
||||
}
|
||||
|
||||
if (done) {
|
||||
// Avoid the extra Next which is expensive in two-level indexes
|
||||
break;
|
||||
@ -2992,14 +3114,18 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
bool done = false;
|
||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
||||
bool reusing_block = true;
|
||||
uint64_t referenced_data_size = 0;
|
||||
bool does_referenced_key_exist = false;
|
||||
BlockCacheLookupContext lookup_data_block_context(
|
||||
BlockCacheLookupCaller::kUserMGet);
|
||||
if (iiter->value().offset() != offset) {
|
||||
offset = iiter->value().offset();
|
||||
biter.Invalidate(Status::OK());
|
||||
NewDataBlockIterator<DataBlockIter>(
|
||||
read_options, iiter->value(), &biter, BlockType::kData,
|
||||
/*key_includes_seq=*/false,
|
||||
/*index_key_is_full=*/true, get_context, &lookup_context,
|
||||
Status(), nullptr);
|
||||
/*index_key_is_full=*/true, get_context,
|
||||
&lookup_data_block_context, Status(), nullptr);
|
||||
reusing_block = false;
|
||||
}
|
||||
if (read_options.read_tier == kBlockCacheTier &&
|
||||
@ -3021,38 +3147,59 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
// the end of the block, i.e. cannot be in the following blocks
|
||||
// either. In this case, the seek_key cannot be found, so we break
|
||||
// from the top level for-loop.
|
||||
break;
|
||||
}
|
||||
done = true;
|
||||
} else {
|
||||
// Call the *saver function on each entry/block until it returns false
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
ParsedInternalKey parsed_key;
|
||||
Cleanable dummy;
|
||||
Cleanable* value_pinner = nullptr;
|
||||
|
||||
// Call the *saver function on each entry/block until it returns false
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
ParsedInternalKey parsed_key;
|
||||
Cleanable dummy;
|
||||
Cleanable* value_pinner = nullptr;
|
||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
if (biter.IsValuePinned()) {
|
||||
if (reusing_block) {
|
||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||
assert(biter.cache_handle() != nullptr);
|
||||
block_cache->Ref(biter.cache_handle());
|
||||
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||
biter.cache_handle());
|
||||
value_pinner = &dummy;
|
||||
} else {
|
||||
value_pinner = &biter;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
if (biter.IsValuePinned()) {
|
||||
if (reusing_block) {
|
||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||
assert(biter.cache_handle() != nullptr);
|
||||
block_cache->Ref(biter.cache_handle());
|
||||
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||
biter.cache_handle());
|
||||
value_pinner = &dummy;
|
||||
} else {
|
||||
value_pinner = &biter;
|
||||
if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
|
||||
value_pinner)) {
|
||||
does_referenced_key_exist = true;
|
||||
referenced_data_size = biter.key().size() + biter.value().size();
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter.value(), &matched, value_pinner)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
s = biter.status();
|
||||
}
|
||||
// Write the block cache access.
|
||||
if (block_cache_tracer_) {
|
||||
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||
// constructing the access record.
|
||||
BlockCacheTraceRecord access_record(
|
||||
rep_->ioptions.env->NowMicros(),
|
||||
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
||||
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||
lookup_data_block_context.is_cache_hit,
|
||||
lookup_data_block_context.no_insert,
|
||||
/*referenced_key=*/"", referenced_data_size,
|
||||
lookup_data_block_context.num_keys_in_block,
|
||||
does_referenced_key_exist);
|
||||
block_cache_tracer_->WriteBlockAccess(
|
||||
access_record, lookup_data_block_context.block_key,
|
||||
rep_->cf_name_for_tracing(), key);
|
||||
}
|
||||
s = biter.status();
|
||||
if (done) {
|
||||
// Avoid the extra Next which is expensive in two-level indexes
|
||||
break;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "db/range_tombstone_fragmenter.h"
|
||||
#include "file/filename.h"
|
||||
#include "options/cf_options.h"
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/persistent_cache.h"
|
||||
@ -571,6 +572,23 @@ struct BlockBasedTable::Rep {
|
||||
? kDisableGlobalSequenceNumber
|
||||
: global_seqno;
|
||||
}
|
||||
|
||||
uint64_t cf_id_for_tracing() const {
|
||||
return table_properties ? table_properties->column_family_id
|
||||
: rocksdb::TablePropertiesCollectorFactory::
|
||||
Context::kUnknownColumnFamily;
|
||||
}
|
||||
|
||||
Slice cf_name_for_tracing() const {
|
||||
return table_properties ? table_properties->column_family_name
|
||||
: BlockCacheTraceHelper::kUnknownColumnFamilyName;
|
||||
}
|
||||
|
||||
uint32_t level_for_tracing() const { return level >= 0 ? level : UINT32_MAX; }
|
||||
|
||||
uint64_t sst_number_for_tracing() const {
|
||||
return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
// Iterates over the contents of BlockBasedTable.
|
||||
|
@ -35,10 +35,11 @@ struct BlockAccessInfo {
|
||||
block_size = access.block_size;
|
||||
caller_num_access_map[access.caller]++;
|
||||
num_accesses++;
|
||||
if (ShouldTraceReferencedKey(access)) {
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(access.block_type,
|
||||
access.caller)) {
|
||||
num_keys = access.num_keys_in_block;
|
||||
|
||||
if (access.is_referenced_key_exist_in_block == Boolean::kTrue) {
|
||||
if (access.referenced_key_exist_in_block == Boolean::kTrue) {
|
||||
key_num_access_map[access.referenced_key]++;
|
||||
num_referenced_key_exist_in_block++;
|
||||
} else {
|
||||
|
@ -89,9 +89,10 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
// The writer should only write these fields for data blocks and the
|
||||
// caller is either GET or MGET.
|
||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.num_keys_in_block = kNumKeysInBlock;
|
||||
ASSERT_OK(writer->WriteBlockAccess(record));
|
||||
ASSERT_OK(writer->WriteBlockAccess(
|
||||
record, record.block_key, record.cf_name, record.referenced_key));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,13 +15,6 @@ namespace rocksdb {
|
||||
|
||||
namespace {
|
||||
const unsigned int kCharSize = 1;
|
||||
} // namespace
|
||||
|
||||
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record) {
|
||||
return (record.block_type == TraceType::kBlockTraceDataBlock) &&
|
||||
(record.caller == BlockCacheLookupCaller::kUserGet ||
|
||||
record.caller == BlockCacheLookupCaller::kUserMGet);
|
||||
}
|
||||
|
||||
bool ShouldTrace(const BlockCacheTraceRecord& record,
|
||||
const TraceOptions& trace_options) {
|
||||
@ -34,6 +27,17 @@ bool ShouldTrace(const BlockCacheTraceRecord& record,
|
||||
const uint64_t hash = GetSliceNPHash64(Slice(record.block_key));
|
||||
return hash % trace_options.sampling_frequency == 0;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
|
||||
"UnknownColumnFamily";
|
||||
|
||||
bool BlockCacheTraceHelper::ShouldTraceReferencedKey(
|
||||
TraceType block_type, BlockCacheLookupCaller caller) {
|
||||
return (block_type == TraceType::kBlockTraceDataBlock) &&
|
||||
(caller == BlockCacheLookupCaller::kUserGet ||
|
||||
caller == BlockCacheLookupCaller::kUserMGet);
|
||||
}
|
||||
|
||||
BlockCacheTraceWriter::BlockCacheTraceWriter(
|
||||
Env* env, const TraceOptions& trace_options,
|
||||
@ -43,7 +47,8 @@ BlockCacheTraceWriter::BlockCacheTraceWriter(
|
||||
trace_writer_(std::move(trace_writer)) {}
|
||||
|
||||
Status BlockCacheTraceWriter::WriteBlockAccess(
|
||||
const BlockCacheTraceRecord& record) {
|
||||
const BlockCacheTraceRecord& record, const Slice& block_key,
|
||||
const Slice& cf_name, const Slice& referenced_key) {
|
||||
uint64_t trace_file_size = trace_writer_->GetFileSize();
|
||||
if (trace_file_size > trace_options_.max_trace_file_size) {
|
||||
return Status::OK();
|
||||
@ -51,19 +56,21 @@ Status BlockCacheTraceWriter::WriteBlockAccess(
|
||||
Trace trace;
|
||||
trace.ts = record.access_timestamp;
|
||||
trace.type = record.block_type;
|
||||
PutLengthPrefixedSlice(&trace.payload, record.block_key);
|
||||
PutLengthPrefixedSlice(&trace.payload, block_key);
|
||||
PutFixed64(&trace.payload, record.block_size);
|
||||
PutFixed32(&trace.payload, record.cf_id);
|
||||
PutLengthPrefixedSlice(&trace.payload, record.cf_name);
|
||||
PutFixed64(&trace.payload, record.cf_id);
|
||||
PutLengthPrefixedSlice(&trace.payload, cf_name);
|
||||
PutFixed32(&trace.payload, record.level);
|
||||
PutFixed32(&trace.payload, record.sst_fd_number);
|
||||
PutFixed64(&trace.payload, record.sst_fd_number);
|
||||
trace.payload.push_back(record.caller);
|
||||
trace.payload.push_back(record.is_cache_hit);
|
||||
trace.payload.push_back(record.no_insert);
|
||||
if (ShouldTraceReferencedKey(record)) {
|
||||
PutLengthPrefixedSlice(&trace.payload, record.referenced_key);
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record.block_type,
|
||||
record.caller)) {
|
||||
PutLengthPrefixedSlice(&trace.payload, referenced_key);
|
||||
PutFixed64(&trace.payload, record.referenced_data_size);
|
||||
PutFixed64(&trace.payload, record.num_keys_in_block);
|
||||
trace.payload.push_back(record.is_referenced_key_exist_in_block);
|
||||
trace.payload.push_back(record.referenced_key_exist_in_block);
|
||||
}
|
||||
std::string encoded_trace;
|
||||
TracerHelper::EncodeTrace(trace, &encoded_trace);
|
||||
@ -143,6 +150,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
record->access_timestamp = trace.ts;
|
||||
record->block_type = trace.type;
|
||||
Slice enc_slice = Slice(trace.payload);
|
||||
|
||||
Slice block_key;
|
||||
if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
|
||||
return Status::Incomplete(
|
||||
@ -153,7 +161,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read block size.");
|
||||
}
|
||||
if (!GetFixed32(&enc_slice, &record->cf_id)) {
|
||||
if (!GetFixed64(&enc_slice, &record->cf_id)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read column family ID.");
|
||||
}
|
||||
@ -167,7 +175,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read level.");
|
||||
}
|
||||
if (!GetFixed32(&enc_slice, &record->sst_fd_number)) {
|
||||
if (!GetFixed64(&enc_slice, &record->sst_fd_number)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read SST file number.");
|
||||
}
|
||||
@ -190,13 +198,18 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
record->no_insert = static_cast<Boolean>(enc_slice[0]);
|
||||
enc_slice.remove_prefix(kCharSize);
|
||||
|
||||
if (ShouldTraceReferencedKey(*record)) {
|
||||
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record->block_type,
|
||||
record->caller)) {
|
||||
Slice referenced_key;
|
||||
if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read the referenced key.");
|
||||
}
|
||||
record->referenced_key = referenced_key.ToString();
|
||||
if (!GetFixed64(&enc_slice, &record->referenced_data_size)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read the referenced data size.");
|
||||
}
|
||||
if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read the number of keys in the "
|
||||
@ -205,10 +218,9 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
||||
if (enc_slice.empty()) {
|
||||
return Status::Incomplete(
|
||||
"Incomplete access record: Failed to read "
|
||||
"is_referenced_key_exist_in_block.");
|
||||
"referenced_key_exist_in_block.");
|
||||
}
|
||||
record->is_referenced_key_exist_in_block =
|
||||
static_cast<Boolean>(enc_slice[0]);
|
||||
record->referenced_key_exist_in_block = static_cast<Boolean>(enc_slice[0]);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -239,7 +251,10 @@ void BlockCacheTracer::EndTrace() {
|
||||
writer_.store(nullptr);
|
||||
}
|
||||
|
||||
Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record) {
|
||||
Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||
const Slice& block_key,
|
||||
const Slice& cf_name,
|
||||
const Slice& referenced_key) {
|
||||
if (!writer_.load() || !ShouldTrace(record, trace_options_)) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -247,7 +262,8 @@ Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record) {
|
||||
if (!writer_.load()) {
|
||||
return Status::OK();
|
||||
}
|
||||
return writer_.load()->WriteBlockAccess(record);
|
||||
return writer_.load()->WriteBlockAccess(record, block_key, cf_name,
|
||||
referenced_key);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -49,28 +49,80 @@ struct BlockCacheLookupContext {
|
||||
BlockCacheLookupContext(const BlockCacheLookupCaller& _caller)
|
||||
: caller(_caller) {}
|
||||
const BlockCacheLookupCaller caller;
|
||||
// These are populated when we perform lookup/insert on block cache. The block
|
||||
// cache tracer uses these inforation when logging the block access at
|
||||
// BlockBasedTable::GET and BlockBasedTable::MultiGet.
|
||||
bool is_cache_hit = false;
|
||||
bool no_insert = false;
|
||||
TraceType block_type = TraceType::kTraceMax;
|
||||
uint64_t block_size = 0;
|
||||
std::string block_key;
|
||||
uint64_t num_keys_in_block = 0;
|
||||
|
||||
void FillLookupContext(bool _is_cache_hit, bool _no_insert,
|
||||
TraceType _block_type, uint64_t _block_size,
|
||||
const std::string& _block_key,
|
||||
uint64_t _num_keys_in_block) {
|
||||
is_cache_hit = _is_cache_hit;
|
||||
no_insert = _no_insert;
|
||||
block_type = _block_type;
|
||||
block_size = _block_size;
|
||||
block_key = _block_key;
|
||||
num_keys_in_block = _num_keys_in_block;
|
||||
}
|
||||
};
|
||||
|
||||
enum Boolean : char { kTrue = 1, kFalse = 0 };
|
||||
|
||||
struct BlockCacheTraceRecord {
|
||||
// Required fields for all accesses.
|
||||
uint64_t access_timestamp;
|
||||
uint64_t access_timestamp = 0;
|
||||
std::string block_key;
|
||||
TraceType block_type;
|
||||
uint64_t block_size;
|
||||
uint32_t cf_id;
|
||||
TraceType block_type = TraceType::kTraceMax;
|
||||
uint64_t block_size = 0;
|
||||
uint64_t cf_id = 0;
|
||||
std::string cf_name;
|
||||
uint32_t level;
|
||||
uint32_t sst_fd_number;
|
||||
BlockCacheLookupCaller caller;
|
||||
Boolean is_cache_hit;
|
||||
Boolean no_insert;
|
||||
uint32_t level = 0;
|
||||
uint64_t sst_fd_number = 0;
|
||||
BlockCacheLookupCaller caller =
|
||||
BlockCacheLookupCaller::kMaxBlockCacheLookupCaller;
|
||||
Boolean is_cache_hit = Boolean::kFalse;
|
||||
Boolean no_insert = Boolean::kFalse;
|
||||
|
||||
// Required fields for data block and user Get/Multi-Get only.
|
||||
std::string referenced_key;
|
||||
uint64_t referenced_data_size = 0;
|
||||
uint64_t num_keys_in_block = 0;
|
||||
Boolean is_referenced_key_exist_in_block = Boolean::kFalse;
|
||||
Boolean referenced_key_exist_in_block = Boolean::kFalse;
|
||||
|
||||
BlockCacheTraceRecord() {}
|
||||
|
||||
BlockCacheTraceRecord(uint64_t _access_timestamp, std::string _block_key,
|
||||
TraceType _block_type, uint64_t _block_size,
|
||||
uint64_t _cf_id, std::string _cf_name, uint32_t _level,
|
||||
uint64_t _sst_fd_number, BlockCacheLookupCaller _caller,
|
||||
bool _is_cache_hit, bool _no_insert,
|
||||
std::string _referenced_key = "",
|
||||
uint64_t _referenced_data_size = 0,
|
||||
uint64_t _num_keys_in_block = 0,
|
||||
bool _referenced_key_exist_in_block = false)
|
||||
: access_timestamp(_access_timestamp),
|
||||
block_key(_block_key),
|
||||
block_type(_block_type),
|
||||
block_size(_block_size),
|
||||
cf_id(_cf_id),
|
||||
cf_name(_cf_name),
|
||||
level(_level),
|
||||
sst_fd_number(_sst_fd_number),
|
||||
caller(_caller),
|
||||
is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse),
|
||||
no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse),
|
||||
referenced_key(_referenced_key),
|
||||
referenced_data_size(_referenced_data_size),
|
||||
num_keys_in_block(_num_keys_in_block),
|
||||
referenced_key_exist_in_block(
|
||||
_referenced_key_exist_in_block ? Boolean::kTrue : Boolean::kFalse) {
|
||||
}
|
||||
};
|
||||
|
||||
struct BlockCacheTraceHeader {
|
||||
@ -79,7 +131,13 @@ struct BlockCacheTraceHeader {
|
||||
uint32_t rocksdb_minor_version;
|
||||
};
|
||||
|
||||
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record);
|
||||
class BlockCacheTraceHelper {
|
||||
public:
|
||||
static bool ShouldTraceReferencedKey(TraceType block_type,
|
||||
BlockCacheLookupCaller caller);
|
||||
|
||||
static const std::string kUnknownColumnFamilyName;
|
||||
};
|
||||
|
||||
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
|
||||
// user-provided TraceWriter. Every RocksDB operation is written as a single
|
||||
@ -96,7 +154,10 @@ class BlockCacheTraceWriter {
|
||||
BlockCacheTraceWriter(BlockCacheTraceWriter&&) = delete;
|
||||
BlockCacheTraceWriter& operator=(BlockCacheTraceWriter&&) = delete;
|
||||
|
||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record);
|
||||
// Pass Slice references to avoid copy.
|
||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||
const Slice& block_key, const Slice& cf_name,
|
||||
const Slice& referenced_key);
|
||||
|
||||
// Write a trace header at the beginning, typically on initiating a trace,
|
||||
// with some metadata like a magic number and RocksDB version.
|
||||
@ -148,7 +209,9 @@ class BlockCacheTracer {
|
||||
// Stop writing block cache accesses to the trace_writer.
|
||||
void EndTrace();
|
||||
|
||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record);
|
||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||
const Slice& block_key, const Slice& cf_name,
|
||||
const Slice& referenced_key);
|
||||
|
||||
private:
|
||||
TraceOptions trace_options_;
|
||||
|
@ -20,6 +20,7 @@ const uint32_t kLevel = 1;
|
||||
const uint64_t kSSTFDNumber = 100;
|
||||
const std::string kRefKeyPrefix = "test-get-";
|
||||
const uint64_t kNumKeysInBlock = 1024;
|
||||
const uint64_t kReferencedDataSize = 10;
|
||||
} // namespace
|
||||
|
||||
class BlockCacheTracerTest : public testing::Test {
|
||||
@ -61,7 +62,7 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
BlockCacheTraceRecord record;
|
||||
record.block_type = block_type;
|
||||
record.block_size = kBlockSize + key_id;
|
||||
record.block_key = kBlockKeyPrefix + std::to_string(key_id);
|
||||
record.block_key = (kBlockKeyPrefix + std::to_string(key_id));
|
||||
record.access_timestamp = env_->NowMicros();
|
||||
record.cf_id = kCFId;
|
||||
record.cf_name = kDefaultColumnFamilyName;
|
||||
@ -73,10 +74,12 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
// Provide these fields for all block types.
|
||||
// The writer should only write these fields for data blocks and the
|
||||
// caller is either GET or MGET.
|
||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.referenced_key = (kRefKeyPrefix + std::to_string(key_id));
|
||||
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.num_keys_in_block = kNumKeysInBlock;
|
||||
ASSERT_OK(writer->WriteBlockAccess(record));
|
||||
record.referenced_data_size = kReferencedDataSize + key_id;
|
||||
ASSERT_OK(writer->WriteBlockAccess(
|
||||
record, record.block_key, record.cf_name, record.referenced_key));
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,7 +98,7 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
record.is_cache_hit = Boolean::kFalse;
|
||||
record.no_insert = Boolean::kFalse;
|
||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||
record.num_keys_in_block = kNumKeysInBlock;
|
||||
return record;
|
||||
}
|
||||
@ -122,13 +125,15 @@ class BlockCacheTracerTest : public testing::Test {
|
||||
record.caller == BlockCacheLookupCaller::kUserMGet)) {
|
||||
ASSERT_EQ(kRefKeyPrefix + std::to_string(key_id),
|
||||
record.referenced_key);
|
||||
ASSERT_EQ(Boolean::kTrue, record.is_referenced_key_exist_in_block);
|
||||
ASSERT_EQ(Boolean::kTrue, record.referenced_key_exist_in_block);
|
||||
ASSERT_EQ(kNumKeysInBlock, record.num_keys_in_block);
|
||||
ASSERT_EQ(kReferencedDataSize + key_id, record.referenced_data_size);
|
||||
continue;
|
||||
}
|
||||
ASSERT_EQ("", record.referenced_key);
|
||||
ASSERT_EQ(Boolean::kFalse, record.is_referenced_key_exist_in_block);
|
||||
ASSERT_EQ(Boolean::kFalse, record.referenced_key_exist_in_block);
|
||||
ASSERT_EQ(0, record.num_keys_in_block);
|
||||
ASSERT_EQ(0, record.referenced_data_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -147,7 +152,8 @@ TEST_F(BlockCacheTracerTest, AtomicWriteBeforeStartTrace) {
|
||||
BlockCacheTracer writer;
|
||||
// The record should be written to the trace_file since StartTrace is not
|
||||
// called.
|
||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||
record.referenced_key));
|
||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||
}
|
||||
{
|
||||
@ -170,7 +176,8 @@ TEST_F(BlockCacheTracerTest, AtomicWrite) {
|
||||
&trace_writer));
|
||||
BlockCacheTracer writer;
|
||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||
record.referenced_key));
|
||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||
}
|
||||
{
|
||||
@ -197,11 +204,13 @@ TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) {
|
||||
&trace_writer));
|
||||
BlockCacheTracer writer;
|
||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||
record.referenced_key));
|
||||
writer.EndTrace();
|
||||
// Write the record again. This time the record should not be written since
|
||||
// EndTrace is called.
|
||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
||||
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||
record.referenced_key));
|
||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||
}
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user