Integrate block cache tracer in block based table reader. (#5441)
Summary: This PR integrates the block cache tracer into block based table reader. The tracer will write the block cache accesses using the trace_writer. The tracer is null in this PR so that nothing will be logged. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5441 Differential Revision: D15772029 Pulled By: HaoyuHuang fbshipit-source-id: a64adb92642cd23222e0ba8b10d86bf522b42f9b
This commit is contained in:
parent
f1219644ec
commit
7a8d7358bb
@ -1877,9 +1877,8 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
|
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
|
||||||
const bool is_a_filter_partition, bool no_io, GetContext* get_context,
|
const bool is_a_filter_partition, bool no_io, GetContext* get_context,
|
||||||
BlockCacheLookupContext* /*lookup_context*/,
|
BlockCacheLookupContext* lookup_context,
|
||||||
const SliceTransform* prefix_extractor) const {
|
const SliceTransform* prefix_extractor) const {
|
||||||
// TODO(haoyu): Trace filter block access here.
|
|
||||||
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
||||||
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
||||||
// read fails at Open() time. We don't want to reload again since it will
|
// read fails at Open() time. We don't want to reload again since it will
|
||||||
@ -1912,17 +1911,22 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
GetEntryFromCache(block_cache, key, BlockType::kFilter, get_context);
|
GetEntryFromCache(block_cache, key, BlockType::kFilter, get_context);
|
||||||
|
|
||||||
FilterBlockReader* filter = nullptr;
|
FilterBlockReader* filter = nullptr;
|
||||||
|
size_t usage = 0;
|
||||||
|
bool is_cache_hit = false;
|
||||||
|
bool return_empty_reader = false;
|
||||||
if (cache_handle != nullptr) {
|
if (cache_handle != nullptr) {
|
||||||
filter =
|
filter =
|
||||||
reinterpret_cast<FilterBlockReader*>(block_cache->Value(cache_handle));
|
reinterpret_cast<FilterBlockReader*>(block_cache->Value(cache_handle));
|
||||||
|
usage = filter->ApproximateMemoryUsage();
|
||||||
|
is_cache_hit = true;
|
||||||
} else if (no_io) {
|
} else if (no_io) {
|
||||||
// Do not invoke any io.
|
// Do not invoke any io.
|
||||||
return CachableEntry<FilterBlockReader>();
|
return_empty_reader = true;
|
||||||
} else {
|
} else {
|
||||||
filter = ReadFilter(prefetch_buffer, filter_blk_handle,
|
filter = ReadFilter(prefetch_buffer, filter_blk_handle,
|
||||||
is_a_filter_partition, prefix_extractor);
|
is_a_filter_partition, prefix_extractor);
|
||||||
if (filter != nullptr) {
|
if (filter != nullptr) {
|
||||||
size_t usage = filter->ApproximateMemoryUsage();
|
usage = filter->ApproximateMemoryUsage();
|
||||||
Status s = block_cache->Insert(
|
Status s = block_cache->Insert(
|
||||||
key, filter, usage, &DeleteCachedFilterEntry, &cache_handle,
|
key, filter, usage, &DeleteCachedFilterEntry, &cache_handle,
|
||||||
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
|
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
|
||||||
@ -1934,19 +1938,36 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
} else {
|
} else {
|
||||||
RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES);
|
RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES);
|
||||||
delete filter;
|
delete filter;
|
||||||
return CachableEntry<FilterBlockReader>();
|
return_empty_reader = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (block_cache_tracer_ && lookup_context) {
|
||||||
|
// Avoid making copy of block_key and cf_name when constructing the access
|
||||||
|
// record.
|
||||||
|
BlockCacheTraceRecord access_record(
|
||||||
|
rep_->ioptions.env->NowMicros(),
|
||||||
|
/*block_key=*/"", TraceType::kBlockTraceFilterBlock,
|
||||||
|
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||||
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||||
|
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||||
|
/*no_insert=*/no_io);
|
||||||
|
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||||
|
rep_->cf_name_for_tracing(),
|
||||||
|
/*referenced_key=*/nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (return_empty_reader) {
|
||||||
|
return CachableEntry<FilterBlockReader>();
|
||||||
|
}
|
||||||
return {filter, cache_handle ? block_cache : nullptr, cache_handle,
|
return {filter, cache_handle ? block_cache : nullptr, cache_handle,
|
||||||
/*own_value=*/false};
|
/*own_value=*/false};
|
||||||
}
|
}
|
||||||
|
|
||||||
CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
||||||
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
|
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
|
||||||
BlockCacheLookupContext* /*lookup_context*/) const {
|
BlockCacheLookupContext* lookup_context) const {
|
||||||
// TODO(haoyu): Trace the access on the uncompression dictionary here.
|
|
||||||
if (!rep_->table_options.cache_index_and_filter_blocks) {
|
if (!rep_->table_options.cache_index_and_filter_blocks) {
|
||||||
// block cache is either disabled or not used for meta-blocks. In either
|
// block cache is either disabled or not used for meta-blocks. In either
|
||||||
// case, BlockBasedTableReader is the owner of the uncompression dictionary.
|
// case, BlockBasedTableReader is the owner of the uncompression dictionary.
|
||||||
@ -1964,9 +1985,13 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
|||||||
GetEntryFromCache(rep_->table_options.block_cache.get(), cache_key,
|
GetEntryFromCache(rep_->table_options.block_cache.get(), cache_key,
|
||||||
BlockType::kCompressionDictionary, get_context);
|
BlockType::kCompressionDictionary, get_context);
|
||||||
UncompressionDict* dict = nullptr;
|
UncompressionDict* dict = nullptr;
|
||||||
|
bool is_cache_hit = false;
|
||||||
|
size_t usage = 0;
|
||||||
if (cache_handle != nullptr) {
|
if (cache_handle != nullptr) {
|
||||||
dict = reinterpret_cast<UncompressionDict*>(
|
dict = reinterpret_cast<UncompressionDict*>(
|
||||||
rep_->table_options.block_cache->Value(cache_handle));
|
rep_->table_options.block_cache->Value(cache_handle));
|
||||||
|
is_cache_hit = true;
|
||||||
|
usage = dict->ApproximateMemoryUsage();
|
||||||
} else if (no_io) {
|
} else if (no_io) {
|
||||||
// Do not invoke any io.
|
// Do not invoke any io.
|
||||||
} else {
|
} else {
|
||||||
@ -1980,7 +2005,7 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
|||||||
new UncompressionDict(compression_dict_block->data.ToString(),
|
new UncompressionDict(compression_dict_block->data.ToString(),
|
||||||
rep_->blocks_definitely_zstd_compressed,
|
rep_->blocks_definitely_zstd_compressed,
|
||||||
rep_->ioptions.statistics));
|
rep_->ioptions.statistics));
|
||||||
const size_t usage = uncompression_dict->ApproximateMemoryUsage();
|
usage = uncompression_dict->ApproximateMemoryUsage();
|
||||||
s = rep_->table_options.block_cache->Insert(
|
s = rep_->table_options.block_cache->Insert(
|
||||||
cache_key, uncompression_dict.get(), usage,
|
cache_key, uncompression_dict.get(), usage,
|
||||||
&DeleteCachedUncompressionDictEntry, &cache_handle,
|
&DeleteCachedUncompressionDictEntry, &cache_handle,
|
||||||
@ -2000,6 +2025,20 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (block_cache_tracer_ && lookup_context) {
|
||||||
|
// Avoid making copy of block_key and cf_name when constructing the access
|
||||||
|
// record.
|
||||||
|
BlockCacheTraceRecord access_record(
|
||||||
|
rep_->ioptions.env->NowMicros(),
|
||||||
|
/*block_key=*/"", TraceType::kBlockTraceUncompressionDictBlock,
|
||||||
|
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||||
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||||
|
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||||
|
/*no_insert=*/no_io);
|
||||||
|
block_cache_tracer_->WriteBlockAccess(access_record, cache_key,
|
||||||
|
rep_->cf_name_for_tracing(),
|
||||||
|
/*referenced_key=*/nullptr);
|
||||||
|
}
|
||||||
return {dict, cache_handle ? rep_->table_options.block_cache.get() : nullptr,
|
return {dict, cache_handle ? rep_->table_options.block_cache.get() : nullptr,
|
||||||
cache_handle, false /* own_value */};
|
cache_handle, false /* own_value */};
|
||||||
}
|
}
|
||||||
@ -2116,13 +2155,10 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|||||||
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
||||||
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
||||||
CachableEntry<Block>* block_entry, BlockType block_type,
|
CachableEntry<Block>* block_entry, BlockType block_type,
|
||||||
GetContext* get_context,
|
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
|
||||||
BlockCacheLookupContext* /*lookup_context*/) const {
|
|
||||||
// TODO(haoyu): Trace data/index/range deletion block access here.
|
|
||||||
assert(block_entry != nullptr);
|
assert(block_entry != nullptr);
|
||||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||||
|
|
||||||
// No point to cache compressed blocks if it never goes away
|
// No point to cache compressed blocks if it never goes away
|
||||||
Cache* block_cache_compressed =
|
Cache* block_cache_compressed =
|
||||||
rep_->immortal_table ? nullptr
|
rep_->immortal_table ? nullptr
|
||||||
@ -2136,6 +2172,8 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|||||||
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
Slice key /* key to the block cache */;
|
Slice key /* key to the block cache */;
|
||||||
Slice ckey /* key to the compressed block cache */;
|
Slice ckey /* key to the compressed block cache */;
|
||||||
|
bool is_cache_hit = false;
|
||||||
|
bool no_insert = true;
|
||||||
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
||||||
// create key for block cache
|
// create key for block cache
|
||||||
if (block_cache != nullptr) {
|
if (block_cache != nullptr) {
|
||||||
@ -2152,10 +2190,15 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|||||||
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
|
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
|
||||||
ro, block_entry, uncompression_dict, block_type,
|
ro, block_entry, uncompression_dict, block_type,
|
||||||
get_context);
|
get_context);
|
||||||
|
if (block_entry->GetValue()) {
|
||||||
|
// TODO(haoyu): Differentiate cache hit on uncompressed block cache and
|
||||||
|
// compressed block cache.
|
||||||
|
is_cache_hit = true;
|
||||||
|
}
|
||||||
// Can't find the block from the cache. If I/O is allowed, read from the
|
// Can't find the block from the cache. If I/O is allowed, read from the
|
||||||
// file.
|
// file.
|
||||||
if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) {
|
if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) {
|
||||||
|
no_insert = false;
|
||||||
Statistics* statistics = rep_->ioptions.statistics;
|
Statistics* statistics = rep_->ioptions.statistics;
|
||||||
bool do_decompress =
|
bool do_decompress =
|
||||||
block_cache_compressed == nullptr && rep_->blocks_maybe_compressed;
|
block_cache_compressed == nullptr && rep_->blocks_maybe_compressed;
|
||||||
@ -2186,6 +2229,59 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fill lookup_context.
|
||||||
|
if (block_cache_tracer_ && lookup_context) {
|
||||||
|
size_t usage = 0;
|
||||||
|
uint64_t nkeys = 0;
|
||||||
|
if (block_entry->GetValue()) {
|
||||||
|
// Approximate the number of keys in the block using restarts.
|
||||||
|
nkeys = rep_->table_options.block_restart_interval *
|
||||||
|
block_entry->GetValue()->NumRestarts();
|
||||||
|
usage = block_entry->GetValue()->ApproximateMemoryUsage();
|
||||||
|
}
|
||||||
|
TraceType trace_block_type = TraceType::kTraceMax;
|
||||||
|
switch (block_type) {
|
||||||
|
case BlockType::kIndex:
|
||||||
|
trace_block_type = TraceType::kBlockTraceIndexBlock;
|
||||||
|
break;
|
||||||
|
case BlockType::kData:
|
||||||
|
trace_block_type = TraceType::kBlockTraceDataBlock;
|
||||||
|
break;
|
||||||
|
case BlockType::kRangeDeletion:
|
||||||
|
trace_block_type = TraceType::kBlockTraceRangeDeletionBlock;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// This cannot happen.
|
||||||
|
assert(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(
|
||||||
|
trace_block_type, lookup_context->caller)) {
|
||||||
|
// Defer logging the access to Get() and MultiGet() to trace additional
|
||||||
|
// information, e.g., the referenced key,
|
||||||
|
// referenced_key_exist_in_block.
|
||||||
|
|
||||||
|
// Make a copy of the block key here since it will be logged later.
|
||||||
|
lookup_context->FillLookupContext(
|
||||||
|
is_cache_hit, no_insert, trace_block_type,
|
||||||
|
/*block_size=*/usage, /*block_key=*/key.ToString(), nkeys);
|
||||||
|
} else {
|
||||||
|
// Avoid making copy of block_key and cf_name when constructing the access
|
||||||
|
// record.
|
||||||
|
BlockCacheTraceRecord access_record(
|
||||||
|
rep_->ioptions.env->NowMicros(),
|
||||||
|
/*block_key=*/"", trace_block_type,
|
||||||
|
/*block_size=*/usage, rep_->cf_id_for_tracing(),
|
||||||
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||||
|
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
|
||||||
|
no_insert);
|
||||||
|
block_cache_tracer_->WriteBlockAccess(access_record, key,
|
||||||
|
rep_->cf_name_for_tracing(),
|
||||||
|
/*referenced_key=*/nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(s.ok() || block_entry->GetValue() == nullptr);
|
assert(s.ok() || block_entry->GetValue() == nullptr);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -2874,11 +2970,15 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
|
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
BlockCacheLookupContext lookup_data_block_context{
|
||||||
|
BlockCacheLookupCaller::kUserGet};
|
||||||
|
bool does_referenced_key_exist = false;
|
||||||
DataBlockIter biter;
|
DataBlockIter biter;
|
||||||
|
uint64_t referenced_data_size = 0;
|
||||||
NewDataBlockIterator<DataBlockIter>(
|
NewDataBlockIterator<DataBlockIter>(
|
||||||
read_options, iiter->value(), &biter, BlockType::kData,
|
read_options, iiter->value(), &biter, BlockType::kData,
|
||||||
/*key_includes_seq=*/true,
|
/*key_includes_seq=*/true,
|
||||||
/*index_key_is_full=*/true, get_context, &lookup_context,
|
/*index_key_is_full=*/true, get_context, &lookup_data_block_context,
|
||||||
/*s=*/Status(), /*prefetch_buffer*/ nullptr);
|
/*s=*/Status(), /*prefetch_buffer*/ nullptr);
|
||||||
|
|
||||||
if (read_options.read_tier == kBlockCacheTier &&
|
if (read_options.read_tier == kBlockCacheTier &&
|
||||||
@ -2902,25 +3002,47 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
// the end of the block, i.e. cannot be in the following blocks
|
// the end of the block, i.e. cannot be in the following blocks
|
||||||
// either. In this case, the seek_key cannot be found, so we break
|
// either. In this case, the seek_key cannot be found, so we break
|
||||||
// from the top level for-loop.
|
// from the top level for-loop.
|
||||||
break;
|
done = true;
|
||||||
}
|
} else {
|
||||||
|
// Call the *saver function on each entry/block until it returns false
|
||||||
|
for (; biter.Valid(); biter.Next()) {
|
||||||
|
ParsedInternalKey parsed_key;
|
||||||
|
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||||
|
s = Status::Corruption(Slice());
|
||||||
|
}
|
||||||
|
|
||||||
// Call the *saver function on each entry/block until it returns false
|
if (!get_context->SaveValue(
|
||||||
for (; biter.Valid(); biter.Next()) {
|
parsed_key, biter.value(), &matched,
|
||||||
ParsedInternalKey parsed_key;
|
biter.IsValuePinned() ? &biter : nullptr)) {
|
||||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
does_referenced_key_exist = true;
|
||||||
s = Status::Corruption(Slice());
|
referenced_data_size = biter.key().size() + biter.value().size();
|
||||||
}
|
done = true;
|
||||||
|
break;
|
||||||
if (!get_context->SaveValue(
|
}
|
||||||
parsed_key, biter.value(), &matched,
|
|
||||||
biter.IsValuePinned() ? &biter : nullptr)) {
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
s = biter.status();
|
||||||
|
}
|
||||||
|
// Write the block cache access record.
|
||||||
|
if (block_cache_tracer_) {
|
||||||
|
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||||
|
// constructing the access record.
|
||||||
|
BlockCacheTraceRecord access_record(
|
||||||
|
rep_->ioptions.env->NowMicros(),
|
||||||
|
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||||
|
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
||||||
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||||
|
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||||
|
lookup_data_block_context.is_cache_hit,
|
||||||
|
lookup_data_block_context.no_insert,
|
||||||
|
/*referenced_key=*/"", referenced_data_size,
|
||||||
|
lookup_data_block_context.num_keys_in_block,
|
||||||
|
does_referenced_key_exist);
|
||||||
|
block_cache_tracer_->WriteBlockAccess(
|
||||||
|
access_record, lookup_data_block_context.block_key,
|
||||||
|
rep_->cf_name_for_tracing(), key);
|
||||||
}
|
}
|
||||||
s = biter.status();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (done) {
|
if (done) {
|
||||||
// Avoid the extra Next which is expensive in two-level indexes
|
// Avoid the extra Next which is expensive in two-level indexes
|
||||||
break;
|
break;
|
||||||
@ -2992,14 +3114,18 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|||||||
bool done = false;
|
bool done = false;
|
||||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
||||||
bool reusing_block = true;
|
bool reusing_block = true;
|
||||||
|
uint64_t referenced_data_size = 0;
|
||||||
|
bool does_referenced_key_exist = false;
|
||||||
|
BlockCacheLookupContext lookup_data_block_context(
|
||||||
|
BlockCacheLookupCaller::kUserMGet);
|
||||||
if (iiter->value().offset() != offset) {
|
if (iiter->value().offset() != offset) {
|
||||||
offset = iiter->value().offset();
|
offset = iiter->value().offset();
|
||||||
biter.Invalidate(Status::OK());
|
biter.Invalidate(Status::OK());
|
||||||
NewDataBlockIterator<DataBlockIter>(
|
NewDataBlockIterator<DataBlockIter>(
|
||||||
read_options, iiter->value(), &biter, BlockType::kData,
|
read_options, iiter->value(), &biter, BlockType::kData,
|
||||||
/*key_includes_seq=*/false,
|
/*key_includes_seq=*/false,
|
||||||
/*index_key_is_full=*/true, get_context, &lookup_context,
|
/*index_key_is_full=*/true, get_context,
|
||||||
Status(), nullptr);
|
&lookup_data_block_context, Status(), nullptr);
|
||||||
reusing_block = false;
|
reusing_block = false;
|
||||||
}
|
}
|
||||||
if (read_options.read_tier == kBlockCacheTier &&
|
if (read_options.read_tier == kBlockCacheTier &&
|
||||||
@ -3021,38 +3147,59 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
|||||||
// the end of the block, i.e. cannot be in the following blocks
|
// the end of the block, i.e. cannot be in the following blocks
|
||||||
// either. In this case, the seek_key cannot be found, so we break
|
// either. In this case, the seek_key cannot be found, so we break
|
||||||
// from the top level for-loop.
|
// from the top level for-loop.
|
||||||
break;
|
done = true;
|
||||||
}
|
} else {
|
||||||
|
// Call the *saver function on each entry/block until it returns false
|
||||||
|
for (; biter.Valid(); biter.Next()) {
|
||||||
|
ParsedInternalKey parsed_key;
|
||||||
|
Cleanable dummy;
|
||||||
|
Cleanable* value_pinner = nullptr;
|
||||||
|
|
||||||
// Call the *saver function on each entry/block until it returns false
|
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||||
for (; biter.Valid(); biter.Next()) {
|
s = Status::Corruption(Slice());
|
||||||
ParsedInternalKey parsed_key;
|
}
|
||||||
Cleanable dummy;
|
if (biter.IsValuePinned()) {
|
||||||
Cleanable* value_pinner = nullptr;
|
if (reusing_block) {
|
||||||
|
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||||
|
assert(biter.cache_handle() != nullptr);
|
||||||
|
block_cache->Ref(biter.cache_handle());
|
||||||
|
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||||
|
biter.cache_handle());
|
||||||
|
value_pinner = &dummy;
|
||||||
|
} else {
|
||||||
|
value_pinner = &biter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
|
||||||
s = Status::Corruption(Slice());
|
value_pinner)) {
|
||||||
}
|
does_referenced_key_exist = true;
|
||||||
if (biter.IsValuePinned()) {
|
referenced_data_size = biter.key().size() + biter.value().size();
|
||||||
if (reusing_block) {
|
done = true;
|
||||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
break;
|
||||||
assert(biter.cache_handle() != nullptr);
|
|
||||||
block_cache->Ref(biter.cache_handle());
|
|
||||||
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
|
||||||
biter.cache_handle());
|
|
||||||
value_pinner = &dummy;
|
|
||||||
} else {
|
|
||||||
value_pinner = &biter;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
s = biter.status();
|
||||||
if (!get_context->SaveValue(
|
}
|
||||||
parsed_key, biter.value(), &matched, value_pinner)) {
|
// Write the block cache access.
|
||||||
done = true;
|
if (block_cache_tracer_) {
|
||||||
break;
|
// Avoid making copy of block_key, cf_name, and referenced_key when
|
||||||
}
|
// constructing the access record.
|
||||||
|
BlockCacheTraceRecord access_record(
|
||||||
|
rep_->ioptions.env->NowMicros(),
|
||||||
|
/*block_key=*/"", lookup_data_block_context.block_type,
|
||||||
|
lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
|
||||||
|
/*cf_name=*/"", rep_->level_for_tracing(),
|
||||||
|
rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
|
||||||
|
lookup_data_block_context.is_cache_hit,
|
||||||
|
lookup_data_block_context.no_insert,
|
||||||
|
/*referenced_key=*/"", referenced_data_size,
|
||||||
|
lookup_data_block_context.num_keys_in_block,
|
||||||
|
does_referenced_key_exist);
|
||||||
|
block_cache_tracer_->WriteBlockAccess(
|
||||||
|
access_record, lookup_data_block_context.block_key,
|
||||||
|
rep_->cf_name_for_tracing(), key);
|
||||||
}
|
}
|
||||||
s = biter.status();
|
|
||||||
if (done) {
|
if (done) {
|
||||||
// Avoid the extra Next which is expensive in two-level indexes
|
// Avoid the extra Next which is expensive in two-level indexes
|
||||||
break;
|
break;
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
|
#include "file/filename.h"
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
#include "rocksdb/persistent_cache.h"
|
#include "rocksdb/persistent_cache.h"
|
||||||
@ -571,6 +572,23 @@ struct BlockBasedTable::Rep {
|
|||||||
? kDisableGlobalSequenceNumber
|
? kDisableGlobalSequenceNumber
|
||||||
: global_seqno;
|
: global_seqno;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t cf_id_for_tracing() const {
|
||||||
|
return table_properties ? table_properties->column_family_id
|
||||||
|
: rocksdb::TablePropertiesCollectorFactory::
|
||||||
|
Context::kUnknownColumnFamily;
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice cf_name_for_tracing() const {
|
||||||
|
return table_properties ? table_properties->column_family_name
|
||||||
|
: BlockCacheTraceHelper::kUnknownColumnFamilyName;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t level_for_tracing() const { return level >= 0 ? level : UINT32_MAX; }
|
||||||
|
|
||||||
|
uint64_t sst_number_for_tracing() const {
|
||||||
|
return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Iterates over the contents of BlockBasedTable.
|
// Iterates over the contents of BlockBasedTable.
|
||||||
|
@ -35,10 +35,11 @@ struct BlockAccessInfo {
|
|||||||
block_size = access.block_size;
|
block_size = access.block_size;
|
||||||
caller_num_access_map[access.caller]++;
|
caller_num_access_map[access.caller]++;
|
||||||
num_accesses++;
|
num_accesses++;
|
||||||
if (ShouldTraceReferencedKey(access)) {
|
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(access.block_type,
|
||||||
|
access.caller)) {
|
||||||
num_keys = access.num_keys_in_block;
|
num_keys = access.num_keys_in_block;
|
||||||
|
|
||||||
if (access.is_referenced_key_exist_in_block == Boolean::kTrue) {
|
if (access.referenced_key_exist_in_block == Boolean::kTrue) {
|
||||||
key_num_access_map[access.referenced_key]++;
|
key_num_access_map[access.referenced_key]++;
|
||||||
num_referenced_key_exist_in_block++;
|
num_referenced_key_exist_in_block++;
|
||||||
} else {
|
} else {
|
||||||
|
@ -89,9 +89,10 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
// The writer should only write these fields for data blocks and the
|
// The writer should only write these fields for data blocks and the
|
||||||
// caller is either GET or MGET.
|
// caller is either GET or MGET.
|
||||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
||||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||||
record.num_keys_in_block = kNumKeysInBlock;
|
record.num_keys_in_block = kNumKeysInBlock;
|
||||||
ASSERT_OK(writer->WriteBlockAccess(record));
|
ASSERT_OK(writer->WriteBlockAccess(
|
||||||
|
record, record.block_key, record.cf_name, record.referenced_key));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,13 +15,6 @@ namespace rocksdb {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
const unsigned int kCharSize = 1;
|
const unsigned int kCharSize = 1;
|
||||||
} // namespace
|
|
||||||
|
|
||||||
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record) {
|
|
||||||
return (record.block_type == TraceType::kBlockTraceDataBlock) &&
|
|
||||||
(record.caller == BlockCacheLookupCaller::kUserGet ||
|
|
||||||
record.caller == BlockCacheLookupCaller::kUserMGet);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ShouldTrace(const BlockCacheTraceRecord& record,
|
bool ShouldTrace(const BlockCacheTraceRecord& record,
|
||||||
const TraceOptions& trace_options) {
|
const TraceOptions& trace_options) {
|
||||||
@ -34,6 +27,17 @@ bool ShouldTrace(const BlockCacheTraceRecord& record,
|
|||||||
const uint64_t hash = GetSliceNPHash64(Slice(record.block_key));
|
const uint64_t hash = GetSliceNPHash64(Slice(record.block_key));
|
||||||
return hash % trace_options.sampling_frequency == 0;
|
return hash % trace_options.sampling_frequency == 0;
|
||||||
}
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
|
||||||
|
"UnknownColumnFamily";
|
||||||
|
|
||||||
|
bool BlockCacheTraceHelper::ShouldTraceReferencedKey(
|
||||||
|
TraceType block_type, BlockCacheLookupCaller caller) {
|
||||||
|
return (block_type == TraceType::kBlockTraceDataBlock) &&
|
||||||
|
(caller == BlockCacheLookupCaller::kUserGet ||
|
||||||
|
caller == BlockCacheLookupCaller::kUserMGet);
|
||||||
|
}
|
||||||
|
|
||||||
BlockCacheTraceWriter::BlockCacheTraceWriter(
|
BlockCacheTraceWriter::BlockCacheTraceWriter(
|
||||||
Env* env, const TraceOptions& trace_options,
|
Env* env, const TraceOptions& trace_options,
|
||||||
@ -43,7 +47,8 @@ BlockCacheTraceWriter::BlockCacheTraceWriter(
|
|||||||
trace_writer_(std::move(trace_writer)) {}
|
trace_writer_(std::move(trace_writer)) {}
|
||||||
|
|
||||||
Status BlockCacheTraceWriter::WriteBlockAccess(
|
Status BlockCacheTraceWriter::WriteBlockAccess(
|
||||||
const BlockCacheTraceRecord& record) {
|
const BlockCacheTraceRecord& record, const Slice& block_key,
|
||||||
|
const Slice& cf_name, const Slice& referenced_key) {
|
||||||
uint64_t trace_file_size = trace_writer_->GetFileSize();
|
uint64_t trace_file_size = trace_writer_->GetFileSize();
|
||||||
if (trace_file_size > trace_options_.max_trace_file_size) {
|
if (trace_file_size > trace_options_.max_trace_file_size) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
@ -51,19 +56,21 @@ Status BlockCacheTraceWriter::WriteBlockAccess(
|
|||||||
Trace trace;
|
Trace trace;
|
||||||
trace.ts = record.access_timestamp;
|
trace.ts = record.access_timestamp;
|
||||||
trace.type = record.block_type;
|
trace.type = record.block_type;
|
||||||
PutLengthPrefixedSlice(&trace.payload, record.block_key);
|
PutLengthPrefixedSlice(&trace.payload, block_key);
|
||||||
PutFixed64(&trace.payload, record.block_size);
|
PutFixed64(&trace.payload, record.block_size);
|
||||||
PutFixed32(&trace.payload, record.cf_id);
|
PutFixed64(&trace.payload, record.cf_id);
|
||||||
PutLengthPrefixedSlice(&trace.payload, record.cf_name);
|
PutLengthPrefixedSlice(&trace.payload, cf_name);
|
||||||
PutFixed32(&trace.payload, record.level);
|
PutFixed32(&trace.payload, record.level);
|
||||||
PutFixed32(&trace.payload, record.sst_fd_number);
|
PutFixed64(&trace.payload, record.sst_fd_number);
|
||||||
trace.payload.push_back(record.caller);
|
trace.payload.push_back(record.caller);
|
||||||
trace.payload.push_back(record.is_cache_hit);
|
trace.payload.push_back(record.is_cache_hit);
|
||||||
trace.payload.push_back(record.no_insert);
|
trace.payload.push_back(record.no_insert);
|
||||||
if (ShouldTraceReferencedKey(record)) {
|
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record.block_type,
|
||||||
PutLengthPrefixedSlice(&trace.payload, record.referenced_key);
|
record.caller)) {
|
||||||
|
PutLengthPrefixedSlice(&trace.payload, referenced_key);
|
||||||
|
PutFixed64(&trace.payload, record.referenced_data_size);
|
||||||
PutFixed64(&trace.payload, record.num_keys_in_block);
|
PutFixed64(&trace.payload, record.num_keys_in_block);
|
||||||
trace.payload.push_back(record.is_referenced_key_exist_in_block);
|
trace.payload.push_back(record.referenced_key_exist_in_block);
|
||||||
}
|
}
|
||||||
std::string encoded_trace;
|
std::string encoded_trace;
|
||||||
TracerHelper::EncodeTrace(trace, &encoded_trace);
|
TracerHelper::EncodeTrace(trace, &encoded_trace);
|
||||||
@ -143,6 +150,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
|||||||
record->access_timestamp = trace.ts;
|
record->access_timestamp = trace.ts;
|
||||||
record->block_type = trace.type;
|
record->block_type = trace.type;
|
||||||
Slice enc_slice = Slice(trace.payload);
|
Slice enc_slice = Slice(trace.payload);
|
||||||
|
|
||||||
Slice block_key;
|
Slice block_key;
|
||||||
if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
|
if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
@ -153,7 +161,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
|||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read block size.");
|
"Incomplete access record: Failed to read block size.");
|
||||||
}
|
}
|
||||||
if (!GetFixed32(&enc_slice, &record->cf_id)) {
|
if (!GetFixed64(&enc_slice, &record->cf_id)) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read column family ID.");
|
"Incomplete access record: Failed to read column family ID.");
|
||||||
}
|
}
|
||||||
@ -167,7 +175,7 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
|||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read level.");
|
"Incomplete access record: Failed to read level.");
|
||||||
}
|
}
|
||||||
if (!GetFixed32(&enc_slice, &record->sst_fd_number)) {
|
if (!GetFixed64(&enc_slice, &record->sst_fd_number)) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read SST file number.");
|
"Incomplete access record: Failed to read SST file number.");
|
||||||
}
|
}
|
||||||
@ -190,13 +198,18 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
|||||||
record->no_insert = static_cast<Boolean>(enc_slice[0]);
|
record->no_insert = static_cast<Boolean>(enc_slice[0]);
|
||||||
enc_slice.remove_prefix(kCharSize);
|
enc_slice.remove_prefix(kCharSize);
|
||||||
|
|
||||||
if (ShouldTraceReferencedKey(*record)) {
|
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record->block_type,
|
||||||
|
record->caller)) {
|
||||||
Slice referenced_key;
|
Slice referenced_key;
|
||||||
if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
|
if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read the referenced key.");
|
"Incomplete access record: Failed to read the referenced key.");
|
||||||
}
|
}
|
||||||
record->referenced_key = referenced_key.ToString();
|
record->referenced_key = referenced_key.ToString();
|
||||||
|
if (!GetFixed64(&enc_slice, &record->referenced_data_size)) {
|
||||||
|
return Status::Incomplete(
|
||||||
|
"Incomplete access record: Failed to read the referenced data size.");
|
||||||
|
}
|
||||||
if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
|
if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read the number of keys in the "
|
"Incomplete access record: Failed to read the number of keys in the "
|
||||||
@ -205,10 +218,9 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
|
|||||||
if (enc_slice.empty()) {
|
if (enc_slice.empty()) {
|
||||||
return Status::Incomplete(
|
return Status::Incomplete(
|
||||||
"Incomplete access record: Failed to read "
|
"Incomplete access record: Failed to read "
|
||||||
"is_referenced_key_exist_in_block.");
|
"referenced_key_exist_in_block.");
|
||||||
}
|
}
|
||||||
record->is_referenced_key_exist_in_block =
|
record->referenced_key_exist_in_block = static_cast<Boolean>(enc_slice[0]);
|
||||||
static_cast<Boolean>(enc_slice[0]);
|
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
@ -239,7 +251,10 @@ void BlockCacheTracer::EndTrace() {
|
|||||||
writer_.store(nullptr);
|
writer_.store(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record) {
|
Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||||
|
const Slice& block_key,
|
||||||
|
const Slice& cf_name,
|
||||||
|
const Slice& referenced_key) {
|
||||||
if (!writer_.load() || !ShouldTrace(record, trace_options_)) {
|
if (!writer_.load() || !ShouldTrace(record, trace_options_)) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
@ -247,7 +262,8 @@ Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record) {
|
|||||||
if (!writer_.load()) {
|
if (!writer_.load()) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
return writer_.load()->WriteBlockAccess(record);
|
return writer_.load()->WriteBlockAccess(record, block_key, cf_name,
|
||||||
|
referenced_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -49,28 +49,80 @@ struct BlockCacheLookupContext {
|
|||||||
BlockCacheLookupContext(const BlockCacheLookupCaller& _caller)
|
BlockCacheLookupContext(const BlockCacheLookupCaller& _caller)
|
||||||
: caller(_caller) {}
|
: caller(_caller) {}
|
||||||
const BlockCacheLookupCaller caller;
|
const BlockCacheLookupCaller caller;
|
||||||
|
// These are populated when we perform lookup/insert on block cache. The block
|
||||||
|
// cache tracer uses these inforation when logging the block access at
|
||||||
|
// BlockBasedTable::GET and BlockBasedTable::MultiGet.
|
||||||
|
bool is_cache_hit = false;
|
||||||
|
bool no_insert = false;
|
||||||
|
TraceType block_type = TraceType::kTraceMax;
|
||||||
|
uint64_t block_size = 0;
|
||||||
|
std::string block_key;
|
||||||
|
uint64_t num_keys_in_block = 0;
|
||||||
|
|
||||||
|
void FillLookupContext(bool _is_cache_hit, bool _no_insert,
|
||||||
|
TraceType _block_type, uint64_t _block_size,
|
||||||
|
const std::string& _block_key,
|
||||||
|
uint64_t _num_keys_in_block) {
|
||||||
|
is_cache_hit = _is_cache_hit;
|
||||||
|
no_insert = _no_insert;
|
||||||
|
block_type = _block_type;
|
||||||
|
block_size = _block_size;
|
||||||
|
block_key = _block_key;
|
||||||
|
num_keys_in_block = _num_keys_in_block;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Boolean : char { kTrue = 1, kFalse = 0 };
|
enum Boolean : char { kTrue = 1, kFalse = 0 };
|
||||||
|
|
||||||
struct BlockCacheTraceRecord {
|
struct BlockCacheTraceRecord {
|
||||||
// Required fields for all accesses.
|
// Required fields for all accesses.
|
||||||
uint64_t access_timestamp;
|
uint64_t access_timestamp = 0;
|
||||||
std::string block_key;
|
std::string block_key;
|
||||||
TraceType block_type;
|
TraceType block_type = TraceType::kTraceMax;
|
||||||
uint64_t block_size;
|
uint64_t block_size = 0;
|
||||||
uint32_t cf_id;
|
uint64_t cf_id = 0;
|
||||||
std::string cf_name;
|
std::string cf_name;
|
||||||
uint32_t level;
|
uint32_t level = 0;
|
||||||
uint32_t sst_fd_number;
|
uint64_t sst_fd_number = 0;
|
||||||
BlockCacheLookupCaller caller;
|
BlockCacheLookupCaller caller =
|
||||||
Boolean is_cache_hit;
|
BlockCacheLookupCaller::kMaxBlockCacheLookupCaller;
|
||||||
Boolean no_insert;
|
Boolean is_cache_hit = Boolean::kFalse;
|
||||||
|
Boolean no_insert = Boolean::kFalse;
|
||||||
|
|
||||||
// Required fields for data block and user Get/Multi-Get only.
|
// Required fields for data block and user Get/Multi-Get only.
|
||||||
std::string referenced_key;
|
std::string referenced_key;
|
||||||
|
uint64_t referenced_data_size = 0;
|
||||||
uint64_t num_keys_in_block = 0;
|
uint64_t num_keys_in_block = 0;
|
||||||
Boolean is_referenced_key_exist_in_block = Boolean::kFalse;
|
Boolean referenced_key_exist_in_block = Boolean::kFalse;
|
||||||
|
|
||||||
|
BlockCacheTraceRecord() {}
|
||||||
|
|
||||||
|
BlockCacheTraceRecord(uint64_t _access_timestamp, std::string _block_key,
|
||||||
|
TraceType _block_type, uint64_t _block_size,
|
||||||
|
uint64_t _cf_id, std::string _cf_name, uint32_t _level,
|
||||||
|
uint64_t _sst_fd_number, BlockCacheLookupCaller _caller,
|
||||||
|
bool _is_cache_hit, bool _no_insert,
|
||||||
|
std::string _referenced_key = "",
|
||||||
|
uint64_t _referenced_data_size = 0,
|
||||||
|
uint64_t _num_keys_in_block = 0,
|
||||||
|
bool _referenced_key_exist_in_block = false)
|
||||||
|
: access_timestamp(_access_timestamp),
|
||||||
|
block_key(_block_key),
|
||||||
|
block_type(_block_type),
|
||||||
|
block_size(_block_size),
|
||||||
|
cf_id(_cf_id),
|
||||||
|
cf_name(_cf_name),
|
||||||
|
level(_level),
|
||||||
|
sst_fd_number(_sst_fd_number),
|
||||||
|
caller(_caller),
|
||||||
|
is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse),
|
||||||
|
no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse),
|
||||||
|
referenced_key(_referenced_key),
|
||||||
|
referenced_data_size(_referenced_data_size),
|
||||||
|
num_keys_in_block(_num_keys_in_block),
|
||||||
|
referenced_key_exist_in_block(
|
||||||
|
_referenced_key_exist_in_block ? Boolean::kTrue : Boolean::kFalse) {
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BlockCacheTraceHeader {
|
struct BlockCacheTraceHeader {
|
||||||
@ -79,7 +131,13 @@ struct BlockCacheTraceHeader {
|
|||||||
uint32_t rocksdb_minor_version;
|
uint32_t rocksdb_minor_version;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record);
|
class BlockCacheTraceHelper {
|
||||||
|
public:
|
||||||
|
static bool ShouldTraceReferencedKey(TraceType block_type,
|
||||||
|
BlockCacheLookupCaller caller);
|
||||||
|
|
||||||
|
static const std::string kUnknownColumnFamilyName;
|
||||||
|
};
|
||||||
|
|
||||||
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
|
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
|
||||||
// user-provided TraceWriter. Every RocksDB operation is written as a single
|
// user-provided TraceWriter. Every RocksDB operation is written as a single
|
||||||
@ -96,7 +154,10 @@ class BlockCacheTraceWriter {
|
|||||||
BlockCacheTraceWriter(BlockCacheTraceWriter&&) = delete;
|
BlockCacheTraceWriter(BlockCacheTraceWriter&&) = delete;
|
||||||
BlockCacheTraceWriter& operator=(BlockCacheTraceWriter&&) = delete;
|
BlockCacheTraceWriter& operator=(BlockCacheTraceWriter&&) = delete;
|
||||||
|
|
||||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record);
|
// Pass Slice references to avoid copy.
|
||||||
|
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||||
|
const Slice& block_key, const Slice& cf_name,
|
||||||
|
const Slice& referenced_key);
|
||||||
|
|
||||||
// Write a trace header at the beginning, typically on initiating a trace,
|
// Write a trace header at the beginning, typically on initiating a trace,
|
||||||
// with some metadata like a magic number and RocksDB version.
|
// with some metadata like a magic number and RocksDB version.
|
||||||
@ -148,7 +209,9 @@ class BlockCacheTracer {
|
|||||||
// Stop writing block cache accesses to the trace_writer.
|
// Stop writing block cache accesses to the trace_writer.
|
||||||
void EndTrace();
|
void EndTrace();
|
||||||
|
|
||||||
Status WriteBlockAccess(const BlockCacheTraceRecord& record);
|
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
|
||||||
|
const Slice& block_key, const Slice& cf_name,
|
||||||
|
const Slice& referenced_key);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TraceOptions trace_options_;
|
TraceOptions trace_options_;
|
||||||
|
@ -20,6 +20,7 @@ const uint32_t kLevel = 1;
|
|||||||
const uint64_t kSSTFDNumber = 100;
|
const uint64_t kSSTFDNumber = 100;
|
||||||
const std::string kRefKeyPrefix = "test-get-";
|
const std::string kRefKeyPrefix = "test-get-";
|
||||||
const uint64_t kNumKeysInBlock = 1024;
|
const uint64_t kNumKeysInBlock = 1024;
|
||||||
|
const uint64_t kReferencedDataSize = 10;
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
class BlockCacheTracerTest : public testing::Test {
|
class BlockCacheTracerTest : public testing::Test {
|
||||||
@ -61,7 +62,7 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
BlockCacheTraceRecord record;
|
BlockCacheTraceRecord record;
|
||||||
record.block_type = block_type;
|
record.block_type = block_type;
|
||||||
record.block_size = kBlockSize + key_id;
|
record.block_size = kBlockSize + key_id;
|
||||||
record.block_key = kBlockKeyPrefix + std::to_string(key_id);
|
record.block_key = (kBlockKeyPrefix + std::to_string(key_id));
|
||||||
record.access_timestamp = env_->NowMicros();
|
record.access_timestamp = env_->NowMicros();
|
||||||
record.cf_id = kCFId;
|
record.cf_id = kCFId;
|
||||||
record.cf_name = kDefaultColumnFamilyName;
|
record.cf_name = kDefaultColumnFamilyName;
|
||||||
@ -73,10 +74,12 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
// Provide these fields for all block types.
|
// Provide these fields for all block types.
|
||||||
// The writer should only write these fields for data blocks and the
|
// The writer should only write these fields for data blocks and the
|
||||||
// caller is either GET or MGET.
|
// caller is either GET or MGET.
|
||||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
record.referenced_key = (kRefKeyPrefix + std::to_string(key_id));
|
||||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||||
record.num_keys_in_block = kNumKeysInBlock;
|
record.num_keys_in_block = kNumKeysInBlock;
|
||||||
ASSERT_OK(writer->WriteBlockAccess(record));
|
record.referenced_data_size = kReferencedDataSize + key_id;
|
||||||
|
ASSERT_OK(writer->WriteBlockAccess(
|
||||||
|
record, record.block_key, record.cf_name, record.referenced_key));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,7 +98,7 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
record.is_cache_hit = Boolean::kFalse;
|
record.is_cache_hit = Boolean::kFalse;
|
||||||
record.no_insert = Boolean::kFalse;
|
record.no_insert = Boolean::kFalse;
|
||||||
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
|
||||||
record.is_referenced_key_exist_in_block = Boolean::kTrue;
|
record.referenced_key_exist_in_block = Boolean::kTrue;
|
||||||
record.num_keys_in_block = kNumKeysInBlock;
|
record.num_keys_in_block = kNumKeysInBlock;
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
@ -122,13 +125,15 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
record.caller == BlockCacheLookupCaller::kUserMGet)) {
|
record.caller == BlockCacheLookupCaller::kUserMGet)) {
|
||||||
ASSERT_EQ(kRefKeyPrefix + std::to_string(key_id),
|
ASSERT_EQ(kRefKeyPrefix + std::to_string(key_id),
|
||||||
record.referenced_key);
|
record.referenced_key);
|
||||||
ASSERT_EQ(Boolean::kTrue, record.is_referenced_key_exist_in_block);
|
ASSERT_EQ(Boolean::kTrue, record.referenced_key_exist_in_block);
|
||||||
ASSERT_EQ(kNumKeysInBlock, record.num_keys_in_block);
|
ASSERT_EQ(kNumKeysInBlock, record.num_keys_in_block);
|
||||||
|
ASSERT_EQ(kReferencedDataSize + key_id, record.referenced_data_size);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ASSERT_EQ("", record.referenced_key);
|
ASSERT_EQ("", record.referenced_key);
|
||||||
ASSERT_EQ(Boolean::kFalse, record.is_referenced_key_exist_in_block);
|
ASSERT_EQ(Boolean::kFalse, record.referenced_key_exist_in_block);
|
||||||
ASSERT_EQ(0, record.num_keys_in_block);
|
ASSERT_EQ(0, record.num_keys_in_block);
|
||||||
|
ASSERT_EQ(0, record.referenced_data_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -147,7 +152,8 @@ TEST_F(BlockCacheTracerTest, AtomicWriteBeforeStartTrace) {
|
|||||||
BlockCacheTracer writer;
|
BlockCacheTracer writer;
|
||||||
// The record should be written to the trace_file since StartTrace is not
|
// The record should be written to the trace_file since StartTrace is not
|
||||||
// called.
|
// called.
|
||||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||||
|
record.referenced_key));
|
||||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -170,7 +176,8 @@ TEST_F(BlockCacheTracerTest, AtomicWrite) {
|
|||||||
&trace_writer));
|
&trace_writer));
|
||||||
BlockCacheTracer writer;
|
BlockCacheTracer writer;
|
||||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||||
|
record.referenced_key));
|
||||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -197,11 +204,13 @@ TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) {
|
|||||||
&trace_writer));
|
&trace_writer));
|
||||||
BlockCacheTracer writer;
|
BlockCacheTracer writer;
|
||||||
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
|
||||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||||
|
record.referenced_key));
|
||||||
writer.EndTrace();
|
writer.EndTrace();
|
||||||
// Write the record again. This time the record should not be written since
|
// Write the record again. This time the record should not be written since
|
||||||
// EndTrace is called.
|
// EndTrace is called.
|
||||||
ASSERT_OK(writer.WriteBlockAccess(record));
|
ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name,
|
||||||
|
record.referenced_key));
|
||||||
ASSERT_OK(env_->FileExists(trace_file_path_));
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user