Reuse data block iterator in BlockBasedTableReader::MultiGet() (#5314)

Summary:
Instead of creating a new DataBlockIterator for every key in a MultiGet batch, reuse it if the next key is in the same block. This results in a small 1-2% cpu improvement.

TEST_TMPDIR=/dev/shm/multiget numactl -C 10  ./db_bench.tmp -use_existing_db=true -benchmarks="readseq,multireadrandom" -write_buffer_size=4194304 -target_file_size_base=4194304 -max_bytes_for_level_base=16777216 -num=12000000 -reads=12000000 -duration=90 -threads=1 -compression_type=none -cache_size=4194304000 -batch_size=32 -disable_auto_compactions=true -bloom_bits=10 -cache_index_and_filter_blocks=true -pin_l0_filter_and_index_blocks_in_cache=true -multiread_batched=true -multiread_stride=4

Without the change -
multireadrandom :       3.066 micros/op 326122 ops/sec; (29375968 of 29375968 found)

With the change -
multireadrandom :       3.003 micros/op 332945 ops/sec; (29983968 of 29983968 found)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5314

Differential Revision: D15742108

Pulled By: anand1976

fbshipit-source-id: 220fb0b8eea9a0d602ddeb371528f7af7936d771
This commit is contained in:
anand76 2019-06-10 13:28:18 -07:00 committed by Facebook Github Bot
parent 6ce5580882
commit 63ace8ef0e
2 changed files with 50 additions and 6 deletions

View File

@ -236,6 +236,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
restart_index_ = num_restarts_;
global_seqno_ = global_seqno;
block_contents_pinned_ = block_contents_pinned;
cache_handle_ = nullptr;
}
// Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do
@ -285,6 +286,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
return static_cast<uint32_t>(value_.data() - data_);
}
void SetCacheHandle(Cache::Handle* handle) { cache_handle_ = handle; }
Cache::Handle* cache_handle() { return cache_handle_; }
protected:
// Note: The type could be changed to InternalKeyComparator but we see a weird
// performance drop by that.
@ -307,6 +312,14 @@ class BlockIter : public InternalIteratorBase<TValue> {
bool block_contents_pinned_;
SequenceNumber global_seqno_;
private:
// Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable
// function callback, which is hard to retrieve. When multiple value
// PinnableSlices reference the block, they need the cache handle in order
// to bump up the ref count
Cache::Handle* cache_handle_;
public:
// Return the offset in data_ just past the end of the current entry.
inline uint32_t NextEntryOffset() const {

View File

@ -129,6 +129,14 @@ void ForceReleaseCachedEntry(void* arg, void* h) {
cache->Release(handle, true /* force_erase */);
}
// Release the cached entry and decrement its ref count.
// Do not force erase
void ReleaseCachedEntry(void* arg, void* h) {
Cache* cache = reinterpret_cast<Cache*>(arg);
Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
cache->Release(handle, false /* force_erase */);
}
// For hash based index, return true if prefix_extractor and
// prefix_extractor_block mismatch, false otherwise. This flag will be used
// as total_order_seek via NewIndexIterator
@ -2073,6 +2081,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
cache_handle);
}
}
} else {
iter->SetCacheHandle(block.GetCacheHandle());
}
block.TransferTo(iter);
@ -2933,6 +2943,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
iiter_unique_ptr.reset(iiter);
}
DataBlockIter biter;
uint64_t offset = std::numeric_limits<uint64_t>::max();
for (auto miter = sst_file_range.begin(); miter != sst_file_range.end();
++miter) {
Status s;
@ -2941,10 +2953,15 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
bool matched = false; // if such user key matched a key in SST
bool done = false;
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
DataBlockIter biter;
NewDataBlockIterator<DataBlockIter>(
read_options, iiter->value(), &biter, BlockType::kData,
true /* key_includes_seq */, get_context);
bool reusing_block = true;
if (iiter->value().offset() != offset) {
offset = iiter->value().offset();
biter.Invalidate(Status::OK());
NewDataBlockIterator<DataBlockIter>(
read_options, iiter->value(), &biter, BlockType::kData, false,
true /* key_includes_seq */, get_context);
reusing_block = false;
}
if (read_options.read_tier == kBlockCacheTier &&
biter.status().IsIncomplete()) {
@ -2971,13 +2988,27 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
// Call the *saver function on each entry/block until it returns false
for (; biter.Valid(); biter.Next()) {
ParsedInternalKey parsed_key;
Cleanable dummy;
Cleanable* value_pinner = nullptr;
if (!ParseInternalKey(biter.key(), &parsed_key)) {
s = Status::Corruption(Slice());
}
if (biter.IsValuePinned()) {
if (reusing_block) {
Cache* block_cache = rep_->table_options.block_cache.get();
assert(biter.cache_handle() != nullptr);
block_cache->Ref(biter.cache_handle());
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
biter.cache_handle());
value_pinner = &dummy;
} else {
value_pinner = &biter;
}
}
if (!get_context->SaveValue(
parsed_key, biter.value(), &matched,
biter.IsValuePinned() ? &biter : nullptr)) {
parsed_key, biter.value(), &matched, value_pinner)) {
done = true;
break;
}