Reuse data block iterator in BlockBasedTableReader::MultiGet() (#5314)
Summary: Instead of creating a new DataBlockIterator for every key in a MultiGet batch, reuse it if the next key is in the same block. This results in a small 1-2% cpu improvement. TEST_TMPDIR=/dev/shm/multiget numactl -C 10 ./db_bench.tmp -use_existing_db=true -benchmarks="readseq,multireadrandom" -write_buffer_size=4194304 -target_file_size_base=4194304 -max_bytes_for_level_base=16777216 -num=12000000 -reads=12000000 -duration=90 -threads=1 -compression_type=none -cache_size=4194304000 -batch_size=32 -disable_auto_compactions=true -bloom_bits=10 -cache_index_and_filter_blocks=true -pin_l0_filter_and_index_blocks_in_cache=true -multiread_batched=true -multiread_stride=4 Without the change - multireadrandom : 3.066 micros/op 326122 ops/sec; (29375968 of 29375968 found) With the change - multireadrandom : 3.003 micros/op 332945 ops/sec; (29983968 of 29983968 found) Pull Request resolved: https://github.com/facebook/rocksdb/pull/5314 Differential Revision: D15742108 Pulled By: anand1976 fbshipit-source-id: 220fb0b8eea9a0d602ddeb371528f7af7936d771
This commit is contained in:
parent
6ce5580882
commit
63ace8ef0e
@ -236,6 +236,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
||||
restart_index_ = num_restarts_;
|
||||
global_seqno_ = global_seqno;
|
||||
block_contents_pinned_ = block_contents_pinned;
|
||||
cache_handle_ = nullptr;
|
||||
}
|
||||
|
||||
// Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do
|
||||
@ -285,6 +286,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
||||
return static_cast<uint32_t>(value_.data() - data_);
|
||||
}
|
||||
|
||||
void SetCacheHandle(Cache::Handle* handle) { cache_handle_ = handle; }
|
||||
|
||||
Cache::Handle* cache_handle() { return cache_handle_; }
|
||||
|
||||
protected:
|
||||
// Note: The type could be changed to InternalKeyComparator but we see a weird
|
||||
// performance drop by that.
|
||||
@ -307,6 +312,14 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
||||
bool block_contents_pinned_;
|
||||
SequenceNumber global_seqno_;
|
||||
|
||||
private:
|
||||
// Store the cache handle, if the block is cached. We need this since the
|
||||
// only other place the handle is stored is as an argument to the Cleanable
|
||||
// function callback, which is hard to retrieve. When multiple value
|
||||
// PinnableSlices reference the block, they need the cache handle in order
|
||||
// to bump up the ref count
|
||||
Cache::Handle* cache_handle_;
|
||||
|
||||
public:
|
||||
// Return the offset in data_ just past the end of the current entry.
|
||||
inline uint32_t NextEntryOffset() const {
|
||||
|
@ -129,6 +129,14 @@ void ForceReleaseCachedEntry(void* arg, void* h) {
|
||||
cache->Release(handle, true /* force_erase */);
|
||||
}
|
||||
|
||||
// Release the cached entry and decrement its ref count.
|
||||
// Do not force erase
|
||||
void ReleaseCachedEntry(void* arg, void* h) {
|
||||
Cache* cache = reinterpret_cast<Cache*>(arg);
|
||||
Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
|
||||
cache->Release(handle, false /* force_erase */);
|
||||
}
|
||||
|
||||
// For hash based index, return true if prefix_extractor and
|
||||
// prefix_extractor_block mismatch, false otherwise. This flag will be used
|
||||
// as total_order_seek via NewIndexIterator
|
||||
@ -2073,6 +2081,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
||||
cache_handle);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
iter->SetCacheHandle(block.GetCacheHandle());
|
||||
}
|
||||
|
||||
block.TransferTo(iter);
|
||||
@ -2933,6 +2943,8 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
iiter_unique_ptr.reset(iiter);
|
||||
}
|
||||
|
||||
DataBlockIter biter;
|
||||
uint64_t offset = std::numeric_limits<uint64_t>::max();
|
||||
for (auto miter = sst_file_range.begin(); miter != sst_file_range.end();
|
||||
++miter) {
|
||||
Status s;
|
||||
@ -2941,10 +2953,15 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
bool matched = false; // if such user key matched a key in SST
|
||||
bool done = false;
|
||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
||||
DataBlockIter biter;
|
||||
NewDataBlockIterator<DataBlockIter>(
|
||||
read_options, iiter->value(), &biter, BlockType::kData,
|
||||
true /* key_includes_seq */, get_context);
|
||||
bool reusing_block = true;
|
||||
if (iiter->value().offset() != offset) {
|
||||
offset = iiter->value().offset();
|
||||
biter.Invalidate(Status::OK());
|
||||
NewDataBlockIterator<DataBlockIter>(
|
||||
read_options, iiter->value(), &biter, BlockType::kData, false,
|
||||
true /* key_includes_seq */, get_context);
|
||||
reusing_block = false;
|
||||
}
|
||||
|
||||
if (read_options.read_tier == kBlockCacheTier &&
|
||||
biter.status().IsIncomplete()) {
|
||||
@ -2971,13 +2988,27 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
|
||||
// Call the *saver function on each entry/block until it returns false
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
ParsedInternalKey parsed_key;
|
||||
Cleanable dummy;
|
||||
Cleanable* value_pinner = nullptr;
|
||||
|
||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||
s = Status::Corruption(Slice());
|
||||
}
|
||||
if (biter.IsValuePinned()) {
|
||||
if (reusing_block) {
|
||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||
assert(biter.cache_handle() != nullptr);
|
||||
block_cache->Ref(biter.cache_handle());
|
||||
dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||
biter.cache_handle());
|
||||
value_pinner = &dummy;
|
||||
} else {
|
||||
value_pinner = &biter;
|
||||
}
|
||||
}
|
||||
|
||||
if (!get_context->SaveValue(
|
||||
parsed_key, biter.value(), &matched,
|
||||
biter.IsValuePinned() ? &biter : nullptr)) {
|
||||
parsed_key, biter.value(), &matched, value_pinner)) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user