Optimize BlockIter::Prev() by caching decoded entries
Summary: Right now the way we do BlockIter::Prev() is like this - Go to the beginning of the restart interval - Keep moving forward (and decoding keys using ParseNextKey()) until we reach the desired key This can be optimized by caching the decoded entries in the first pass and reusing them in consecutive BlockIter::Prev() calls Before caching ``` DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readreverse" --db="/dev/shm/bench_prev_opt/" --use_existing_db --disable_auto_compactions DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.413 micros/op 2423972 ops/sec; 268.2 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.414 micros/op 2413867 ops/sec; 267.0 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.410 micros/op 2440881 ops/sec; 270.0 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.414 micros/op 2417298 ops/sec; 267.4 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.413 micros/op 2421682 ops/sec; 267.9 MB/s ``` After caching ``` DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readreverse" --db="/dev/shm/bench_prev_opt/" --use_existing_db --disable_auto_compactions DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.324 micros/op 3088955 ops/sec; 341.7 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.335 micros/op 2980999 ops/sec; 329.8 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.341 micros/op 2929681 ops/sec; 324.1 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.344 micros/op 2908490 ops/sec; 321.8 MB/s DB path: [/dev/shm/bench_prev_opt/] readreverse : 0.338 micros/op 2958404 ops/sec; 327.3 MB/s ``` Test Plan: COMPILE_WITH_ASAN=1 make check -j64 Reviewers: andrewkr, yiwu, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, yoshinorim Differential Revision: https://reviews.facebook.net/D59463
This commit is contained in:
parent
550bf895e9
commit
812dbfb483
@ -63,6 +63,40 @@ void BlockIter::Next() {
|
||||
void BlockIter::Prev() {
|
||||
assert(Valid());
|
||||
|
||||
assert(prev_entries_idx_ == -1 ||
|
||||
static_cast<size_t>(prev_entries_idx_) < prev_entries_.size());
|
||||
// Check if we can use cached prev_entries_
|
||||
if (prev_entries_idx_ > 0 &&
|
||||
prev_entries_[prev_entries_idx_].offset == current_) {
|
||||
// Read cached CachedPrevEntry
|
||||
prev_entries_idx_--;
|
||||
const CachedPrevEntry& current_prev_entry =
|
||||
prev_entries_[prev_entries_idx_];
|
||||
|
||||
const char* key_ptr = current_prev_entry.key_ptr;
|
||||
if (current_prev_entry.key_ptr != nullptr) {
|
||||
// The key is not delta encoded and stored in the data block
|
||||
key_ptr = current_prev_entry.key_ptr;
|
||||
key_pinned_ = true;
|
||||
} else {
|
||||
// The key is delta encoded and stored in prev_entries_keys_buff_
|
||||
key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset;
|
||||
key_pinned_ = false;
|
||||
}
|
||||
const Slice current_key(key_ptr, current_prev_entry.key_size);
|
||||
|
||||
current_ = current_prev_entry.offset;
|
||||
key_.SetKey(current_key, false /* copy */);
|
||||
value_ = current_prev_entry.value;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear prev entries cache
|
||||
prev_entries_idx_ = -1;
|
||||
prev_entries_.clear();
|
||||
prev_entries_keys_buff_.clear();
|
||||
|
||||
// Scan backwards to a restart point before current_
|
||||
const uint32_t original = current_;
|
||||
while (GetRestartPoint(restart_index_) >= original) {
|
||||
@ -76,9 +110,28 @@ void BlockIter::Prev() {
|
||||
}
|
||||
|
||||
SeekToRestartPoint(restart_index_);
|
||||
|
||||
do {
|
||||
if (!ParseNextKey()) {
|
||||
break;
|
||||
}
|
||||
Slice current_key = key();
|
||||
|
||||
if (key_.IsKeyPinned()) {
|
||||
// The key is not delta encoded
|
||||
prev_entries_.emplace_back(current_, current_key.data(), 0,
|
||||
current_key.size(), value());
|
||||
} else {
|
||||
// The key is delta encoded, cache decoded key in buffer
|
||||
size_t new_key_offset = prev_entries_keys_buff_.size();
|
||||
prev_entries_keys_buff_.append(current_key.data(), current_key.size());
|
||||
|
||||
prev_entries_.emplace_back(current_, nullptr, new_key_offset,
|
||||
current_key.size(), value());
|
||||
}
|
||||
// Loop until end of current entry hits the start of original entry
|
||||
} while (ParseNextKey() && NextEntryOffset() < original);
|
||||
} while (NextEntryOffset() < original);
|
||||
prev_entries_idx_ = prev_entries_.size() - 1;
|
||||
}
|
||||
|
||||
void BlockIter::Seek(const Slice& target) {
|
||||
@ -155,9 +208,11 @@ bool BlockIter::ParseNextKey() {
|
||||
// If this key dont share any bytes with prev key then we dont need
|
||||
// to decode it and can use it's address in the block directly.
|
||||
key_.SetKey(Slice(p, non_shared), false /* copy */);
|
||||
key_pinned_ = true;
|
||||
} else {
|
||||
// This key share `shared` bytes with prev key, we need to decode it
|
||||
key_.TrimAppend(shared, p, non_shared);
|
||||
key_pinned_ = false;
|
||||
}
|
||||
value_ = Slice(p + non_shared, value_length);
|
||||
while (restart_index_ + 1 < num_restarts_ &&
|
||||
|
@ -10,6 +10,8 @@
|
||||
#pragma once
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
@ -96,7 +98,8 @@ class BlockIter : public InternalIterator {
|
||||
current_(0),
|
||||
restart_index_(0),
|
||||
status_(Status::OK()),
|
||||
prefix_index_(nullptr) {}
|
||||
prefix_index_(nullptr),
|
||||
key_pinned_(false) {}
|
||||
|
||||
BlockIter(const Comparator* comparator, const char* data, uint32_t restarts,
|
||||
uint32_t num_restarts, BlockPrefixIndex* prefix_index)
|
||||
@ -157,7 +160,7 @@ class BlockIter : public InternalIterator {
|
||||
PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
|
||||
#endif
|
||||
|
||||
virtual bool IsKeyPinned() const override { return key_.IsKeyPinned(); }
|
||||
virtual bool IsKeyPinned() const override { return key_pinned_; }
|
||||
|
||||
private:
|
||||
const Comparator* comparator_;
|
||||
@ -172,6 +175,31 @@ class BlockIter : public InternalIterator {
|
||||
Slice value_;
|
||||
Status status_;
|
||||
BlockPrefixIndex* prefix_index_;
|
||||
bool key_pinned_;
|
||||
|
||||
struct CachedPrevEntry {
|
||||
explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr,
|
||||
size_t _key_offset, size_t _key_size, Slice _value)
|
||||
: offset(_offset),
|
||||
key_ptr(_key_ptr),
|
||||
key_offset(_key_offset),
|
||||
key_size(_key_size),
|
||||
value(_value) {}
|
||||
|
||||
// offset of entry in block
|
||||
uint32_t offset;
|
||||
// Pointer to key data in block (nullptr if key is delta-encoded)
|
||||
const char* key_ptr;
|
||||
// offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr)
|
||||
size_t key_offset;
|
||||
// size of key
|
||||
size_t key_size;
|
||||
// value slice pointing to data in block
|
||||
Slice value;
|
||||
};
|
||||
std::string prev_entries_keys_buff_;
|
||||
std::vector<CachedPrevEntry> prev_entries_;
|
||||
int32_t prev_entries_idx_ = -1;
|
||||
|
||||
inline int Compare(const Slice& a, const Slice& b) const {
|
||||
return comparator_->Compare(a, b);
|
||||
|
Loading…
Reference in New Issue
Block a user