Optimize BlockIter::Prev() by caching decoded entries

Summary:
Right now the way we do BlockIter::Prev() is like this

- Go to the beginning of the restart interval
- Keep moving forward (and decoding keys using ParseNextKey()) until we reach the desired key

This can be optimized by caching the decoded entries in the first pass and reusing them in consecutive BlockIter::Prev() calls

Before caching

```
DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readreverse" --db="/dev/shm/bench_prev_opt/" --use_existing_db --disable_auto_compactions
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.413 micros/op 2423972 ops/sec;  268.2 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.414 micros/op 2413867 ops/sec;  267.0 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.410 micros/op 2440881 ops/sec;  270.0 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.414 micros/op 2417298 ops/sec;  267.4 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.413 micros/op 2421682 ops/sec;  267.9 MB/s
```

After caching

```
DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readreverse" --db="/dev/shm/bench_prev_opt/" --use_existing_db --disable_auto_compactions
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.324 micros/op 3088955 ops/sec;  341.7 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.335 micros/op 2980999 ops/sec;  329.8 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.341 micros/op 2929681 ops/sec;  324.1 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.344 micros/op 2908490 ops/sec;  321.8 MB/s
DB path: [/dev/shm/bench_prev_opt/]
readreverse  :       0.338 micros/op 2958404 ops/sec;  327.3 MB/s
```

Test Plan: COMPILE_WITH_ASAN=1 make check -j64

Reviewers: andrewkr, yiwu, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba, yoshinorim

Differential Revision: https://reviews.facebook.net/D59463
This commit is contained in:
Islam AbdelRahman 2016-06-14 12:27:46 -07:00
parent 550bf895e9
commit 812dbfb483
2 changed files with 86 additions and 3 deletions

View File

@ -63,6 +63,40 @@ void BlockIter::Next() {
void BlockIter::Prev() { void BlockIter::Prev() {
assert(Valid()); assert(Valid());
assert(prev_entries_idx_ == -1 ||
static_cast<size_t>(prev_entries_idx_) < prev_entries_.size());
// Check if we can use cached prev_entries_
if (prev_entries_idx_ > 0 &&
prev_entries_[prev_entries_idx_].offset == current_) {
// Read cached CachedPrevEntry
prev_entries_idx_--;
const CachedPrevEntry& current_prev_entry =
prev_entries_[prev_entries_idx_];
const char* key_ptr = current_prev_entry.key_ptr;
if (current_prev_entry.key_ptr != nullptr) {
// The key is not delta encoded and stored in the data block
key_ptr = current_prev_entry.key_ptr;
key_pinned_ = true;
} else {
// The key is delta encoded and stored in prev_entries_keys_buff_
key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset;
key_pinned_ = false;
}
const Slice current_key(key_ptr, current_prev_entry.key_size);
current_ = current_prev_entry.offset;
key_.SetKey(current_key, false /* copy */);
value_ = current_prev_entry.value;
return;
}
// Clear prev entries cache
prev_entries_idx_ = -1;
prev_entries_.clear();
prev_entries_keys_buff_.clear();
// Scan backwards to a restart point before current_ // Scan backwards to a restart point before current_
const uint32_t original = current_; const uint32_t original = current_;
while (GetRestartPoint(restart_index_) >= original) { while (GetRestartPoint(restart_index_) >= original) {
@ -76,9 +110,28 @@ void BlockIter::Prev() {
} }
SeekToRestartPoint(restart_index_); SeekToRestartPoint(restart_index_);
do { do {
if (!ParseNextKey()) {
break;
}
Slice current_key = key();
if (key_.IsKeyPinned()) {
// The key is not delta encoded
prev_entries_.emplace_back(current_, current_key.data(), 0,
current_key.size(), value());
} else {
// The key is delta encoded, cache decoded key in buffer
size_t new_key_offset = prev_entries_keys_buff_.size();
prev_entries_keys_buff_.append(current_key.data(), current_key.size());
prev_entries_.emplace_back(current_, nullptr, new_key_offset,
current_key.size(), value());
}
// Loop until end of current entry hits the start of original entry // Loop until end of current entry hits the start of original entry
} while (ParseNextKey() && NextEntryOffset() < original); } while (NextEntryOffset() < original);
prev_entries_idx_ = prev_entries_.size() - 1;
} }
void BlockIter::Seek(const Slice& target) { void BlockIter::Seek(const Slice& target) {
@ -155,9 +208,11 @@ bool BlockIter::ParseNextKey() {
// If this key dont share any bytes with prev key then we dont need // If this key dont share any bytes with prev key then we dont need
// to decode it and can use it's address in the block directly. // to decode it and can use it's address in the block directly.
key_.SetKey(Slice(p, non_shared), false /* copy */); key_.SetKey(Slice(p, non_shared), false /* copy */);
key_pinned_ = true;
} else { } else {
// This key share `shared` bytes with prev key, we need to decode it // This key share `shared` bytes with prev key, we need to decode it
key_.TrimAppend(shared, p, non_shared); key_.TrimAppend(shared, p, non_shared);
key_pinned_ = false;
} }
value_ = Slice(p + non_shared, value_length); value_ = Slice(p + non_shared, value_length);
while (restart_index_ + 1 < num_restarts_ && while (restart_index_ + 1 < num_restarts_ &&

View File

@ -10,6 +10,8 @@
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <string>
#include <vector>
#ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef ROCKSDB_MALLOC_USABLE_SIZE
#include <malloc.h> #include <malloc.h>
#endif #endif
@ -96,7 +98,8 @@ class BlockIter : public InternalIterator {
current_(0), current_(0),
restart_index_(0), restart_index_(0),
status_(Status::OK()), status_(Status::OK()),
prefix_index_(nullptr) {} prefix_index_(nullptr),
key_pinned_(false) {}
BlockIter(const Comparator* comparator, const char* data, uint32_t restarts, BlockIter(const Comparator* comparator, const char* data, uint32_t restarts,
uint32_t num_restarts, BlockPrefixIndex* prefix_index) uint32_t num_restarts, BlockPrefixIndex* prefix_index)
@ -157,7 +160,7 @@ class BlockIter : public InternalIterator {
PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
#endif #endif
virtual bool IsKeyPinned() const override { return key_.IsKeyPinned(); } virtual bool IsKeyPinned() const override { return key_pinned_; }
private: private:
const Comparator* comparator_; const Comparator* comparator_;
@ -172,6 +175,31 @@ class BlockIter : public InternalIterator {
Slice value_; Slice value_;
Status status_; Status status_;
BlockPrefixIndex* prefix_index_; BlockPrefixIndex* prefix_index_;
bool key_pinned_;
struct CachedPrevEntry {
explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr,
size_t _key_offset, size_t _key_size, Slice _value)
: offset(_offset),
key_ptr(_key_ptr),
key_offset(_key_offset),
key_size(_key_size),
value(_value) {}
// offset of entry in block
uint32_t offset;
// Pointer to key data in block (nullptr if key is delta-encoded)
const char* key_ptr;
// offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr)
size_t key_offset;
// size of key
size_t key_size;
// value slice pointing to data in block
Slice value;
};
std::string prev_entries_keys_buff_;
std::vector<CachedPrevEntry> prev_entries_;
int32_t prev_entries_idx_ = -1;
inline int Compare(const Slice& a, const Slice& b) const { inline int Compare(const Slice& a, const Slice& b) const {
return comparator_->Compare(a, b); return comparator_->Compare(a, b);