Improve data block construction performance (#9040)
Summary: ... by bypassing tracking of last_key in BlockBuilder when last_key is already known (for BlockBasedTableBuilder::data_block). I tried extracting a base class of BlockBuilder without the last_key tracking at all, but that became complicated by NewFlushBlockPolicy() in the public API referencing BlockBuilder, which would need to be the base class, and I don't want to replace nearly all the internal references to BlockBuilder. Possible follow-up: * Investigate / consider using AddWithLastKey in more places This improvement should stack with https://github.com/facebook/rocksdb/issues/9039 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9040 Test Plan: TEST_TMPDIR=/dev/shm/rocksdb1 ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=50000000 Compiled with DEBUG_LEVEL=0 Test vs. control runs simulaneous for better accuracy, units = ops/sec Run 1: 278929 vs. 267799 (+4.2%) Run 2: 281836 vs. 267432 (+5.4%) Run 3: 278279 vs. 270454 (+2.9%) (This benchmark is chosen to have detectable signal-to-noise, not to represent expected improvement percent on real workloads.) Reviewed By: mrambacher Differential Revision: D31706033 Pulled By: pdillinger fbshipit-source-id: 8a50fe6fefdd67b6d7665ffa687bbdcf5ad0d5ec
This commit is contained in:
parent
0534393fc8
commit
b234a3f569
@ -29,6 +29,9 @@
|
||||
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
|
||||
* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK.
|
||||
|
||||
### Performance Improvements
|
||||
* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040).
|
||||
|
||||
## 6.25.0 (2021-09-20)
|
||||
### Bug Fixes
|
||||
* Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion.
|
||||
|
@ -961,8 +961,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
}
|
||||
}
|
||||
|
||||
r->data_block.AddWithLastKey(key, value, r->last_key);
|
||||
r->last_key.assign(key.data(), key.size());
|
||||
r->data_block.Add(key, value);
|
||||
if (r->state == Rep::State::kBuffered) {
|
||||
// Buffered keys will be replayed from data_block_buffers during
|
||||
// `Finish()` once compression dictionary has been finalized.
|
||||
|
@ -78,6 +78,9 @@ void BlockBuilder::Reset() {
|
||||
if (data_block_hash_index_builder_.Valid()) {
|
||||
data_block_hash_index_builder_.Reset();
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
add_with_last_key_called_ = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void BlockBuilder::SwapAndReset(std::string& buffer) {
|
||||
@ -138,33 +141,62 @@ Slice BlockBuilder::Finish() {
|
||||
|
||||
void BlockBuilder::Add(const Slice& key, const Slice& value,
|
||||
const Slice* const delta_value) {
|
||||
// Ensure no unsafe mixing of Add and AddWithLastKey
|
||||
assert(!add_with_last_key_called_);
|
||||
|
||||
AddWithLastKeyImpl(key, value, last_key_, delta_value, buffer_.size());
|
||||
if (use_delta_encoding_) {
|
||||
// Update state
|
||||
// We used to just copy the changed data, but it appears to be
|
||||
// faster to just copy the whole thing.
|
||||
last_key_.assign(key.data(), key.size());
|
||||
}
|
||||
}
|
||||
|
||||
void BlockBuilder::AddWithLastKey(const Slice& key, const Slice& value,
|
||||
const Slice& last_key_param,
|
||||
const Slice* const delta_value) {
|
||||
// Ensure no unsafe mixing of Add and AddWithLastKey
|
||||
assert(last_key_.empty());
|
||||
#ifndef NDEBUG
|
||||
add_with_last_key_called_ = false;
|
||||
#endif
|
||||
|
||||
// Here we make sure to use an empty `last_key` on first call after creation
|
||||
// or Reset. This is more convenient for the caller and we can be more
|
||||
// clever inside BlockBuilder. On this hot code path, we want to avoid
|
||||
// conditional jumps like `buffer_.empty() ? ... : ...` so we can use a
|
||||
// fast min operation instead, with an assertion to be sure our logic is
|
||||
// sound.
|
||||
size_t buffer_size = buffer_.size();
|
||||
size_t last_key_size = last_key_param.size();
|
||||
assert(buffer_size == 0 || buffer_size >= last_key_size);
|
||||
|
||||
Slice last_key(last_key_param.data(), std::min(buffer_size, last_key_size));
|
||||
|
||||
AddWithLastKeyImpl(key, value, last_key, delta_value, buffer_size);
|
||||
}
|
||||
|
||||
inline void BlockBuilder::AddWithLastKeyImpl(const Slice& key,
|
||||
const Slice& value,
|
||||
const Slice& last_key,
|
||||
const Slice* const delta_value,
|
||||
size_t buffer_size) {
|
||||
assert(!finished_);
|
||||
assert(counter_ <= block_restart_interval_);
|
||||
assert(!use_value_delta_encoding_ || delta_value);
|
||||
size_t shared = 0; // number of bytes shared with prev key
|
||||
if (counter_ >= block_restart_interval_) {
|
||||
// Restart compression
|
||||
restarts_.push_back(static_cast<uint32_t>(buffer_.size()));
|
||||
restarts_.push_back(static_cast<uint32_t>(buffer_size));
|
||||
estimate_ += sizeof(uint32_t);
|
||||
counter_ = 0;
|
||||
|
||||
if (use_delta_encoding_) {
|
||||
// Update state
|
||||
last_key_.assign(key.data(), key.size());
|
||||
}
|
||||
} else if (use_delta_encoding_) {
|
||||
Slice last_key_piece(last_key_);
|
||||
// See how much sharing to do with previous string
|
||||
shared = key.difference_offset(last_key_piece);
|
||||
|
||||
// Update state
|
||||
// We used to just copy the changed data here, but it appears to be
|
||||
// faster to just copy the whole thing.
|
||||
last_key_.assign(key.data(), key.size());
|
||||
shared = key.difference_offset(last_key);
|
||||
}
|
||||
|
||||
const size_t non_shared = key.size() - shared;
|
||||
const size_t curr_size = buffer_.size();
|
||||
|
||||
if (use_value_delta_encoding_) {
|
||||
// Add "<shared><non_shared>" to buffer_
|
||||
@ -194,7 +226,7 @@ void BlockBuilder::Add(const Slice& key, const Slice& value,
|
||||
}
|
||||
|
||||
counter_++;
|
||||
estimate_ += buffer_.size() - curr_size;
|
||||
estimate_ += buffer_.size() - buffer_size;
|
||||
}
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
@ -37,9 +37,24 @@ class BlockBuilder {
|
||||
|
||||
// REQUIRES: Finish() has not been called since the last call to Reset().
|
||||
// REQUIRES: key is larger than any previously added key
|
||||
// DO NOT mix with AddWithLastKey() between Resets. For efficiency, use
|
||||
// AddWithLastKey() in contexts where previous added key is already known
|
||||
// and delta encoding might be used.
|
||||
void Add(const Slice& key, const Slice& value,
|
||||
const Slice* const delta_value = nullptr);
|
||||
|
||||
// A faster version of Add() if the previous key is already known for all
|
||||
// Add()s.
|
||||
// REQUIRES: Finish() has not been called since the last call to Reset().
|
||||
// REQUIRES: key is larger than any previously added key
|
||||
// REQUIRES: if AddWithLastKey has been called since last Reset(), last_key
|
||||
// is the key from most recent AddWithLastKey. (For convenience, last_key
|
||||
// is ignored on first call after creation or Reset().)
|
||||
// DO NOT mix with Add() between Resets.
|
||||
void AddWithLastKey(const Slice& key, const Slice& value,
|
||||
const Slice& last_key,
|
||||
const Slice* const delta_value = nullptr);
|
||||
|
||||
// Finish building the block and return a slice that refers to the
|
||||
// block contents. The returned slice will remain valid for the
|
||||
// lifetime of this builder or until Reset() is called.
|
||||
@ -60,6 +75,11 @@ class BlockBuilder {
|
||||
bool empty() const { return buffer_.empty(); }
|
||||
|
||||
private:
|
||||
inline void AddWithLastKeyImpl(const Slice& key, const Slice& value,
|
||||
const Slice& last_key,
|
||||
const Slice* const delta_value,
|
||||
size_t buffer_size);
|
||||
|
||||
const int block_restart_interval_;
|
||||
// TODO(myabandeh): put it into a separate IndexBlockBuilder
|
||||
const bool use_delta_encoding_;
|
||||
@ -73,6 +93,9 @@ class BlockBuilder {
|
||||
bool finished_; // Has Finish() been called?
|
||||
std::string last_key_;
|
||||
DataBlockHashIndexBuilder data_block_hash_index_builder_;
|
||||
#ifndef NDEBUG
|
||||
bool add_with_last_key_called_ = false;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
Loading…
Reference in New Issue
Block a user