From b234a3f5698f2457e4e0b2d9646b1893b82c60ab Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 19 Oct 2021 12:35:17 -0700 Subject: [PATCH] Improve data block construction performance (#9040) Summary: ... by bypassing tracking of last_key in BlockBuilder when last_key is already known (for BlockBasedTableBuilder::data_block). I tried extracting a base class of BlockBuilder without the last_key tracking at all, but that became complicated by NewFlushBlockPolicy() in the public API referencing BlockBuilder, which would need to be the base class, and I don't want to replace nearly all the internal references to BlockBuilder. Possible follow-up: * Investigate / consider using AddWithLastKey in more places This improvement should stack with https://github.com/facebook/rocksdb/issues/9039 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9040 Test Plan: TEST_TMPDIR=/dev/shm/rocksdb1 ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=50000000 Compiled with DEBUG_LEVEL=0 Test vs. control runs simulaneous for better accuracy, units = ops/sec Run 1: 278929 vs. 267799 (+4.2%) Run 2: 281836 vs. 267432 (+5.4%) Run 3: 278279 vs. 270454 (+2.9%) (This benchmark is chosen to have detectable signal-to-noise, not to represent expected improvement percent on real workloads.) Reviewed By: mrambacher Differential Revision: D31706033 Pulled By: pdillinger fbshipit-source-id: 8a50fe6fefdd67b6d7665ffa687bbdcf5ad0d5ec --- HISTORY.md | 3 + .../block_based/block_based_table_builder.cc | 2 +- table/block_based/block_builder.cc | 62 ++++++++++++++----- table/block_based/block_builder.h | 23 +++++++ 4 files changed, 74 insertions(+), 16 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 96e920909..24ce4dc1e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -29,6 +29,9 @@ * Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files. * If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK. +### Performance Improvements +* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040). + ## 6.25.0 (2021-09-20) ### Bug Fixes * Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion. diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index c94004815..8fb51e17c 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -961,8 +961,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { } } + r->data_block.AddWithLastKey(key, value, r->last_key); r->last_key.assign(key.data(), key.size()); - r->data_block.Add(key, value); if (r->state == Rep::State::kBuffered) { // Buffered keys will be replayed from data_block_buffers during // `Finish()` once compression dictionary has been finalized. diff --git a/table/block_based/block_builder.cc b/table/block_based/block_builder.cc index 219504d50..850693454 100644 --- a/table/block_based/block_builder.cc +++ b/table/block_based/block_builder.cc @@ -78,6 +78,9 @@ void BlockBuilder::Reset() { if (data_block_hash_index_builder_.Valid()) { data_block_hash_index_builder_.Reset(); } +#ifndef NDEBUG + add_with_last_key_called_ = false; +#endif } void BlockBuilder::SwapAndReset(std::string& buffer) { @@ -138,33 +141,62 @@ Slice BlockBuilder::Finish() { void BlockBuilder::Add(const Slice& key, const Slice& value, const Slice* const delta_value) { + // Ensure no unsafe mixing of Add and AddWithLastKey + assert(!add_with_last_key_called_); + + AddWithLastKeyImpl(key, value, last_key_, delta_value, buffer_.size()); + if (use_delta_encoding_) { + // Update state + // We used to just copy the changed data, but it appears to be + // faster to just copy the whole thing. + last_key_.assign(key.data(), key.size()); + } +} + +void BlockBuilder::AddWithLastKey(const Slice& key, const Slice& value, + const Slice& last_key_param, + const Slice* const delta_value) { + // Ensure no unsafe mixing of Add and AddWithLastKey + assert(last_key_.empty()); +#ifndef NDEBUG + add_with_last_key_called_ = false; +#endif + + // Here we make sure to use an empty `last_key` on first call after creation + // or Reset. This is more convenient for the caller and we can be more + // clever inside BlockBuilder. On this hot code path, we want to avoid + // conditional jumps like `buffer_.empty() ? ... : ...` so we can use a + // fast min operation instead, with an assertion to be sure our logic is + // sound. + size_t buffer_size = buffer_.size(); + size_t last_key_size = last_key_param.size(); + assert(buffer_size == 0 || buffer_size >= last_key_size); + + Slice last_key(last_key_param.data(), std::min(buffer_size, last_key_size)); + + AddWithLastKeyImpl(key, value, last_key, delta_value, buffer_size); +} + +inline void BlockBuilder::AddWithLastKeyImpl(const Slice& key, + const Slice& value, + const Slice& last_key, + const Slice* const delta_value, + size_t buffer_size) { assert(!finished_); assert(counter_ <= block_restart_interval_); assert(!use_value_delta_encoding_ || delta_value); size_t shared = 0; // number of bytes shared with prev key if (counter_ >= block_restart_interval_) { // Restart compression - restarts_.push_back(static_cast(buffer_.size())); + restarts_.push_back(static_cast(buffer_size)); estimate_ += sizeof(uint32_t); counter_ = 0; - - if (use_delta_encoding_) { - // Update state - last_key_.assign(key.data(), key.size()); - } } else if (use_delta_encoding_) { - Slice last_key_piece(last_key_); // See how much sharing to do with previous string - shared = key.difference_offset(last_key_piece); - - // Update state - // We used to just copy the changed data here, but it appears to be - // faster to just copy the whole thing. - last_key_.assign(key.data(), key.size()); + shared = key.difference_offset(last_key); } const size_t non_shared = key.size() - shared; - const size_t curr_size = buffer_.size(); if (use_value_delta_encoding_) { // Add "" to buffer_ @@ -194,7 +226,7 @@ void BlockBuilder::Add(const Slice& key, const Slice& value, } counter_++; - estimate_ += buffer_.size() - curr_size; + estimate_ += buffer_.size() - buffer_size; } } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_builder.h b/table/block_based/block_builder.h index e3fcfc2ec..7702ef117 100644 --- a/table/block_based/block_builder.h +++ b/table/block_based/block_builder.h @@ -37,9 +37,24 @@ class BlockBuilder { // REQUIRES: Finish() has not been called since the last call to Reset(). // REQUIRES: key is larger than any previously added key + // DO NOT mix with AddWithLastKey() between Resets. For efficiency, use + // AddWithLastKey() in contexts where previous added key is already known + // and delta encoding might be used. void Add(const Slice& key, const Slice& value, const Slice* const delta_value = nullptr); + // A faster version of Add() if the previous key is already known for all + // Add()s. + // REQUIRES: Finish() has not been called since the last call to Reset(). + // REQUIRES: key is larger than any previously added key + // REQUIRES: if AddWithLastKey has been called since last Reset(), last_key + // is the key from most recent AddWithLastKey. (For convenience, last_key + // is ignored on first call after creation or Reset().) + // DO NOT mix with Add() between Resets. + void AddWithLastKey(const Slice& key, const Slice& value, + const Slice& last_key, + const Slice* const delta_value = nullptr); + // Finish building the block and return a slice that refers to the // block contents. The returned slice will remain valid for the // lifetime of this builder or until Reset() is called. @@ -60,6 +75,11 @@ class BlockBuilder { bool empty() const { return buffer_.empty(); } private: + inline void AddWithLastKeyImpl(const Slice& key, const Slice& value, + const Slice& last_key, + const Slice* const delta_value, + size_t buffer_size); + const int block_restart_interval_; // TODO(myabandeh): put it into a separate IndexBlockBuilder const bool use_delta_encoding_; @@ -73,6 +93,9 @@ class BlockBuilder { bool finished_; // Has Finish() been called? std::string last_key_; DataBlockHashIndexBuilder data_block_hash_index_builder_; +#ifndef NDEBUG + bool add_with_last_key_called_ = false; +#endif }; } // namespace ROCKSDB_NAMESPACE