Reduce comparator objects init cost in BlockIter (#9611)

Summary:
This PR solves the problem discussed in https://github.com/facebook/rocksdb/issues/7149. By storing the pointer of InternalKeyComparator as icmp_ in BlockIter, the object size remains the same. And for each call to CompareCurrentKey, there is no need to create Comparator objects. One can use icmp_ directly or use the "user_comparator" from the icmp_.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9611

Test Plan:
with https://github.com/facebook/rocksdb/issues/9903,

```
$ TEST_TMPDIR=/dev/shm python3.6 ../benchmark/tools/compare.py benchmarks ./db_basic_bench ../rocksdb-pr9611/db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1 --benchmark_repetitions=50
...
Comparing ./db_basic_bench to ../rocksdb-pr9611/db_basic_bench
Benchmark                                                                                                                                                               Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
...
DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1_pvalue                 0.0001          0.0001      U Test, Repetitions: 50 vs 50
DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1_mean                  -0.0483         -0.0483          3924          3734          3924          3734
DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1_median                -0.0713         -0.0713          3971          3687          3970          3687
DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1_stddev                -0.0342         -0.0344           225           217           225           217
DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:1/iterations:262144/threads:1_cv                    +0.0148         +0.0146             0             0             0             0
OVERALL_GEOMEAN                                                                                                                                                      -0.0483         -0.0483             0             0             0             0
```

Reviewed By: akankshamahajan15

Differential Revision: D35882037

Pulled By: ajkr

fbshipit-source-id: 9e5337bbad8f1239dff7aa9f6549020d599bfcdf
This commit is contained in:
Xinyu Zeng 2022-05-03 17:37:19 -07:00 committed by Facebook GitHub Bot
parent b82edffc7b
commit 8b74cea7fe
2 changed files with 24 additions and 26 deletions

View File

@ -10,6 +10,7 @@
// Decodes the blocks generated by block_builder.cc. // Decodes the blocks generated by block_builder.cc.
#include "table/block_based/block.h" #include "table/block_based/block.h"
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -400,7 +401,8 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true; return true;
} }
if (ucmp().Compare(raw_key_.GetUserKey(), target_user_key) != 0) { if (icmp_->user_comparator()->Compare(raw_key_.GetUserKey(),
target_user_key) != 0) {
// the key is not in this block and cannot be at the next block either. // the key is not in this block and cannot be at the next block either.
return false; return false;
} }

View File

@ -10,6 +10,7 @@
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include <vector> #include <vector>
@ -266,23 +267,6 @@ class Block {
template <class TValue> template <class TValue>
class BlockIter : public InternalIteratorBase<TValue> { class BlockIter : public InternalIteratorBase<TValue> {
public: public:
void InitializeBase(const Comparator* raw_ucmp, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno, bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid
raw_ucmp_ = raw_ucmp;
data_ = data;
restarts_ = restarts;
num_restarts_ = num_restarts;
current_ = restarts_;
restart_index_ = num_restarts_;
global_seqno_ = global_seqno;
block_contents_pinned_ = block_contents_pinned;
cache_handle_ = nullptr;
}
// Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do
// nothing. Calls cleanup functions. // nothing. Calls cleanup functions.
virtual void Invalidate(const Status& s) { virtual void Invalidate(const Status& s) {
@ -371,6 +355,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
Cache::Handle* cache_handle() { return cache_handle_; } Cache::Handle* cache_handle() { return cache_handle_; }
protected: protected:
std::unique_ptr<InternalKeyComparator> icmp_;
const char* data_; // underlying block contents const char* data_; // underlying block contents
uint32_t num_restarts_; // Number of uint32_t entries in restart array uint32_t num_restarts_; // Number of uint32_t entries in restart array
@ -405,11 +390,23 @@ class BlockIter : public InternalIteratorBase<TValue> {
template <typename DecodeEntryFunc> template <typename DecodeEntryFunc>
inline bool ParseNextKey(bool* is_shared); inline bool ParseNextKey(bool* is_shared);
InternalKeyComparator icmp() { void InitializeBase(const Comparator* raw_ucmp, const char* data,
return InternalKeyComparator(raw_ucmp_, false /* named */); uint32_t restarts, uint32_t num_restarts,
} SequenceNumber global_seqno, bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid
UserComparatorWrapper ucmp() { return UserComparatorWrapper(raw_ucmp_); } icmp_ =
std::make_unique<InternalKeyComparator>(raw_ucmp, false /* named */);
data_ = data;
restarts_ = restarts;
num_restarts_ = num_restarts;
current_ = restarts_;
restart_index_ = num_restarts_;
global_seqno_ = global_seqno;
block_contents_pinned_ = block_contents_pinned;
cache_handle_ = nullptr;
}
// Must be called every time a key is found that needs to be returned to user, // Must be called every time a key is found that needs to be returned to user,
// and may be called when no key is found (as a no-op). Updates `key_`, // and may be called when no key is found (as a no-op). Updates `key_`,
@ -440,16 +437,15 @@ class BlockIter : public InternalIteratorBase<TValue> {
int CompareCurrentKey(const Slice& other) { int CompareCurrentKey(const Slice& other) {
if (raw_key_.IsUserKey()) { if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber); assert(global_seqno_ == kDisableGlobalSequenceNumber);
return ucmp().Compare(raw_key_.GetUserKey(), other); return icmp_->user_comparator()->Compare(raw_key_.GetUserKey(), other);
} else if (global_seqno_ == kDisableGlobalSequenceNumber) { } else if (global_seqno_ == kDisableGlobalSequenceNumber) {
return icmp().Compare(raw_key_.GetInternalKey(), other); return icmp_->Compare(raw_key_.GetInternalKey(), other);
} }
return icmp().Compare(raw_key_.GetInternalKey(), global_seqno_, other, return icmp_->Compare(raw_key_.GetInternalKey(), global_seqno_, other,
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
} }
private: private:
const Comparator* raw_ucmp_;
// Store the cache handle, if the block is cached. We need this since the // Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable // only other place the handle is stored is as an argument to the Cleanable
// function callback, which is hard to retrieve. When multiple value // function callback, which is hard to retrieve. When multiple value