Introduce FullMergeV2 (eliminate memcpy from merge operators)
Summary: This diff update the code to pin the merge operator operands while the merge operation is done, so that we can eliminate the memcpy cost, to do that we need a new public API for FullMerge that replace the std::deque<std::string> with std::vector<Slice> This diff is stacked on top of D56493 and D56511 In this diff we - Update FullMergeV2 arguments to be encapsulated in MergeOperationInput and MergeOperationOutput which will make it easier to add new arguments in the future - Replace std::deque<std::string> with std::vector<Slice> to pass operands - Replace MergeContext std::deque with std::vector (based on a simple benchmark I ran https://gist.github.com/IslamAbdelRahman/78fc86c9ab9f52b1df791e58943fb187) - Allow FullMergeV2 output to be an existing operand ``` [Everything in Memtable | 10K operands | 10 KB each | 1 operand per key] DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="mergerandom,readseq,readseq,readseq,readseq,readseq" --merge_operator="max" --merge_keys=10000 --num=10000 --disable_auto_compactions --value_size=10240 --write_buffer_size=1000000000 [FullMergeV2] readseq : 0.607 micros/op 1648235 ops/sec; 16121.2 MB/s readseq : 0.478 micros/op 2091546 ops/sec; 20457.2 MB/s readseq : 0.252 micros/op 3972081 ops/sec; 38850.5 MB/s readseq : 0.237 micros/op 4218328 ops/sec; 41259.0 MB/s readseq : 0.247 micros/op 4043927 ops/sec; 39553.2 MB/s [master] readseq : 3.935 micros/op 254140 ops/sec; 2485.7 MB/s readseq : 3.722 micros/op 268657 ops/sec; 2627.7 MB/s readseq : 3.149 micros/op 317605 ops/sec; 3106.5 MB/s readseq : 3.125 micros/op 320024 ops/sec; 3130.1 MB/s readseq : 4.075 micros/op 245374 ops/sec; 2400.0 MB/s ``` ``` [Everything in Memtable | 10K operands | 10 KB each | 10 operand per key] DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="mergerandom,readseq,readseq,readseq,readseq,readseq" --merge_operator="max" --merge_keys=1000 --num=10000 --disable_auto_compactions --value_size=10240 --write_buffer_size=1000000000 [FullMergeV2] readseq : 3.472 micros/op 288018 ops/sec; 2817.1 MB/s readseq : 2.304 micros/op 434027 ops/sec; 4245.2 MB/s readseq : 1.163 micros/op 859845 ops/sec; 8410.0 MB/s readseq : 1.192 micros/op 838926 ops/sec; 8205.4 MB/s readseq : 1.250 micros/op 800000 ops/sec; 7824.7 MB/s [master] readseq : 24.025 micros/op 41623 ops/sec; 407.1 MB/s readseq : 18.489 micros/op 54086 ops/sec; 529.0 MB/s readseq : 18.693 micros/op 53495 ops/sec; 523.2 MB/s readseq : 23.621 micros/op 42335 ops/sec; 414.1 MB/s readseq : 18.775 micros/op 53262 ops/sec; 521.0 MB/s ``` ``` [Everything in Block cache | 10K operands | 10 KB each | 1 operand per key] [FullMergeV2] $ DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readseq,readseq,readseq,readseq,readseq" --merge_operator="max" --num=100000 --db="/dev/shm/merge-random-10K-10KB" --cache_size=1000000000 --use_existing_db --disable_auto_compactions readseq : 14.741 micros/op 67837 ops/sec; 663.5 MB/s readseq : 1.029 micros/op 971446 ops/sec; 9501.6 MB/s readseq : 0.974 micros/op 1026229 ops/sec; 10037.4 MB/s readseq : 0.965 micros/op 1036080 ops/sec; 10133.8 MB/s readseq : 0.943 micros/op 1060657 ops/sec; 10374.2 MB/s [master] readseq : 16.735 micros/op 59755 ops/sec; 584.5 MB/s readseq : 3.029 micros/op 330151 ops/sec; 3229.2 MB/s readseq : 3.136 micros/op 318883 ops/sec; 3119.0 MB/s readseq : 3.065 micros/op 326245 ops/sec; 3191.0 MB/s readseq : 3.014 micros/op 331813 ops/sec; 3245.4 MB/s ``` ``` [Everything in Block cache | 10K operands | 10 KB each | 10 operand per key] DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readseq,readseq,readseq,readseq,readseq" --merge_operator="max" --num=100000 --db="/dev/shm/merge-random-10-operands-10K-10KB" --cache_size=1000000000 --use_existing_db --disable_auto_compactions [FullMergeV2] readseq : 24.325 micros/op 41109 ops/sec; 402.1 MB/s readseq : 1.470 micros/op 680272 ops/sec; 6653.7 MB/s readseq : 1.231 micros/op 812347 ops/sec; 7945.5 MB/s readseq : 1.091 micros/op 916590 ops/sec; 8965.1 MB/s readseq : 1.109 micros/op 901713 ops/sec; 8819.6 MB/s [master] readseq : 27.257 micros/op 36687 ops/sec; 358.8 MB/s readseq : 4.443 micros/op 225073 ops/sec; 2201.4 MB/s readseq : 5.830 micros/op 171526 ops/sec; 1677.7 MB/s readseq : 4.173 micros/op 239635 ops/sec; 2343.8 MB/s readseq : 4.150 micros/op 240963 ops/sec; 2356.8 MB/s ``` Test Plan: COMPILE_WITH_ASAN=1 make check -j64 Reviewers: yhchiang, andrewkr, sdong Reviewed By: sdong Subscribers: lovro, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D57075
This commit is contained in:
parent
e70ba4e40e
commit
68a8e6b8fa
24
db/c.cc
24
db/c.cc
@ -269,33 +269,31 @@ struct rocksdb_mergeoperator_t : public MergeOperator {
|
|||||||
|
|
||||||
virtual const char* Name() const override { return (*name_)(state_); }
|
virtual const char* Name() const override { return (*name_)(state_); }
|
||||||
|
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const std::deque<std::string>& operand_list,
|
MergeOperationOutput* merge_out) const override {
|
||||||
std::string* new_value,
|
size_t n = merge_in.operand_list.size();
|
||||||
Logger* logger) const override {
|
|
||||||
size_t n = operand_list.size();
|
|
||||||
std::vector<const char*> operand_pointers(n);
|
std::vector<const char*> operand_pointers(n);
|
||||||
std::vector<size_t> operand_sizes(n);
|
std::vector<size_t> operand_sizes(n);
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
Slice operand(operand_list[i]);
|
Slice operand(merge_in.operand_list[i]);
|
||||||
operand_pointers[i] = operand.data();
|
operand_pointers[i] = operand.data();
|
||||||
operand_sizes[i] = operand.size();
|
operand_sizes[i] = operand.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* existing_value_data = nullptr;
|
const char* existing_value_data = nullptr;
|
||||||
size_t existing_value_len = 0;
|
size_t existing_value_len = 0;
|
||||||
if (existing_value != nullptr) {
|
if (merge_in.existing_value != nullptr) {
|
||||||
existing_value_data = existing_value->data();
|
existing_value_data = merge_in.existing_value->data();
|
||||||
existing_value_len = existing_value->size();
|
existing_value_len = merge_in.existing_value->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char success;
|
unsigned char success;
|
||||||
size_t new_value_len;
|
size_t new_value_len;
|
||||||
char* tmp_new_value = (*full_merge_)(
|
char* tmp_new_value = (*full_merge_)(
|
||||||
state_, key.data(), key.size(), existing_value_data, existing_value_len,
|
state_, merge_in.key.data(), merge_in.key.size(), existing_value_data,
|
||||||
&operand_pointers[0], &operand_sizes[0], static_cast<int>(n), &success,
|
existing_value_len, &operand_pointers[0], &operand_sizes[0],
|
||||||
&new_value_len);
|
static_cast<int>(n), &success, &new_value_len);
|
||||||
new_value->assign(tmp_new_value, new_value_len);
|
merge_out->new_value.assign(tmp_new_value, new_value_len);
|
||||||
|
|
||||||
if (delete_value_ != nullptr) {
|
if (delete_value_ != nullptr) {
|
||||||
(*delete_value_)(state_, tmp_new_value, new_value_len);
|
(*delete_value_)(state_, tmp_new_value, new_value_len);
|
||||||
|
@ -49,6 +49,12 @@ CompactionIterator::CompactionIterator(
|
|||||||
} else {
|
} else {
|
||||||
ignore_snapshots_ = false;
|
ignore_snapshots_ = false;
|
||||||
}
|
}
|
||||||
|
input_->SetPinnedItersMgr(&pinned_iters_mgr_);
|
||||||
|
}
|
||||||
|
|
||||||
|
CompactionIterator::~CompactionIterator() {
|
||||||
|
// input_ Iteartor lifetime is longer than pinned_iters_mgr_ lifetime
|
||||||
|
input_->SetPinnedItersMgr(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompactionIterator::ResetRecordCounts() {
|
void CompactionIterator::ResetRecordCounts() {
|
||||||
@ -83,6 +89,8 @@ void CompactionIterator::Next() {
|
|||||||
ikey_.user_key = current_key_.GetUserKey();
|
ikey_.user_key = current_key_.GetUserKey();
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
} else {
|
} else {
|
||||||
|
// We consumed all pinned merge operands, release pinned iterators
|
||||||
|
pinned_iters_mgr_.ReleasePinnedIterators();
|
||||||
// MergeHelper moves the iterator to the first record after the merged
|
// MergeHelper moves the iterator to the first record after the merged
|
||||||
// records, so even though we reached the end of the merge output, we do
|
// records, so even though we reached the end of the merge output, we do
|
||||||
// not want to advance the iterator.
|
// not want to advance the iterator.
|
||||||
@ -368,6 +376,7 @@ void CompactionIterator::NextFromInput() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pinned_iters_mgr_.StartPinning();
|
||||||
// We know the merge type entry is not hidden, otherwise we would
|
// We know the merge type entry is not hidden, otherwise we would
|
||||||
// have hit (A)
|
// have hit (A)
|
||||||
// We encapsulate the merge related state machine in a different
|
// We encapsulate the merge related state machine in a different
|
||||||
@ -395,6 +404,7 @@ void CompactionIterator::NextFromInput() {
|
|||||||
// batch consumed by the merge operator should not shadow any keys
|
// batch consumed by the merge operator should not shadow any keys
|
||||||
// coming after the merges
|
// coming after the merges
|
||||||
has_current_user_key_ = false;
|
has_current_user_key_ = false;
|
||||||
|
pinned_iters_mgr_.ReleasePinnedIterators();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "db/compaction.h"
|
#include "db/compaction.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
|
#include "db/pinned_iterators_manager.h"
|
||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "util/log_buffer.h"
|
#include "util/log_buffer.h"
|
||||||
|
|
||||||
@ -46,6 +47,8 @@ class CompactionIterator {
|
|||||||
const CompactionFilter* compaction_filter = nullptr,
|
const CompactionFilter* compaction_filter = nullptr,
|
||||||
LogBuffer* log_buffer = nullptr);
|
LogBuffer* log_buffer = nullptr);
|
||||||
|
|
||||||
|
~CompactionIterator();
|
||||||
|
|
||||||
void ResetRecordCounts();
|
void ResetRecordCounts();
|
||||||
|
|
||||||
// Seek to the beginning of the compaction iterator output.
|
// Seek to the beginning of the compaction iterator output.
|
||||||
@ -136,6 +139,9 @@ class CompactionIterator {
|
|||||||
bool clear_and_output_next_key_ = false;
|
bool clear_and_output_next_key_ = false;
|
||||||
|
|
||||||
MergeOutputIterator merge_out_iter_;
|
MergeOutputIterator merge_out_iter_;
|
||||||
|
// PinnedIteratorsManager used to pin input_ Iterator blocks while reading
|
||||||
|
// merge operands and then releasing them after consuming them.
|
||||||
|
PinnedIteratorsManager pinned_iters_mgr_;
|
||||||
std::string compaction_filter_value_;
|
std::string compaction_filter_value_;
|
||||||
// "level_ptrs" holds indices that remember which file of an associated
|
// "level_ptrs" holds indices that remember which file of an associated
|
||||||
// level we were last checking during the last call to compaction->
|
// level we were last checking during the last call to compaction->
|
||||||
|
@ -155,7 +155,9 @@ class DBIter: public Iterator {
|
|||||||
virtual Slice value() const override {
|
virtual Slice value() const override {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
if (current_entry_is_merged_) {
|
if (current_entry_is_merged_) {
|
||||||
return saved_value_;
|
// If pinned_value_ is set then the result of merge operator is one of
|
||||||
|
// the merge operands and we should return it.
|
||||||
|
return pinned_value_.data() ? pinned_value_ : saved_value_;
|
||||||
} else if (direction_ == kReverse) {
|
} else if (direction_ == kReverse) {
|
||||||
return pinned_value_;
|
return pinned_value_;
|
||||||
} else {
|
} else {
|
||||||
@ -286,9 +288,9 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
|||||||
void DBIter::Next() {
|
void DBIter::Next() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
|
|
||||||
|
// Release temporarily pinned blocks from last operation
|
||||||
|
ReleaseTempPinnedData();
|
||||||
if (direction_ == kReverse) {
|
if (direction_ == kReverse) {
|
||||||
// We only pin blocks when doing kReverse
|
|
||||||
ReleaseTempPinnedData();
|
|
||||||
FindNextUserKey();
|
FindNextUserKey();
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
if (!iter_->Valid()) {
|
if (!iter_->Valid()) {
|
||||||
@ -433,9 +435,12 @@ void DBIter::MergeValuesNewToOld() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Temporarily pin the blocks that hold merge operands
|
||||||
|
TempPinData();
|
||||||
merge_context_.Clear();
|
merge_context_.Clear();
|
||||||
// Start the merge process by pushing the first operand
|
// Start the merge process by pushing the first operand
|
||||||
merge_context_.PushOperand(iter_->value());
|
merge_context_.PushOperand(iter_->value(),
|
||||||
|
iter_->IsValuePinned() /* operand_pinned */);
|
||||||
|
|
||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
for (iter_->Next(); iter_->Valid(); iter_->Next()) {
|
for (iter_->Next(); iter_->Valid(); iter_->Next()) {
|
||||||
@ -459,15 +464,15 @@ void DBIter::MergeValuesNewToOld() {
|
|||||||
const Slice val = iter_->value();
|
const Slice val = iter_->value();
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, ikey.user_key, &val,
|
MergeHelper::TimedFullMerge(merge_operator_, ikey.user_key, &val,
|
||||||
merge_context_.GetOperands(), &saved_value_,
|
merge_context_.GetOperands(), &saved_value_,
|
||||||
logger_, statistics_, env_);
|
logger_, statistics_, env_, &pinned_value_);
|
||||||
// iter_ is positioned after put
|
// iter_ is positioned after put
|
||||||
iter_->Next();
|
iter_->Next();
|
||||||
return;
|
return;
|
||||||
} else if (kTypeMerge == ikey.type) {
|
} else if (kTypeMerge == ikey.type) {
|
||||||
// hit a merge, add the value as an operand and run associative merge.
|
// hit a merge, add the value as an operand and run associative merge.
|
||||||
// when complete, add result to operands and continue.
|
// when complete, add result to operands and continue.
|
||||||
const Slice& val = iter_->value();
|
merge_context_.PushOperand(iter_->value(),
|
||||||
merge_context_.PushOperand(val);
|
iter_->IsValuePinned() /* operand_pinned */);
|
||||||
} else {
|
} else {
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
@ -479,15 +484,15 @@ void DBIter::MergeValuesNewToOld() {
|
|||||||
// client can differentiate this scenario and do things accordingly.
|
// client can differentiate this scenario and do things accordingly.
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), nullptr,
|
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), nullptr,
|
||||||
merge_context_.GetOperands(), &saved_value_,
|
merge_context_.GetOperands(), &saved_value_,
|
||||||
logger_, statistics_, env_);
|
logger_, statistics_, env_, &pinned_value_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBIter::Prev() {
|
void DBIter::Prev() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
|
ReleaseTempPinnedData();
|
||||||
if (direction_ == kForward) {
|
if (direction_ == kForward) {
|
||||||
ReverseToBackward();
|
ReverseToBackward();
|
||||||
}
|
}
|
||||||
ReleaseTempPinnedData();
|
|
||||||
PrevInternal();
|
PrevInternal();
|
||||||
if (statistics_ != nullptr) {
|
if (statistics_ != nullptr) {
|
||||||
local_stats_.prev_count_++;
|
local_stats_.prev_count_++;
|
||||||
@ -580,6 +585,9 @@ bool DBIter::FindValueForCurrentKey() {
|
|||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
FindParseableKey(&ikey, kReverse);
|
FindParseableKey(&ikey, kReverse);
|
||||||
|
|
||||||
|
// Temporarily pin blocks that hold (merge operands / the value)
|
||||||
|
ReleaseTempPinnedData();
|
||||||
|
TempPinData();
|
||||||
size_t num_skipped = 0;
|
size_t num_skipped = 0;
|
||||||
while (iter_->Valid() && ikey.sequence <= sequence_ &&
|
while (iter_->Valid() && ikey.sequence <= sequence_ &&
|
||||||
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) {
|
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) {
|
||||||
@ -592,8 +600,7 @@ bool DBIter::FindValueForCurrentKey() {
|
|||||||
switch (last_key_entry_type) {
|
switch (last_key_entry_type) {
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
merge_context_.Clear();
|
merge_context_.Clear();
|
||||||
ReleaseTempPinnedData();
|
assert(iter_->IsValuePinned());
|
||||||
TempPinData();
|
|
||||||
pinned_value_ = iter_->value();
|
pinned_value_ = iter_->value();
|
||||||
last_not_merge_type = kTypeValue;
|
last_not_merge_type = kTypeValue;
|
||||||
break;
|
break;
|
||||||
@ -605,7 +612,8 @@ bool DBIter::FindValueForCurrentKey() {
|
|||||||
break;
|
break;
|
||||||
case kTypeMerge:
|
case kTypeMerge:
|
||||||
assert(merge_operator_ != nullptr);
|
assert(merge_operator_ != nullptr);
|
||||||
merge_context_.PushOperandBack(iter_->value());
|
merge_context_.PushOperandBack(
|
||||||
|
iter_->value(), iter_->IsValuePinned() /* operand_pinned */);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
@ -628,13 +636,14 @@ bool DBIter::FindValueForCurrentKey() {
|
|||||||
if (last_not_merge_type == kTypeDeletion) {
|
if (last_not_merge_type == kTypeDeletion) {
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(),
|
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(),
|
||||||
nullptr, merge_context_.GetOperands(),
|
nullptr, merge_context_.GetOperands(),
|
||||||
&saved_value_, logger_, statistics_, env_);
|
&saved_value_, logger_, statistics_, env_,
|
||||||
|
&pinned_value_);
|
||||||
} else {
|
} else {
|
||||||
assert(last_not_merge_type == kTypeValue);
|
assert(last_not_merge_type == kTypeValue);
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(),
|
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(),
|
||||||
&pinned_value_,
|
&pinned_value_,
|
||||||
merge_context_.GetOperands(), &saved_value_,
|
merge_context_.GetOperands(), &saved_value_,
|
||||||
logger_, statistics_, env_);
|
logger_, statistics_, env_, &pinned_value_);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
@ -651,6 +660,9 @@ bool DBIter::FindValueForCurrentKey() {
|
|||||||
// This function is used in FindValueForCurrentKey.
|
// This function is used in FindValueForCurrentKey.
|
||||||
// We use Seek() function instead of Prev() to find necessary value
|
// We use Seek() function instead of Prev() to find necessary value
|
||||||
bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
||||||
|
// FindValueForCurrentKey will enable pinning before calling
|
||||||
|
// FindValueForCurrentKeyUsingSeek()
|
||||||
|
assert(pinned_iters_mgr_.PinningEnabled());
|
||||||
std::string last_key;
|
std::string last_key;
|
||||||
AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetKey(), sequence_,
|
AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetKey(), sequence_,
|
||||||
kValueTypeForSeek));
|
kValueTypeForSeek));
|
||||||
@ -664,8 +676,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|||||||
if (ikey.type == kTypeValue || ikey.type == kTypeDeletion ||
|
if (ikey.type == kTypeValue || ikey.type == kTypeDeletion ||
|
||||||
ikey.type == kTypeSingleDeletion) {
|
ikey.type == kTypeSingleDeletion) {
|
||||||
if (ikey.type == kTypeValue) {
|
if (ikey.type == kTypeValue) {
|
||||||
ReleaseTempPinnedData();
|
assert(iter_->IsValuePinned());
|
||||||
TempPinData();
|
|
||||||
pinned_value_ = iter_->value();
|
pinned_value_ = iter_->value();
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
return true;
|
return true;
|
||||||
@ -681,7 +692,8 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|||||||
while (iter_->Valid() &&
|
while (iter_->Valid() &&
|
||||||
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey()) &&
|
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey()) &&
|
||||||
ikey.type == kTypeMerge) {
|
ikey.type == kTypeMerge) {
|
||||||
merge_context_.PushOperand(iter_->value());
|
merge_context_.PushOperand(iter_->value(),
|
||||||
|
iter_->IsValuePinned() /* operand_pinned */);
|
||||||
iter_->Next();
|
iter_->Next();
|
||||||
FindParseableKey(&ikey, kForward);
|
FindParseableKey(&ikey, kForward);
|
||||||
}
|
}
|
||||||
@ -691,7 +703,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|||||||
ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion) {
|
ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion) {
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), nullptr,
|
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), nullptr,
|
||||||
merge_context_.GetOperands(), &saved_value_,
|
merge_context_.GetOperands(), &saved_value_,
|
||||||
logger_, statistics_, env_);
|
logger_, statistics_, env_, &pinned_value_);
|
||||||
// Make iter_ valid and point to saved_key_
|
// Make iter_ valid and point to saved_key_
|
||||||
if (!iter_->Valid() ||
|
if (!iter_->Valid() ||
|
||||||
!user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) {
|
!user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) {
|
||||||
@ -705,7 +717,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|||||||
const Slice& val = iter_->value();
|
const Slice& val = iter_->value();
|
||||||
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), &val,
|
MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetKey(), &val,
|
||||||
merge_context_.GetOperands(), &saved_value_,
|
merge_context_.GetOperands(), &saved_value_,
|
||||||
logger_, statistics_, env_);
|
logger_, statistics_, env_, &pinned_value_);
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -150,6 +150,9 @@ class TestIterator : public InternalIterator {
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool IsKeyPinned() const override { return true; }
|
||||||
|
virtual bool IsValuePinned() const override { return true; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool initialized_;
|
bool initialized_;
|
||||||
bool valid_;
|
bool valid_;
|
||||||
|
@ -4848,12 +4848,11 @@ class DelayedMergeOperator : public MergeOperator {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
|
explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
|
||||||
const std::deque<std::string>& operand_list,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
std::string* new_value,
|
MergeOperationOutput* merge_out) const override {
|
||||||
Logger* logger) const override {
|
|
||||||
db_test_->env_->addon_time_.fetch_add(1000);
|
db_test_->env_->addon_time_.fetch_add(1000);
|
||||||
*new_value = "";
|
merge_out->new_value = "";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
261
db/db_test2.cc
261
db/db_test2.cc
@ -1487,6 +1487,267 @@ TEST_F(DBTest2, SyncPointMarker) {
|
|||||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
class MergeOperatorPinningTest : public DBTest2,
|
||||||
|
public testing::WithParamInterface<bool> {
|
||||||
|
public:
|
||||||
|
MergeOperatorPinningTest() { disable_block_cache_ = GetParam(); }
|
||||||
|
|
||||||
|
bool disable_block_cache_;
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(MergeOperatorPinningTest, MergeOperatorPinningTest,
|
||||||
|
::testing::Bool());
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
TEST_P(MergeOperatorPinningTest, OperandsMultiBlocks) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.block_size = 1; // every block will contain one entry
|
||||||
|
table_options.no_block_cache = disable_block_cache_;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
|
||||||
|
options.level0_slowdown_writes_trigger = (1 << 30);
|
||||||
|
options.level0_stop_writes_trigger = (1 << 30);
|
||||||
|
options.disable_auto_compactions = true;
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
const int kKeysPerFile = 10;
|
||||||
|
const int kOperandsPerKeyPerFile = 7;
|
||||||
|
const int kOperandSize = 100;
|
||||||
|
// Filse to write in L0 before compacting to lower level
|
||||||
|
const int kFilesPerLevel = 3;
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
std::map<std::string, std::string> true_data;
|
||||||
|
int batch_num = 1;
|
||||||
|
int lvl_to_fill = 4;
|
||||||
|
int key_id = 0;
|
||||||
|
while (true) {
|
||||||
|
for (int j = 0; j < kKeysPerFile; j++) {
|
||||||
|
std::string key = Key(key_id % 35);
|
||||||
|
key_id++;
|
||||||
|
for (int k = 0; k < kOperandsPerKeyPerFile; k++) {
|
||||||
|
std::string val = RandomString(&rnd, kOperandSize);
|
||||||
|
ASSERT_OK(db_->Merge(WriteOptions(), key, val));
|
||||||
|
if (true_data[key].size() == 0) {
|
||||||
|
true_data[key] = val;
|
||||||
|
} else {
|
||||||
|
true_data[key] += "," + val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lvl_to_fill == -1) {
|
||||||
|
// Keep last batch in memtable and stop
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
if (batch_num % kFilesPerLevel == 0) {
|
||||||
|
if (lvl_to_fill != 0) {
|
||||||
|
MoveFilesToLevel(lvl_to_fill);
|
||||||
|
}
|
||||||
|
lvl_to_fill--;
|
||||||
|
}
|
||||||
|
batch_num++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3 L0 files
|
||||||
|
// 1 L1 file
|
||||||
|
// 3 L2 files
|
||||||
|
// 1 L3 file
|
||||||
|
// 3 L4 Files
|
||||||
|
ASSERT_EQ(FilesPerLevel(), "3,1,3,1,3");
|
||||||
|
|
||||||
|
// Verify Get()
|
||||||
|
for (auto kv : true_data) {
|
||||||
|
ASSERT_EQ(Get(kv.first), kv.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||||
|
|
||||||
|
// Verify Iterator::Next()
|
||||||
|
auto data_iter = true_data.begin();
|
||||||
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next(), data_iter++) {
|
||||||
|
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||||
|
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(data_iter, true_data.end());
|
||||||
|
|
||||||
|
// Verify Iterator::Prev()
|
||||||
|
auto data_rev = true_data.rbegin();
|
||||||
|
for (iter->SeekToLast(); iter->Valid(); iter->Prev(), data_rev++) {
|
||||||
|
ASSERT_EQ(iter->key().ToString(), data_rev->first);
|
||||||
|
ASSERT_EQ(iter->value().ToString(), data_rev->second);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(data_rev, true_data.rend());
|
||||||
|
|
||||||
|
// Verify Iterator::Seek()
|
||||||
|
for (auto kv : true_data) {
|
||||||
|
iter->Seek(kv.first);
|
||||||
|
ASSERT_EQ(kv.first, iter->key().ToString());
|
||||||
|
ASSERT_EQ(kv.second, iter->value().ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
delete iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(MergeOperatorPinningTest, Randomized) {
|
||||||
|
do {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.merge_operator = MergeOperators::CreateMaxOperator();
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.no_block_cache = disable_block_cache_;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
std::map<std::string, std::string> true_data;
|
||||||
|
|
||||||
|
const int kTotalMerges = 10000;
|
||||||
|
// Every key gets ~10 operands
|
||||||
|
const int kKeyRange = kTotalMerges / 10;
|
||||||
|
const int kOperandSize = 20;
|
||||||
|
const int kNumPutBefore = kKeyRange / 10; // 10% value
|
||||||
|
const int kNumPutAfter = kKeyRange / 10; // 10% overwrite
|
||||||
|
const int kNumDelete = kKeyRange / 10; // 10% delete
|
||||||
|
|
||||||
|
// kNumPutBefore keys will have base values
|
||||||
|
for (int i = 0; i < kNumPutBefore; i++) {
|
||||||
|
std::string key = Key(rnd.Next() % kKeyRange);
|
||||||
|
std::string value = RandomString(&rnd, kOperandSize);
|
||||||
|
ASSERT_OK(db_->Put(WriteOptions(), key, value));
|
||||||
|
|
||||||
|
true_data[key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do kTotalMerges merges
|
||||||
|
for (int i = 0; i < kTotalMerges; i++) {
|
||||||
|
std::string key = Key(rnd.Next() % kKeyRange);
|
||||||
|
std::string value = RandomString(&rnd, kOperandSize);
|
||||||
|
ASSERT_OK(db_->Merge(WriteOptions(), key, value));
|
||||||
|
|
||||||
|
if (true_data[key] < value) {
|
||||||
|
true_data[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Overwrite random kNumPutAfter keys
|
||||||
|
for (int i = 0; i < kNumPutAfter; i++) {
|
||||||
|
std::string key = Key(rnd.Next() % kKeyRange);
|
||||||
|
std::string value = RandomString(&rnd, kOperandSize);
|
||||||
|
ASSERT_OK(db_->Put(WriteOptions(), key, value));
|
||||||
|
|
||||||
|
true_data[key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete random kNumDelete keys
|
||||||
|
for (int i = 0; i < kNumDelete; i++) {
|
||||||
|
std::string key = Key(rnd.Next() % kKeyRange);
|
||||||
|
ASSERT_OK(db_->Delete(WriteOptions(), key));
|
||||||
|
|
||||||
|
true_data.erase(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
VerifyDBFromMap(true_data);
|
||||||
|
|
||||||
|
// Skip HashCuckoo since it does not support merge operators
|
||||||
|
} while (ChangeOptions(kSkipMergePut | kSkipHashCuckoo));
|
||||||
|
}
|
||||||
|
|
||||||
|
class MergeOperatorHook : public MergeOperator {
|
||||||
|
public:
|
||||||
|
explicit MergeOperatorHook(std::shared_ptr<MergeOperator> _merge_op)
|
||||||
|
: merge_op_(_merge_op) {}
|
||||||
|
|
||||||
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
|
MergeOperationOutput* merge_out) const override {
|
||||||
|
before_merge_();
|
||||||
|
bool res = merge_op_->FullMergeV2(merge_in, merge_out);
|
||||||
|
after_merge_();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* Name() const override { return merge_op_->Name(); }
|
||||||
|
|
||||||
|
std::shared_ptr<MergeOperator> merge_op_;
|
||||||
|
std::function<void()> before_merge_ = []() {};
|
||||||
|
std::function<void()> after_merge_ = []() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(MergeOperatorPinningTest, EvictCacheBeforeMerge) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
|
||||||
|
auto merge_hook =
|
||||||
|
std::make_shared<MergeOperatorHook>(MergeOperators::CreateMaxOperator());
|
||||||
|
options.merge_operator = merge_hook;
|
||||||
|
options.disable_auto_compactions = true;
|
||||||
|
options.level0_slowdown_writes_trigger = (1 << 30);
|
||||||
|
options.level0_stop_writes_trigger = (1 << 30);
|
||||||
|
options.max_open_files = 20;
|
||||||
|
BlockBasedTableOptions bbto;
|
||||||
|
bbto.no_block_cache = disable_block_cache_;
|
||||||
|
if (bbto.no_block_cache == false) {
|
||||||
|
bbto.block_cache = NewLRUCache(64 * 1024 * 1024);
|
||||||
|
} else {
|
||||||
|
bbto.block_cache = nullptr;
|
||||||
|
}
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
const int kNumOperands = 30;
|
||||||
|
const int kNumKeys = 1000;
|
||||||
|
const int kOperandSize = 100;
|
||||||
|
Random rnd(301);
|
||||||
|
|
||||||
|
// 1000 keys every key have 30 operands, every operand is in a different file
|
||||||
|
std::map<std::string, std::string> true_data;
|
||||||
|
for (int i = 0; i < kNumOperands; i++) {
|
||||||
|
for (int j = 0; j < kNumKeys; j++) {
|
||||||
|
std::string k = Key(j);
|
||||||
|
std::string v = RandomString(&rnd, kOperandSize);
|
||||||
|
ASSERT_OK(db_->Merge(WriteOptions(), k, v));
|
||||||
|
|
||||||
|
true_data[k] = std::max(true_data[k], v);
|
||||||
|
}
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
|
||||||
|
ASSERT_EQ(file_numbers.size(), kNumOperands);
|
||||||
|
int merge_cnt = 0;
|
||||||
|
|
||||||
|
// Code executed before merge operation
|
||||||
|
merge_hook->before_merge_ = [&]() {
|
||||||
|
// Evict all tables from cache before every merge operation
|
||||||
|
for (uint64_t num : file_numbers) {
|
||||||
|
TableCache::Evict(dbfull()->TEST_table_cache(), num);
|
||||||
|
}
|
||||||
|
// Decrease cache capacity to force all unrefed blocks to be evicted
|
||||||
|
if (bbto.block_cache) {
|
||||||
|
bbto.block_cache->SetCapacity(1);
|
||||||
|
}
|
||||||
|
merge_cnt++;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Code executed after merge operation
|
||||||
|
merge_hook->after_merge_ = [&]() {
|
||||||
|
// Increase capacity again after doing the merge
|
||||||
|
if (bbto.block_cache) {
|
||||||
|
bbto.block_cache->SetCapacity(64 * 1024 * 1024);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
VerifyDBFromMap(true_data);
|
||||||
|
ASSERT_EQ(merge_cnt, kNumKeys * 4 /* get + next + prev + seek */);
|
||||||
|
|
||||||
|
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
||||||
|
|
||||||
|
VerifyDBFromMap(true_data);
|
||||||
|
}
|
||||||
|
#endif // ROCKSDB_LITE
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -1074,6 +1074,40 @@ std::vector<std::uint64_t> DBTestBase::ListTableFiles(Env* env,
|
|||||||
return file_numbers;
|
return file_numbers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DBTestBase::VerifyDBFromMap(std::map<std::string, std::string> true_data) {
|
||||||
|
for (auto& kv : true_data) {
|
||||||
|
ASSERT_EQ(Get(kv.first), kv.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadOptions ro;
|
||||||
|
ro.total_order_seek = true;
|
||||||
|
Iterator* iter = db_->NewIterator(ro);
|
||||||
|
// Verify Iterator::Next()
|
||||||
|
auto data_iter = true_data.begin();
|
||||||
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next(), data_iter++) {
|
||||||
|
ASSERT_EQ(iter->key().ToString(), data_iter->first);
|
||||||
|
ASSERT_EQ(iter->value().ToString(), data_iter->second);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(data_iter, true_data.end());
|
||||||
|
|
||||||
|
// Verify Iterator::Prev()
|
||||||
|
auto data_rev = true_data.rbegin();
|
||||||
|
for (iter->SeekToLast(); iter->Valid(); iter->Prev(), data_rev++) {
|
||||||
|
ASSERT_EQ(iter->key().ToString(), data_rev->first);
|
||||||
|
ASSERT_EQ(iter->value().ToString(), data_rev->second);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(data_rev, true_data.rend());
|
||||||
|
|
||||||
|
// Verify Iterator::Seek()
|
||||||
|
for (auto kv : true_data) {
|
||||||
|
iter->Seek(kv.first);
|
||||||
|
ASSERT_EQ(kv.first, iter->key().ToString());
|
||||||
|
ASSERT_EQ(kv.second, iter->value().ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
delete iter;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
|
|
||||||
Status DBTestBase::GenerateAndAddExternalFile(const Options options,
|
Status DBTestBase::GenerateAndAddExternalFile(const Options options,
|
||||||
|
@ -797,6 +797,8 @@ class DBTestBase : public testing::Test {
|
|||||||
|
|
||||||
std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path);
|
std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path);
|
||||||
|
|
||||||
|
void VerifyDBFromMap(std::map<std::string, std::string> true_data);
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
Status GenerateAndAddExternalFile(const Options options,
|
Status GenerateAndAddExternalFile(const Options options,
|
||||||
std::vector<int> keys, size_t file_id);
|
std::vector<int> keys, size_t file_id);
|
||||||
|
@ -218,12 +218,13 @@ const char* EncodeKey(std::string* scratch, const Slice& target) {
|
|||||||
|
|
||||||
class MemTableIterator : public InternalIterator {
|
class MemTableIterator : public InternalIterator {
|
||||||
public:
|
public:
|
||||||
MemTableIterator(
|
MemTableIterator(const MemTable& mem, const ReadOptions& read_options,
|
||||||
const MemTable& mem, const ReadOptions& read_options, Arena* arena)
|
Arena* arena)
|
||||||
: bloom_(nullptr),
|
: bloom_(nullptr),
|
||||||
prefix_extractor_(mem.prefix_extractor_),
|
prefix_extractor_(mem.prefix_extractor_),
|
||||||
valid_(false),
|
valid_(false),
|
||||||
arena_mode_(arena != nullptr) {
|
arena_mode_(arena != nullptr),
|
||||||
|
value_pinned_(!mem.GetMemTableOptions()->inplace_update_support) {
|
||||||
if (prefix_extractor_ != nullptr && !read_options.total_order_seek) {
|
if (prefix_extractor_ != nullptr && !read_options.total_order_seek) {
|
||||||
bloom_ = mem.prefix_bloom_.get();
|
bloom_ = mem.prefix_bloom_.get();
|
||||||
iter_ = mem.table_->GetDynamicPrefixIterator(arena);
|
iter_ = mem.table_->GetDynamicPrefixIterator(arena);
|
||||||
@ -306,12 +307,18 @@ class MemTableIterator : public InternalIterator {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool IsValuePinned() const override {
|
||||||
|
// memtable value is always pinned, except if we allow inplace update.
|
||||||
|
return value_pinned_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DynamicBloom* bloom_;
|
DynamicBloom* bloom_;
|
||||||
const SliceTransform* const prefix_extractor_;
|
const SliceTransform* const prefix_extractor_;
|
||||||
MemTableRep::Iterator* iter_;
|
MemTableRep::Iterator* iter_;
|
||||||
bool valid_;
|
bool valid_;
|
||||||
bool arena_mode_;
|
bool arena_mode_;
|
||||||
|
bool value_pinned_;
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
MemTableIterator(const MemTableIterator&);
|
MemTableIterator(const MemTableIterator&);
|
||||||
@ -508,7 +515,6 @@ static bool SaveValue(void* arg, const char* entry) {
|
|||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
case kTypeSingleDeletion: {
|
case kTypeSingleDeletion: {
|
||||||
if (*(s->merge_in_progress)) {
|
if (*(s->merge_in_progress)) {
|
||||||
*(s->status) = Status::OK();
|
|
||||||
*(s->status) = MergeHelper::TimedFullMerge(
|
*(s->status) = MergeHelper::TimedFullMerge(
|
||||||
merge_operator, s->key->user_key(), nullptr,
|
merge_operator, s->key->user_key(), nullptr,
|
||||||
merge_context->GetOperands(), s->value, s->logger, s->statistics,
|
merge_context->GetOperands(), s->value, s->logger, s->statistics,
|
||||||
@ -532,7 +538,8 @@ static bool SaveValue(void* arg, const char* entry) {
|
|||||||
}
|
}
|
||||||
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
||||||
*(s->merge_in_progress) = true;
|
*(s->merge_in_progress) = true;
|
||||||
merge_context->PushOperand(v);
|
merge_context->PushOperand(
|
||||||
|
v, s->inplace_update_support == false /* operand_pinned */);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -4,70 +4,113 @@
|
|||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
#include <string>
|
|
||||||
#include <deque>
|
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
const std::deque<std::string> empty_operand_list;
|
const std::vector<Slice> empty_operand_list;
|
||||||
|
|
||||||
// The merge context for merging a user key.
|
// The merge context for merging a user key.
|
||||||
// When doing a Get(), DB will create such a class and pass it when
|
// When doing a Get(), DB will create such a class and pass it when
|
||||||
// issuing Get() operation to memtables and version_set. The operands
|
// issuing Get() operation to memtables and version_set. The operands
|
||||||
// will be fetched from the context when issuing partial of full merge.
|
// will be fetched from the context when issuing partial of full merge.
|
||||||
class MergeContext {
|
class MergeContext {
|
||||||
public:
|
public:
|
||||||
// Clear all the operands
|
// Clear all the operands
|
||||||
void Clear() {
|
void Clear() {
|
||||||
if (operand_list) {
|
if (operand_list_) {
|
||||||
operand_list->clear();
|
operand_list_->clear();
|
||||||
|
copied_operands_->clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Replace all operands with merge_result, which are expected to be the
|
|
||||||
// merge result of them.
|
|
||||||
void PushPartialMergeResult(std::string& merge_result) {
|
|
||||||
assert (operand_list);
|
|
||||||
operand_list->clear();
|
|
||||||
operand_list->push_front(std::move(merge_result));
|
|
||||||
}
|
|
||||||
// Push a merge operand
|
// Push a merge operand
|
||||||
void PushOperand(const Slice& operand_slice) {
|
void PushOperand(const Slice& operand_slice, bool operand_pinned = false) {
|
||||||
Initialize();
|
Initialize();
|
||||||
operand_list->push_front(operand_slice.ToString());
|
SetDirectionBackward();
|
||||||
|
|
||||||
|
if (operand_pinned) {
|
||||||
|
operand_list_->push_back(operand_slice);
|
||||||
|
} else {
|
||||||
|
// We need to have our own copy of the operand since it's not pinned
|
||||||
|
copied_operands_->emplace_back(operand_slice.data(),
|
||||||
|
operand_slice.size());
|
||||||
|
operand_list_->push_back(copied_operands_->back());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push back a merge operand
|
// Push back a merge operand
|
||||||
void PushOperandBack(const Slice& operand_slice) {
|
void PushOperandBack(const Slice& operand_slice,
|
||||||
|
bool operand_pinned = false) {
|
||||||
Initialize();
|
Initialize();
|
||||||
operand_list->push_back(operand_slice.ToString());
|
SetDirectionForward();
|
||||||
|
|
||||||
|
if (operand_pinned) {
|
||||||
|
operand_list_->push_back(operand_slice);
|
||||||
|
} else {
|
||||||
|
// We need to have our own copy of the operand since it's not pinned
|
||||||
|
copied_operands_->emplace_back(operand_slice.data(),
|
||||||
|
operand_slice.size());
|
||||||
|
operand_list_->push_back(copied_operands_->back());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return total number of operands in the list
|
// return total number of operands in the list
|
||||||
size_t GetNumOperands() const {
|
size_t GetNumOperands() const {
|
||||||
if (!operand_list) {
|
if (!operand_list_) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return operand_list->size();
|
return operand_list_->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the operand at the index.
|
// Get the operand at the index.
|
||||||
Slice GetOperand(int index) const {
|
Slice GetOperand(int index) {
|
||||||
assert (operand_list);
|
assert(operand_list_);
|
||||||
return (*operand_list)[index];
|
|
||||||
|
SetDirectionForward();
|
||||||
|
return (*operand_list_)[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return all the operands.
|
// Return all the operands.
|
||||||
const std::deque<std::string>& GetOperands() const {
|
const std::vector<Slice>& GetOperands() {
|
||||||
if (!operand_list) {
|
if (!operand_list_) {
|
||||||
return empty_operand_list;
|
return empty_operand_list;
|
||||||
}
|
}
|
||||||
return *operand_list;
|
|
||||||
|
SetDirectionForward();
|
||||||
|
return *operand_list_;
|
||||||
}
|
}
|
||||||
private:
|
|
||||||
|
private:
|
||||||
void Initialize() {
|
void Initialize() {
|
||||||
if (!operand_list) {
|
if (!operand_list_) {
|
||||||
operand_list.reset(new std::deque<std::string>());
|
operand_list_.reset(new std::vector<Slice>());
|
||||||
|
copied_operands_.reset(new std::vector<std::string>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::unique_ptr<std::deque<std::string>> operand_list;
|
|
||||||
|
void SetDirectionForward() {
|
||||||
|
if (operands_reversed_ == true) {
|
||||||
|
std::reverse(operand_list_->begin(), operand_list_->end());
|
||||||
|
operands_reversed_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetDirectionBackward() {
|
||||||
|
if (operands_reversed_ == false) {
|
||||||
|
std::reverse(operand_list_->begin(), operand_list_->end());
|
||||||
|
operands_reversed_ = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of operands
|
||||||
|
std::unique_ptr<std::vector<Slice>> operand_list_;
|
||||||
|
// Copy of operands that are not pinned.
|
||||||
|
std::unique_ptr<std::vector<std::string>> copied_operands_;
|
||||||
|
bool operands_reversed_ = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -20,9 +20,10 @@ namespace rocksdb {
|
|||||||
|
|
||||||
Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
|
Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
|
||||||
const Slice& key, const Slice* value,
|
const Slice& key, const Slice* value,
|
||||||
const std::deque<std::string>& operands,
|
const std::vector<Slice>& operands,
|
||||||
std::string* result, Logger* logger,
|
std::string* result, Logger* logger,
|
||||||
Statistics* statistics, Env* env) {
|
Statistics* statistics, Env* env,
|
||||||
|
Slice* result_operand) {
|
||||||
assert(merge_operator != nullptr);
|
assert(merge_operator != nullptr);
|
||||||
|
|
||||||
if (operands.size() == 0) {
|
if (operands.size() == 0) {
|
||||||
@ -32,13 +33,28 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool success;
|
bool success;
|
||||||
|
Slice tmp_result_operand(nullptr, 0);
|
||||||
|
const MergeOperator::MergeOperationInput merge_in(key, value, operands,
|
||||||
|
logger);
|
||||||
|
MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand);
|
||||||
{
|
{
|
||||||
// Setup to time the merge
|
// Setup to time the merge
|
||||||
StopWatchNano timer(env, statistics != nullptr);
|
StopWatchNano timer(env, statistics != nullptr);
|
||||||
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
||||||
|
|
||||||
// Do the merge
|
// Do the merge
|
||||||
success = merge_operator->FullMerge(key, value, operands, result, logger);
|
success = merge_operator->FullMergeV2(merge_in, &merge_out);
|
||||||
|
|
||||||
|
if (tmp_result_operand.data()) {
|
||||||
|
// FullMergeV2 result is an existing operand
|
||||||
|
if (result_operand != nullptr) {
|
||||||
|
*result_operand = tmp_result_operand;
|
||||||
|
} else {
|
||||||
|
result->assign(tmp_result_operand.data(), tmp_result_operand.size());
|
||||||
|
}
|
||||||
|
} else if (result_operand) {
|
||||||
|
*result_operand = Slice(nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
RecordTick(statistics, MERGE_OPERATION_TOTAL_TIME,
|
RecordTick(statistics, MERGE_OPERATION_TOTAL_TIME,
|
||||||
statistics ? timer.ElapsedNanos() : 0);
|
statistics ? timer.ElapsedNanos() : 0);
|
||||||
@ -65,7 +81,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
// Also maintain the list of merge operands seen.
|
// Also maintain the list of merge operands seen.
|
||||||
assert(HasOperator());
|
assert(HasOperator());
|
||||||
keys_.clear();
|
keys_.clear();
|
||||||
operands_.clear();
|
merge_context_.Clear();
|
||||||
assert(user_merge_operator_);
|
assert(user_merge_operator_);
|
||||||
bool first_key = true;
|
bool first_key = true;
|
||||||
|
|
||||||
@ -87,7 +103,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
bool hit_the_next_user_key = false;
|
bool hit_the_next_user_key = false;
|
||||||
for (; iter->Valid(); iter->Next(), original_key_is_iter = false) {
|
for (; iter->Valid(); iter->Next(), original_key_is_iter = false) {
|
||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
assert(keys_.size() == operands_.size());
|
assert(keys_.size() == merge_context_.GetNumOperands());
|
||||||
|
|
||||||
if (!ParseInternalKey(iter->key(), &ikey)) {
|
if (!ParseInternalKey(iter->key(), &ikey)) {
|
||||||
// stop at corrupted key
|
// stop at corrupted key
|
||||||
@ -142,7 +158,8 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
const Slice* val_ptr = (kTypeValue == ikey.type) ? &val : nullptr;
|
const Slice* val_ptr = (kTypeValue == ikey.type) ? &val : nullptr;
|
||||||
std::string merge_result;
|
std::string merge_result;
|
||||||
s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr,
|
s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr,
|
||||||
operands_, &merge_result, logger_, stats_, env_);
|
merge_context_.GetOperands(), &merge_result, logger_,
|
||||||
|
stats_, env_);
|
||||||
|
|
||||||
// We store the result in keys_.back() and operands_.back()
|
// We store the result in keys_.back() and operands_.back()
|
||||||
// if nothing went wrong (i.e.: no operand corruption on disk)
|
// if nothing went wrong (i.e.: no operand corruption on disk)
|
||||||
@ -152,9 +169,9 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
orig_ikey.type = kTypeValue;
|
orig_ikey.type = kTypeValue;
|
||||||
UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
|
UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
|
||||||
keys_.clear();
|
keys_.clear();
|
||||||
operands_.clear();
|
merge_context_.Clear();
|
||||||
keys_.emplace_front(std::move(original_key));
|
keys_.emplace_front(std::move(original_key));
|
||||||
operands_.emplace_front(std::move(merge_result));
|
merge_context_.PushOperand(merge_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
// move iter to the next entry
|
// move iter to the next entry
|
||||||
@ -188,12 +205,13 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
// original_key before
|
// original_key before
|
||||||
ParseInternalKey(keys_.back(), &orig_ikey);
|
ParseInternalKey(keys_.back(), &orig_ikey);
|
||||||
}
|
}
|
||||||
operands_.push_front(value_slice.ToString());
|
merge_context_.PushOperand(value_slice,
|
||||||
|
iter->IsValuePinned() /* operand_pinned */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (operands_.size() == 0) {
|
if (merge_context_.GetNumOperands() == 0) {
|
||||||
// we filtered out all the merge operands
|
// we filtered out all the merge operands
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
@ -218,11 +236,12 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
// do a final merge with nullptr as the existing value and say
|
// do a final merge with nullptr as the existing value and say
|
||||||
// bye to the merge type (it's now converted to a Put)
|
// bye to the merge type (it's now converted to a Put)
|
||||||
assert(kTypeMerge == orig_ikey.type);
|
assert(kTypeMerge == orig_ikey.type);
|
||||||
assert(operands_.size() >= 1);
|
assert(merge_context_.GetNumOperands() >= 1);
|
||||||
assert(operands_.size() == keys_.size());
|
assert(merge_context_.GetNumOperands() == keys_.size());
|
||||||
std::string merge_result;
|
std::string merge_result;
|
||||||
s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
|
s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
|
||||||
operands_, &merge_result, logger_, stats_, env_);
|
merge_context_.GetOperands(), &merge_result, logger_,
|
||||||
|
stats_, env_);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// The original key encountered
|
// The original key encountered
|
||||||
// We are certain that keys_ is not empty here (see assertions couple of
|
// We are certain that keys_ is not empty here (see assertions couple of
|
||||||
@ -231,9 +250,9 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
orig_ikey.type = kTypeValue;
|
orig_ikey.type = kTypeValue;
|
||||||
UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
|
UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type);
|
||||||
keys_.clear();
|
keys_.clear();
|
||||||
operands_.clear();
|
merge_context_.Clear();
|
||||||
keys_.emplace_front(std::move(original_key));
|
keys_.emplace_front(std::move(original_key));
|
||||||
operands_.emplace_front(std::move(merge_result));
|
merge_context_.PushOperand(merge_result);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We haven't seen the beginning of the key nor a Put/Delete.
|
// We haven't seen the beginning of the key nor a Put/Delete.
|
||||||
@ -244,8 +263,8 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
// partial merge returns Status::OK(). Should we change the status code
|
// partial merge returns Status::OK(). Should we change the status code
|
||||||
// after a successful partial merge?
|
// after a successful partial merge?
|
||||||
s = Status::MergeInProgress();
|
s = Status::MergeInProgress();
|
||||||
if (operands_.size() >= 2 &&
|
if (merge_context_.GetNumOperands() >= 2 &&
|
||||||
operands_.size() >= min_partial_merge_operands_) {
|
merge_context_.GetNumOperands() >= min_partial_merge_operands_) {
|
||||||
bool merge_success = false;
|
bool merge_success = false;
|
||||||
std::string merge_result;
|
std::string merge_result;
|
||||||
{
|
{
|
||||||
@ -253,7 +272,8 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
PERF_TIMER_GUARD(merge_operator_time_nanos);
|
||||||
merge_success = user_merge_operator_->PartialMergeMulti(
|
merge_success = user_merge_operator_->PartialMergeMulti(
|
||||||
orig_ikey.user_key,
|
orig_ikey.user_key,
|
||||||
std::deque<Slice>(operands_.begin(), operands_.end()),
|
std::deque<Slice>(merge_context_.GetOperands().begin(),
|
||||||
|
merge_context_.GetOperands().end()),
|
||||||
&merge_result, logger_);
|
&merge_result, logger_);
|
||||||
RecordTick(stats_, MERGE_OPERATION_TOTAL_TIME,
|
RecordTick(stats_, MERGE_OPERATION_TOTAL_TIME,
|
||||||
stats_ ? timer.ElapsedNanosSafe() : 0);
|
stats_ ? timer.ElapsedNanosSafe() : 0);
|
||||||
@ -261,8 +281,8 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|||||||
if (merge_success) {
|
if (merge_success) {
|
||||||
// Merging of operands (associative merge) was successful.
|
// Merging of operands (associative merge) was successful.
|
||||||
// Replace operands with the merge result
|
// Replace operands with the merge result
|
||||||
operands_.clear();
|
merge_context_.Clear();
|
||||||
operands_.emplace_front(std::move(merge_result));
|
merge_context_.PushOperand(merge_result);
|
||||||
keys_.erase(keys_.begin(), keys_.end() - 1);
|
keys_.erase(keys_.begin(), keys_.end() - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,10 @@
|
|||||||
|
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
|
#include "db/merge_context.h"
|
||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
@ -42,14 +44,13 @@ class MergeHelper {
|
|||||||
latest_snapshot_(latest_snapshot),
|
latest_snapshot_(latest_snapshot),
|
||||||
level_(level),
|
level_(level),
|
||||||
keys_(),
|
keys_(),
|
||||||
operands_(),
|
|
||||||
filter_timer_(env_),
|
filter_timer_(env_),
|
||||||
total_filter_time_(0U),
|
total_filter_time_(0U),
|
||||||
stats_(stats) {
|
stats_(stats) {
|
||||||
assert(user_comparator_ != nullptr);
|
assert(user_comparator_ != nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper around MergeOperator::FullMerge() that records perf statistics.
|
// Wrapper around MergeOperator::FullMergeV2() that records perf statistics.
|
||||||
// Result of merge will be written to result if status returned is OK.
|
// Result of merge will be written to result if status returned is OK.
|
||||||
// If operands is empty, the value will simply be copied to result.
|
// If operands is empty, the value will simply be copied to result.
|
||||||
// Returns one of the following statuses:
|
// Returns one of the following statuses:
|
||||||
@ -57,9 +58,10 @@ class MergeHelper {
|
|||||||
// - Corruption: Merge operator reported unsuccessful merge.
|
// - Corruption: Merge operator reported unsuccessful merge.
|
||||||
static Status TimedFullMerge(const MergeOperator* merge_operator,
|
static Status TimedFullMerge(const MergeOperator* merge_operator,
|
||||||
const Slice& key, const Slice* value,
|
const Slice& key, const Slice* value,
|
||||||
const std::deque<std::string>& operands,
|
const std::vector<Slice>& operands,
|
||||||
std::string* result, Logger* logger,
|
std::string* result, Logger* logger,
|
||||||
Statistics* statistics, Env* env);
|
Statistics* statistics, Env* env,
|
||||||
|
Slice* result_operand = nullptr);
|
||||||
|
|
||||||
// Merge entries until we hit
|
// Merge entries until we hit
|
||||||
// - a corrupted key
|
// - a corrupted key
|
||||||
@ -116,7 +118,9 @@ class MergeHelper {
|
|||||||
// So keys().back() was the first key seen by iterator.
|
// So keys().back() was the first key seen by iterator.
|
||||||
// TODO: Re-style this comment to be like the first one
|
// TODO: Re-style this comment to be like the first one
|
||||||
const std::deque<std::string>& keys() const { return keys_; }
|
const std::deque<std::string>& keys() const { return keys_; }
|
||||||
const std::deque<std::string>& values() const { return operands_; }
|
const std::vector<Slice>& values() const {
|
||||||
|
return merge_context_.GetOperands();
|
||||||
|
}
|
||||||
uint64_t TotalFilterTime() const { return total_filter_time_; }
|
uint64_t TotalFilterTime() const { return total_filter_time_; }
|
||||||
bool HasOperator() const { return user_merge_operator_ != nullptr; }
|
bool HasOperator() const { return user_merge_operator_ != nullptr; }
|
||||||
|
|
||||||
@ -133,8 +137,11 @@ class MergeHelper {
|
|||||||
|
|
||||||
// the scratch area that holds the result of MergeUntil
|
// the scratch area that holds the result of MergeUntil
|
||||||
// valid up to the next MergeUntil call
|
// valid up to the next MergeUntil call
|
||||||
std::deque<std::string> keys_; // Keeps track of the sequence of keys seen
|
|
||||||
std::deque<std::string> operands_; // Parallel with keys_; stores the values
|
// Keeps track of the sequence of keys seen
|
||||||
|
std::deque<std::string> keys_;
|
||||||
|
// Parallel with keys_; stores the operands
|
||||||
|
mutable MergeContext merge_context_;
|
||||||
|
|
||||||
StopWatchNano filter_timer_;
|
StopWatchNano filter_timer_;
|
||||||
uint64_t total_filter_time_;
|
uint64_t total_filter_time_;
|
||||||
@ -159,7 +166,7 @@ class MergeOutputIterator {
|
|||||||
private:
|
private:
|
||||||
const MergeHelper* merge_helper_;
|
const MergeHelper* merge_helper_;
|
||||||
std::deque<std::string>::const_reverse_iterator it_keys_;
|
std::deque<std::string>::const_reverse_iterator it_keys_;
|
||||||
std::deque<std::string>::const_reverse_iterator it_values_;
|
std::vector<Slice>::const_reverse_iterator it_values_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -11,6 +11,18 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
|
bool MergeOperator::FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
|
MergeOperationOutput* merge_out) const {
|
||||||
|
// If FullMergeV2 is not implemented, we convert the operand_list to
|
||||||
|
// std::deque<std::string> and pass it to FullMerge
|
||||||
|
std::deque<std::string> operand_list_str;
|
||||||
|
for (auto& op : merge_in.operand_list) {
|
||||||
|
operand_list_str.emplace_back(op.data(), op.size());
|
||||||
|
}
|
||||||
|
return FullMerge(merge_in.key, merge_in.existing_value, operand_list_str,
|
||||||
|
&merge_out->new_value, merge_in.logger);
|
||||||
|
}
|
||||||
|
|
||||||
// The default implementation of PartialMergeMulti, which invokes
|
// The default implementation of PartialMergeMulti, which invokes
|
||||||
// PartialMerge multiple times internally and merges two operands at
|
// PartialMerge multiple times internally and merges two operands at
|
||||||
// a time.
|
// a time.
|
||||||
@ -39,23 +51,20 @@ bool MergeOperator::PartialMergeMulti(const Slice& key,
|
|||||||
// Given a "real" merge from the library, call the user's
|
// Given a "real" merge from the library, call the user's
|
||||||
// associative merge function one-by-one on each of the operands.
|
// associative merge function one-by-one on each of the operands.
|
||||||
// NOTE: It is assumed that the client's merge-operator will handle any errors.
|
// NOTE: It is assumed that the client's merge-operator will handle any errors.
|
||||||
bool AssociativeMergeOperator::FullMerge(
|
bool AssociativeMergeOperator::FullMergeV2(
|
||||||
const Slice& key,
|
const MergeOperationInput& merge_in,
|
||||||
const Slice* existing_value,
|
MergeOperationOutput* merge_out) const {
|
||||||
const std::deque<std::string>& operand_list,
|
|
||||||
std::string* new_value,
|
|
||||||
Logger* logger) const {
|
|
||||||
|
|
||||||
// Simply loop through the operands
|
// Simply loop through the operands
|
||||||
Slice temp_existing;
|
Slice temp_existing;
|
||||||
for (const auto& operand : operand_list) {
|
const Slice* existing_value = merge_in.existing_value;
|
||||||
Slice value(operand);
|
for (const auto& operand : merge_in.operand_list) {
|
||||||
std::string temp_value;
|
std::string temp_value;
|
||||||
if (!Merge(key, existing_value, value, &temp_value, logger)) {
|
if (!Merge(merge_in.key, existing_value, operand, &temp_value,
|
||||||
|
merge_in.logger)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
swap(temp_value, *new_value);
|
swap(temp_value, merge_out->new_value);
|
||||||
temp_existing = Slice(*new_value);
|
temp_existing = Slice(merge_out->new_value);
|
||||||
existing_value = &temp_existing;
|
existing_value = &temp_existing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ namespace rocksdb {
|
|||||||
class PinnedIteratorsManager {
|
class PinnedIteratorsManager {
|
||||||
public:
|
public:
|
||||||
PinnedIteratorsManager() : pinning_enabled(false), pinned_iters_(nullptr) {}
|
PinnedIteratorsManager() : pinning_enabled(false), pinned_iters_(nullptr) {}
|
||||||
~PinnedIteratorsManager() { assert(!pinning_enabled); }
|
~PinnedIteratorsManager() { ReleasePinnedIterators(); }
|
||||||
|
|
||||||
// Enable Iterators pinning
|
// Enable Iterators pinning
|
||||||
void StartPinning() {
|
void StartPinning() {
|
||||||
@ -43,7 +43,7 @@ class PinnedIteratorsManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Release pinned Iterators
|
// Release pinned Iterators
|
||||||
void ReleasePinnedIterators() {
|
inline void ReleasePinnedIterators() {
|
||||||
if (pinning_enabled) {
|
if (pinning_enabled) {
|
||||||
pinning_enabled = false;
|
pinning_enabled = false;
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
|
#include "db/pinned_iterators_manager.h"
|
||||||
#include "db/table_cache.h"
|
#include "db/table_cache.h"
|
||||||
#include "db/version_builder.h"
|
#include "db/version_builder.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
@ -917,10 +918,17 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|||||||
*key_exists = true;
|
*key_exists = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PinnedIteratorsManager pinned_iters_mgr;
|
||||||
GetContext get_context(
|
GetContext get_context(
|
||||||
user_comparator(), merge_operator_, info_log_, db_statistics_,
|
user_comparator(), merge_operator_, info_log_, db_statistics_,
|
||||||
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
|
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
|
||||||
value, value_found, merge_context, this->env_, seq);
|
value, value_found, merge_context, this->env_, seq,
|
||||||
|
merge_operator_ ? &pinned_iters_mgr : nullptr);
|
||||||
|
|
||||||
|
// Pin blocks that we read to hold merge operands
|
||||||
|
if (merge_operator_) {
|
||||||
|
pinned_iters_mgr.StartPinning();
|
||||||
|
}
|
||||||
|
|
||||||
FilePicker fp(
|
FilePicker fp(
|
||||||
storage_info_.files_, user_key, ikey, &storage_info_.level_files_brief_,
|
storage_info_.files_, user_key, ikey, &storage_info_.level_files_brief_,
|
||||||
|
@ -922,12 +922,10 @@ class MemTableInserter : public WriteBatch::Handler {
|
|||||||
auto merge_operator = moptions->merge_operator;
|
auto merge_operator = moptions->merge_operator;
|
||||||
assert(merge_operator);
|
assert(merge_operator);
|
||||||
|
|
||||||
std::deque<std::string> operands;
|
|
||||||
operands.push_front(value.ToString());
|
|
||||||
std::string new_value;
|
std::string new_value;
|
||||||
|
|
||||||
Status merge_status = MergeHelper::TimedFullMerge(
|
Status merge_status = MergeHelper::TimedFullMerge(
|
||||||
merge_operator, key, &get_value_slice, operands, &new_value,
|
merge_operator, key, &get_value_slice, {value}, &new_value,
|
||||||
moptions->info_log, moptions->statistics, Env::Default());
|
moptions->info_log, moptions->statistics, Env::Default());
|
||||||
|
|
||||||
if (!merge_status.ok()) {
|
if (!merge_status.ok()) {
|
||||||
|
@ -10,19 +10,18 @@
|
|||||||
|
|
||||||
class MyMerge : public rocksdb::MergeOperator {
|
class MyMerge : public rocksdb::MergeOperator {
|
||||||
public:
|
public:
|
||||||
bool FullMerge(const rocksdb::Slice& key,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const rocksdb::Slice* existing_value,
|
MergeOperationOutput* merge_out) const override {
|
||||||
const std::deque<std::string>& operand_list,
|
merge_out->new_value.clear();
|
||||||
std::string* new_value,
|
if (merge_in.existing_value != nullptr) {
|
||||||
rocksdb::Logger* logger) const override {
|
merge_out->new_value.assign(merge_in.existing_value->data(),
|
||||||
new_value->clear();
|
merge_in.existing_value->size());
|
||||||
if (existing_value != nullptr) {
|
|
||||||
new_value->assign(existing_value->data(), existing_value->size());
|
|
||||||
}
|
}
|
||||||
for (const std::string& m : operand_list) {
|
for (const rocksdb::Slice& m : merge_in.operand_list) {
|
||||||
fprintf(stderr, "Merge(%s)\n", m.c_str());
|
fprintf(stderr, "Merge(%s)\n", m.ToString().c_str());
|
||||||
assert(m != "bad"); // the compaction filter filters out bad values
|
// the compaction filter filters out bad values
|
||||||
new_value->assign(m);
|
assert(m.ToString() != "bad");
|
||||||
|
merge_out->new_value.assign(m.data(), m.size());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <deque>
|
#include <deque>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
|
|
||||||
@ -32,7 +33,7 @@ class Logger;
|
|||||||
// into rocksdb); numeric addition and string concatenation are examples;
|
// into rocksdb); numeric addition and string concatenation are examples;
|
||||||
//
|
//
|
||||||
// b) MergeOperator - the generic class for all the more abstract / complex
|
// b) MergeOperator - the generic class for all the more abstract / complex
|
||||||
// operations; one method (FullMerge) to merge a Put/Delete value with a
|
// operations; one method (FullMergeV2) to merge a Put/Delete value with a
|
||||||
// merge operand; and another method (PartialMerge) that merges multiple
|
// merge operand; and another method (PartialMerge) that merges multiple
|
||||||
// operands together. this is especially useful if your key values have
|
// operands together. this is especially useful if your key values have
|
||||||
// complex structures but you would still like to support client-specific
|
// complex structures but you would still like to support client-specific
|
||||||
@ -69,7 +70,49 @@ class MergeOperator {
|
|||||||
const Slice* existing_value,
|
const Slice* existing_value,
|
||||||
const std::deque<std::string>& operand_list,
|
const std::deque<std::string>& operand_list,
|
||||||
std::string* new_value,
|
std::string* new_value,
|
||||||
Logger* logger) const = 0;
|
Logger* logger) const {
|
||||||
|
// deprecated, please use FullMergeV2()
|
||||||
|
assert(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct MergeOperationInput {
|
||||||
|
explicit MergeOperationInput(const Slice& _key,
|
||||||
|
const Slice* _existing_value,
|
||||||
|
const std::vector<Slice>& _operand_list,
|
||||||
|
Logger* _logger)
|
||||||
|
: key(_key),
|
||||||
|
existing_value(_existing_value),
|
||||||
|
operand_list(_operand_list),
|
||||||
|
logger(_logger) {}
|
||||||
|
|
||||||
|
// The key associated with the merge operation.
|
||||||
|
const Slice& key;
|
||||||
|
// The existing value of the current key, nullptr means that the
|
||||||
|
// value dont exist.
|
||||||
|
const Slice* existing_value;
|
||||||
|
// A list of operands to apply.
|
||||||
|
const std::vector<Slice>& operand_list;
|
||||||
|
// Logger could be used by client to log any errors that happen during
|
||||||
|
// the merge operation.
|
||||||
|
Logger* logger;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MergeOperationOutput {
|
||||||
|
explicit MergeOperationOutput(std::string& _new_value,
|
||||||
|
Slice& _existing_operand)
|
||||||
|
: new_value(_new_value), existing_operand(_existing_operand) {}
|
||||||
|
|
||||||
|
// Client is responsible for filling the merge result here.
|
||||||
|
std::string& new_value;
|
||||||
|
// If the merge result is one of the existing operands (or existing_value),
|
||||||
|
// client can set this field to the operand (or existing_value) instead of
|
||||||
|
// using new_value.
|
||||||
|
Slice& existing_operand;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
|
MergeOperationOutput* merge_out) const;
|
||||||
|
|
||||||
// This function performs merge(left_op, right_op)
|
// This function performs merge(left_op, right_op)
|
||||||
// when both the operands are themselves merge operation types
|
// when both the operands are themselves merge operation types
|
||||||
@ -99,7 +142,7 @@ class MergeOperator {
|
|||||||
// TODO: Presently there is no way to differentiate between error/corruption
|
// TODO: Presently there is no way to differentiate between error/corruption
|
||||||
// and simply "return false". For now, the client should simply return
|
// and simply "return false". For now, the client should simply return
|
||||||
// false in any case it cannot perform partial-merge, regardless of reason.
|
// false in any case it cannot perform partial-merge, regardless of reason.
|
||||||
// If there is corruption in the data, handle it in the FullMerge() function,
|
// If there is corruption in the data, handle it in the FullMergeV2() function
|
||||||
// and return false there. The default implementation of PartialMerge will
|
// and return false there. The default implementation of PartialMerge will
|
||||||
// always return false.
|
// always return false.
|
||||||
virtual bool PartialMerge(const Slice& key, const Slice& left_operand,
|
virtual bool PartialMerge(const Slice& key, const Slice& left_operand,
|
||||||
@ -171,11 +214,8 @@ class AssociativeMergeOperator : public MergeOperator {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Default implementations of the MergeOperator functions
|
// Default implementations of the MergeOperator functions
|
||||||
virtual bool FullMerge(const Slice& key,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const Slice* existing_value,
|
MergeOperationOutput* merge_out) const override;
|
||||||
const std::deque<std::string>& operand_list,
|
|
||||||
std::string* new_value,
|
|
||||||
Logger* logger) const override;
|
|
||||||
|
|
||||||
virtual bool PartialMerge(const Slice& key,
|
virtual bool PartialMerge(const Slice& key,
|
||||||
const Slice& left_operand,
|
const Slice& left_operand,
|
||||||
|
@ -162,6 +162,8 @@ class BlockIter : public InternalIterator {
|
|||||||
|
|
||||||
virtual bool IsKeyPinned() const override { return key_pinned_; }
|
virtual bool IsKeyPinned() const override { return key_pinned_; }
|
||||||
|
|
||||||
|
virtual bool IsValuePinned() const override { return true; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Comparator* comparator_;
|
const Comparator* comparator_;
|
||||||
const char* data_; // underlying block contents
|
const char* data_; // underlying block contents
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
|
#include "db/pinned_iterators_manager.h"
|
||||||
|
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
@ -1381,6 +1382,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
BlockIter iiter;
|
BlockIter iiter;
|
||||||
NewIndexIterator(read_options, &iiter);
|
NewIndexIterator(read_options, &iiter);
|
||||||
|
|
||||||
|
PinnedIteratorsManager* pinned_iters_mgr = get_context->pinned_iters_mgr();
|
||||||
|
bool pin_blocks = pinned_iters_mgr && pinned_iters_mgr->PinningEnabled();
|
||||||
|
BlockIter* biter = nullptr;
|
||||||
|
|
||||||
bool done = false;
|
bool done = false;
|
||||||
for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) {
|
for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) {
|
||||||
Slice handle_value = iiter.value();
|
Slice handle_value = iiter.value();
|
||||||
@ -1398,37 +1403,60 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
BlockIter biter;
|
BlockIter stack_biter;
|
||||||
NewDataBlockIterator(rep_, read_options, iiter.value(), &biter);
|
if (pin_blocks) {
|
||||||
|
// We need to create the BlockIter on heap because we may need to
|
||||||
|
// pin it if we encounterd merge operands
|
||||||
|
biter = static_cast<BlockIter*>(
|
||||||
|
NewDataBlockIterator(rep_, read_options, iiter.value()));
|
||||||
|
} else {
|
||||||
|
biter = &stack_biter;
|
||||||
|
NewDataBlockIterator(rep_, read_options, iiter.value(), biter);
|
||||||
|
}
|
||||||
|
|
||||||
if (read_options.read_tier == kBlockCacheTier &&
|
if (read_options.read_tier == kBlockCacheTier &&
|
||||||
biter.status().IsIncomplete()) {
|
biter->status().IsIncomplete()) {
|
||||||
// couldn't get block from block_cache
|
// couldn't get block from block_cache
|
||||||
// Update Saver.state to Found because we are only looking for whether
|
// Update Saver.state to Found because we are only looking for whether
|
||||||
// we can guarantee the key is not there when "no_io" is set
|
// we can guarantee the key is not there when "no_io" is set
|
||||||
get_context->MarkKeyMayExist();
|
get_context->MarkKeyMayExist();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!biter.status().ok()) {
|
if (!biter->status().ok()) {
|
||||||
s = biter.status();
|
s = biter->status();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call the *saver function on each entry/block until it returns false
|
// Call the *saver function on each entry/block until it returns false
|
||||||
for (biter.Seek(key); biter.Valid(); biter.Next()) {
|
for (biter->Seek(key); biter->Valid(); biter->Next()) {
|
||||||
ParsedInternalKey parsed_key;
|
ParsedInternalKey parsed_key;
|
||||||
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
if (!ParseInternalKey(biter->key(), &parsed_key)) {
|
||||||
s = Status::Corruption(Slice());
|
s = Status::Corruption(Slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!get_context->SaveValue(parsed_key, biter.value())) {
|
if (!get_context->SaveValue(parsed_key, biter->value(), pin_blocks)) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s = biter.status();
|
s = biter->status();
|
||||||
|
|
||||||
|
if (pin_blocks) {
|
||||||
|
if (get_context->State() == GetContext::kMerge) {
|
||||||
|
// Pin blocks as long as we are merging
|
||||||
|
pinned_iters_mgr->PinIteratorIfNeeded(biter);
|
||||||
|
} else {
|
||||||
|
delete biter;
|
||||||
|
}
|
||||||
|
biter = nullptr;
|
||||||
|
} else {
|
||||||
|
// biter is on stack, Nothing to clean
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (pin_blocks && biter != nullptr) {
|
||||||
|
delete biter;
|
||||||
|
}
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = iiter.status();
|
s = iiter.status();
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include "table/get_context.h"
|
#include "table/get_context.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
|
#include "db/pinned_iterators_manager.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/merge_operator.h"
|
#include "rocksdb/merge_operator.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
@ -36,7 +37,8 @@ GetContext::GetContext(const Comparator* ucmp,
|
|||||||
Statistics* statistics, GetState init_state,
|
Statistics* statistics, GetState init_state,
|
||||||
const Slice& user_key, std::string* ret_value,
|
const Slice& user_key, std::string* ret_value,
|
||||||
bool* value_found, MergeContext* merge_context, Env* env,
|
bool* value_found, MergeContext* merge_context, Env* env,
|
||||||
SequenceNumber* seq)
|
SequenceNumber* seq,
|
||||||
|
PinnedIteratorsManager* _pinned_iters_mgr)
|
||||||
: ucmp_(ucmp),
|
: ucmp_(ucmp),
|
||||||
merge_operator_(merge_operator),
|
merge_operator_(merge_operator),
|
||||||
logger_(logger),
|
logger_(logger),
|
||||||
@ -48,7 +50,8 @@ GetContext::GetContext(const Comparator* ucmp,
|
|||||||
merge_context_(merge_context),
|
merge_context_(merge_context),
|
||||||
env_(env),
|
env_(env),
|
||||||
seq_(seq),
|
seq_(seq),
|
||||||
replay_log_(nullptr) {
|
replay_log_(nullptr),
|
||||||
|
pinned_iters_mgr_(_pinned_iters_mgr) {
|
||||||
if (seq_) {
|
if (seq_) {
|
||||||
*seq_ = kMaxSequenceNumber;
|
*seq_ = kMaxSequenceNumber;
|
||||||
}
|
}
|
||||||
@ -77,7 +80,7 @@ void GetContext::SaveValue(const Slice& value, SequenceNumber seq) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
||||||
const Slice& value) {
|
const Slice& value, bool value_pinned) {
|
||||||
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
||||||
merge_context_ != nullptr);
|
merge_context_ != nullptr);
|
||||||
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
|
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
|
||||||
@ -139,7 +142,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
|||||||
case kTypeMerge:
|
case kTypeMerge:
|
||||||
assert(state_ == kNotFound || state_ == kMerge);
|
assert(state_ == kNotFound || state_ == kMerge);
|
||||||
state_ = kMerge;
|
state_ = kMerge;
|
||||||
merge_context_->PushOperand(value);
|
merge_context_->PushOperand(value, value_pinned);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -167,7 +170,7 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
|||||||
// Since SequenceNumber is not stored and unknown, we will use
|
// Since SequenceNumber is not stored and unknown, we will use
|
||||||
// kMaxSequenceNumber.
|
// kMaxSequenceNumber.
|
||||||
get_context->SaveValue(
|
get_context->SaveValue(
|
||||||
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value);
|
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value, true);
|
||||||
}
|
}
|
||||||
#else // ROCKSDB_LITE
|
#else // ROCKSDB_LITE
|
||||||
assert(false);
|
assert(false);
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
class MergeContext;
|
class MergeContext;
|
||||||
|
class PinnedIteratorsManager;
|
||||||
|
|
||||||
class GetContext {
|
class GetContext {
|
||||||
public:
|
public:
|
||||||
@ -26,7 +27,8 @@ class GetContext {
|
|||||||
Logger* logger, Statistics* statistics, GetState init_state,
|
Logger* logger, Statistics* statistics, GetState init_state,
|
||||||
const Slice& user_key, std::string* ret_value, bool* value_found,
|
const Slice& user_key, std::string* ret_value, bool* value_found,
|
||||||
MergeContext* merge_context, Env* env,
|
MergeContext* merge_context, Env* env,
|
||||||
SequenceNumber* seq = nullptr);
|
SequenceNumber* seq = nullptr,
|
||||||
|
PinnedIteratorsManager* _pinned_iters_mgr = nullptr);
|
||||||
|
|
||||||
void MarkKeyMayExist();
|
void MarkKeyMayExist();
|
||||||
|
|
||||||
@ -35,7 +37,8 @@ class GetContext {
|
|||||||
//
|
//
|
||||||
// Returns True if more keys need to be read (due to merges) or
|
// Returns True if more keys need to be read (due to merges) or
|
||||||
// False if the complete value has been found.
|
// False if the complete value has been found.
|
||||||
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value);
|
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
|
||||||
|
bool value_pinned = false);
|
||||||
|
|
||||||
// Simplified version of the previous function. Should only be used when we
|
// Simplified version of the previous function. Should only be used when we
|
||||||
// know that the operation is a Put.
|
// know that the operation is a Put.
|
||||||
@ -43,6 +46,8 @@ class GetContext {
|
|||||||
|
|
||||||
GetState State() const { return state_; }
|
GetState State() const { return state_; }
|
||||||
|
|
||||||
|
PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
|
||||||
|
|
||||||
// If a non-null string is passed, all the SaveValue calls will be
|
// If a non-null string is passed, all the SaveValue calls will be
|
||||||
// logged into the string. The operations can then be replayed on
|
// logged into the string. The operations can then be replayed on
|
||||||
// another GetContext with replayGetContextLog.
|
// another GetContext with replayGetContextLog.
|
||||||
@ -68,6 +73,8 @@ class GetContext {
|
|||||||
// write to the key or kMaxSequenceNumber if unknown
|
// write to the key or kMaxSequenceNumber if unknown
|
||||||
SequenceNumber* seq_;
|
SequenceNumber* seq_;
|
||||||
std::string* replay_log_;
|
std::string* replay_log_;
|
||||||
|
// Used to temporarily pin blocks when state_ == GetContext::kMerge
|
||||||
|
PinnedIteratorsManager* pinned_iters_mgr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
|
||||||
|
@ -80,6 +80,11 @@ class InternalIterator : public Cleanable {
|
|||||||
// set to false.
|
// set to false.
|
||||||
virtual bool IsKeyPinned() const { return false; }
|
virtual bool IsKeyPinned() const { return false; }
|
||||||
|
|
||||||
|
// If true, this means that the Slice returned by value() is valid as long as
|
||||||
|
// PinnedIteratorsManager::ReleasePinnedIterators is not called and the
|
||||||
|
// Iterator is not deleted.
|
||||||
|
virtual bool IsValuePinned() const { return false; }
|
||||||
|
|
||||||
virtual Status GetProperty(std::string prop_name, std::string* prop) {
|
virtual Status GetProperty(std::string prop_name, std::string* prop) {
|
||||||
return Status::NotSupported("");
|
return Status::NotSupported("");
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
// A internal wrapper class with an interface similar to Iterator that caches
|
// A internal wrapper class with an interface similar to Iterator that caches
|
||||||
// the valid(), key() and IsKeyPinned() results for an underlying iterator.
|
// the valid() and key() results for an underlying iterator.
|
||||||
// This can help avoid virtual function calls and also gives better
|
// This can help avoid virtual function calls and also gives better
|
||||||
// cache locality.
|
// cache locality.
|
||||||
class IteratorWrapper {
|
class IteratorWrapper {
|
||||||
@ -55,7 +55,6 @@ class IteratorWrapper {
|
|||||||
// Iterator interface methods
|
// Iterator interface methods
|
||||||
bool Valid() const { return valid_; }
|
bool Valid() const { return valid_; }
|
||||||
Slice key() const { assert(Valid()); return key_; }
|
Slice key() const { assert(Valid()); return key_; }
|
||||||
bool IsKeyPinned() const { assert(Valid()); return is_key_pinned_; }
|
|
||||||
Slice value() const { assert(Valid()); return iter_->value(); }
|
Slice value() const { assert(Valid()); return iter_->value(); }
|
||||||
// Methods below require iter() != nullptr
|
// Methods below require iter() != nullptr
|
||||||
Status status() const { assert(iter_); return iter_->status(); }
|
Status status() const { assert(iter_); return iter_->status(); }
|
||||||
@ -64,10 +63,18 @@ class IteratorWrapper {
|
|||||||
void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); }
|
void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); }
|
||||||
void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); }
|
void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); }
|
||||||
void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); }
|
void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); }
|
||||||
|
|
||||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
|
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
|
||||||
assert(iter_);
|
assert(iter_);
|
||||||
iter_->SetPinnedItersMgr(pinned_iters_mgr);
|
iter_->SetPinnedItersMgr(pinned_iters_mgr);
|
||||||
Update();
|
}
|
||||||
|
bool IsKeyPinned() const {
|
||||||
|
assert(Valid());
|
||||||
|
return iter_->IsKeyPinned();
|
||||||
|
}
|
||||||
|
bool IsValuePinned() const {
|
||||||
|
assert(Valid());
|
||||||
|
return iter_->IsValuePinned();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -75,14 +82,12 @@ class IteratorWrapper {
|
|||||||
valid_ = iter_->Valid();
|
valid_ = iter_->Valid();
|
||||||
if (valid_) {
|
if (valid_) {
|
||||||
key_ = iter_->key();
|
key_ = iter_->key();
|
||||||
is_key_pinned_ = iter_->IsKeyPinned();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
InternalIterator* iter_;
|
InternalIterator* iter_;
|
||||||
bool valid_;
|
bool valid_;
|
||||||
Slice key_;
|
Slice key_;
|
||||||
bool is_key_pinned_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Arena;
|
class Arena;
|
||||||
|
@ -257,6 +257,12 @@ class MergingIterator : public InternalIterator {
|
|||||||
current_->IsKeyPinned();
|
current_->IsKeyPinned();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool IsValuePinned() const override {
|
||||||
|
assert(Valid());
|
||||||
|
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
||||||
|
current_->IsValuePinned();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Clears heaps for both directions, used when changing direction or seeking
|
// Clears heaps for both directions, used when changing direction or seeking
|
||||||
void ClearHeaps();
|
void ClearHeaps();
|
||||||
|
@ -78,6 +78,10 @@ class TwoLevelIterator : public InternalIterator {
|
|||||||
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
||||||
second_level_iter_.iter() && second_level_iter_.IsKeyPinned();
|
second_level_iter_.iter() && second_level_iter_.IsKeyPinned();
|
||||||
}
|
}
|
||||||
|
virtual bool IsValuePinned() const override {
|
||||||
|
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
||||||
|
second_level_iter_.iter() && second_level_iter_.IsValuePinned();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void SaveError(const Status& s) {
|
void SaveError(const Status& s) {
|
||||||
|
@ -45,6 +45,8 @@ default_params = {
|
|||||||
"write_buffer_size": 4 * 1024 * 1024,
|
"write_buffer_size": 4 * 1024 * 1024,
|
||||||
"writepercent": 35,
|
"writepercent": 35,
|
||||||
"subcompactions": lambda: random.randint(1, 4),
|
"subcompactions": lambda: random.randint(1, 4),
|
||||||
|
"use_merge": lambda: random.randint(0, 1),
|
||||||
|
"use_full_merge_v1": lambda: random.randint(0, 1),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -453,6 +453,9 @@ static const bool FLAGS_prefix_size_dummy __attribute__((unused)) =
|
|||||||
DEFINE_bool(use_merge, false, "On true, replaces all writes with a Merge "
|
DEFINE_bool(use_merge, false, "On true, replaces all writes with a Merge "
|
||||||
"that behaves like a Put");
|
"that behaves like a Put");
|
||||||
|
|
||||||
|
DEFINE_bool(use_full_merge_v1, false,
|
||||||
|
"On true, use a merge operator that implement the deprecated "
|
||||||
|
"version of FullMerge");
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -2106,7 +2109,11 @@ class StressTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (FLAGS_use_merge) {
|
if (FLAGS_use_merge) {
|
||||||
options_.merge_operator = MergeOperators::CreatePutOperator();
|
if (FLAGS_use_full_merge_v1) {
|
||||||
|
options_.merge_operator = MergeOperators::CreateDeprecatedPutOperator();
|
||||||
|
} else {
|
||||||
|
options_.merge_operator = MergeOperators::CreatePutOperator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set universal style compaction configurations, if applicable
|
// set universal style compaction configurations, if applicable
|
||||||
|
@ -614,10 +614,8 @@ class ChanglingMergeOperator : public MergeOperator {
|
|||||||
|
|
||||||
void SetName(const std::string& name) { name_ = name; }
|
void SetName(const std::string& name) { name_ = name; }
|
||||||
|
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const std::deque<std::string>& operand_list,
|
MergeOperationOutput* merge_out) const override {
|
||||||
std::string* new_value,
|
|
||||||
Logger* logger) const override {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual bool PartialMergeMulti(const Slice& key,
|
virtual bool PartialMergeMulti(const Slice& key,
|
||||||
|
@ -16,6 +16,7 @@ namespace rocksdb {
|
|||||||
class MergeOperators {
|
class MergeOperators {
|
||||||
public:
|
public:
|
||||||
static std::shared_ptr<MergeOperator> CreatePutOperator();
|
static std::shared_ptr<MergeOperator> CreatePutOperator();
|
||||||
|
static std::shared_ptr<MergeOperator> CreateDeprecatedPutOperator();
|
||||||
static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
|
static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
|
||||||
static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
|
static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
|
||||||
static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
|
static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
|
||||||
@ -27,6 +28,8 @@ class MergeOperators {
|
|||||||
const std::string& name) {
|
const std::string& name) {
|
||||||
if (name == "put") {
|
if (name == "put") {
|
||||||
return CreatePutOperator();
|
return CreatePutOperator();
|
||||||
|
} else if (name == "put_v1") {
|
||||||
|
return CreateDeprecatedPutOperator();
|
||||||
} else if ( name == "uint64add") {
|
} else if ( name == "uint64add") {
|
||||||
return CreateUInt64AddOperator();
|
return CreateUInt64AddOperator();
|
||||||
} else if (name == "stringappend") {
|
} else if (name == "stringappend") {
|
||||||
|
@ -19,22 +19,20 @@ namespace { // anonymous namespace
|
|||||||
// Slice::compare
|
// Slice::compare
|
||||||
class MaxOperator : public MergeOperator {
|
class MaxOperator : public MergeOperator {
|
||||||
public:
|
public:
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const std::deque<std::string>& operand_list,
|
MergeOperationOutput* merge_out) const override {
|
||||||
std::string* new_value,
|
Slice& max = merge_out->existing_operand;
|
||||||
Logger* logger) const override {
|
if (merge_in.existing_value) {
|
||||||
Slice max;
|
max = Slice(merge_in.existing_value->data(),
|
||||||
if (existing_value) {
|
merge_in.existing_value->size());
|
||||||
max = Slice(existing_value->data(), existing_value->size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& op : operand_list) {
|
for (const auto& op : merge_in.operand_list) {
|
||||||
if (max.compare(op) < 0) {
|
if (max.compare(op) < 0) {
|
||||||
max = Slice(op.data(), op.size());
|
max = op;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
new_value->assign(max.data(), max.size());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,12 +57,33 @@ class PutOperator : public MergeOperator {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class PutOperatorV2 : public PutOperator {
|
||||||
|
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
||||||
|
const std::deque<std::string>& operand_sequence,
|
||||||
|
std::string* new_value,
|
||||||
|
Logger* logger) const override {
|
||||||
|
assert(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
|
MergeOperationOutput* merge_out) const override {
|
||||||
|
// Put basically only looks at the current/latest value
|
||||||
|
assert(!merge_in.operand_list.empty());
|
||||||
|
merge_out->existing_operand = merge_in.operand_list.back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // end of anonymous namespace
|
} // end of anonymous namespace
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
std::shared_ptr<MergeOperator> MergeOperators::CreatePutOperator() {
|
std::shared_ptr<MergeOperator> MergeOperators::CreateDeprecatedPutOperator() {
|
||||||
return std::make_shared<PutOperator>();
|
return std::make_shared<PutOperator>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<MergeOperator> MergeOperators::CreatePutOperator() {
|
||||||
|
return std::make_shared<PutOperatorV2>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,20 +21,22 @@ StringAppendTESTOperator::StringAppendTESTOperator(char delim_char)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Implementation for the merge operation (concatenates two strings)
|
// Implementation for the merge operation (concatenates two strings)
|
||||||
bool StringAppendTESTOperator::FullMerge(
|
bool StringAppendTESTOperator::FullMergeV2(
|
||||||
const Slice& key,
|
const MergeOperationInput& merge_in,
|
||||||
const Slice* existing_value,
|
MergeOperationOutput* merge_out) const {
|
||||||
const std::deque<std::string>& operands,
|
|
||||||
std::string* new_value,
|
|
||||||
Logger* logger) const {
|
|
||||||
|
|
||||||
// Clear the *new_value for writing.
|
// Clear the *new_value for writing.
|
||||||
assert(new_value);
|
merge_out->new_value.clear();
|
||||||
new_value->clear();
|
|
||||||
|
if (merge_in.existing_value == nullptr && merge_in.operand_list.size() == 1) {
|
||||||
|
// Only one operand
|
||||||
|
merge_out->existing_operand = merge_in.operand_list.back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Compute the space needed for the final result.
|
// Compute the space needed for the final result.
|
||||||
size_t numBytes = 0;
|
size_t numBytes = 0;
|
||||||
for(auto it = operands.begin(); it != operands.end(); ++it) {
|
for (auto it = merge_in.operand_list.begin();
|
||||||
|
it != merge_in.operand_list.end(); ++it) {
|
||||||
numBytes += it->size() + 1; // Plus 1 for the delimiter
|
numBytes += it->size() + 1; // Plus 1 for the delimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,20 +44,23 @@ bool StringAppendTESTOperator::FullMerge(
|
|||||||
bool printDelim = false;
|
bool printDelim = false;
|
||||||
|
|
||||||
// Prepend the *existing_value if one exists.
|
// Prepend the *existing_value if one exists.
|
||||||
if (existing_value) {
|
if (merge_in.existing_value) {
|
||||||
new_value->reserve(numBytes + existing_value->size());
|
merge_out->new_value.reserve(numBytes + merge_in.existing_value->size());
|
||||||
new_value->append(existing_value->data(), existing_value->size());
|
merge_out->new_value.append(merge_in.existing_value->data(),
|
||||||
|
merge_in.existing_value->size());
|
||||||
printDelim = true;
|
printDelim = true;
|
||||||
} else if (numBytes) {
|
} else if (numBytes) {
|
||||||
new_value->reserve(numBytes-1); // Minus 1 since we have one less delimiter
|
merge_out->new_value.reserve(
|
||||||
|
numBytes - 1); // Minus 1 since we have one less delimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
// Concatenate the sequence of strings (and add a delimiter between each)
|
// Concatenate the sequence of strings (and add a delimiter between each)
|
||||||
for(auto it = operands.begin(); it != operands.end(); ++it) {
|
for (auto it = merge_in.operand_list.begin();
|
||||||
|
it != merge_in.operand_list.end(); ++it) {
|
||||||
if (printDelim) {
|
if (printDelim) {
|
||||||
new_value->append(1,delim_);
|
merge_out->new_value.append(1, delim_);
|
||||||
}
|
}
|
||||||
new_value->append(*it);
|
merge_out->new_value.append(it->data(), it->size());
|
||||||
printDelim = true;
|
printDelim = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,11 +24,8 @@ class StringAppendTESTOperator : public MergeOperator {
|
|||||||
// Constructor with delimiter
|
// Constructor with delimiter
|
||||||
explicit StringAppendTESTOperator(char delim_char);
|
explicit StringAppendTESTOperator(char delim_char);
|
||||||
|
|
||||||
virtual bool FullMerge(const Slice& key,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const Slice* existing_value,
|
MergeOperationOutput* merge_out) const override;
|
||||||
const std::deque<std::string>& operand_sequence,
|
|
||||||
std::string* new_value,
|
|
||||||
Logger* logger) const override;
|
|
||||||
|
|
||||||
virtual bool PartialMergeMulti(const Slice& key,
|
virtual bool PartialMergeMulti(const Slice& key,
|
||||||
const std::deque<Slice>& operand_list,
|
const std::deque<Slice>& operand_list,
|
||||||
|
@ -128,19 +128,19 @@ class DummyMergeOperator : public MergeOperator {
|
|||||||
DummyMergeOperator() {}
|
DummyMergeOperator() {}
|
||||||
virtual ~DummyMergeOperator() {}
|
virtual ~DummyMergeOperator() {}
|
||||||
|
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const std::deque<std::string>& operand_list,
|
MergeOperationOutput* merge_out) const override {
|
||||||
std::string* new_value, Logger* logger) const {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool PartialMergeMulti(const Slice& key,
|
virtual bool PartialMergeMulti(const Slice& key,
|
||||||
const std::deque<Slice>& operand_list,
|
const std::deque<Slice>& operand_list,
|
||||||
std::string* new_value, Logger* logger) const {
|
std::string* new_value,
|
||||||
|
Logger* logger) const override {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const char* Name() const { return "DummyMergeOperator"; }
|
virtual const char* Name() const override { return "DummyMergeOperator"; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class DummySliceTransform : public SliceTransform {
|
class DummySliceTransform : public SliceTransform {
|
||||||
|
@ -225,38 +225,43 @@ class TtlMergeOperator : public MergeOperator {
|
|||||||
assert(env);
|
assert(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool FullMerge(const Slice& key, const Slice* existing_value,
|
virtual bool FullMergeV2(const MergeOperationInput& merge_in,
|
||||||
const std::deque<std::string>& operands,
|
MergeOperationOutput* merge_out) const override {
|
||||||
std::string* new_value, Logger* logger) const
|
|
||||||
override {
|
|
||||||
const uint32_t ts_len = DBWithTTLImpl::kTSLength;
|
const uint32_t ts_len = DBWithTTLImpl::kTSLength;
|
||||||
if (existing_value && existing_value->size() < ts_len) {
|
if (merge_in.existing_value && merge_in.existing_value->size() < ts_len) {
|
||||||
Log(InfoLogLevel::ERROR_LEVEL, logger,
|
Log(InfoLogLevel::ERROR_LEVEL, merge_in.logger,
|
||||||
"Error: Could not remove timestamp from existing value.");
|
"Error: Could not remove timestamp from existing value.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract time-stamp from each operand to be passed to user_merge_op_
|
// Extract time-stamp from each operand to be passed to user_merge_op_
|
||||||
std::deque<std::string> operands_without_ts;
|
std::vector<Slice> operands_without_ts;
|
||||||
for (const auto& operand : operands) {
|
for (const auto& operand : merge_in.operand_list) {
|
||||||
if (operand.size() < ts_len) {
|
if (operand.size() < ts_len) {
|
||||||
Log(InfoLogLevel::ERROR_LEVEL, logger,
|
Log(InfoLogLevel::ERROR_LEVEL, merge_in.logger,
|
||||||
"Error: Could not remove timestamp from operand value.");
|
"Error: Could not remove timestamp from operand value.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
operands_without_ts.push_back(operand.substr(0, operand.size() - ts_len));
|
operands_without_ts.push_back(operand);
|
||||||
|
operands_without_ts.back().remove_suffix(ts_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply the user merge operator (store result in *new_value)
|
// Apply the user merge operator (store result in *new_value)
|
||||||
bool good = true;
|
bool good = true;
|
||||||
if (existing_value) {
|
MergeOperationOutput user_merge_out(merge_out->new_value,
|
||||||
Slice existing_value_without_ts(existing_value->data(),
|
merge_out->existing_operand);
|
||||||
existing_value->size() - ts_len);
|
if (merge_in.existing_value) {
|
||||||
good = user_merge_op_->FullMerge(key, &existing_value_without_ts,
|
Slice existing_value_without_ts(merge_in.existing_value->data(),
|
||||||
operands_without_ts, new_value, logger);
|
merge_in.existing_value->size() - ts_len);
|
||||||
|
good = user_merge_op_->FullMergeV2(
|
||||||
|
MergeOperationInput(merge_in.key, &existing_value_without_ts,
|
||||||
|
operands_without_ts, merge_in.logger),
|
||||||
|
&user_merge_out);
|
||||||
} else {
|
} else {
|
||||||
good = user_merge_op_->FullMerge(key, nullptr, operands_without_ts,
|
good = user_merge_op_->FullMergeV2(
|
||||||
new_value, logger);
|
MergeOperationInput(merge_in.key, nullptr, operands_without_ts,
|
||||||
|
merge_in.logger),
|
||||||
|
&user_merge_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return false if the user merge operator returned false
|
// Return false if the user merge operator returned false
|
||||||
@ -264,17 +269,23 @@ class TtlMergeOperator : public MergeOperator {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (merge_out->existing_operand.data()) {
|
||||||
|
merge_out->new_value.assign(merge_out->existing_operand.data(),
|
||||||
|
merge_out->existing_operand.size());
|
||||||
|
merge_out->existing_operand = Slice(nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Augment the *new_value with the ttl time-stamp
|
// Augment the *new_value with the ttl time-stamp
|
||||||
int64_t curtime;
|
int64_t curtime;
|
||||||
if (!env_->GetCurrentTime(&curtime).ok()) {
|
if (!env_->GetCurrentTime(&curtime).ok()) {
|
||||||
Log(InfoLogLevel::ERROR_LEVEL, logger,
|
Log(InfoLogLevel::ERROR_LEVEL, merge_in.logger,
|
||||||
"Error: Could not get current time to be attached internally "
|
"Error: Could not get current time to be attached internally "
|
||||||
"to the new value.");
|
"to the new value.");
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
char ts_string[ts_len];
|
char ts_string[ts_len];
|
||||||
EncodeFixed32(ts_string, (int32_t)curtime);
|
EncodeFixed32(ts_string, (int32_t)curtime);
|
||||||
new_value->append(ts_string, ts_len);
|
merge_out->new_value.append(ts_string, ts_len);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,22 +13,41 @@ class UtilMergeOperatorTest : public testing::Test {
|
|||||||
public:
|
public:
|
||||||
UtilMergeOperatorTest() {}
|
UtilMergeOperatorTest() {}
|
||||||
|
|
||||||
std::string FullMerge(std::string existing_value,
|
std::string FullMergeV2(std::string existing_value,
|
||||||
std::deque<std::string> operands,
|
std::vector<std::string> operands,
|
||||||
std::string key = "") {
|
std::string key = "") {
|
||||||
Slice existing_value_slice(existing_value);
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
Slice result_operand(nullptr, 0);
|
||||||
|
|
||||||
merge_operator_->FullMerge(key, &existing_value_slice, operands, &result,
|
Slice existing_value_slice(existing_value);
|
||||||
nullptr);
|
std::vector<Slice> operands_slice(operands.begin(), operands.end());
|
||||||
|
|
||||||
|
const MergeOperator::MergeOperationInput merge_in(
|
||||||
|
key, &existing_value_slice, operands_slice, nullptr);
|
||||||
|
MergeOperator::MergeOperationOutput merge_out(result, result_operand);
|
||||||
|
merge_operator_->FullMergeV2(merge_in, &merge_out);
|
||||||
|
|
||||||
|
if (result_operand.data()) {
|
||||||
|
result.assign(result_operand.data(), result_operand.size());
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string FullMerge(std::deque<std::string> operands,
|
std::string FullMergeV2(std::vector<std::string> operands,
|
||||||
std::string key = "") {
|
std::string key = "") {
|
||||||
std::string result;
|
std::string result;
|
||||||
|
Slice result_operand(nullptr, 0);
|
||||||
|
|
||||||
merge_operator_->FullMerge(key, nullptr, operands, &result, nullptr);
|
std::vector<Slice> operands_slice(operands.begin(), operands.end());
|
||||||
|
|
||||||
|
const MergeOperator::MergeOperationInput merge_in(key, nullptr,
|
||||||
|
operands_slice, nullptr);
|
||||||
|
MergeOperator::MergeOperationOutput merge_out(result, result_operand);
|
||||||
|
merge_operator_->FullMergeV2(merge_in, &merge_out);
|
||||||
|
|
||||||
|
if (result_operand.data()) {
|
||||||
|
result.assign(result_operand.data(), result_operand.size());
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,14 +75,14 @@ class UtilMergeOperatorTest : public testing::Test {
|
|||||||
TEST_F(UtilMergeOperatorTest, MaxMergeOperator) {
|
TEST_F(UtilMergeOperatorTest, MaxMergeOperator) {
|
||||||
merge_operator_ = MergeOperators::CreateMaxOperator();
|
merge_operator_ = MergeOperators::CreateMaxOperator();
|
||||||
|
|
||||||
EXPECT_EQ("B", FullMerge("B", {"A"}));
|
EXPECT_EQ("B", FullMergeV2("B", {"A"}));
|
||||||
EXPECT_EQ("B", FullMerge("A", {"B"}));
|
EXPECT_EQ("B", FullMergeV2("A", {"B"}));
|
||||||
EXPECT_EQ("", FullMerge({"", "", ""}));
|
EXPECT_EQ("", FullMergeV2({"", "", ""}));
|
||||||
EXPECT_EQ("A", FullMerge({"A"}));
|
EXPECT_EQ("A", FullMergeV2({"A"}));
|
||||||
EXPECT_EQ("ABC", FullMerge({"ABC"}));
|
EXPECT_EQ("ABC", FullMergeV2({"ABC"}));
|
||||||
EXPECT_EQ("Z", FullMerge({"ABC", "Z", "C", "AXX"}));
|
EXPECT_EQ("Z", FullMergeV2({"ABC", "Z", "C", "AXX"}));
|
||||||
EXPECT_EQ("ZZZ", FullMerge({"ABC", "CC", "Z", "ZZZ"}));
|
EXPECT_EQ("ZZZ", FullMergeV2({"ABC", "CC", "Z", "ZZZ"}));
|
||||||
EXPECT_EQ("a", FullMerge("a", {"ABC", "CC", "Z", "ZZZ"}));
|
EXPECT_EQ("a", FullMergeV2("a", {"ABC", "CC", "Z", "ZZZ"}));
|
||||||
|
|
||||||
EXPECT_EQ("z", PartialMergeMulti({"a", "z", "efqfqwgwew", "aaz", "hhhhh"}));
|
EXPECT_EQ("z", PartialMergeMulti({"a", "z", "efqfqwgwew", "aaz", "hhhhh"}));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user