Add a limited support for iteration bounds into BaseDeltaIterator (#5403)
Summary: For MDEV-19670: MyRocks: key lookups into deleted data are very slow BaseDeltaIterator remembers iterate_upper_bound and will not let delta_iterator_ walk above the iterate_upper_bound if base_iterator_ is not valid anymore. == Rationale == The most straightforward way would be to make the delta_iterator (which is a rocksdb::WBWIIterator) to support iterator bounds. But checking for bounds has an extra CPU overhead. So we put the check into BaseDeltaIterator, and only make it when base_iterator_ is not valid. (note: We could take it even further, and move the check a few lines down, and only check iterator bounds ourselves if base_iterator_ is not valid AND delta_iterator_ hit a tombstone). Pull Request resolved: https://github.com/facebook/rocksdb/pull/5403 Differential Revision: D15863092 Pulled By: maysamyabandeh fbshipit-source-id: 8da458e7b9af95ff49356666f69664b4a6ccf49b
This commit is contained in:
parent
52733b4498
commit
230bcae7b6
@ -1467,6 +1467,7 @@ int main(int argc, char** argv) {
|
|||||||
CheckCondition(!rocksdb_iter_valid(iter));
|
CheckCondition(!rocksdb_iter_valid(iter));
|
||||||
|
|
||||||
rocksdb_iter_destroy(iter);
|
rocksdb_iter_destroy(iter);
|
||||||
|
rocksdb_readoptions_set_iterate_upper_bound(roptions, NULL, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,7 +165,8 @@ class WriteBatchWithIndex : public WriteBatchBase {
|
|||||||
// the write batch update finishes. The state may recover after Next() is
|
// the write batch update finishes. The state may recover after Next() is
|
||||||
// called.
|
// called.
|
||||||
Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
|
Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
|
||||||
Iterator* base_iterator);
|
Iterator* base_iterator,
|
||||||
|
const ReadOptions *opts = nullptr);
|
||||||
// default column family
|
// default column family
|
||||||
Iterator* NewIteratorWithBase(Iterator* base_iterator);
|
Iterator* NewIteratorWithBase(Iterator* base_iterator);
|
||||||
|
|
||||||
|
@ -369,7 +369,8 @@ Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options,
|
|||||||
Iterator* db_iter = db_->NewIterator(read_options, column_family);
|
Iterator* db_iter = db_->NewIterator(read_options, column_family);
|
||||||
assert(db_iter);
|
assert(db_iter);
|
||||||
|
|
||||||
return write_batch_.NewIteratorWithBase(column_family, db_iter);
|
return write_batch_.NewIteratorWithBase(column_family, db_iter,
|
||||||
|
&read_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
|
Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
|
||||||
|
@ -33,14 +33,18 @@ namespace rocksdb {
|
|||||||
class BaseDeltaIterator : public Iterator {
|
class BaseDeltaIterator : public Iterator {
|
||||||
public:
|
public:
|
||||||
BaseDeltaIterator(Iterator* base_iterator, WBWIIterator* delta_iterator,
|
BaseDeltaIterator(Iterator* base_iterator, WBWIIterator* delta_iterator,
|
||||||
const Comparator* comparator)
|
const Comparator* comparator,
|
||||||
|
const ReadOptions* read_options = nullptr)
|
||||||
: forward_(true),
|
: forward_(true),
|
||||||
current_at_base_(true),
|
current_at_base_(true),
|
||||||
equal_keys_(false),
|
equal_keys_(false),
|
||||||
status_(Status::OK()),
|
status_(Status::OK()),
|
||||||
base_iterator_(base_iterator),
|
base_iterator_(base_iterator),
|
||||||
delta_iterator_(delta_iterator),
|
delta_iterator_(delta_iterator),
|
||||||
comparator_(comparator) {}
|
comparator_(comparator),
|
||||||
|
iterate_upper_bound_(read_options? read_options->iterate_upper_bound :
|
||||||
|
nullptr)
|
||||||
|
{}
|
||||||
|
|
||||||
~BaseDeltaIterator() override {}
|
~BaseDeltaIterator() override {}
|
||||||
|
|
||||||
@ -279,6 +283,13 @@ class BaseDeltaIterator : public Iterator {
|
|||||||
// Finished
|
// Finished
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (iterate_upper_bound_) {
|
||||||
|
if (comparator_->Compare(delta_entry.key,
|
||||||
|
*iterate_upper_bound_) >= 0) {
|
||||||
|
// out of upper bound -> finished.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (delta_entry.type == kDeleteRecord ||
|
if (delta_entry.type == kDeleteRecord ||
|
||||||
delta_entry.type == kSingleDeleteRecord) {
|
delta_entry.type == kSingleDeleteRecord) {
|
||||||
AdvanceDelta();
|
AdvanceDelta();
|
||||||
@ -326,6 +337,7 @@ class BaseDeltaIterator : public Iterator {
|
|||||||
std::unique_ptr<Iterator> base_iterator_;
|
std::unique_ptr<Iterator> base_iterator_;
|
||||||
std::unique_ptr<WBWIIterator> delta_iterator_;
|
std::unique_ptr<WBWIIterator> delta_iterator_;
|
||||||
const Comparator* comparator_; // not owned
|
const Comparator* comparator_; // not owned
|
||||||
|
const Slice* iterate_upper_bound_;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef SkipList<WriteBatchIndexEntry*, const WriteBatchEntryComparator&>
|
typedef SkipList<WriteBatchIndexEntry*, const WriteBatchEntryComparator&>
|
||||||
@ -647,13 +659,15 @@ WBWIIterator* WriteBatchWithIndex::NewIterator(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Iterator* WriteBatchWithIndex::NewIteratorWithBase(
|
Iterator* WriteBatchWithIndex::NewIteratorWithBase(
|
||||||
ColumnFamilyHandle* column_family, Iterator* base_iterator) {
|
ColumnFamilyHandle* column_family, Iterator* base_iterator,
|
||||||
|
const ReadOptions *read_options) {
|
||||||
if (rep->overwrite_key == false) {
|
if (rep->overwrite_key == false) {
|
||||||
assert(false);
|
assert(false);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return new BaseDeltaIterator(base_iterator, NewIterator(column_family),
|
return new BaseDeltaIterator(base_iterator, NewIterator(column_family),
|
||||||
GetColumnFamilyUserComparator(column_family));
|
GetColumnFamilyUserComparator(column_family),
|
||||||
|
read_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) {
|
Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) {
|
||||||
|
Loading…
Reference in New Issue
Block a user