Add a limited support for iteration bounds into BaseDeltaIterator (#5403)

Summary:
For MDEV-19670: MyRocks: key lookups into deleted data are very slow

BaseDeltaIterator remembers iterate_upper_bound and will not let delta_iterator_
walk above the iterate_upper_bound if base_iterator_ is not valid
anymore.

== Rationale ==
The most straightforward way would be to make the delta_iterator
(which is a rocksdb::WBWIIterator) to support iterator bounds. But
checking for bounds has an extra CPU overhead.

So we put the check into BaseDeltaIterator, and only make it when
base_iterator_ is not valid.

(note: We could take it even further, and move the check a few lines
down, and only check iterator bounds ourselves if base_iterator_ is
not valid AND delta_iterator_ hit a tombstone).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5403

Differential Revision: D15863092

Pulled By: maysamyabandeh

fbshipit-source-id: 8da458e7b9af95ff49356666f69664b4a6ccf49b
This commit is contained in:
Sergei Petrunia 2019-11-05 11:29:31 -08:00 committed by Facebook Github Bot
parent 52733b4498
commit 230bcae7b6
4 changed files with 23 additions and 6 deletions

View File

@ -1467,6 +1467,7 @@ int main(int argc, char** argv) {
CheckCondition(!rocksdb_iter_valid(iter));
rocksdb_iter_destroy(iter);
rocksdb_readoptions_set_iterate_upper_bound(roptions, NULL, 0);
}
}

View File

@ -165,7 +165,8 @@ class WriteBatchWithIndex : public WriteBatchBase {
// the write batch update finishes. The state may recover after Next() is
// called.
Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
Iterator* base_iterator);
Iterator* base_iterator,
const ReadOptions *opts = nullptr);
// default column family
Iterator* NewIteratorWithBase(Iterator* base_iterator);

View File

@ -369,7 +369,8 @@ Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options,
Iterator* db_iter = db_->NewIterator(read_options, column_family);
assert(db_iter);
return write_batch_.NewIteratorWithBase(column_family, db_iter);
return write_batch_.NewIteratorWithBase(column_family, db_iter,
&read_options);
}
Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,

View File

@ -33,14 +33,18 @@ namespace rocksdb {
class BaseDeltaIterator : public Iterator {
public:
BaseDeltaIterator(Iterator* base_iterator, WBWIIterator* delta_iterator,
const Comparator* comparator)
const Comparator* comparator,
const ReadOptions* read_options = nullptr)
: forward_(true),
current_at_base_(true),
equal_keys_(false),
status_(Status::OK()),
base_iterator_(base_iterator),
delta_iterator_(delta_iterator),
comparator_(comparator) {}
comparator_(comparator),
iterate_upper_bound_(read_options? read_options->iterate_upper_bound :
nullptr)
{}
~BaseDeltaIterator() override {}
@ -279,6 +283,13 @@ class BaseDeltaIterator : public Iterator {
// Finished
return;
}
if (iterate_upper_bound_) {
if (comparator_->Compare(delta_entry.key,
*iterate_upper_bound_) >= 0) {
// out of upper bound -> finished.
return;
}
}
if (delta_entry.type == kDeleteRecord ||
delta_entry.type == kSingleDeleteRecord) {
AdvanceDelta();
@ -326,6 +337,7 @@ class BaseDeltaIterator : public Iterator {
std::unique_ptr<Iterator> base_iterator_;
std::unique_ptr<WBWIIterator> delta_iterator_;
const Comparator* comparator_; // not owned
const Slice* iterate_upper_bound_;
};
typedef SkipList<WriteBatchIndexEntry*, const WriteBatchEntryComparator&>
@ -647,13 +659,15 @@ WBWIIterator* WriteBatchWithIndex::NewIterator(
}
Iterator* WriteBatchWithIndex::NewIteratorWithBase(
ColumnFamilyHandle* column_family, Iterator* base_iterator) {
ColumnFamilyHandle* column_family, Iterator* base_iterator,
const ReadOptions *read_options) {
if (rep->overwrite_key == false) {
assert(false);
return nullptr;
}
return new BaseDeltaIterator(base_iterator, NewIterator(column_family),
GetColumnFamilyUserComparator(column_family));
GetColumnFamilyUserComparator(column_family),
read_options);
}
Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) {