From d31d02248ab4e74e4156544609f0052fbe3386fb Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 26 Oct 2017 17:14:04 -0700 Subject: [PATCH] implement lower bound for iterators Summary: - for `SeekToFirst()`, just convert it to a regular `Seek()` if lower bound is specified - for operations that iterate backwards over user keys (`SeekForPrev`, `SeekToLast`, `Prev`), change `PrevInternal` to check whether user key went below lower bound every time the user key changes -- same approach we use to ensure we stay within a prefix when `prefix_same_as_start=true`. Closes https://github.com/facebook/rocksdb/pull/3074 Differential Revision: D6158654 Pulled By: ajkr fbshipit-source-id: cb0e3a922e2650d2cd4d1c6e1c0f1e8b729ff518 --- HISTORY.md | 1 + db/db_iter.cc | 14 ++++++++ db/db_iter_test.cc | 68 +++++++++++++++++++++++++++++++++++++++ include/rocksdb/options.h | 14 ++++++-- options/options.cc | 2 ++ 5 files changed, 97 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 9c8026faf..d92d40e73 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,7 @@ # Rocksdb Change Log ### New Features * Upon snapshot release, recompact bottommost files containing deleted/overwritten keys that previously could not be dropped due to the snapshot. This alleviates space-amp caused by long-held snapshots. +* Support lower bound on iterators specified via `ReadOptions::iterate_lower_bound`. ## 5.7.4 (08/31/2017) No significant changes. diff --git a/db/db_iter.cc b/db/db_iter.cc index 7d9d17a14..c821476c4 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -117,6 +117,7 @@ class DBIter: public Iterator { current_entry_is_merged_(false), statistics_(cf_options.statistics), version_number_(version_number), + iterate_lower_bound_(read_options.iterate_lower_bound), iterate_upper_bound_(read_options.iterate_upper_bound), prefix_same_as_start_(read_options.prefix_same_as_start), pin_thru_lifetime_(read_options.pin_data), @@ -275,6 +276,7 @@ class DBIter: public Iterator { uint64_t max_skippable_internal_keys_; uint64_t num_internal_keys_skipped_; uint64_t version_number_; + const Slice* iterate_lower_bound_; const Slice* iterate_upper_bound_; IterKey prefix_start_buf_; Slice prefix_start_key_; @@ -686,6 +688,14 @@ void DBIter::PrevInternal() { return; } + if (iterate_lower_bound_ != nullptr && + user_comparator_->Compare(saved_key_.GetUserKey(), + *iterate_lower_bound_) < 0) { + // We've iterated earlier than the user-specified lower bound. + valid_ = false; + return; + } + if (FindValueForCurrentKey()) { valid_ = true; if (!iter_->Valid()) { @@ -1078,6 +1088,10 @@ void DBIter::SeekToFirst() { if (prefix_extractor_ != nullptr) { max_skip_ = std::numeric_limits::max(); } + if (iterate_lower_bound_ != nullptr) { + Seek(*iterate_lower_bound_); + return; + } direction_ = kForward; ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc index a6dd06ffb..5a8888f5e 100644 --- a/db/db_iter_test.cc +++ b/db/db_iter_test.cc @@ -2832,6 +2832,74 @@ TEST_F(DBIteratorTest, SeekPrefixTombstones) { ASSERT_EQ(skipped_keys, 0); } +TEST_F(DBIteratorTest, SeekToFirstLowerBound) { + const int kNumKeys = 3; + for (int i = 0; i < kNumKeys + 2; ++i) { + // + 2 for two special cases: lower bound before and lower bound after the + // internal iterator's keys + TestIterator* internal_iter = new TestIterator(BytewiseComparator()); + for (int j = 1; j <= kNumKeys; ++j) { + internal_iter->AddPut(std::to_string(j), "val"); + } + internal_iter->Finish(); + + ReadOptions ro; + auto lower_bound_str = std::to_string(i); + Slice lower_bound(lower_bound_str); + ro.iterate_lower_bound = &lower_bound; + Options options; + std::unique_ptr db_iter(NewDBIterator( + env_, ro, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, 10 /* sequence */, + options.max_sequential_skip_in_iterations, 0)); + + db_iter->SeekToFirst(); + if (i == kNumKeys + 1) { + // lower bound was beyond the last key + ASSERT_FALSE(db_iter->Valid()); + } else { + ASSERT_TRUE(db_iter->Valid()); + int expected; + if (i == 0) { + // lower bound was before the first key + expected = 1; + } else { + // lower bound was at the ith key + expected = i; + } + ASSERT_EQ(std::to_string(expected), db_iter->key().ToString()); + } + } +} + +TEST_F(DBIteratorTest, PrevLowerBound) { + const int kNumKeys = 3; + const int kLowerBound = 2; + TestIterator* internal_iter = new TestIterator(BytewiseComparator()); + for (int j = 1; j <= kNumKeys; ++j) { + internal_iter->AddPut(std::to_string(j), "val"); + } + internal_iter->Finish(); + + ReadOptions ro; + auto lower_bound_str = std::to_string(kLowerBound); + Slice lower_bound(lower_bound_str); + ro.iterate_lower_bound = &lower_bound; + Options options; + std::unique_ptr db_iter(NewDBIterator( + env_, ro, ImmutableCFOptions(options), BytewiseComparator(), + internal_iter, 10 /* sequence */, + options.max_sequential_skip_in_iterations, 0)); + + db_iter->SeekToLast(); + for (int i = kNumKeys; i >= kLowerBound; --i) { + ASSERT_TRUE(db_iter->Valid()); + ASSERT_EQ(std::to_string(i), db_iter->key().ToString()); + db_iter->Prev(); + } + ASSERT_FALSE(db_iter->Valid()); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 4d2f143a0..57fba6ad3 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -961,14 +961,24 @@ struct ReadOptions { // Default: nullptr const Snapshot* snapshot; + // `iterate_lower_bound` defines the smallest key at which the backward + // iterator can return an entry. Once the bound is passed, Valid() will be + // false. `iterate_lower_bound` is inclusive ie the bound value is a valid + // entry. + // + // If prefix_extractor is not null, the Seek target and `iterate_lower_bound` + // need to have the same prefix. This is because ordering is not guaranteed + // outside of prefix domain. + // + // Default: nullptr + const Slice* iterate_lower_bound; + // "iterate_upper_bound" defines the extent upto which the forward iterator // can returns entries. Once the bound is reached, Valid() will be false. // "iterate_upper_bound" is exclusive ie the bound value is // not a valid entry. If iterator_extractor is not null, the Seek target // and iterator_upper_bound need to have the same prefix. // This is because ordering is not guaranteed outside of prefix domain. - // There is no lower bound on the iterator. If needed, that can be easily - // implemented. // // Default: nullptr const Slice* iterate_upper_bound; diff --git a/options/options.cc b/options/options.cc index 4d8ba487a..8a2adfbfa 100644 --- a/options/options.cc +++ b/options/options.cc @@ -605,6 +605,7 @@ DBOptions* DBOptions::IncreaseParallelism(int total_threads) { ReadOptions::ReadOptions() : snapshot(nullptr), + iterate_lower_bound(nullptr), iterate_upper_bound(nullptr), readahead_size(0), max_skippable_internal_keys(0), @@ -621,6 +622,7 @@ ReadOptions::ReadOptions() ReadOptions::ReadOptions(bool cksum, bool cache) : snapshot(nullptr), + iterate_lower_bound(nullptr), iterate_upper_bound(nullptr), readahead_size(0), max_skippable_internal_keys(0),