Improve readability of DBIter's two seek functions (#5794)

Summary:
Doing some code reordering in DBIter::Seek() and DBIter::SeekForPrev().
The logic largely remains the same, except slight difference when handling some stats when valid_ = false, where they are not supposed to be used anyway.
Also remove prefix_start_key_, which sometimes point a part of seek target, some times prefix_start_buf_, which is confusing.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5794

Test Plan: Run all tests.

Differential Revision: D17375257

fbshipit-source-id: 7339a23898cecd3a8475bf72340fcd6f82b933c5
This commit is contained in:
sdong 2019-09-16 21:02:27 -07:00 committed by Facebook Github Bot
parent 622683000c
commit 6287f0d73b
2 changed files with 196 additions and 130 deletions

View File

@ -47,13 +47,14 @@ static void DumpInternalIter(Iterator* iter) {
#endif
DBIter::DBIter(Env* _env, const ReadOptions& read_options,
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options, const Comparator* cmp,
InternalIterator* iter, SequenceNumber s, bool arena_mode,
uint64_t max_sequential_skip_in_iterations,
ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd,
bool allow_blob)
: env_(_env),
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options,
const Comparator* cmp, InternalIterator* iter, SequenceNumber s,
bool arena_mode, uint64_t max_sequential_skip_in_iterations,
ReadCallback* read_callback, DBImpl* db_impl,
ColumnFamilyData* cfd, bool allow_blob)
: prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
env_(_env),
logger_(cf_options.info_log),
user_comparator_(cmp),
merge_operator_(cf_options.merge_operator),
@ -68,7 +69,9 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
valid_(false),
current_entry_is_merged_(false),
is_key_seqnum_zero_(false),
prefix_same_as_start_(read_options.prefix_same_as_start),
prefix_same_as_start_(mutable_cf_options.prefix_extractor
? read_options.prefix_same_as_start
: false),
pin_thru_lifetime_(read_options.pin_data),
total_order_seek_(read_options.total_order_seek),
allow_blob_(allow_blob),
@ -79,7 +82,6 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
cfd_(cfd),
start_seqnum_(read_options.iter_start_seqnum) {
RecordTick(statistics_, NO_ITERATOR_CREATED);
prefix_extractor_ = mutable_cf_options.prefix_extractor.get();
max_skip_ = max_sequential_skip_in_iterations;
max_skippable_internal_keys_ = read_options.max_skippable_internal_keys;
if (pin_thru_lifetime_) {
@ -152,8 +154,13 @@ void DBIter::Next() {
local_stats_.next_count_++;
if (ok && iter_.Valid()) {
Slice prefix;
if (prefix_same_as_start_) {
assert(prefix_extractor_ != nullptr);
prefix = prefix_.GetUserKey();
}
FindNextUserEntry(true /* skipping the current user key */,
prefix_same_as_start_);
prefix_same_as_start_ ? &prefix : nullptr);
} else {
is_key_seqnum_zero_ = false;
valid_ = false;
@ -164,7 +171,7 @@ void DBIter::Next() {
}
}
// PRE: saved_key_ has the current user key if skipping
// PRE: saved_key_ has the current user key if skipping_saved_key
// POST: saved_key_ should have the next user key if valid_,
// if the current entry is a result of merge
// current_entry_is_merged_ => true
@ -174,17 +181,17 @@ void DBIter::Next() {
// a delete marker or a sequence number higher than sequence_
// saved_key_ MUST have a proper user_key before calling this function
//
// The prefix_check parameter controls whether we check the iterated
// keys against the prefix of the seeked key. Set to false when
// performing a seek without a key (e.g. SeekToFirst). Set to
// prefix_same_as_start_ for other iterations.
bool DBIter::FindNextUserEntry(bool skipping, bool prefix_check) {
// The prefix parameter, if not null, indicates that we need to iterator
// within the prefix, and the iterator needs to be made invalid, if no
// more entry for the prefix can be found.
bool DBIter::FindNextUserEntry(bool skipping_saved_key, const Slice* prefix) {
PERF_TIMER_GUARD(find_next_user_entry_time);
return FindNextUserEntryInternal(skipping, prefix_check);
return FindNextUserEntryInternal(skipping_saved_key, prefix);
}
// Actual implementation of DBIter::FindNextUserEntry()
bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
const Slice* prefix) {
// Loop until we hit an acceptable entry to yield
assert(iter_.Valid());
assert(status_.ok());
@ -193,9 +200,10 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
// How many times in a row we have skipped an entry with user key less than
// or equal to saved_key_. We could skip these entries either because
// sequence numbers were too high or because skipping = true.
// sequence numbers were too high or because skipping_saved_key = true.
// What saved_key_ contains throughout this method:
// - if skipping : saved_key_ contains the key that we need to skip,
// - if skipping_saved_key : saved_key_ contains the key that we need
// to skip,
// and we haven't seen any keys greater than that,
// - if num_skipped > 0 : saved_key_ contains the key that we have skipped
// num_skipped times, and we haven't seen any keys
@ -228,9 +236,10 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
break;
}
if (prefix_extractor_ && prefix_check &&
prefix_extractor_->Transform(ikey_.user_key)
.compare(prefix_start_key_) != 0) {
assert(prefix == nullptr || prefix_extractor_ != nullptr);
if (prefix != nullptr &&
prefix_extractor_->Transform(ikey_.user_key).compare(*prefix) != 0) {
assert(prefix_same_as_start_);
break;
}
@ -243,14 +252,15 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
// possibly be skipped. This condition can potentially be relaxed to
// prev_key.seq <= ikey_.sequence. We are cautious because it will be more
// prone to bugs causing the same user key with the same sequence number.
if (!is_prev_key_seqnum_zero && skipping &&
if (!is_prev_key_seqnum_zero && skipping_saved_key &&
user_comparator_.Compare(ikey_.user_key, saved_key_.GetUserKey()) <=
0) {
num_skipped++; // skip this entry
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
} else {
assert(!skipping || user_comparator_.Compare(
ikey_.user_key, saved_key_.GetUserKey()) > 0);
assert(!skipping_saved_key ||
user_comparator_.Compare(ikey_.user_key,
saved_key_.GetUserKey()) > 0);
num_skipped = 0;
reseek_done = false;
switch (ikey_.type) {
@ -271,7 +281,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
saved_key_.SetUserKey(
ikey_.user_key, !pin_thru_lifetime_ ||
!iter_.iter()->IsKeyPinned() /* copy */);
skipping = true;
skipping_saved_key = true;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
}
break;
@ -289,12 +299,12 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
return true;
} else {
// this key and all previous versions shouldn't be included,
// skipping
// skipping_saved_key
saved_key_.SetUserKey(
ikey_.user_key,
!pin_thru_lifetime_ ||
!iter_.iter()->IsKeyPinned() /* copy */);
skipping = true;
skipping_saved_key = true;
}
} else {
saved_key_.SetUserKey(
@ -304,7 +314,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
ikey_, RangeDelPositioningMode::kForwardTraversal)) {
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
skipping = true;
skipping_saved_key = true;
num_skipped = 0;
reseek_done = false;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
@ -335,7 +345,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
ikey_, RangeDelPositioningMode::kForwardTraversal)) {
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
skipping = true;
skipping_saved_key = true;
num_skipped = 0;
reseek_done = false;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
@ -360,13 +370,13 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
// to seek to the target sequence number.
int cmp =
user_comparator_.Compare(ikey_.user_key, saved_key_.GetUserKey());
if (cmp == 0 || (skipping && cmp <= 0)) {
if (cmp == 0 || (skipping_saved_key && cmp <= 0)) {
num_skipped++;
} else {
saved_key_.SetUserKey(
ikey_.user_key,
!iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
skipping = false;
skipping_saved_key = false;
num_skipped = 0;
reseek_done = false;
}
@ -386,14 +396,14 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
num_skipped = 0;
reseek_done = true;
std::string last_key;
if (skipping) {
if (skipping_saved_key) {
// We're looking for the next user-key but all we see are the same
// user-key with decreasing sequence numbers. Fast forward to
// sequence number 0 and type deletion (the smallest type).
AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetUserKey(),
0, kTypeDeletion));
// Don't set skipping = false because we may still see more user-keys
// equal to saved_key_.
// Don't set skipping_saved_key = false because we may still see more
// user-keys equal to saved_key_.
} else {
// We saw multiple entries with this user key and sequence numbers
// higher than sequence_. Fast forward to sequence_.
@ -534,8 +544,14 @@ void DBIter::Prev() {
}
}
if (ok) {
PrevInternal();
Slice prefix;
if (prefix_same_as_start_) {
assert(prefix_extractor_ != nullptr);
prefix = prefix_.GetUserKey();
}
PrevInternal(prefix_same_as_start_ ? &prefix : nullptr);
}
if (statistics_ != nullptr) {
local_stats_.prev_count_++;
if (valid_) {
@ -613,15 +629,17 @@ bool DBIter::ReverseToBackward() {
return FindUserKeyBeforeSavedKey();
}
void DBIter::PrevInternal() {
void DBIter::PrevInternal(const Slice* prefix) {
while (iter_.Valid()) {
saved_key_.SetUserKey(
ExtractUserKey(iter_.key()),
!iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
if (prefix_extractor_ && prefix_same_as_start_ &&
assert(prefix == nullptr || prefix_extractor_ != nullptr);
if (prefix != nullptr &&
prefix_extractor_->Transform(saved_key_.GetUserKey())
.compare(prefix_start_key_) != 0) {
.compare(*prefix) != 0) {
assert(prefix_same_as_start_);
// Current key does not have the same prefix as start
valid_ = false;
return;
@ -1047,71 +1065,22 @@ bool DBIter::IsVisible(SequenceNumber sequence) {
}
}
void DBIter::Seek(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
status_ = Status::OK();
ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
void DBIter::SetSavedKeyToSeekTarget(const Slice& target) {
is_key_seqnum_zero_ = false;
SequenceNumber seq = sequence_;
saved_key_.Clear();
saved_key_.SetInternalKey(target, seq);
#ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) {
db_impl_->TraceIteratorSeek(cfd_->GetID(), target);
}
#endif // ROCKSDB_LITE
if (iterate_lower_bound_ != nullptr &&
user_comparator_.Compare(saved_key_.GetUserKey(), *iterate_lower_bound_) <
0) {
// Seek key is smaller than the lower bound.
saved_key_.Clear();
saved_key_.SetInternalKey(*iterate_lower_bound_, seq);
}
{
PERF_TIMER_GUARD(seek_internal_seek_time);
iter_.Seek(saved_key_.GetInternalKey());
range_del_agg_.InvalidateRangeDelMapPositions();
}
RecordTick(statistics_, NUMBER_DB_SEEK);
if (iter_.Valid()) {
if (prefix_extractor_ && prefix_same_as_start_) {
prefix_start_key_ = prefix_extractor_->Transform(target);
}
direction_ = kForward;
ClearSavedValue();
FindNextUserEntry(false /* not skipping */, prefix_same_as_start_);
if (!valid_) {
prefix_start_key_.clear();
}
if (statistics_ != nullptr) {
if (valid_) {
// Decrement since we don't want to count this key as skipped
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
}
}
} else {
valid_ = false;
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_) {
prefix_start_buf_.SetUserKey(prefix_start_key_);
prefix_start_key_ = prefix_start_buf_.GetUserKey();
}
}
void DBIter::SeekForPrev(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
status_ = Status::OK();
ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
is_key_seqnum_zero_ = false;
saved_key_.Clear();
// now saved_key is used to store internal key.
@ -1124,12 +1093,74 @@ void DBIter::SeekForPrev(const Slice& target) {
saved_key_.Clear();
saved_key_.SetInternalKey(*iterate_upper_bound_, kMaxSequenceNumber);
}
}
void DBIter::Seek(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) {
db_impl_->TraceIteratorSeek(cfd_->GetID(), target);
}
#endif // ROCKSDB_LITE
status_ = Status::OK();
ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
// Seek the inner iterator based on the target key.
{
PERF_TIMER_GUARD(seek_internal_seek_time);
iter_.SeekForPrev(saved_key_.GetInternalKey());
SetSavedKeyToSeekTarget(target);
iter_.Seek(saved_key_.GetInternalKey());
range_del_agg_.InvalidateRangeDelMapPositions();
RecordTick(statistics_, NUMBER_DB_SEEK);
}
if (!iter_.Valid()) {
valid_ = false;
return;
}
direction_ = kForward;
// Now the inner iterator is placed to the target position. From there,
// we need to find out the next key that is visible to the user.
//
ClearSavedValue();
if (prefix_same_as_start_) {
// The case where the iterator needs to be invalidated if it has exausted
// keys within the same prefix of the seek key.
assert(prefix_extractor_ != nullptr);
Slice target_prefix;
target_prefix = prefix_extractor_->Transform(target);
FindNextUserEntry(false /* not skipping saved_key */,
&target_prefix /* prefix */);
if (valid_) {
// Remember the prefix of the seek key for the future Prev() call to
// check.
prefix_.SetUserKey(target_prefix);
}
} else {
FindNextUserEntry(false /* not skipping saved_key */, nullptr);
}
if (!valid_) {
return;
}
// Updating stats and perf context counters.
if (statistics_ != nullptr) {
// Decrement since we don't want to count this key as skipped
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
}
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
}
void DBIter::SeekForPrev(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) {
@ -1137,30 +1168,49 @@ void DBIter::SeekForPrev(const Slice& target) {
}
#endif // ROCKSDB_LITE
RecordTick(statistics_, NUMBER_DB_SEEK);
if (iter_.Valid()) {
if (prefix_extractor_ && prefix_same_as_start_) {
prefix_start_key_ = prefix_extractor_->Transform(target);
}
direction_ = kReverse;
ClearSavedValue();
PrevInternal();
if (!valid_) {
prefix_start_key_.clear();
}
if (statistics_ != nullptr) {
if (valid_) {
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
}
status_ = Status::OK();
ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
// Seek the inner iterator based on the target key.
{
PERF_TIMER_GUARD(seek_internal_seek_time);
SetSavedKeyToSeekForPrevTarget(target);
iter_.SeekForPrev(saved_key_.GetInternalKey());
range_del_agg_.InvalidateRangeDelMapPositions();
RecordTick(statistics_, NUMBER_DB_SEEK);
}
if (!iter_.Valid()) {
valid_ = false;
return;
}
direction_ = kReverse;
// Now the inner iterator is placed to the target position. From there,
// we need to find out the first key that is visible to the user in the
// backward direction.
ClearSavedValue();
if (prefix_same_as_start_) {
// The case where the iterator needs to be invalidated if it has exausted
// keys within the same prefix of the seek key.
assert(prefix_extractor_ != nullptr);
Slice target_prefix;
target_prefix = prefix_extractor_->Transform(target);
PrevInternal(&target_prefix);
if (valid_) {
// Remember the prefix of the seek key for the future Prev() call to
// check.
prefix_.SetUserKey(target_prefix);
}
} else {
valid_ = false;
PrevInternal(nullptr);
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_) {
prefix_start_buf_.SetUserKey(prefix_start_key_);
prefix_start_key_ = prefix_start_buf_.GetUserKey();
// Report stats and perf context.
if (statistics_ != nullptr && valid_) {
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
}
}
@ -1193,7 +1243,8 @@ void DBIter::SeekToFirst() {
saved_key_.SetUserKey(
ExtractUserKey(iter_.key()),
!iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
FindNextUserEntry(false /* not skipping */, false /* no prefix check */);
FindNextUserEntry(false /* not skipping saved_key */,
nullptr /* no prefix check */);
if (statistics_ != nullptr) {
if (valid_) {
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
@ -1204,10 +1255,9 @@ void DBIter::SeekToFirst() {
} else {
valid_ = false;
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_) {
prefix_start_buf_.SetUserKey(
prefix_extractor_->Transform(saved_key_.GetUserKey()));
prefix_start_key_ = prefix_start_buf_.GetUserKey();
if (valid_ && prefix_same_as_start_) {
assert(prefix_extractor_ != nullptr);
prefix_.SetUserKey(prefix_extractor_->Transform(saved_key_.GetUserKey()));
}
}
@ -1217,7 +1267,7 @@ void DBIter::SeekToLast() {
SeekForPrev(*iterate_upper_bound_);
if (Valid() && user_comparator_.Equal(*iterate_upper_bound_, key())) {
ReleaseTempPinnedData();
PrevInternal();
PrevInternal(nullptr);
}
return;
}
@ -1240,7 +1290,7 @@ void DBIter::SeekToLast() {
iter_.SeekToLast();
range_del_agg_.InvalidateRangeDelMapPositions();
}
PrevInternal();
PrevInternal(nullptr);
if (statistics_ != nullptr) {
RecordTick(statistics_, NUMBER_DB_SEEK);
if (valid_) {
@ -1249,10 +1299,9 @@ void DBIter::SeekToLast() {
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
}
}
if (valid_ && prefix_extractor_ && prefix_same_as_start_) {
prefix_start_buf_.SetUserKey(
prefix_extractor_->Transform(saved_key_.GetUserKey()));
prefix_start_key_ = prefix_start_buf_.GetUserKey();
if (valid_ && prefix_same_as_start_) {
assert(prefix_extractor_ != nullptr);
prefix_.SetUserKey(prefix_extractor_->Transform(saved_key_.GetUserKey()));
}
}

View File

@ -203,15 +203,28 @@ class DBIter final: public Iterator {
// in this case callers would usually stop what they were doing and return.
bool ReverseToForward();
bool ReverseToBackward();
// Set saved_key_ to the seek key to target, with proper sequence number set.
// It might get adjusted if the seek key is smaller than iterator lower bound.
void SetSavedKeyToSeekTarget(const Slice& /*target*/);
// Set saved_key_ to the seek key to target, with proper sequence number set.
// It might get adjusted if the seek key is larger than iterator upper bound.
void SetSavedKeyToSeekForPrevTarget(const Slice& /*target*/);
bool FindValueForCurrentKey();
bool FindValueForCurrentKeyUsingSeek();
bool FindUserKeyBeforeSavedKey();
bool FindNextUserEntry(bool skipping, bool prefix_check);
bool FindNextUserEntryInternal(bool skipping, bool prefix_check);
// If `skipping_saved_key` is true, the function will keep iterating until it
// finds a user key that is larger than `saved_key_`.
// If `prefix` is not null, the iterator needs to stop when all keys for the
// prefix are exhausted and the interator is set to invalid.
bool FindNextUserEntry(bool skipping_saved_key, const Slice* prefix);
// Internal implementation of FindNextUserEntry().
bool FindNextUserEntryInternal(bool skipping_saved_key, const Slice* prefix);
bool ParseKey(ParsedInternalKey* key);
bool MergeValuesNewToOld();
void PrevInternal();
// If prefix is not null, we need to set the iterator to invalid if no more
// entry can be found within the prefix.
void PrevInternal(const Slice* /*prefix*/);
bool TooManyInternalKeysSkipped(bool increment = true);
bool IsVisible(SequenceNumber sequence);
@ -273,10 +286,14 @@ class DBIter final: public Iterator {
const Slice* iterate_lower_bound_;
const Slice* iterate_upper_bound_;
IterKey prefix_start_buf_;
// The prefix of the seek key. It is only used when prefix_same_as_start_
// is true and prefix extractor is not null. In Next() or Prev(), current keys
// will be checked against this prefix, so that the iterator can be
// invalidated if the keys in this prefix has been exhausted. Set it using
// SetUserKey() and use it using GetUserKey().
IterKey prefix_;
Status status_;
Slice prefix_start_key_;
Direction direction_;
bool valid_;
bool current_entry_is_merged_;