rocksdb/db/read_callback.h
Yanqin Jin d758273ceb Get() with timestamp should respect snapshot (#7227)
Summary:
If user-defined timestamp is enabled, current implementation can expose
newer data to queries even if an older sequence number is specified via
read_options.snapshot. This PR makes Get() respect sequence-number-based
snapshot.

Solution is simple. Besides using <ukey, ts, seq> to search the index for the key,
we also verify that the candidate result's seq is smaller than or equal to seq. This
requires passing a seq via `GetContext`, which results in the majority of code
change caused by this PR.

Also added a few unit tests to demonstrate standard visibility during point lookup
and range scan when timestamp and snapshot are both present.

Test plan (devserver):
```
make check
$./db_bench --benchmarks=fillseq,readrandom -cache_size=$[64*1024*1024]
```
Result
this PR: readrandom   :       4.827 micros/op 207180 ops/sec;   22.9 MB/s (1000000 of 1000000 found)
master:  readrandom   :       4.936 micros/op 202610 ops/sec;   22.4 MB/s (1000000 of 1000000 found)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7227

Reviewed By: ltamasi

Differential Revision: D23015242

Pulled By: riversand963

fbshipit-source-id: ea7b85a728654553ba357d2e6a207b5e40f7376a
2020-08-14 19:20:58 -07:00

54 lines
1.8 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
class ReadCallback {
public:
explicit ReadCallback(SequenceNumber last_visible_seq)
: max_visible_seq_(last_visible_seq) {}
ReadCallback(SequenceNumber last_visible_seq, SequenceNumber min_uncommitted)
: max_visible_seq_(last_visible_seq), min_uncommitted_(min_uncommitted) {}
virtual ~ReadCallback() {}
// Will be called to see if the seq number visible; if not it moves on to
// the next seq number.
virtual bool IsVisibleFullCheck(SequenceNumber seq) = 0;
inline bool IsVisible(SequenceNumber seq) {
assert(min_uncommitted_ > 0);
assert(min_uncommitted_ >= kMinUnCommittedSeq);
if (seq < min_uncommitted_) { // handles seq == 0 as well
assert(seq <= max_visible_seq_);
return true;
} else if (max_visible_seq_ < seq) {
assert(seq != 0);
return false;
} else {
assert(seq != 0); // already handled in the first if-then clause
return IsVisibleFullCheck(seq);
}
}
inline SequenceNumber max_visible_seq() { return max_visible_seq_; }
// Refresh to a more recent visible seq
virtual void Refresh(SequenceNumber seq) { max_visible_seq_ = seq; }
protected:
// The max visible seq, it is usually the snapshot but could be larger if
// transaction has its own writes written to db.
SequenceNumber max_visible_seq_ = kMaxSequenceNumber;
// Any seq less than min_uncommitted_ is committed.
const SequenceNumber min_uncommitted_ = kMinUnCommittedSeq;
};
} // namespace ROCKSDB_NAMESPACE