Timestamp-based validation for pessimistic txn (#9562)

Summary:
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9562

With per-transaction `read_timestamp_`, it is possible to perform transaction validation after
locking a key in addition to sequence-based validation. Specifically, if a transaction has a
read_timestamp, then we perform timestamp-based validation as well after the key is locked
via `GetForUpdate()`. This is to make sure that no other transaction has modified the key and
committed successfully since the read timestamp (but before the locking operation) which
 represents a consistent view of the database.

Reviewed By: ltamasi

Differential Revision: D31822034

fbshipit-source-id: c6f1828b7fc23e4f85e2d1ed73ff51464a058d91
This commit is contained in:
Yanqin Jin 2022-02-14 17:31:41 -08:00 committed by Facebook GitHub Bot
parent ec0b1ff2bd
commit 241b5aa15a

View File

@ -603,6 +603,13 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
s = txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive);
}
const ColumnFamilyHandle* const cfh =
column_family ? column_family : db_impl_->DefaultColumnFamily();
assert(cfh);
const Comparator* const ucmp = cfh->GetComparator();
assert(ucmp);
size_t ts_sz = ucmp->timestamp_size();
SetSnapshotIfNeeded();
// Even though we do not care about doing conflict checking for this write,
@ -610,10 +617,11 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
// some other write. However, we do not need to check if there have been
// any writes since this transaction's snapshot.
// TODO(agiardullo): could optimize by supporting shared txn locks in the
// future
// future.
SequenceNumber tracked_at_seq =
status.locked ? status.seq : kMaxSequenceNumber;
if (!do_validate || snapshot_ == nullptr) {
if (!do_validate || (snapshot_ == nullptr &&
(0 == ts_sz || kMaxTxnTimestamp == read_timestamp_))) {
if (assume_tracked && !previously_locked &&
tracked_locks_->IsPointLockSupported()) {
s = Status::InvalidArgument(
@ -621,8 +629,7 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
}
// Need to remember the earliest sequence number that we know that this
// key has not been modified after. This is useful if this same
// transaction
// later tries to lock this key again.
// transaction later tries to lock this key again.
if (tracked_at_seq == kMaxSequenceNumber) {
// Since we haven't checked a snapshot, we only know this key has not
// been modified since after we locked it.
@ -633,27 +640,24 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
// lock, which would be an unusual sequence.
tracked_at_seq = db_->GetLatestSequenceNumber();
}
} else {
} else if (s.ok()) {
// If a snapshot is set, we need to make sure the key hasn't been modified
// since the snapshot. This must be done after we locked the key.
// If we already have validated an earilier snapshot it must has been
// reflected in tracked_at_seq and ValidateSnapshot will return OK.
if (s.ok()) {
s = ValidateSnapshot(column_family, key, &tracked_at_seq);
if (!s.ok()) {
// Failed to validate key
// Unlock key we just locked
if (lock_upgrade) {
s = txn_db_impl_->TryLock(this, cfh_id, key_str,
false /* exclusive */);
s = txn_db_impl_->TryLock(this, cfh_id, key_str, false /* exclusive */);
assert(s.ok());
} else if (!previously_locked) {
txn_db_impl_->UnLock(this, cfh_id, key.ToString());
}
}
}
}
if (s.ok()) {
// We must track all the locked keys so that we can unlock them later. If
@ -709,15 +713,21 @@ Status PessimisticTransaction::GetRangeLock(ColumnFamilyHandle* column_family,
Status PessimisticTransaction::ValidateSnapshot(
ColumnFamilyHandle* column_family, const Slice& key,
SequenceNumber* tracked_at_seq) {
assert(snapshot_);
assert(snapshot_ || read_timestamp_ < kMaxTxnTimestamp);
SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
SequenceNumber snap_seq = 0;
if (snapshot_) {
snap_seq = snapshot_->GetSequenceNumber();
if (*tracked_at_seq <= snap_seq) {
// If the key has been previous validated (or locked) at a sequence number
// earlier than the current snapshot's sequence number, we already know it
// has not been modified aftter snap_seq either.
return Status::OK();
}
} else {
snap_seq = db_impl_->GetLatestSequenceNumber();
}
// Otherwise we have either
// 1: tracked_at_seq == kMaxSequenceNumber, i.e., first time tracking the key
// 2: snap_seq < tracked_at_seq: last time we lock the key was via