Timestamp-based validation for pessimistic txn (#9562)
Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/9562 With per-transaction `read_timestamp_`, it is possible to perform transaction validation after locking a key in addition to sequence-based validation. Specifically, if a transaction has a read_timestamp, then we perform timestamp-based validation as well after the key is locked via `GetForUpdate()`. This is to make sure that no other transaction has modified the key and committed successfully since the read timestamp (but before the locking operation) which represents a consistent view of the database. Reviewed By: ltamasi Differential Revision: D31822034 fbshipit-source-id: c6f1828b7fc23e4f85e2d1ed73ff51464a058d91
This commit is contained in:
parent
ec0b1ff2bd
commit
241b5aa15a
@ -603,6 +603,13 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
|
|||||||
s = txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive);
|
s = txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const ColumnFamilyHandle* const cfh =
|
||||||
|
column_family ? column_family : db_impl_->DefaultColumnFamily();
|
||||||
|
assert(cfh);
|
||||||
|
const Comparator* const ucmp = cfh->GetComparator();
|
||||||
|
assert(ucmp);
|
||||||
|
size_t ts_sz = ucmp->timestamp_size();
|
||||||
|
|
||||||
SetSnapshotIfNeeded();
|
SetSnapshotIfNeeded();
|
||||||
|
|
||||||
// Even though we do not care about doing conflict checking for this write,
|
// Even though we do not care about doing conflict checking for this write,
|
||||||
@ -610,10 +617,11 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
|
|||||||
// some other write. However, we do not need to check if there have been
|
// some other write. However, we do not need to check if there have been
|
||||||
// any writes since this transaction's snapshot.
|
// any writes since this transaction's snapshot.
|
||||||
// TODO(agiardullo): could optimize by supporting shared txn locks in the
|
// TODO(agiardullo): could optimize by supporting shared txn locks in the
|
||||||
// future
|
// future.
|
||||||
SequenceNumber tracked_at_seq =
|
SequenceNumber tracked_at_seq =
|
||||||
status.locked ? status.seq : kMaxSequenceNumber;
|
status.locked ? status.seq : kMaxSequenceNumber;
|
||||||
if (!do_validate || snapshot_ == nullptr) {
|
if (!do_validate || (snapshot_ == nullptr &&
|
||||||
|
(0 == ts_sz || kMaxTxnTimestamp == read_timestamp_))) {
|
||||||
if (assume_tracked && !previously_locked &&
|
if (assume_tracked && !previously_locked &&
|
||||||
tracked_locks_->IsPointLockSupported()) {
|
tracked_locks_->IsPointLockSupported()) {
|
||||||
s = Status::InvalidArgument(
|
s = Status::InvalidArgument(
|
||||||
@ -621,8 +629,7 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
|
|||||||
}
|
}
|
||||||
// Need to remember the earliest sequence number that we know that this
|
// Need to remember the earliest sequence number that we know that this
|
||||||
// key has not been modified after. This is useful if this same
|
// key has not been modified after. This is useful if this same
|
||||||
// transaction
|
// transaction later tries to lock this key again.
|
||||||
// later tries to lock this key again.
|
|
||||||
if (tracked_at_seq == kMaxSequenceNumber) {
|
if (tracked_at_seq == kMaxSequenceNumber) {
|
||||||
// Since we haven't checked a snapshot, we only know this key has not
|
// Since we haven't checked a snapshot, we only know this key has not
|
||||||
// been modified since after we locked it.
|
// been modified since after we locked it.
|
||||||
@ -633,27 +640,24 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
|
|||||||
// lock, which would be an unusual sequence.
|
// lock, which would be an unusual sequence.
|
||||||
tracked_at_seq = db_->GetLatestSequenceNumber();
|
tracked_at_seq = db_->GetLatestSequenceNumber();
|
||||||
}
|
}
|
||||||
} else {
|
} else if (s.ok()) {
|
||||||
// If a snapshot is set, we need to make sure the key hasn't been modified
|
// If a snapshot is set, we need to make sure the key hasn't been modified
|
||||||
// since the snapshot. This must be done after we locked the key.
|
// since the snapshot. This must be done after we locked the key.
|
||||||
// If we already have validated an earilier snapshot it must has been
|
// If we already have validated an earilier snapshot it must has been
|
||||||
// reflected in tracked_at_seq and ValidateSnapshot will return OK.
|
// reflected in tracked_at_seq and ValidateSnapshot will return OK.
|
||||||
if (s.ok()) {
|
|
||||||
s = ValidateSnapshot(column_family, key, &tracked_at_seq);
|
s = ValidateSnapshot(column_family, key, &tracked_at_seq);
|
||||||
|
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
// Failed to validate key
|
// Failed to validate key
|
||||||
// Unlock key we just locked
|
// Unlock key we just locked
|
||||||
if (lock_upgrade) {
|
if (lock_upgrade) {
|
||||||
s = txn_db_impl_->TryLock(this, cfh_id, key_str,
|
s = txn_db_impl_->TryLock(this, cfh_id, key_str, false /* exclusive */);
|
||||||
false /* exclusive */);
|
|
||||||
assert(s.ok());
|
assert(s.ok());
|
||||||
} else if (!previously_locked) {
|
} else if (!previously_locked) {
|
||||||
txn_db_impl_->UnLock(this, cfh_id, key.ToString());
|
txn_db_impl_->UnLock(this, cfh_id, key.ToString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// We must track all the locked keys so that we can unlock them later. If
|
// We must track all the locked keys so that we can unlock them later. If
|
||||||
@ -709,15 +713,21 @@ Status PessimisticTransaction::GetRangeLock(ColumnFamilyHandle* column_family,
|
|||||||
Status PessimisticTransaction::ValidateSnapshot(
|
Status PessimisticTransaction::ValidateSnapshot(
|
||||||
ColumnFamilyHandle* column_family, const Slice& key,
|
ColumnFamilyHandle* column_family, const Slice& key,
|
||||||
SequenceNumber* tracked_at_seq) {
|
SequenceNumber* tracked_at_seq) {
|
||||||
assert(snapshot_);
|
assert(snapshot_ || read_timestamp_ < kMaxTxnTimestamp);
|
||||||
|
|
||||||
SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
|
SequenceNumber snap_seq = 0;
|
||||||
|
if (snapshot_) {
|
||||||
|
snap_seq = snapshot_->GetSequenceNumber();
|
||||||
if (*tracked_at_seq <= snap_seq) {
|
if (*tracked_at_seq <= snap_seq) {
|
||||||
// If the key has been previous validated (or locked) at a sequence number
|
// If the key has been previous validated (or locked) at a sequence number
|
||||||
// earlier than the current snapshot's sequence number, we already know it
|
// earlier than the current snapshot's sequence number, we already know it
|
||||||
// has not been modified aftter snap_seq either.
|
// has not been modified aftter snap_seq either.
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
snap_seq = db_impl_->GetLatestSequenceNumber();
|
||||||
|
}
|
||||||
|
|
||||||
// Otherwise we have either
|
// Otherwise we have either
|
||||||
// 1: tracked_at_seq == kMaxSequenceNumber, i.e., first time tracking the key
|
// 1: tracked_at_seq == kMaxSequenceNumber, i.e., first time tracking the key
|
||||||
// 2: snap_seq < tracked_at_seq: last time we lock the key was via
|
// 2: snap_seq < tracked_at_seq: last time we lock the key was via
|
||||||
|
Loading…
Reference in New Issue
Block a user