WritePrepared Txn: rollback via commit

Summary:
Currently WritePrepared rolls back a transaction with prepare sequence number prepare_seq by i) write a single rollback batch with rollback_seq, ii) add <rollback_seq, rollback_seq> to commit cache, iii) remove prepare_seq from PrepareHeap.
This is correct assuming that there is no snapshot taken when a transaction is rolled back. This is the case the way MySQL does rollback which is after recovery. Otherwise if max_evicted_seq advances the prepare_seq, the live snapshot might assume data as committed since it does not find them in CommitCache.
The change is to simply add <prepare_seq. rollback_seq> to commit cache before removing prepare_seq from PrepareHeap. In this way if max_evicted_seq advances prpeare_seq, the existing mechanism that we have to check evicted entries against live snapshots will make sure that the live snapshot will not see the data of rolled back transaction.
Closes https://github.com/facebook/rocksdb/pull/3745

Differential Revision: D7696193

Pulled By: maysamyabandeh

fbshipit-source-id: c9a2d46341ddc03554dded1303520a1cab74ef9c
This commit is contained in:
Maysam Yabandeh 2018-04-20 15:25:12 -07:00 committed by Facebook Github Bot
parent dbdaa4662e
commit bb2a2ec731
4 changed files with 18 additions and 34 deletions

View File

@ -184,9 +184,7 @@ bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn,
s = txn->Prepare();
assert(s.ok());
}
// TODO(myabandeh): enable this when WritePreparedTxnDB::RollbackPrepared
// is updated to handle in-the-middle rollbacks.
if (!rand_->OneIn(0)) {
if (!rand_->OneIn(20)) {
s = txn->Commit();
} else {
// Also try 5% rollback

View File

@ -312,10 +312,16 @@ Status WritePreparedTxn::RollbackInternal() {
const bool DISABLE_MEMTABLE = true;
const uint64_t NO_REF_LOG = 0;
uint64_t seq_used = kMaxSequenceNumber;
const size_t ZERO_PREPARES = 0;
const size_t ONE_BATCH = 1;
// We commit the rolled back prepared batches. ALthough this is
// counter-intuitive, i) it is safe to do so, since the prepared batches are
// already canceled out by the rollback batch, ii) adding the commit entry to
// CommitCache will allow us to benefit from the existing mechanism in
// CommitCache that keeps an entry evicted due to max advance and yet overlaps
// with a live snapshot around so that the live snapshot properly skips the
// entry even if its prepare seq is lower than max_evicted_seq_.
WritePreparedCommitEntryPreReleaseCallback update_commit_map(
wpt_db_, db_impl_, kMaxSequenceNumber, ZERO_PREPARES, ONE_BATCH);
wpt_db_, db_impl_, GetId(), prepare_batch_cnt_, ONE_BATCH);
// Note: the rollback batch does not need AddPrepared since it is written to
// DB in one shot. min_uncommitted still works since it requires capturing
// data that is written to DB but not yet committed, while
@ -328,11 +334,7 @@ Status WritePreparedTxn::RollbackInternal() {
return s;
}
if (do_one_write) {
// Mark the txn as rolled back
uint64_t& rollback_seq = seq_used;
for (size_t i = 0; i < prepare_batch_cnt_; i++) {
wpt_db_->RollbackPrepared(GetId() + i, rollback_seq);
}
wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_);
return s;
} // else do the 2nd write for commit
uint64_t& prepare_seq = seq_used;
@ -355,9 +357,16 @@ Status WritePreparedTxn::RollbackInternal() {
// Mark the txn as rolled back
uint64_t& rollback_seq = seq_used;
if (s.ok()) {
// Note: it is safe to do it after PreReleaseCallback via WriteImpl since
// all the writes by the prpared batch are already blinded by the rollback
// batch. The only reason we commit the prepared batch here is to benefit
// from the existing mechanism in CommitCache that takes care of the rare
// cases that the prepare seq is visible to a snsapshot but max evicted seq
// advances that prepare seq.
for (size_t i = 0; i < prepare_batch_cnt_; i++) {
wpt_db_->RollbackPrepared(GetId() + i, rollback_seq);
wpt_db_->AddCommitted(GetId() + i, rollback_seq);
}
wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_);
}
return s;

View File

@ -380,25 +380,6 @@ void WritePreparedTxnDB::AddPrepared(uint64_t seq) {
prepared_txns_.push(seq);
}
void WritePreparedTxnDB::RollbackPrepared(uint64_t prep_seq,
uint64_t /*rollback_seq*/) {
ROCKS_LOG_DETAILS(
info_log_, "Txn %" PRIu64 " rolling back with rollback seq of " PRIu64 "",
prep_seq, rollback_seq);
std::vector<SequenceNumber> snapshots =
GetSnapshotListFromDB(kMaxSequenceNumber);
// TODO(myabandeh): currently we are assuming that there is no snapshot taken
// when a transaciton is rolled back. This is the case the way MySQL does
// rollback which is after recovery. We should extend it to be able to
// rollback txns that overlap with exsiting snapshots.
assert(snapshots.size() == 0);
if (snapshots.size()) {
throw std::runtime_error(
"Rollback reqeust while there are live snapshots.");
}
RemovePrepared(prep_seq);
}
void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,
uint8_t loop_cnt) {
ROCKS_LOG_DETAILS(info_log_, "Txn %" PRIu64 " Committing with %" PRIu64,

View File

@ -249,10 +249,6 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
void AddPrepared(uint64_t seq);
// Remove the transaction with prepare sequence seq from the prepared list
void RemovePrepared(const uint64_t seq, const size_t batch_cnt = 1);
// Rollback a prepared txn identified with prep_seq. rollback_seq is the seq
// with which the additional data is written to cancel the txn effect. It can
// be used to identify the snapshots that overlap with the rolled back txn.
void RollbackPrepared(uint64_t prep_seq, uint64_t rollback_seq);
// Add the transaction with prepare sequence prepare_seq and commit sequence
// commit_seq to the commit map. loop_cnt is to detect infinite loops.
void AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,