From bb2a2ec7313e9af648fc9ac613289e18ed019eb0 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Fri, 20 Apr 2018 15:25:12 -0700 Subject: [PATCH] WritePrepared Txn: rollback via commit Summary: Currently WritePrepared rolls back a transaction with prepare sequence number prepare_seq by i) write a single rollback batch with rollback_seq, ii) add to commit cache, iii) remove prepare_seq from PrepareHeap. This is correct assuming that there is no snapshot taken when a transaction is rolled back. This is the case the way MySQL does rollback which is after recovery. Otherwise if max_evicted_seq advances the prepare_seq, the live snapshot might assume data as committed since it does not find them in CommitCache. The change is to simply add to commit cache before removing prepare_seq from PrepareHeap. In this way if max_evicted_seq advances prpeare_seq, the existing mechanism that we have to check evicted entries against live snapshots will make sure that the live snapshot will not see the data of rolled back transaction. Closes https://github.com/facebook/rocksdb/pull/3745 Differential Revision: D7696193 Pulled By: maysamyabandeh fbshipit-source-id: c9a2d46341ddc03554dded1303520a1cab74ef9c --- util/transaction_test_util.cc | 4 +-- utilities/transactions/write_prepared_txn.cc | 25 +++++++++++++------ .../transactions/write_prepared_txn_db.cc | 19 -------------- .../transactions/write_prepared_txn_db.h | 4 --- 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/util/transaction_test_util.cc b/util/transaction_test_util.cc index bc6f512a6..3f8eea6de 100644 --- a/util/transaction_test_util.cc +++ b/util/transaction_test_util.cc @@ -184,9 +184,7 @@ bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn, s = txn->Prepare(); assert(s.ok()); } - // TODO(myabandeh): enable this when WritePreparedTxnDB::RollbackPrepared - // is updated to handle in-the-middle rollbacks. - if (!rand_->OneIn(0)) { + if (!rand_->OneIn(20)) { s = txn->Commit(); } else { // Also try 5% rollback diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc index 5ae320133..155bc2677 100644 --- a/utilities/transactions/write_prepared_txn.cc +++ b/utilities/transactions/write_prepared_txn.cc @@ -312,10 +312,16 @@ Status WritePreparedTxn::RollbackInternal() { const bool DISABLE_MEMTABLE = true; const uint64_t NO_REF_LOG = 0; uint64_t seq_used = kMaxSequenceNumber; - const size_t ZERO_PREPARES = 0; const size_t ONE_BATCH = 1; + // We commit the rolled back prepared batches. ALthough this is + // counter-intuitive, i) it is safe to do so, since the prepared batches are + // already canceled out by the rollback batch, ii) adding the commit entry to + // CommitCache will allow us to benefit from the existing mechanism in + // CommitCache that keeps an entry evicted due to max advance and yet overlaps + // with a live snapshot around so that the live snapshot properly skips the + // entry even if its prepare seq is lower than max_evicted_seq_. WritePreparedCommitEntryPreReleaseCallback update_commit_map( - wpt_db_, db_impl_, kMaxSequenceNumber, ZERO_PREPARES, ONE_BATCH); + wpt_db_, db_impl_, GetId(), prepare_batch_cnt_, ONE_BATCH); // Note: the rollback batch does not need AddPrepared since it is written to // DB in one shot. min_uncommitted still works since it requires capturing // data that is written to DB but not yet committed, while @@ -328,11 +334,7 @@ Status WritePreparedTxn::RollbackInternal() { return s; } if (do_one_write) { - // Mark the txn as rolled back - uint64_t& rollback_seq = seq_used; - for (size_t i = 0; i < prepare_batch_cnt_; i++) { - wpt_db_->RollbackPrepared(GetId() + i, rollback_seq); - } + wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_); return s; } // else do the 2nd write for commit uint64_t& prepare_seq = seq_used; @@ -355,9 +357,16 @@ Status WritePreparedTxn::RollbackInternal() { // Mark the txn as rolled back uint64_t& rollback_seq = seq_used; if (s.ok()) { + // Note: it is safe to do it after PreReleaseCallback via WriteImpl since + // all the writes by the prpared batch are already blinded by the rollback + // batch. The only reason we commit the prepared batch here is to benefit + // from the existing mechanism in CommitCache that takes care of the rare + // cases that the prepare seq is visible to a snsapshot but max evicted seq + // advances that prepare seq. for (size_t i = 0; i < prepare_batch_cnt_; i++) { - wpt_db_->RollbackPrepared(GetId() + i, rollback_seq); + wpt_db_->AddCommitted(GetId() + i, rollback_seq); } + wpt_db_->RemovePrepared(GetId(), prepare_batch_cnt_); } return s; diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index a32903389..a0b22b0d1 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -380,25 +380,6 @@ void WritePreparedTxnDB::AddPrepared(uint64_t seq) { prepared_txns_.push(seq); } -void WritePreparedTxnDB::RollbackPrepared(uint64_t prep_seq, - uint64_t /*rollback_seq*/) { - ROCKS_LOG_DETAILS( - info_log_, "Txn %" PRIu64 " rolling back with rollback seq of " PRIu64 "", - prep_seq, rollback_seq); - std::vector snapshots = - GetSnapshotListFromDB(kMaxSequenceNumber); - // TODO(myabandeh): currently we are assuming that there is no snapshot taken - // when a transaciton is rolled back. This is the case the way MySQL does - // rollback which is after recovery. We should extend it to be able to - // rollback txns that overlap with exsiting snapshots. - assert(snapshots.size() == 0); - if (snapshots.size()) { - throw std::runtime_error( - "Rollback reqeust while there are live snapshots."); - } - RemovePrepared(prep_seq); -} - void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq, uint64_t commit_seq, uint8_t loop_cnt) { ROCKS_LOG_DETAILS(info_log_, "Txn %" PRIu64 " Committing with %" PRIu64, diff --git a/utilities/transactions/write_prepared_txn_db.h b/utilities/transactions/write_prepared_txn_db.h index c2bca70c3..46435049f 100644 --- a/utilities/transactions/write_prepared_txn_db.h +++ b/utilities/transactions/write_prepared_txn_db.h @@ -249,10 +249,6 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { void AddPrepared(uint64_t seq); // Remove the transaction with prepare sequence seq from the prepared list void RemovePrepared(const uint64_t seq, const size_t batch_cnt = 1); - // Rollback a prepared txn identified with prep_seq. rollback_seq is the seq - // with which the additional data is written to cancel the txn effect. It can - // be used to identify the snapshots that overlap with the rolled back txn. - void RollbackPrepared(uint64_t prep_seq, uint64_t rollback_seq); // Add the transaction with prepare sequence prepare_seq and commit sequence // commit_seq to the commit map. loop_cnt is to detect infinite loops. void AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,