From 74fc31ec926ed17279d44a39957c4d0d7533005c Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Thu, 12 Apr 2018 11:52:15 -0700 Subject: [PATCH] WritePrepared Txn: rollback_merge_operands hack Summary: This is a hack as temporary fix of MyRocks with rollbacking the merge operands. The way MyRocks uses merge operands is without protection of locks, which violates the assumption behind the rollback algorithm. They are ok with not being rolled back as it would just create a gap in the autoincrement column. The hack add an option to disable the rollback of merge operands by default and only enables it to let the unit test pass. Closes https://github.com/facebook/rocksdb/pull/3711 Differential Revision: D7597177 Pulled By: maysamyabandeh fbshipit-source-id: 544be0f666c7e7abb7f651ec8b23124e05056728 --- HISTORY.md | 2 ++ include/rocksdb/utilities/transaction_db.h | 8 ++++++++ utilities/transactions/transaction_test.h | 1 + utilities/transactions/write_prepared_txn.cc | 16 ++++++++++++---- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 83ee673bf..abf0f24dd 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,11 +4,13 @@ * Add a BlockBasedTableOption to align uncompressed data blocks on the smaller of block size or page size boundary, to reduce flash reads by avoiding reads spanning 4K pages. ### New Features +* TransactionDBOptions::write_policy can be configured to enable WritePrepared 2PC transactions. Read more about them in the wiki. ### Bug Fixes * Fsync after writing global seq number to the ingestion file in ExternalSstFileIngestionJob. * Fix WAL corruption caused by race condition between user write thread and FlushWAL when two_write_queue is not set. * Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters +* Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true. ### Java API Changes * Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances. diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 856f48996..e55f65006 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -85,6 +85,14 @@ struct TransactionDBOptions { // before the commit phase. The DB then needs to provide the mechanisms to // tell apart committed from uncommitted data. TxnDBWritePolicy write_policy = TxnDBWritePolicy::WRITE_COMMITTED; + + // TODO(myabandeh): remove this option + // Note: this is a temporary option as a hot fix in rollback of writeprepared + // txns in myrocks. MyRocks uses merge operands for autoinc column id without + // however obtaining locks. This breaks the assumption behind the rollback + // logic in myrocks. This hack of simply not rolling back merge operands works + // for the special way that myrocks uses this operands. + bool rollback_merge_operands = false; }; struct TransactionOptions { diff --git a/utilities/transactions/transaction_test.h b/utilities/transactions/transaction_test.h index beec0df40..cf7e08a50 100644 --- a/utilities/transactions/transaction_test.h +++ b/utilities/transactions/transaction_test.h @@ -66,6 +66,7 @@ class TransactionTestBase : public ::testing::Test { txn_db_options.transaction_lock_timeout = 0; txn_db_options.default_lock_timeout = 0; txn_db_options.write_policy = write_policy; + txn_db_options.rollback_merge_operands = true; Status s; if (use_stackable_db == false) { s = TransactionDB::Open(options, txn_db_options, dbname, &db); diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc index 77e5b6126..0c3669595 100644 --- a/utilities/transactions/write_prepared_txn.cc +++ b/utilities/transactions/write_prepared_txn.cc @@ -218,15 +218,18 @@ Status WritePreparedTxn::RollbackInternal() { std::map& comparators_; using CFKeys = std::set; std::map keys_; + bool rollback_merge_operands_; RollbackWriteBatchBuilder( DBImpl* db, WritePreparedTxnDB* wpt_db, SequenceNumber snap_seq, WriteBatch* dst_batch, - std::map& comparators) + std::map& comparators, + bool rollback_merge_operands) : db_(db), callback(wpt_db, snap_seq, 0), // 0 disables min_uncommitted optimization rollback_batch_(dst_batch), - comparators_(comparators) {} + comparators_(comparators), + rollback_merge_operands_(rollback_merge_operands) {} Status Rollback(uint32_t cf, const Slice& key) { Status s; @@ -275,7 +278,11 @@ Status WritePreparedTxn::RollbackInternal() { Status MergeCF(uint32_t cf, const Slice& key, const Slice& /*val*/) override { - return Rollback(cf, key); + if (rollback_merge_operands_) { + return Rollback(cf, key); + } else { + return Status::OK(); + } } Status MarkNoop(bool) override { return Status::OK(); } @@ -289,7 +296,8 @@ Status WritePreparedTxn::RollbackInternal() { protected: virtual bool WriteAfterCommit() const override { return false; } } rollback_handler(db_impl_, wpt_db_, last_visible_txn, &rollback_batch, - *wpt_db_->GetCFComparatorMap()); + *wpt_db_->GetCFComparatorMap(), + wpt_db_->txn_db_options_.rollback_merge_operands); auto s = GetWriteBatch()->GetWriteBatch()->Iterate(&rollback_handler); assert(s.ok()); if (!s.ok()) {