From 1315375542ff170144a296c4868a2a727ad35d44 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 5 Feb 2021 15:55:34 -0800 Subject: [PATCH] Allow range deletions in `*TransactionDB` only when safe (#7929) Summary: Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`. Fixes https://github.com/facebook/rocksdb/issues/7913. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929 Test Plan: unit tests covering the cases it's permitted/rejected Reviewed By: ltamasi Differential Revision: D26240254 Pulled By: ajkr fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9 --- HISTORY.md | 5 ++ .../utilities/optimistic_transaction_db.h | 2 + include/rocksdb/utilities/transaction_db.h | 11 ++++ .../optimistic_transaction_db_impl.h | 16 ++++++ .../optimistic_transaction_test.cc | 11 ++++ utilities/transactions/transaction_test.cc | 50 +++++++++++++++++++ .../transactions/write_prepared_txn_db.cc | 4 +- 7 files changed, 98 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 8a2f29d55..f25f63825 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,9 @@ # Rocksdb Change Log +## Unreleased +### Bug Fixes +* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details. +* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. + ## 6.17.1 (01/28/2021) ### Behavior Changes * When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. diff --git a/include/rocksdb/utilities/optimistic_transaction_db.h b/include/rocksdb/utilities/optimistic_transaction_db.h index 5356df71f..c070e49a3 100644 --- a/include/rocksdb/utilities/optimistic_transaction_db.h +++ b/include/rocksdb/utilities/optimistic_transaction_db.h @@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions { uint32_t occ_lock_buckets = (1 << 20); }; +// Range deletions (including those in `WriteBatch`es passed to `Write()`) are +// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status` class OptimisticTransactionDB : public StackableDB { public: // Open an OptimisticTransactionDB similar to DB::Open(). diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 580c6f6bb..545e5f17c 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -344,6 +344,17 @@ class TransactionDB : public StackableDB { // falls back to the un-optimized version of ::Write return Write(opts, updates); } + // Transactional `DeleteRange()` is not yet supported. + // However, users who know their deleted range does not conflict with + // anything can still use it via the `Write()` API. In all cases, the + // `Write()` overload specifying `TransactionDBWriteOptimizations` must be + // used and `skip_concurrency_control` must be set. When using either + // WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must + // additionally be set. + virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*, + const Slice&, const Slice&) override { + return Status::NotSupported(); + } // Open a TransactionDB similar to DB::Open(). // Internally call PrepareWrap() and WrapDB() // If the return status is not ok, then dbptr is set to nullptr. diff --git a/utilities/transactions/optimistic_transaction_db_impl.h b/utilities/transactions/optimistic_transaction_db_impl.h index d895d49b8..a23d9a06d 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.h +++ b/utilities/transactions/optimistic_transaction_db_impl.h @@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB { const OptimisticTransactionOptions& txn_options, Transaction* old_txn) override; + // Transactional `DeleteRange()` is not yet supported. + virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*, + const Slice&, const Slice&) override { + return Status::NotSupported(); + } + + // Range deletions also must not be snuck into `WriteBatch`es as they are + // incompatible with `OptimisticTransactionDB`. + virtual Status Write(const WriteOptions& write_opts, + WriteBatch* batch) override { + if (batch->HasDeleteRange()) { + return Status::NotSupported(); + } + return OptimisticTransactionDB::Write(write_opts, batch); + } + size_t GetLockBucketsSize() const { return bucketed_locks_.size(); } OccValidationPolicy GetValidatePolicy() const { return validate_policy_; } diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index ad27bd964..138823b65 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1033,6 +1033,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) { delete txn; } +TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) { + // `OptimisticTransactionDB` does not allow range deletion in any API. + ASSERT_TRUE( + txn_db + ->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b") + .IsNotSupported()); + WriteBatch wb; + ASSERT_OK(wb.DeleteRange("a", "b")); + ASSERT_NOK(txn_db->Write(WriteOptions(), &wb)); +} + TEST_P(OptimisticTransactionTest, SavepointTest) { WriteOptions write_options; ReadOptions read_options, snapshot_read_options; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 9c4ce5604..27f9504e0 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) { ASSERT_EQ("a,3", value); } +TEST_P(TransactionTest, DeleteRangeSupportTest) { + // The `DeleteRange()` API is banned everywhere. + ASSERT_TRUE( + db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b") + .IsNotSupported()); + + // But range deletions can be added via the `Write()` API by specifying the + // proper flags to promise there are no conflicts according to the DB type + // (see `TransactionDB::DeleteRange()` API doc for details). + for (bool skip_concurrency_control : {false, true}) { + for (bool skip_duplicate_key_check : {false, true}) { + ASSERT_OK(db->Put(WriteOptions(), "a", "val")); + WriteBatch wb; + ASSERT_OK(wb.DeleteRange("a", "b")); + TransactionDBWriteOptimizations flags; + flags.skip_concurrency_control = skip_concurrency_control; + flags.skip_duplicate_key_check = skip_duplicate_key_check; + Status s = db->Write(WriteOptions(), flags, &wb); + std::string value; + switch (txn_db_options.write_policy) { + case WRITE_COMMITTED: + if (skip_concurrency_control) { + ASSERT_OK(s); + ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound()); + } else { + ASSERT_NOK(s); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + break; + case WRITE_PREPARED: + // Intentional fall-through + case WRITE_UNPREPARED: + if (skip_concurrency_control && skip_duplicate_key_check) { + ASSERT_OK(s); + ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound()); + } else { + ASSERT_NOK(s); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + break; + } + // Without any promises from the user, range deletion via other `Write()` + // APIs are still banned. + ASSERT_OK(db->Put(WriteOptions(), "a", "val")); + ASSERT_NOK(db->Write(WriteOptions(), &wb)); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + } +} + TEST_P(TransactionTest, DeferSnapshotTest) { WriteOptions write_options; ReadOptions read_options; diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index af0df6604..167d2e80c 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig, // TODO(myabandeh): add an option to allow user skipping this cost SubBatchCounter counter(*GetCFComparatorMap()); auto s = batch->Iterate(&counter); - assert(s.ok()); + if (!s.ok()) { + return s; + } batch_cnt = counter.BatchCount(); WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD); ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",