Allow range deletions in *TransactionDB only when safe (#7929)

Summary:
Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`.

Fixes https://github.com/facebook/rocksdb/issues/7913.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929

Test Plan: unit tests covering the cases it's permitted/rejected

Reviewed By: ltamasi

Differential Revision: D26240254

Pulled By: ajkr

fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9
This commit is contained in:
Andrew Kryczka 2021-02-05 15:55:34 -08:00
parent a471d31e04
commit 1315375542
7 changed files with 98 additions and 1 deletions

View File

@ -1,4 +1,9 @@
# Rocksdb Change Log
## Unreleased
### Bug Fixes
* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
## 6.17.1 (01/28/2021)
### Behavior Changes
* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper.

View File

@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions {
uint32_t occ_lock_buckets = (1 << 20);
};
// Range deletions (including those in `WriteBatch`es passed to `Write()`) are
// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status`
class OptimisticTransactionDB : public StackableDB {
public:
// Open an OptimisticTransactionDB similar to DB::Open().

View File

@ -344,6 +344,17 @@ class TransactionDB : public StackableDB {
// falls back to the un-optimized version of ::Write
return Write(opts, updates);
}
// Transactional `DeleteRange()` is not yet supported.
// However, users who know their deleted range does not conflict with
// anything can still use it via the `Write()` API. In all cases, the
// `Write()` overload specifying `TransactionDBWriteOptimizations` must be
// used and `skip_concurrency_control` must be set. When using either
// WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must
// additionally be set.
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
const Slice&, const Slice&) override {
return Status::NotSupported();
}
// Open a TransactionDB similar to DB::Open().
// Internally call PrepareWrap() and WrapDB()
// If the return status is not ok, then dbptr is set to nullptr.

View File

@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
const OptimisticTransactionOptions& txn_options,
Transaction* old_txn) override;
// Transactional `DeleteRange()` is not yet supported.
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
const Slice&, const Slice&) override {
return Status::NotSupported();
}
// Range deletions also must not be snuck into `WriteBatch`es as they are
// incompatible with `OptimisticTransactionDB`.
virtual Status Write(const WriteOptions& write_opts,
WriteBatch* batch) override {
if (batch->HasDeleteRange()) {
return Status::NotSupported();
}
return OptimisticTransactionDB::Write(write_opts, batch);
}
size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }

View File

@ -1033,6 +1033,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) {
delete txn;
}
TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) {
// `OptimisticTransactionDB` does not allow range deletion in any API.
ASSERT_TRUE(
txn_db
->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b")
.IsNotSupported());
WriteBatch wb;
ASSERT_OK(wb.DeleteRange("a", "b"));
ASSERT_NOK(txn_db->Write(WriteOptions(), &wb));
}
TEST_P(OptimisticTransactionTest, SavepointTest) {
WriteOptions write_options;
ReadOptions read_options, snapshot_read_options;

View File

@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) {
ASSERT_EQ("a,3", value);
}
TEST_P(TransactionTest, DeleteRangeSupportTest) {
// The `DeleteRange()` API is banned everywhere.
ASSERT_TRUE(
db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b")
.IsNotSupported());
// But range deletions can be added via the `Write()` API by specifying the
// proper flags to promise there are no conflicts according to the DB type
// (see `TransactionDB::DeleteRange()` API doc for details).
for (bool skip_concurrency_control : {false, true}) {
for (bool skip_duplicate_key_check : {false, true}) {
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
WriteBatch wb;
ASSERT_OK(wb.DeleteRange("a", "b"));
TransactionDBWriteOptimizations flags;
flags.skip_concurrency_control = skip_concurrency_control;
flags.skip_duplicate_key_check = skip_duplicate_key_check;
Status s = db->Write(WriteOptions(), flags, &wb);
std::string value;
switch (txn_db_options.write_policy) {
case WRITE_COMMITTED:
if (skip_concurrency_control) {
ASSERT_OK(s);
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
} else {
ASSERT_NOK(s);
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
break;
case WRITE_PREPARED:
// Intentional fall-through
case WRITE_UNPREPARED:
if (skip_concurrency_control && skip_duplicate_key_check) {
ASSERT_OK(s);
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
} else {
ASSERT_NOK(s);
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
break;
}
// Without any promises from the user, range deletion via other `Write()`
// APIs are still banned.
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
ASSERT_NOK(db->Write(WriteOptions(), &wb));
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
}
}
TEST_P(TransactionTest, DeferSnapshotTest) {
WriteOptions write_options;
ReadOptions read_options;

View File

@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
// TODO(myabandeh): add an option to allow user skipping this cost
SubBatchCounter counter(*GetCFComparatorMap());
auto s = batch->Iterate(&counter);
assert(s.ok());
if (!s.ok()) {
return s;
}
batch_cnt = counter.BatchCount();
WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD);
ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",