Allow range deletions in *TransactionDB
only when safe (#7929)
Summary: Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`. Fixes https://github.com/facebook/rocksdb/issues/7913. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929 Test Plan: unit tests covering the cases it's permitted/rejected Reviewed By: ltamasi Differential Revision: D26240254 Pulled By: ajkr fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9
This commit is contained in:
parent
a471d31e04
commit
1315375542
@ -1,4 +1,9 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
|
||||
* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
|
||||
|
||||
## 6.17.1 (01/28/2021)
|
||||
### Behavior Changes
|
||||
* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper.
|
||||
|
@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions {
|
||||
uint32_t occ_lock_buckets = (1 << 20);
|
||||
};
|
||||
|
||||
// Range deletions (including those in `WriteBatch`es passed to `Write()`) are
|
||||
// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status`
|
||||
class OptimisticTransactionDB : public StackableDB {
|
||||
public:
|
||||
// Open an OptimisticTransactionDB similar to DB::Open().
|
||||
|
@ -344,6 +344,17 @@ class TransactionDB : public StackableDB {
|
||||
// falls back to the un-optimized version of ::Write
|
||||
return Write(opts, updates);
|
||||
}
|
||||
// Transactional `DeleteRange()` is not yet supported.
|
||||
// However, users who know their deleted range does not conflict with
|
||||
// anything can still use it via the `Write()` API. In all cases, the
|
||||
// `Write()` overload specifying `TransactionDBWriteOptimizations` must be
|
||||
// used and `skip_concurrency_control` must be set. When using either
|
||||
// WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must
|
||||
// additionally be set.
|
||||
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
|
||||
const Slice&, const Slice&) override {
|
||||
return Status::NotSupported();
|
||||
}
|
||||
// Open a TransactionDB similar to DB::Open().
|
||||
// Internally call PrepareWrap() and WrapDB()
|
||||
// If the return status is not ok, then dbptr is set to nullptr.
|
||||
|
@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
|
||||
const OptimisticTransactionOptions& txn_options,
|
||||
Transaction* old_txn) override;
|
||||
|
||||
// Transactional `DeleteRange()` is not yet supported.
|
||||
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
|
||||
const Slice&, const Slice&) override {
|
||||
return Status::NotSupported();
|
||||
}
|
||||
|
||||
// Range deletions also must not be snuck into `WriteBatch`es as they are
|
||||
// incompatible with `OptimisticTransactionDB`.
|
||||
virtual Status Write(const WriteOptions& write_opts,
|
||||
WriteBatch* batch) override {
|
||||
if (batch->HasDeleteRange()) {
|
||||
return Status::NotSupported();
|
||||
}
|
||||
return OptimisticTransactionDB::Write(write_opts, batch);
|
||||
}
|
||||
|
||||
size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
|
||||
|
||||
OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
|
||||
|
@ -1033,6 +1033,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) {
|
||||
delete txn;
|
||||
}
|
||||
|
||||
TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) {
|
||||
// `OptimisticTransactionDB` does not allow range deletion in any API.
|
||||
ASSERT_TRUE(
|
||||
txn_db
|
||||
->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b")
|
||||
.IsNotSupported());
|
||||
WriteBatch wb;
|
||||
ASSERT_OK(wb.DeleteRange("a", "b"));
|
||||
ASSERT_NOK(txn_db->Write(WriteOptions(), &wb));
|
||||
}
|
||||
|
||||
TEST_P(OptimisticTransactionTest, SavepointTest) {
|
||||
WriteOptions write_options;
|
||||
ReadOptions read_options, snapshot_read_options;
|
||||
|
@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) {
|
||||
ASSERT_EQ("a,3", value);
|
||||
}
|
||||
|
||||
TEST_P(TransactionTest, DeleteRangeSupportTest) {
|
||||
// The `DeleteRange()` API is banned everywhere.
|
||||
ASSERT_TRUE(
|
||||
db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b")
|
||||
.IsNotSupported());
|
||||
|
||||
// But range deletions can be added via the `Write()` API by specifying the
|
||||
// proper flags to promise there are no conflicts according to the DB type
|
||||
// (see `TransactionDB::DeleteRange()` API doc for details).
|
||||
for (bool skip_concurrency_control : {false, true}) {
|
||||
for (bool skip_duplicate_key_check : {false, true}) {
|
||||
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
|
||||
WriteBatch wb;
|
||||
ASSERT_OK(wb.DeleteRange("a", "b"));
|
||||
TransactionDBWriteOptimizations flags;
|
||||
flags.skip_concurrency_control = skip_concurrency_control;
|
||||
flags.skip_duplicate_key_check = skip_duplicate_key_check;
|
||||
Status s = db->Write(WriteOptions(), flags, &wb);
|
||||
std::string value;
|
||||
switch (txn_db_options.write_policy) {
|
||||
case WRITE_COMMITTED:
|
||||
if (skip_concurrency_control) {
|
||||
ASSERT_OK(s);
|
||||
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
|
||||
} else {
|
||||
ASSERT_NOK(s);
|
||||
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
|
||||
}
|
||||
break;
|
||||
case WRITE_PREPARED:
|
||||
// Intentional fall-through
|
||||
case WRITE_UNPREPARED:
|
||||
if (skip_concurrency_control && skip_duplicate_key_check) {
|
||||
ASSERT_OK(s);
|
||||
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
|
||||
} else {
|
||||
ASSERT_NOK(s);
|
||||
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Without any promises from the user, range deletion via other `Write()`
|
||||
// APIs are still banned.
|
||||
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
|
||||
ASSERT_NOK(db->Write(WriteOptions(), &wb));
|
||||
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(TransactionTest, DeferSnapshotTest) {
|
||||
WriteOptions write_options;
|
||||
ReadOptions read_options;
|
||||
|
@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
|
||||
// TODO(myabandeh): add an option to allow user skipping this cost
|
||||
SubBatchCounter counter(*GetCFComparatorMap());
|
||||
auto s = batch->Iterate(&counter);
|
||||
assert(s.ok());
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
batch_cnt = counter.BatchCount();
|
||||
WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD);
|
||||
ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",
|
||||
|
Loading…
Reference in New Issue
Block a user