Allow range deletions in *TransactionDB only when safe (#7929)

Summary:
Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`.

Fixes https://github.com/facebook/rocksdb/issues/7913.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929

Test Plan: unit tests covering the cases it's permitted/rejected

Reviewed By: ltamasi

Differential Revision: D26240254

Pulled By: ajkr

fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9
This commit is contained in:
Andrew Kryczka 2021-02-05 15:55:34 -08:00 committed by Facebook GitHub Bot
parent 4fba83b4c2
commit 8d2bbdd04f
7 changed files with 97 additions and 1 deletions

View File

@ -7,6 +7,10 @@
### New Features
* Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`.
### Bug Fixes
* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
## 6.17.0 (01/15/2021)
### Behavior Changes
* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created.

View File

@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions {
uint32_t occ_lock_buckets = (1 << 20);
};
// Range deletions (including those in `WriteBatch`es passed to `Write()`) are
// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status`
class OptimisticTransactionDB : public StackableDB {
public:
// Open an OptimisticTransactionDB similar to DB::Open().

View File

@ -344,6 +344,17 @@ class TransactionDB : public StackableDB {
// falls back to the un-optimized version of ::Write
return Write(opts, updates);
}
// Transactional `DeleteRange()` is not yet supported.
// However, users who know their deleted range does not conflict with
// anything can still use it via the `Write()` API. In all cases, the
// `Write()` overload specifying `TransactionDBWriteOptimizations` must be
// used and `skip_concurrency_control` must be set. When using either
// WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must
// additionally be set.
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
const Slice&, const Slice&) override {
return Status::NotSupported();
}
// Open a TransactionDB similar to DB::Open().
// Internally call PrepareWrap() and WrapDB()
// If the return status is not ok, then dbptr is set to nullptr.

View File

@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
const OptimisticTransactionOptions& txn_options,
Transaction* old_txn) override;
// Transactional `DeleteRange()` is not yet supported.
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
const Slice&, const Slice&) override {
return Status::NotSupported();
}
// Range deletions also must not be snuck into `WriteBatch`es as they are
// incompatible with `OptimisticTransactionDB`.
virtual Status Write(const WriteOptions& write_opts,
WriteBatch* batch) override {
if (batch->HasDeleteRange()) {
return Status::NotSupported();
}
return OptimisticTransactionDB::Write(write_opts, batch);
}
size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }

View File

@ -1035,6 +1035,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) {
delete txn;
}
TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) {
// `OptimisticTransactionDB` does not allow range deletion in any API.
ASSERT_TRUE(
txn_db
->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b")
.IsNotSupported());
WriteBatch wb;
ASSERT_OK(wb.DeleteRange("a", "b"));
ASSERT_NOK(txn_db->Write(WriteOptions(), &wb));
}
TEST_P(OptimisticTransactionTest, SavepointTest) {
WriteOptions write_options;
ReadOptions read_options, snapshot_read_options;

View File

@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) {
ASSERT_EQ("a,3", value);
}
TEST_P(TransactionTest, DeleteRangeSupportTest) {
// The `DeleteRange()` API is banned everywhere.
ASSERT_TRUE(
db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b")
.IsNotSupported());
// But range deletions can be added via the `Write()` API by specifying the
// proper flags to promise there are no conflicts according to the DB type
// (see `TransactionDB::DeleteRange()` API doc for details).
for (bool skip_concurrency_control : {false, true}) {
for (bool skip_duplicate_key_check : {false, true}) {
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
WriteBatch wb;
ASSERT_OK(wb.DeleteRange("a", "b"));
TransactionDBWriteOptimizations flags;
flags.skip_concurrency_control = skip_concurrency_control;
flags.skip_duplicate_key_check = skip_duplicate_key_check;
Status s = db->Write(WriteOptions(), flags, &wb);
std::string value;
switch (txn_db_options.write_policy) {
case WRITE_COMMITTED:
if (skip_concurrency_control) {
ASSERT_OK(s);
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
} else {
ASSERT_NOK(s);
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
break;
case WRITE_PREPARED:
// Intentional fall-through
case WRITE_UNPREPARED:
if (skip_concurrency_control && skip_duplicate_key_check) {
ASSERT_OK(s);
ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound());
} else {
ASSERT_NOK(s);
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
break;
}
// Without any promises from the user, range deletion via other `Write()`
// APIs are still banned.
ASSERT_OK(db->Put(WriteOptions(), "a", "val"));
ASSERT_NOK(db->Write(WriteOptions(), &wb));
ASSERT_OK(db->Get(ReadOptions(), "a", &value));
}
}
}
TEST_P(TransactionTest, DeferSnapshotTest) {
WriteOptions write_options;
ReadOptions read_options;

View File

@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
// TODO(myabandeh): add an option to allow user skipping this cost
SubBatchCounter counter(*GetCFComparatorMap());
auto s = batch->Iterate(&counter);
assert(s.ok());
if (!s.ok()) {
return s;
}
batch_cnt = counter.BatchCount();
WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD);
ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",