rocksdb/utilities/transactions/optimistic_transaction_db_impl.h

89 lines
2.9 KiB
C
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <mutex>
#include <vector>
#include <algorithm>
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
namespace ROCKSDB_NAMESPACE {
class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
public:
explicit OptimisticTransactionDBImpl(
DB* db, const OptimisticTransactionDBOptions& occ_options,
bool take_ownership = true)
: OptimisticTransactionDB(db),
db_owner_(take_ownership),
validate_policy_(occ_options.validate_policy) {
if (validate_policy_ == OccValidationPolicy::kValidateParallel) {
uint32_t bucket_size = std::max(16u, occ_options.occ_lock_buckets);
bucketed_locks_.reserve(bucket_size);
for (size_t i = 0; i < bucket_size; ++i) {
bucketed_locks_.emplace_back(
std::unique_ptr<std::mutex>(new std::mutex));
}
}
}
~OptimisticTransactionDBImpl() {
// Prevent this stackable from destroying
// base db
if (!db_owner_) {
db_ = nullptr;
}
}
Transaction* BeginTransaction(const WriteOptions& write_options,
const OptimisticTransactionOptions& txn_options,
Transaction* old_txn) override;
// Transactional `DeleteRange()` is not yet supported.
Revise APIs related to user-defined timestamp (#8946) Summary: ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`. Namely, `WriteOptions` should not include information about "what-to-write", but should just include information about "how-to-write". According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore, this PR removes `WriteOptions::timestamp` for compliance. After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and `SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity made me reconsider doing it in another PR (maybe). For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take extra `timestamp` information when writing to `WriteBatch`es. These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list. Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to `WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps allocated already and multiple timestamps can be updated. The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp size of the default column family. This will be used to allocate space when calling APIs that do not specify a column family handle. Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing some assertions about timestamp to returning Status code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946 Test Plan: make check ./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8 ./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0 Make sure there is no perf regression by running the following ``` ./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom ``` Before this PR ``` DB path: [/dev/shm/rocksdb] fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s ``` After this PR ``` DB path: [/dev/shm/rocksdb] fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s ``` Reviewed By: ltamasi Differential Revision: D33721359 Pulled By: riversand963 fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 07:17:46 +01:00
using StackableDB::DeleteRange;
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
const Slice&, const Slice&) override {
return Status::NotSupported();
}
// Range deletions also must not be snuck into `WriteBatch`es as they are
// incompatible with `OptimisticTransactionDB`.
virtual Status Write(const WriteOptions& write_opts,
WriteBatch* batch) override {
if (batch->HasDeleteRange()) {
return Status::NotSupported();
}
return OptimisticTransactionDB::Write(write_opts, batch);
}
size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
std::unique_lock<std::mutex> LockBucket(size_t idx);
private:
// NOTE: used in validation phase. Each key is hashed into some
// bucket. We then take the lock in the hash value order to avoid deadlock.
std::vector<std::unique_ptr<std::mutex>> bucketed_locks_;
bool db_owner_;
const OccValidationPolicy validate_policy_;
void ReinitializeTransaction(Transaction* txn,
const WriteOptions& write_options,
const OptimisticTransactionOptions& txn_options =
OptimisticTransactionOptions());
};
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE