2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2016-02-02 02:07:05 +01:00
|
|
|
#include <mutex>
|
2016-04-18 20:15:50 +02:00
|
|
|
#include <queue>
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
#include <string>
|
2016-02-02 02:07:05 +01:00
|
|
|
#include <unordered_map>
|
2016-04-18 20:15:50 +02:00
|
|
|
#include <vector>
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/utilities/transaction_db.h"
|
|
|
|
#include "utilities/transactions/transaction_impl.h"
|
|
|
|
#include "utilities/transactions/transaction_lock_mgr.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class TransactionDBImpl : public TransactionDB {
|
|
|
|
public:
|
|
|
|
explicit TransactionDBImpl(DB* db,
|
|
|
|
const TransactionDBOptions& txn_db_options);
|
|
|
|
|
2016-05-13 19:37:46 +02:00
|
|
|
~TransactionDBImpl();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
Transaction* BeginTransaction(const WriteOptions& write_options,
|
2016-02-03 04:19:17 +01:00
|
|
|
const TransactionOptions& txn_options,
|
|
|
|
Transaction* old_txn) override;
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
using StackableDB::Put;
|
|
|
|
virtual Status Put(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
const Slice& val) override;
|
|
|
|
|
|
|
|
using StackableDB::Delete;
|
|
|
|
virtual Status Delete(const WriteOptions& wopts,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& key) override;
|
|
|
|
|
|
|
|
using StackableDB::Merge;
|
|
|
|
virtual Status Merge(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
|
|
const Slice& value) override;
|
|
|
|
|
|
|
|
using StackableDB::Write;
|
|
|
|
virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
|
|
|
|
|
|
|
|
using StackableDB::CreateColumnFamily;
|
|
|
|
virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
|
|
|
|
const std::string& column_family_name,
|
|
|
|
ColumnFamilyHandle** handle) override;
|
|
|
|
|
|
|
|
using StackableDB::DropColumnFamily;
|
|
|
|
virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override;
|
|
|
|
|
|
|
|
Status TryLock(TransactionImpl* txn, uint32_t cfh_id, const std::string& key);
|
|
|
|
|
2015-09-12 03:10:50 +02:00
|
|
|
void UnLock(TransactionImpl* txn, const TransactionKeyMap* keys);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
void UnLock(TransactionImpl* txn, uint32_t cfh_id, const std::string& key);
|
|
|
|
|
|
|
|
void AddColumnFamily(const ColumnFamilyHandle* handle);
|
|
|
|
|
|
|
|
static TransactionDBOptions ValidateTxnDBOptions(
|
|
|
|
const TransactionDBOptions& txn_db_options);
|
|
|
|
|
|
|
|
const TransactionDBOptions& GetTxnDBOptions() const {
|
|
|
|
return txn_db_options_;
|
|
|
|
}
|
|
|
|
|
2016-02-02 02:07:05 +01:00
|
|
|
void InsertExpirableTransaction(TransactionID tx_id, TransactionImpl* tx);
|
|
|
|
void RemoveExpirableTransaction(TransactionID tx_id);
|
|
|
|
|
|
|
|
// If transaction is no longer available, locks can be stolen
|
|
|
|
// If transaction is available, try stealing locks directly from transaction
|
|
|
|
// It is the caller's responsibility to ensure that the referred transaction
|
|
|
|
// is expirable (GetExpirationTime() > 0) and that it is expired.
|
|
|
|
bool TryStealingExpiredTransactionLocks(TransactionID tx_id);
|
|
|
|
|
2016-04-18 20:15:50 +02:00
|
|
|
Transaction* GetTransactionByName(const TransactionName& name) override;
|
|
|
|
|
|
|
|
void RegisterTransaction(Transaction* txn);
|
|
|
|
void UnregisterTransaction(Transaction* txn);
|
|
|
|
|
|
|
|
// not thread safe. current use case is during recovery (single thread)
|
|
|
|
void GetAllPreparedTransactions(std::vector<Transaction*>* trans) override;
|
|
|
|
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
private:
|
2016-02-03 04:19:17 +01:00
|
|
|
void ReinitializeTransaction(
|
|
|
|
Transaction* txn, const WriteOptions& write_options,
|
|
|
|
const TransactionOptions& txn_options = TransactionOptions());
|
|
|
|
|
2016-04-18 20:15:50 +02:00
|
|
|
DBImpl* db_impl_;
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
const TransactionDBOptions txn_db_options_;
|
|
|
|
TransactionLockMgr lock_mgr_;
|
|
|
|
|
|
|
|
// Must be held when adding/dropping column families.
|
|
|
|
InstrumentedMutex column_family_mutex_;
|
|
|
|
Transaction* BeginInternalTransaction(const WriteOptions& options);
|
|
|
|
Status WriteHelper(WriteBatch* updates, TransactionImpl* txn_impl);
|
2016-02-02 02:07:05 +01:00
|
|
|
|
|
|
|
// Used to ensure that no locks are stolen from an expirable transaction
|
|
|
|
// that has started a commit. Only transactions with an expiration time
|
|
|
|
// should be in this map.
|
|
|
|
std::mutex map_mutex_;
|
|
|
|
std::unordered_map<TransactionID, TransactionImpl*>
|
|
|
|
expirable_transactions_map_;
|
2016-04-18 20:15:50 +02:00
|
|
|
|
|
|
|
// map from name to two phase transaction instance
|
|
|
|
std::mutex name_map_mutex_;
|
|
|
|
std::unordered_map<TransactionName, Transaction*> transactions_;
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // ROCKSDB_LITE
|