2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2015-09-08 21:36:48 +02:00
|
|
|
#include "utilities/transactions/transaction_db_impl.h"
|
|
|
|
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
#include <string>
|
2015-11-20 10:07:24 +01:00
|
|
|
#include <unordered_set>
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/utilities/transaction_db.h"
|
2015-09-08 21:36:48 +02:00
|
|
|
#include "utilities/transactions/transaction_db_mutex_impl.h"
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
#include "utilities/transactions/transaction_impl.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
TransactionDBImpl::TransactionDBImpl(DB* db,
|
|
|
|
const TransactionDBOptions& txn_db_options)
|
|
|
|
: TransactionDB(db),
|
2016-04-18 20:15:50 +02:00
|
|
|
db_impl_(dynamic_cast<DBImpl*>(db)),
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
txn_db_options_(txn_db_options),
|
2016-02-02 02:07:05 +01:00
|
|
|
lock_mgr_(this, txn_db_options_.num_stripes, txn_db_options.max_num_locks,
|
2015-09-08 21:36:48 +02:00
|
|
|
txn_db_options_.custom_mutex_factory
|
|
|
|
? txn_db_options_.custom_mutex_factory
|
|
|
|
: std::shared_ptr<TransactionDBMutexFactory>(
|
2016-04-18 20:15:50 +02:00
|
|
|
new TransactionDBMutexFactoryImpl())) {
|
|
|
|
assert(db_impl_ != nullptr);
|
|
|
|
}
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
// Support initiliazing TransactionDBImpl from a stackable db
|
|
|
|
//
|
|
|
|
// TransactionDBImpl
|
|
|
|
// ^ ^
|
|
|
|
// | |
|
|
|
|
// | +
|
|
|
|
// | StackableDB
|
|
|
|
// | ^
|
|
|
|
// | |
|
|
|
|
// + +
|
|
|
|
// DBImpl
|
|
|
|
// ^
|
|
|
|
// |(inherit)
|
|
|
|
// +
|
|
|
|
// DB
|
|
|
|
//
|
|
|
|
TransactionDBImpl::TransactionDBImpl(StackableDB* db,
|
|
|
|
const TransactionDBOptions& txn_db_options)
|
|
|
|
: TransactionDB(db),
|
|
|
|
db_impl_(dynamic_cast<DBImpl*>(db->GetRootDB())),
|
|
|
|
txn_db_options_(txn_db_options),
|
|
|
|
lock_mgr_(this, txn_db_options_.num_stripes, txn_db_options.max_num_locks,
|
|
|
|
txn_db_options_.custom_mutex_factory
|
|
|
|
? txn_db_options_.custom_mutex_factory
|
|
|
|
: std::shared_ptr<TransactionDBMutexFactory>(
|
|
|
|
new TransactionDBMutexFactoryImpl())) {
|
|
|
|
assert(db_impl_ != nullptr);
|
|
|
|
}
|
|
|
|
|
2016-05-13 19:37:46 +02:00
|
|
|
TransactionDBImpl::~TransactionDBImpl() {
|
|
|
|
while (!transactions_.empty()) {
|
|
|
|
delete transactions_.begin()->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
Status TransactionDBImpl::Initialize(
|
|
|
|
const std::vector<size_t>& compaction_enabled_cf_indices,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& handles) {
|
|
|
|
for (auto cf_ptr : handles) {
|
|
|
|
AddColumnFamily(cf_ptr);
|
|
|
|
}
|
|
|
|
// Re-enable compaction for the column families that initially had
|
|
|
|
// compaction enabled.
|
|
|
|
std::vector<ColumnFamilyHandle*> compaction_enabled_cf_handles;
|
|
|
|
compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size());
|
|
|
|
for (auto index : compaction_enabled_cf_indices) {
|
|
|
|
compaction_enabled_cf_handles.push_back(handles[index]);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status s = EnableAutoCompaction(compaction_enabled_cf_handles);
|
|
|
|
|
|
|
|
// create 'real' transactions from recovered shell transactions
|
|
|
|
auto dbimpl = reinterpret_cast<DBImpl*>(GetRootDB());
|
|
|
|
assert(dbimpl != nullptr);
|
|
|
|
auto rtrxs = dbimpl->recovered_transactions();
|
|
|
|
|
|
|
|
for (auto it = rtrxs.begin(); it != rtrxs.end(); it++) {
|
|
|
|
auto recovered_trx = it->second;
|
|
|
|
assert(recovered_trx);
|
|
|
|
assert(recovered_trx->log_number_);
|
|
|
|
assert(recovered_trx->name_.length());
|
|
|
|
|
|
|
|
WriteOptions w_options;
|
|
|
|
w_options.sync = true;
|
|
|
|
TransactionOptions t_options;
|
|
|
|
|
|
|
|
Transaction* real_trx = BeginTransaction(w_options, t_options, nullptr);
|
|
|
|
assert(real_trx);
|
|
|
|
real_trx->SetLogNumber(recovered_trx->log_number_);
|
|
|
|
|
|
|
|
s = real_trx->SetName(recovered_trx->name_);
|
|
|
|
if (!s.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
s = real_trx->RebuildFromWriteBatch(recovered_trx->batch_);
|
2016-10-05 22:39:00 +02:00
|
|
|
real_trx->SetState(Transaction::PREPARED);
|
2016-08-11 23:19:33 +02:00
|
|
|
if (!s.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
dbimpl->DeleteAllRecoveredTransactions();
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
Transaction* TransactionDBImpl::BeginTransaction(
|
2016-02-03 04:19:17 +01:00
|
|
|
const WriteOptions& write_options, const TransactionOptions& txn_options,
|
|
|
|
Transaction* old_txn) {
|
|
|
|
if (old_txn != nullptr) {
|
|
|
|
ReinitializeTransaction(old_txn, write_options, txn_options);
|
|
|
|
return old_txn;
|
|
|
|
} else {
|
|
|
|
return new TransactionImpl(this, write_options, txn_options);
|
|
|
|
}
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
TransactionDBOptions TransactionDBImpl::ValidateTxnDBOptions(
|
|
|
|
const TransactionDBOptions& txn_db_options) {
|
|
|
|
TransactionDBOptions validated = txn_db_options;
|
|
|
|
|
|
|
|
if (txn_db_options.num_stripes == 0) {
|
|
|
|
validated.num_stripes = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return validated;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDB::Open(const Options& options,
|
|
|
|
const TransactionDBOptions& txn_db_options,
|
|
|
|
const std::string& dbname, TransactionDB** dbptr) {
|
|
|
|
DBOptions db_options(options);
|
|
|
|
ColumnFamilyOptions cf_options(options);
|
|
|
|
std::vector<ColumnFamilyDescriptor> column_families;
|
|
|
|
column_families.push_back(
|
|
|
|
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
|
|
|
|
std::vector<ColumnFamilyHandle*> handles;
|
|
|
|
Status s = TransactionDB::Open(db_options, txn_db_options, dbname,
|
|
|
|
column_families, &handles, dbptr);
|
|
|
|
if (s.ok()) {
|
|
|
|
assert(handles.size() == 1);
|
|
|
|
// i can delete the handle since DBImpl is always holding a reference to
|
|
|
|
// default column family
|
|
|
|
delete handles[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDB::Open(
|
|
|
|
const DBOptions& db_options, const TransactionDBOptions& txn_db_options,
|
|
|
|
const std::string& dbname,
|
|
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
|
|
std::vector<ColumnFamilyHandle*>* handles, TransactionDB** dbptr) {
|
|
|
|
Status s;
|
|
|
|
DB* db;
|
|
|
|
|
|
|
|
std::vector<ColumnFamilyDescriptor> column_families_copy = column_families;
|
2015-11-20 10:07:24 +01:00
|
|
|
std::vector<size_t> compaction_enabled_cf_indices;
|
2016-04-18 20:15:50 +02:00
|
|
|
DBOptions db_options_2pc = db_options;
|
2016-08-11 23:19:33 +02:00
|
|
|
PrepareWrap(&db_options_2pc, &column_families_copy,
|
|
|
|
&compaction_enabled_cf_indices);
|
2016-04-18 20:15:50 +02:00
|
|
|
s = DB::Open(db_options_2pc, dbname, column_families_copy, handles, &db);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
if (s.ok()) {
|
2016-08-11 23:19:33 +02:00
|
|
|
s = WrapDB(db, txn_db_options, compaction_enabled_cf_indices, *handles,
|
|
|
|
dbptr);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
void TransactionDB::PrepareWrap(
|
|
|
|
DBOptions* db_options, std::vector<ColumnFamilyDescriptor>* column_families,
|
|
|
|
std::vector<size_t>* compaction_enabled_cf_indices) {
|
|
|
|
compaction_enabled_cf_indices->clear();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
// Enable MemTable History if not already enabled
|
|
|
|
for (size_t i = 0; i < column_families->size(); i++) {
|
|
|
|
ColumnFamilyOptions* cf_options = &(*column_families)[i].options;
|
2015-11-20 10:07:24 +01:00
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
if (cf_options->max_write_buffer_number_to_maintain == 0) {
|
|
|
|
// Setting to -1 will set the History size to max_write_buffer_number.
|
|
|
|
cf_options->max_write_buffer_number_to_maintain = -1;
|
2016-04-18 20:15:50 +02:00
|
|
|
}
|
2016-08-11 23:19:33 +02:00
|
|
|
if (!cf_options->disable_auto_compactions) {
|
|
|
|
// Disable compactions momentarily to prevent race with DB::Open
|
|
|
|
cf_options->disable_auto_compactions = true;
|
|
|
|
compaction_enabled_cf_indices->push_back(i);
|
2016-05-13 19:37:46 +02:00
|
|
|
}
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
}
|
2016-08-11 23:19:33 +02:00
|
|
|
db_options->allow_2pc = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDB::WrapDB(
|
|
|
|
// make sure this db is already opened with memtable history enabled,
|
|
|
|
// auto compaction distabled and 2 phase commit enabled
|
|
|
|
DB* db, const TransactionDBOptions& txn_db_options,
|
|
|
|
const std::vector<size_t>& compaction_enabled_cf_indices,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr) {
|
|
|
|
TransactionDBImpl* txn_db = new TransactionDBImpl(
|
|
|
|
db, TransactionDBImpl::ValidateTxnDBOptions(txn_db_options));
|
|
|
|
*dbptr = txn_db;
|
|
|
|
Status s = txn_db->Initialize(compaction_enabled_cf_indices, handles);
|
|
|
|
return s;
|
|
|
|
}
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
2016-08-11 23:19:33 +02:00
|
|
|
Status TransactionDB::WrapStackableDB(
|
|
|
|
// make sure this stackable_db is already opened with memtable history
|
|
|
|
// enabled,
|
|
|
|
// auto compaction distabled and 2 phase commit enabled
|
|
|
|
StackableDB* db, const TransactionDBOptions& txn_db_options,
|
|
|
|
const std::vector<size_t>& compaction_enabled_cf_indices,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& handles, TransactionDB** dbptr) {
|
|
|
|
TransactionDBImpl* txn_db = new TransactionDBImpl(
|
|
|
|
db, TransactionDBImpl::ValidateTxnDBOptions(txn_db_options));
|
|
|
|
*dbptr = txn_db;
|
|
|
|
Status s = txn_db->Initialize(compaction_enabled_cf_indices, handles);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Let TransactionLockMgr know that this column family exists so it can
|
|
|
|
// allocate a LockMap for it.
|
|
|
|
void TransactionDBImpl::AddColumnFamily(const ColumnFamilyHandle* handle) {
|
|
|
|
lock_mgr_.AddColumnFamily(handle->GetID());
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDBImpl::CreateColumnFamily(
|
|
|
|
const ColumnFamilyOptions& options, const std::string& column_family_name,
|
|
|
|
ColumnFamilyHandle** handle) {
|
|
|
|
InstrumentedMutexLock l(&column_family_mutex_);
|
|
|
|
|
|
|
|
Status s = db_->CreateColumnFamily(options, column_family_name, handle);
|
|
|
|
if (s.ok()) {
|
|
|
|
lock_mgr_.AddColumnFamily((*handle)->GetID());
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Let TransactionLockMgr know that it can deallocate the LockMap for this
|
|
|
|
// column family.
|
|
|
|
Status TransactionDBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
|
|
|
|
InstrumentedMutexLock l(&column_family_mutex_);
|
|
|
|
|
|
|
|
Status s = db_->DropColumnFamily(column_family);
|
|
|
|
if (s.ok()) {
|
|
|
|
lock_mgr_.RemoveColumnFamily(column_family->GetID());
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDBImpl::TryLock(TransactionImpl* txn, uint32_t cfh_id,
|
2016-12-06 02:18:14 +01:00
|
|
|
const std::string& key, bool exclusive) {
|
|
|
|
return lock_mgr_.TryLock(txn, cfh_id, key, GetEnv(), exclusive);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
}
|
|
|
|
|
2015-09-12 03:10:50 +02:00
|
|
|
void TransactionDBImpl::UnLock(TransactionImpl* txn,
|
|
|
|
const TransactionKeyMap* keys) {
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
lock_mgr_.UnLock(txn, keys, GetEnv());
|
|
|
|
}
|
|
|
|
|
|
|
|
void TransactionDBImpl::UnLock(TransactionImpl* txn, uint32_t cfh_id,
|
|
|
|
const std::string& key) {
|
|
|
|
lock_mgr_.UnLock(txn, cfh_id, key, GetEnv());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Used when wrapping DB write operations in a transaction
|
|
|
|
Transaction* TransactionDBImpl::BeginInternalTransaction(
|
|
|
|
const WriteOptions& options) {
|
|
|
|
TransactionOptions txn_options;
|
2016-02-03 04:19:17 +01:00
|
|
|
Transaction* txn = BeginTransaction(options, txn_options, nullptr);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
// Use default timeout for non-transactional writes
|
2016-07-08 01:34:41 +02:00
|
|
|
txn->SetLockTimeout(txn_db_options_.default_lock_timeout);
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
return txn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// All user Put, Merge, Delete, and Write requests must be intercepted to make
|
|
|
|
// sure that they lock all keys that they are writing to avoid causing conflicts
|
|
|
|
// with any concurent transactions. The easiest way to do this is to wrap all
|
|
|
|
// write operations in a transaction.
|
|
|
|
//
|
|
|
|
// Put(), Merge(), and Delete() only lock a single key per call. Write() will
|
|
|
|
// sort its keys before locking them. This guarantees that TransactionDB write
|
|
|
|
// methods cannot deadlock with eachother (but still could deadlock with a
|
|
|
|
// Transaction).
|
|
|
|
Status TransactionDBImpl::Put(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& key, const Slice& val) {
|
|
|
|
Status s;
|
|
|
|
|
|
|
|
Transaction* txn = BeginInternalTransaction(options);
|
2015-10-09 22:31:10 +02:00
|
|
|
txn->DisableIndexing();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
// Since the client didn't create a transaction, they don't care about
|
|
|
|
// conflict checking for this write. So we just need to do PutUntracked().
|
|
|
|
s = txn->PutUntracked(column_family, key, val);
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
s = txn->Commit();
|
|
|
|
}
|
|
|
|
|
|
|
|
delete txn;
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDBImpl::Delete(const WriteOptions& wopts,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& key) {
|
|
|
|
Status s;
|
|
|
|
|
|
|
|
Transaction* txn = BeginInternalTransaction(wopts);
|
2015-10-09 22:31:10 +02:00
|
|
|
txn->DisableIndexing();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
// Since the client didn't create a transaction, they don't care about
|
|
|
|
// conflict checking for this write. So we just need to do
|
|
|
|
// DeleteUntracked().
|
|
|
|
s = txn->DeleteUntracked(column_family, key);
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
s = txn->Commit();
|
|
|
|
}
|
|
|
|
|
|
|
|
delete txn;
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDBImpl::Merge(const WriteOptions& options,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& key, const Slice& value) {
|
|
|
|
Status s;
|
|
|
|
|
|
|
|
Transaction* txn = BeginInternalTransaction(options);
|
2015-10-09 22:31:10 +02:00
|
|
|
txn->DisableIndexing();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
// Since the client didn't create a transaction, they don't care about
|
|
|
|
// conflict checking for this write. So we just need to do
|
|
|
|
// MergeUntracked().
|
|
|
|
s = txn->MergeUntracked(column_family, key, value);
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
s = txn->Commit();
|
|
|
|
}
|
|
|
|
|
|
|
|
delete txn;
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TransactionDBImpl::Write(const WriteOptions& opts, WriteBatch* updates) {
|
|
|
|
// Need to lock all keys in this batch to prevent write conflicts with
|
|
|
|
// concurrent transactions.
|
|
|
|
Transaction* txn = BeginInternalTransaction(opts);
|
2015-10-09 22:31:10 +02:00
|
|
|
txn->DisableIndexing();
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
|
|
|
|
assert(dynamic_cast<TransactionImpl*>(txn) != nullptr);
|
|
|
|
auto txn_impl = reinterpret_cast<TransactionImpl*>(txn);
|
|
|
|
|
|
|
|
// Since commitBatch sorts the keys before locking, concurrent Write()
|
|
|
|
// operations will not cause a deadlock.
|
|
|
|
// In order to avoid a deadlock with a concurrent Transaction, Transactions
|
|
|
|
// should use a lock timeout.
|
|
|
|
Status s = txn_impl->CommitBatch(updates);
|
|
|
|
|
|
|
|
delete txn;
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2016-02-02 02:07:05 +01:00
|
|
|
void TransactionDBImpl::InsertExpirableTransaction(TransactionID tx_id,
|
|
|
|
TransactionImpl* tx) {
|
|
|
|
assert(tx->GetExpirationTime() > 0);
|
|
|
|
std::lock_guard<std::mutex> lock(map_mutex_);
|
|
|
|
expirable_transactions_map_.insert({tx_id, tx});
|
|
|
|
}
|
|
|
|
|
|
|
|
void TransactionDBImpl::RemoveExpirableTransaction(TransactionID tx_id) {
|
|
|
|
std::lock_guard<std::mutex> lock(map_mutex_);
|
|
|
|
expirable_transactions_map_.erase(tx_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TransactionDBImpl::TryStealingExpiredTransactionLocks(
|
|
|
|
TransactionID tx_id) {
|
|
|
|
std::lock_guard<std::mutex> lock(map_mutex_);
|
|
|
|
|
|
|
|
auto tx_it = expirable_transactions_map_.find(tx_id);
|
|
|
|
if (tx_it == expirable_transactions_map_.end()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
TransactionImpl& tx = *(tx_it->second);
|
|
|
|
return tx.TryStealingLocks();
|
|
|
|
}
|
|
|
|
|
2016-02-03 04:19:17 +01:00
|
|
|
void TransactionDBImpl::ReinitializeTransaction(
|
|
|
|
Transaction* txn, const WriteOptions& write_options,
|
|
|
|
const TransactionOptions& txn_options) {
|
|
|
|
assert(dynamic_cast<TransactionImpl*>(txn) != nullptr);
|
|
|
|
auto txn_impl = reinterpret_cast<TransactionImpl*>(txn);
|
|
|
|
|
2016-03-04 00:36:26 +01:00
|
|
|
txn_impl->Reinitialize(this, write_options, txn_options);
|
2016-02-03 04:19:17 +01:00
|
|
|
}
|
|
|
|
|
2016-04-18 20:15:50 +02:00
|
|
|
Transaction* TransactionDBImpl::GetTransactionByName(
|
|
|
|
const TransactionName& name) {
|
|
|
|
std::lock_guard<std::mutex> lock(name_map_mutex_);
|
|
|
|
auto it = transactions_.find(name);
|
|
|
|
if (it == transactions_.end()) {
|
|
|
|
return nullptr;
|
|
|
|
} else {
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TransactionDBImpl::GetAllPreparedTransactions(
|
|
|
|
std::vector<Transaction*>* transv) {
|
|
|
|
assert(transv);
|
|
|
|
transv->clear();
|
|
|
|
std::lock_guard<std::mutex> lock(name_map_mutex_);
|
|
|
|
for (auto it = transactions_.begin(); it != transactions_.end(); it++) {
|
2016-10-05 22:39:00 +02:00
|
|
|
if (it->second->GetState() == Transaction::PREPARED) {
|
2016-04-18 20:15:50 +02:00
|
|
|
transv->push_back(it->second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-28 02:43:06 +02:00
|
|
|
TransactionLockMgr::LockStatusData TransactionDBImpl::GetLockStatusData() {
|
|
|
|
return lock_mgr_.GetLockStatusData();
|
|
|
|
}
|
|
|
|
|
2016-04-18 20:15:50 +02:00
|
|
|
void TransactionDBImpl::RegisterTransaction(Transaction* txn) {
|
|
|
|
assert(txn);
|
|
|
|
assert(txn->GetName().length() > 0);
|
|
|
|
assert(GetTransactionByName(txn->GetName()) == nullptr);
|
2016-10-05 22:39:00 +02:00
|
|
|
assert(txn->GetState() == Transaction::STARTED);
|
2016-04-18 20:15:50 +02:00
|
|
|
std::lock_guard<std::mutex> lock(name_map_mutex_);
|
|
|
|
transactions_[txn->GetName()] = txn;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TransactionDBImpl::UnregisterTransaction(Transaction* txn) {
|
|
|
|
assert(txn);
|
|
|
|
std::lock_guard<std::mutex> lock(name_map_mutex_);
|
|
|
|
auto it = transactions_.find(txn->GetName());
|
|
|
|
assert(it != transactions_.end());
|
|
|
|
transactions_.erase(it);
|
|
|
|
}
|
|
|
|
|
Pessimistic Transactions
Summary:
Initial implementation of Pessimistic Transactions. This diff contains the api changes discussed in D38913. This diff is pretty large, so let me know if people would prefer to meet up to discuss it.
MyRocks folks: please take a look at the API in include/rocksdb/utilities/transaction[_db].h and let me know if you have any issues.
Also, you'll notice a couple of TODOs in the implementation of RollbackToSavePoint(). After chatting with Siying, I'm going to send out a separate diff for an alternate implementation of this feature that implements the rollback inside of WriteBatch/WriteBatchWithIndex. We can then decide which route is preferable.
Next, I'm planning on doing some perf testing and then integrating this diff into MongoRocks for further testing.
Test Plan: Unit tests, db_bench parallel testing.
Reviewers: igor, rven, sdong, yhchiang, yoshinorim
Reviewed By: sdong
Subscribers: hermanlee4, maykov, spetrunia, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D40869
2015-05-26 02:37:33 +02:00
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // ROCKSDB_LITE
|