18dcf7f98d
Summary: Add PreReleaseCallback to be called at the end of WriteImpl but before publishing the sequence number. The callback is used in WritePrepareTxn to i) update the commit map, ii) update the last published sequence number in the 2nd write queue. It also ensures that all the commits will go to the 2nd queue. These changes will ensure that the commit map is updated before the sequence number is published and used by reading snapshots. If we use two write queues, the snapshots will use the seq number published by the 2nd queue. If we use one write queue (the default, the snapshots will use the last seq number in the memtable, which also indicates the last published seq number. Closes https://github.com/facebook/rocksdb/pull/3205 Differential Revision: D6438959 Pulled By: maysamyabandeh fbshipit-source-id: f8b6c434e94bc5f5ab9cb696879d4c23e2577ab9
97 lines
3.4 KiB
C++
97 lines
3.4 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <mutex>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "db/write_callback.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/slice.h"
|
|
#include "rocksdb/snapshot.h"
|
|
#include "rocksdb/status.h"
|
|
#include "rocksdb/types.h"
|
|
#include "rocksdb/utilities/transaction.h"
|
|
#include "rocksdb/utilities/transaction_db.h"
|
|
#include "rocksdb/utilities/write_batch_with_index.h"
|
|
#include "util/autovector.h"
|
|
#include "utilities/transactions/pessimistic_transaction.h"
|
|
#include "utilities/transactions/pessimistic_transaction_db.h"
|
|
#include "utilities/transactions/transaction_base.h"
|
|
#include "utilities/transactions/transaction_util.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
class WritePreparedTxnDB;
|
|
|
|
// This impl could write to DB also uncomitted data and then later tell apart
|
|
// committed data from uncomitted data. Uncommitted data could be after the
|
|
// Prepare phase in 2PC (WritePreparedTxn) or before that
|
|
// (WriteUnpreparedTxnImpl).
|
|
class WritePreparedTxn : public PessimisticTransaction {
|
|
public:
|
|
WritePreparedTxn(WritePreparedTxnDB* db, const WriteOptions& write_options,
|
|
const TransactionOptions& txn_options);
|
|
|
|
virtual ~WritePreparedTxn() {}
|
|
|
|
// To make WAL commit markers visible, the snapshot will be based on the last
|
|
// seq in the WAL that is also published, LastPublishedSequence, as opposed to
|
|
// the last seq in the memtable.
|
|
using Transaction::Get;
|
|
virtual Status Get(const ReadOptions& options,
|
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
PinnableSlice* value) override;
|
|
|
|
// To make WAL commit markers visible, the snapshot will be based on the last
|
|
// seq in the WAL that is also published, LastPublishedSequence, as opposed to
|
|
// the last seq in the memtable.
|
|
using Transaction::GetIterator;
|
|
virtual Iterator* GetIterator(const ReadOptions& options) override;
|
|
virtual Iterator* GetIterator(const ReadOptions& options,
|
|
ColumnFamilyHandle* column_family) override;
|
|
|
|
private:
|
|
friend class WritePreparedTransactionTest_BasicRecoveryTest_Test;
|
|
|
|
Status PrepareInternal() override;
|
|
|
|
Status CommitWithoutPrepareInternal() override;
|
|
|
|
Status CommitBatchInternal(WriteBatch* batch) override;
|
|
|
|
// Since the data is already written to memtables at the Prepare phase, the
|
|
// commit entails writing only a commit marker in the WAL. The sequence number
|
|
// of the commit marker is then the commit timestamp of the transaction. To
|
|
// make WAL commit markers visible, the snapshot will be based on the last seq
|
|
// in the WAL that is also published, LastPublishedSequence, as opposed to the
|
|
// last seq in the memtable.
|
|
Status CommitInternal() override;
|
|
|
|
Status RollbackInternal() override;
|
|
|
|
virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
|
|
const Slice& key,
|
|
SequenceNumber* tracked_at_seq) override;
|
|
|
|
// No copying allowed
|
|
WritePreparedTxn(const WritePreparedTxn&);
|
|
void operator=(const WritePreparedTxn&);
|
|
|
|
WritePreparedTxnDB* wpt_db_;
|
|
};
|
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // ROCKSDB_LITE
|