rocksdb/utilities/transactions/write_unprepared_txn.cc
Manuel Ung a16e00b7b9 WriteUnPrepared Txn: Disable seek to snapshot optimization (#3955)
Summary:
This is implemented by extending ReadCallback with another function `MaxUnpreparedSequenceNumber` which returns the largest visible sequence number for the current transaction, if there is uncommitted data written to DB. Otherwise, it returns zero, indicating no uncommitted data.

There are the places where reads had to be modified.
- Get and Seek/Next was just updated to seek to max(snapshot_seq, MaxUnpreparedSequenceNumber()) instead, and iterate until a key was visible.
- Prev did not need need updates since it did not use the Seek to sequence number optimization. Assuming that locks were held when writing unprepared keys, and ValidateSnapshot runs, there should only be committed keys and unprepared keys of the current transaction, all of which are visible. Prev will simply iterate to get the last visible key.
- Reseeking to skip keys optimization was also disabled for write unprepared, since it's possible to hit the max_skip condition even while reseeking. There needs to be some way to resolve infinite looping in this case.
Closes https://github.com/facebook/rocksdb/pull/3955

Differential Revision: D8286688

Pulled By: lth

fbshipit-source-id: 25e42f47fdeb5f7accea0f4fd350ef35198caafe
2018-06-27 12:23:07 -07:00

91 lines
3.2 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#include "utilities/transactions/write_unprepared_txn.h"
#include "db/db_impl.h"
#include "util/cast_util.h"
#include "utilities/transactions/write_unprepared_txn_db.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
namespace rocksdb {
bool WriteUnpreparedTxnReadCallback::IsVisible(SequenceNumber seq) {
auto unprep_seqs = txn_->GetUnpreparedSequenceNumbers();
// Since unprep_seqs maps prep_seq => prepare_batch_cnt, to check if seq is
// in unprep_seqs, we have to check if seq is equal to prep_seq or any of
// the prepare_batch_cnt seq nums after it.
//
// TODO(lth): Can be optimized with std::lower_bound if unprep_seqs is
// large.
for (const auto& it : unprep_seqs) {
if (it.first <= seq && seq < it.first + it.second) {
return true;
}
}
return db_->IsInSnapshot(seq, snapshot_, min_uncommitted_);
}
SequenceNumber WriteUnpreparedTxnReadCallback::MaxUnpreparedSequenceNumber() {
auto unprep_seqs = txn_->GetUnpreparedSequenceNumbers();
if (unprep_seqs.size()) {
return unprep_seqs.rbegin()->first + unprep_seqs.rbegin()->second - 1;
}
return 0;
}
WriteUnpreparedTxn::WriteUnpreparedTxn(WriteUnpreparedTxnDB* txn_db,
const WriteOptions& write_options,
const TransactionOptions& txn_options)
: WritePreparedTxn(txn_db, write_options, txn_options), wupt_db_(txn_db) {}
Status WriteUnpreparedTxn::Get(const ReadOptions& options,
ColumnFamilyHandle* column_family,
const Slice& key, PinnableSlice* value) {
auto snapshot = options.snapshot;
auto snap_seq =
snapshot != nullptr ? snapshot->GetSequenceNumber() : kMaxSequenceNumber;
SequenceNumber min_uncommitted = 0; // by default disable the optimization
if (snapshot != nullptr) {
min_uncommitted =
static_cast_with_check<const SnapshotImpl, const Snapshot>(snapshot)
->min_uncommitted_;
}
WriteUnpreparedTxnReadCallback callback(wupt_db_, snap_seq, min_uncommitted,
this);
return write_batch_.GetFromBatchAndDB(db_, options, column_family, key, value,
&callback);
}
Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options) {
return GetIterator(options, wupt_db_->DefaultColumnFamily());
}
Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options,
ColumnFamilyHandle* column_family) {
// Make sure to get iterator from WriteUnprepareTxnDB, not the root db.
Iterator* db_iter = wupt_db_->NewIterator(options, column_family, this);
assert(db_iter);
return write_batch_.NewIteratorWithBase(column_family, db_iter);
}
const std::map<SequenceNumber, size_t>&
WriteUnpreparedTxn::GetUnpreparedSequenceNumbers() {
return unprep_seqs_;
}
} // namespace rocksdb
#endif // ROCKSDB_LITE