rocksdb/utilities/transactions/transaction_util.h
Manuel Ung 230b909da8 Fix PopSavePoint to merge info into the previous savepoint (#5628)
Summary:
Transaction::RollbackToSavePoint undos the modification made since the SavePoint beginning, and also unlocks the corresponding keys, which are tracked in the last SavePoint. Currently ::PopSavePoint simply discard these tracked keys, leaving them locked in the lock manager. This breaks a subsequent ::RollbackToSavePoint behavior as it loses track of such keys, and thus cannot unlock them. The patch fixes ::PopSavePoint by passing on the track key information to the previous SavePoint.
Fixes https://github.com/facebook/rocksdb/issues/5618
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5628

Differential Revision: D16505325

Pulled By: lth

fbshipit-source-id: 2bc3b30963ab4d36d996d1f66543c93abf358980
2019-07-26 11:39:30 -07:00

104 lines
3.6 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <unordered_map>
#include "db/dbformat.h"
#include "db/read_callback.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
namespace rocksdb {
struct TransactionKeyMapInfo {
// Earliest sequence number that is relevant to this transaction for this key
SequenceNumber seq;
uint32_t num_writes;
uint32_t num_reads;
bool exclusive;
explicit TransactionKeyMapInfo(SequenceNumber seq_no)
: seq(seq_no), num_writes(0), num_reads(0), exclusive(false) {}
// Used in PopSavePoint to collapse two savepoints together.
void Merge(const TransactionKeyMapInfo& info) {
assert(seq <= info.seq);
num_reads += info.num_reads;
num_writes += info.num_writes;
exclusive |= info.exclusive;
}
};
using TransactionKeyMap =
std::unordered_map<uint32_t,
std::unordered_map<std::string, TransactionKeyMapInfo>>;
class DBImpl;
struct SuperVersion;
class WriteBatchWithIndex;
class TransactionUtil {
public:
// Verifies there have been no commits to this key in the db since this
// sequence number.
//
// If cache_only is true, then this function will not attempt to read any
// SST files. This will make it more likely this function will
// return an error if it is unable to determine if there are any conflicts.
//
// See comment of CheckKey() for explanation of `snap_seq`, `snap_checker`
// and `min_uncommitted`.
//
// Returns OK on success, BUSY if there is a conflicting write, or other error
// status for any unexpected errors.
static Status CheckKeyForConflicts(
DBImpl* db_impl, ColumnFamilyHandle* column_family,
const std::string& key, SequenceNumber snap_seq, bool cache_only,
ReadCallback* snap_checker = nullptr,
SequenceNumber min_uncommitted = kMaxSequenceNumber);
// For each key,SequenceNumber pair in the TransactionKeyMap, this function
// will verify there have been no writes to the key in the db since that
// sequence number.
//
// Returns OK on success, BUSY if there is a conflicting write, or other error
// status for any unexpected errors.
//
// REQUIRED: this function should only be called on the write thread or if the
// mutex is held.
static Status CheckKeysForConflicts(DBImpl* db_impl,
const TransactionKeyMap& keys,
bool cache_only);
private:
// If `snap_checker` == nullptr, writes are always commited in sequence number
// order. All sequence number <= `snap_seq` will not conflict with any
// write, and all keys > `snap_seq` of `key` will trigger conflict.
// If `snap_checker` != nullptr, writes may not commit in sequence number
// order. In this case `min_uncommitted` is a lower bound.
// seq < `min_uncommitted`: no conflict
// seq > `snap_seq`: applicable to conflict
// `min_uncommitted` <= seq <= `snap_seq`: call `snap_checker` to determine.
static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
SequenceNumber earliest_seq, SequenceNumber snap_seq,
const std::string& key, bool cache_only,
ReadCallback* snap_checker = nullptr,
SequenceNumber min_uncommitted = kMaxSequenceNumber);
};
} // namespace rocksdb
#endif // ROCKSDB_LITE