rocksdb/utilities/transactions/transaction_util.h
sdong 58c4aee42e TransactionUtil::CheckKey() to skip unnecessary history (#4941)
Summary:
If a memtable definitely covers a key, there isn't a need to check older memtables.
We can skip them by checking the earliest sequence number.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4941

Differential Revision: D13932666

fbshipit-source-id: b9d52f234b8ad9dd3bf6547645cd457175a3ca9b
2019-06-11 11:46:42 -07:00

96 lines
3.4 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <unordered_map>
#include "db/dbformat.h"
#include "db/read_callback.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
namespace rocksdb {
struct TransactionKeyMapInfo {
// Earliest sequence number that is relevant to this transaction for this key
SequenceNumber seq;
uint32_t num_writes;
uint32_t num_reads;
bool exclusive;
explicit TransactionKeyMapInfo(SequenceNumber seq_no)
: seq(seq_no), num_writes(0), num_reads(0), exclusive(false) {}
};
using TransactionKeyMap =
std::unordered_map<uint32_t,
std::unordered_map<std::string, TransactionKeyMapInfo>>;
class DBImpl;
struct SuperVersion;
class WriteBatchWithIndex;
class TransactionUtil {
public:
// Verifies there have been no commits to this key in the db since this
// sequence number.
//
// If cache_only is true, then this function will not attempt to read any
// SST files. This will make it more likely this function will
// return an error if it is unable to determine if there are any conflicts.
//
// See comment of CheckKey() for explanation of `snap_seq`, `snap_checker`
// and `min_uncommitted`.
//
// Returns OK on success, BUSY if there is a conflicting write, or other error
// status for any unexpected errors.
static Status CheckKeyForConflicts(
DBImpl* db_impl, ColumnFamilyHandle* column_family,
const std::string& key, SequenceNumber snap_seq, bool cache_only,
ReadCallback* snap_checker = nullptr,
SequenceNumber min_uncommitted = kMaxSequenceNumber);
// For each key,SequenceNumber pair in the TransactionKeyMap, this function
// will verify there have been no writes to the key in the db since that
// sequence number.
//
// Returns OK on success, BUSY if there is a conflicting write, or other error
// status for any unexpected errors.
//
// REQUIRED: this function should only be called on the write thread or if the
// mutex is held.
static Status CheckKeysForConflicts(DBImpl* db_impl,
const TransactionKeyMap& keys,
bool cache_only);
private:
// If `snap_checker` == nullptr, writes are always commited in sequence number
// order. All sequence number <= `snap_seq` will not conflict with any
// write, and all keys > `snap_seq` of `key` will trigger conflict.
// If `snap_checker` != nullptr, writes may not commit in sequence number
// order. In this case `min_uncommitted` is a lower bound.
// seq < `min_uncommitted`: no conflict
// seq > `snap_seq`: applicable to conflict
// `min_uncommitted` <= seq <= `snap_seq`: call `snap_checker` to determine.
static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
SequenceNumber earliest_seq, SequenceNumber snap_seq,
const std::string& key, bool cache_only,
ReadCallback* snap_checker = nullptr,
SequenceNumber min_uncommitted = kMaxSequenceNumber);
};
} // namespace rocksdb
#endif // ROCKSDB_LITE