rocksdb/table/get_context.h
Manuel Ung a16e00b7b9 WriteUnPrepared Txn: Disable seek to snapshot optimization (#3955)
Summary:
This is implemented by extending ReadCallback with another function `MaxUnpreparedSequenceNumber` which returns the largest visible sequence number for the current transaction, if there is uncommitted data written to DB. Otherwise, it returns zero, indicating no uncommitted data.

There are the places where reads had to be modified.
- Get and Seek/Next was just updated to seek to max(snapshot_seq, MaxUnpreparedSequenceNumber()) instead, and iterate until a key was visible.
- Prev did not need need updates since it did not use the Seek to sequence number optimization. Assuming that locks were held when writing unprepared keys, and ValidateSnapshot runs, there should only be committed keys and unprepared keys of the current transaction, all of which are visible. Prev will simply iterate to get the last visible key.
- Reseeking to skip keys optimization was also disabled for write unprepared, since it's possible to hit the max_skip condition even while reseeking. There needs to be some way to resolve infinite looping in this case.
Closes https://github.com/facebook/rocksdb/pull/3955

Differential Revision: D8286688

Pulled By: lth

fbshipit-source-id: 25e42f47fdeb5f7accea0f4fd350ef35198caafe
2018-06-27 12:23:07 -07:00

112 lines
3.7 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <string>
#include "db/merge_context.h"
#include "db/range_del_aggregator.h"
#include "db/read_callback.h"
#include "rocksdb/env.h"
#include "rocksdb/statistics.h"
#include "rocksdb/types.h"
#include "table/block.h"
namespace rocksdb {
class MergeContext;
class PinnedIteratorsManager;
class GetContext {
public:
enum GetState {
kNotFound,
kFound,
kDeleted,
kCorrupt,
kMerge, // saver contains the current merge result (the operands)
kBlobIndex,
};
uint64_t tickers_value[Tickers::TICKER_ENUM_MAX] = {0};
GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
Logger* logger, Statistics* statistics, GetState init_state,
const Slice& user_key, PinnableSlice* value, bool* value_found,
MergeContext* merge_context, RangeDelAggregator* range_del_agg,
Env* env, SequenceNumber* seq = nullptr,
PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
ReadCallback* callback = nullptr, bool* is_blob_index = nullptr);
void MarkKeyMayExist();
// Records this key, value, and any meta-data (such as sequence number and
// state) into this GetContext.
//
// If the parsed_key matches the user key that we are looking for, sets
// mathced to true.
//
// Returns True if more keys need to be read (due to merges) or
// False if the complete value has been found.
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
bool* matched, Cleanable* value_pinner = nullptr);
// Simplified version of the previous function. Should only be used when we
// know that the operation is a Put.
void SaveValue(const Slice& value, SequenceNumber seq);
GetState State() const { return state_; }
RangeDelAggregator* range_del_agg() { return range_del_agg_; }
PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
// If a non-null string is passed, all the SaveValue calls will be
// logged into the string. The operations can then be replayed on
// another GetContext with replayGetContextLog.
void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
// Do we need to fetch the SequenceNumber for this key?
bool NeedToReadSequence() const { return (seq_ != nullptr); }
bool sample() const { return sample_; }
bool CheckCallback(SequenceNumber seq) {
if (callback_) {
return callback_->IsVisible(seq);
}
return true;
}
void RecordCounters(Tickers ticker, size_t val);
private:
const Comparator* ucmp_;
const MergeOperator* merge_operator_;
// the merge operations encountered;
Logger* logger_;
Statistics* statistics_;
GetState state_;
Slice user_key_;
PinnableSlice* pinnable_val_;
bool* value_found_; // Is value set correctly? Used by KeyMayExist
MergeContext* merge_context_;
RangeDelAggregator* range_del_agg_;
Env* env_;
// If a key is found, seq_ will be set to the SequenceNumber of most recent
// write to the key or kMaxSequenceNumber if unknown
SequenceNumber* seq_;
std::string* replay_log_;
// Used to temporarily pin blocks when state_ == GetContext::kMerge
PinnedIteratorsManager* pinned_iters_mgr_;
ReadCallback* callback_;
bool sample_;
bool* is_blob_index_;
};
void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
GetContext* get_context,
Cleanable* value_pinner = nullptr);
} // namespace rocksdb