rocksdb/include/rocksdb/utilities/write_batch_with_index.h
anand76 1c8cbf315f Extend MultiGet batching to Transactions (#5210)
Summary:
MultiGet batching was implemented in #5011 in order to reduce CPU utilization when looking up multiple keys at once. This PR implements corresponding ```MultiGet``` and ```MultiGetSingleCFForUpdate``` in ```rocksdb::Transaction``` that call the underlying batching implementation.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5210

Differential Revision: D15048164

Pulled By: anand1976

fbshipit-source-id: c52f6043102ab0cbc723f4cba2a7b7d1767f6f52
2019-04-23 14:11:26 -07:00

268 lines
10 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// A WriteBatchWithIndex with a binary searchable index built for all the keys
// inserted.
#pragma once
#ifndef ROCKSDB_LITE
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/comparator.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/write_batch.h"
#include "rocksdb/write_batch_base.h"
namespace rocksdb {
class ColumnFamilyHandle;
class Comparator;
class DB;
class ReadCallback;
struct ReadOptions;
struct DBOptions;
enum WriteType {
kPutRecord,
kMergeRecord,
kDeleteRecord,
kSingleDeleteRecord,
kDeleteRangeRecord,
kLogDataRecord,
kXIDRecord,
};
// an entry for Put, Merge, Delete, or SingleDelete entry for write batches.
// Used in WBWIIterator.
struct WriteEntry {
WriteType type;
Slice key;
Slice value;
};
// Iterator of one column family out of a WriteBatchWithIndex.
class WBWIIterator {
public:
virtual ~WBWIIterator() {}
virtual bool Valid() const = 0;
virtual void SeekToFirst() = 0;
virtual void SeekToLast() = 0;
virtual void Seek(const Slice& key) = 0;
virtual void SeekForPrev(const Slice& key) = 0;
virtual void Next() = 0;
virtual void Prev() = 0;
// the return WriteEntry is only valid until the next mutation of
// WriteBatchWithIndex
virtual WriteEntry Entry() const = 0;
virtual Status status() const = 0;
};
// A WriteBatchWithIndex with a binary searchable index built for all the keys
// inserted.
// In Put(), Merge() Delete(), or SingleDelete(), the same function of the
// wrapped will be called. At the same time, indexes will be built.
// By calling GetWriteBatch(), a user will get the WriteBatch for the data
// they inserted, which can be used for DB::Write().
// A user can call NewIterator() to create an iterator.
class WriteBatchWithIndex : public WriteBatchBase {
public:
// backup_index_comparator: the backup comparator used to compare keys
// within the same column family, if column family is not given in the
// interface, or we can't find a column family from the column family handle
// passed in, backup_index_comparator will be used for the column family.
// reserved_bytes: reserved bytes in underlying WriteBatch
// max_bytes: maximum size of underlying WriteBatch in bytes
// overwrite_key: if true, overwrite the key in the index when inserting
// the same key as previously, so iterator will never
// show two entries with the same key.
explicit WriteBatchWithIndex(
const Comparator* backup_index_comparator = BytewiseComparator(),
size_t reserved_bytes = 0, bool overwrite_key = false,
size_t max_bytes = 0);
~WriteBatchWithIndex() override;
using WriteBatchBase::Put;
Status Put(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;
Status Put(const Slice& key, const Slice& value) override;
using WriteBatchBase::Merge;
Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) override;
Status Merge(const Slice& key, const Slice& value) override;
using WriteBatchBase::Delete;
Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
Status Delete(const Slice& key) override;
using WriteBatchBase::SingleDelete;
Status SingleDelete(ColumnFamilyHandle* column_family,
const Slice& key) override;
Status SingleDelete(const Slice& key) override;
using WriteBatchBase::DeleteRange;
Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
const Slice& end_key) override;
Status DeleteRange(const Slice& begin_key, const Slice& end_key) override;
using WriteBatchBase::PutLogData;
Status PutLogData(const Slice& blob) override;
using WriteBatchBase::Clear;
void Clear() override;
using WriteBatchBase::GetWriteBatch;
WriteBatch* GetWriteBatch() override;
// Create an iterator of a column family. User can call iterator.Seek() to
// search to the next entry of or after a key. Keys will be iterated in the
// order given by index_comparator. For multiple updates on the same key,
// each update will be returned as a separate entry, in the order of update
// time.
//
// The returned iterator should be deleted by the caller.
WBWIIterator* NewIterator(ColumnFamilyHandle* column_family);
// Create an iterator of the default column family.
WBWIIterator* NewIterator();
// Will create a new Iterator that will use WBWIIterator as a delta and
// base_iterator as base.
//
// This function is only supported if the WriteBatchWithIndex was
// constructed with overwrite_key=true.
//
// The returned iterator should be deleted by the caller.
// The base_iterator is now 'owned' by the returned iterator. Deleting the
// returned iterator will also delete the base_iterator.
//
// Updating write batch with the current key of the iterator is not safe.
// We strongly recommand users not to do it. It will invalidate the current
// key() and value() of the iterator. This invalidation happens even before
// the write batch update finishes. The state may recover after Next() is
// called.
Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
Iterator* base_iterator);
// default column family
Iterator* NewIteratorWithBase(Iterator* base_iterator);
// Similar to DB::Get() but will only read the key from this batch.
// If the batch does not have enough data to resolve Merge operations,
// MergeInProgress status may be returned.
Status GetFromBatch(ColumnFamilyHandle* column_family,
const DBOptions& options, const Slice& key,
std::string* value);
// Similar to previous function but does not require a column_family.
// Note: An InvalidArgument status will be returned if there are any Merge
// operators for this key. Use previous method instead.
Status GetFromBatch(const DBOptions& options, const Slice& key,
std::string* value) {
return GetFromBatch(nullptr, options, key, value);
}
// Similar to DB::Get() but will also read writes from this batch.
//
// This function will query both this batch and the DB and then merge
// the results using the DB's merge operator (if the batch contains any
// merge requests).
//
// Setting read_options.snapshot will affect what is read from the DB
// but will NOT change which keys are read from the batch (the keys in
// this batch do not yet belong to any snapshot and will be fetched
// regardless).
Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
const Slice& key, std::string* value);
// An overload of the above method that receives a PinnableSlice
Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
const Slice& key, PinnableSlice* value);
Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
std::string* value);
// An overload of the above method that receives a PinnableSlice
Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* value);
void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
ColumnFamilyHandle* column_family,
const size_t num_keys, const Slice* keys,
PinnableSlice* values, Status* statuses,
bool sorted_input);
// Records the state of the batch for future calls to RollbackToSavePoint().
// May be called multiple times to set multiple save points.
void SetSavePoint() override;
// Remove all entries in this batch (Put, Merge, Delete, SingleDelete,
// PutLogData) since the most recent call to SetSavePoint() and removes the
// most recent save point.
// If there is no previous call to SetSavePoint(), behaves the same as
// Clear().
//
// Calling RollbackToSavePoint invalidates any open iterators on this batch.
//
// Returns Status::OK() on success,
// Status::NotFound() if no previous call to SetSavePoint(),
// or other Status on corruption.
Status RollbackToSavePoint() override;
// Pop the most recent save point.
// If there is no previous call to SetSavePoint(), Status::NotFound()
// will be returned.
// Otherwise returns Status::OK().
Status PopSavePoint() override;
void SetMaxBytes(size_t max_bytes) override;
size_t GetDataSize() const;
private:
friend class PessimisticTransactionDB;
friend class WritePreparedTxn;
friend class WriteUnpreparedTxn;
friend class WriteBatchWithIndex_SubBatchCnt_Test;
// Returns the number of sub-batches inside the write batch. A sub-batch
// starts right before inserting a key that is a duplicate of a key in the
// last sub-batch.
size_t SubBatchCnt();
Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* value, ReadCallback* callback);
void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
ColumnFamilyHandle* column_family,
const size_t num_keys, const Slice* keys,
PinnableSlice* values, Status* statuses,
bool sorted_input, ReadCallback* callback);
struct Rep;
std::unique_ptr<Rep> rep;
};
} // namespace rocksdb
#endif // !ROCKSDB_LITE