rocksdb/include/rocksdb/write_batch.h
sdong 28b5c76004 WriteBatchWithIndex: a wrapper of WriteBatch, with a searchable index
Summary:
Add WriteBatchWithIndex so that a user can query data out of a WriteBatch, to support MongoDB's read-its-own-write.

WriteBatchWithIndex uses a skiplist to store the binary index. The index stores the offset of the entry in the write batch. When searching for a key, the key for the entry is read by read the entry from the write batch from the offset.

Define a new iterator class for querying data out of WriteBatchWithIndex. A user can create an iterator of the write batch for one column family, seek to a key and keep calling Next() to see next entries.

I will add more unit tests if people are OK about this API.

Test Plan:
make all check
Add unit tests.

Reviewers: yhchiang, igor, MarkCallaghan, ljin

Reviewed By: ljin

Subscribers: dhruba, leveldb, xjin

Differential Revision: https://reviews.facebook.net/D21381
2014-08-18 16:37:38 -07:00

164 lines
6.1 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// WriteBatch holds a collection of updates to apply atomically to a DB.
//
// The updates are applied in the order in which they are added
// to the WriteBatch. For example, the value of "key" will be "v3"
// after the following batch is written:
//
// batch.Put("key", "v1");
// batch.Delete("key");
// batch.Put("key", "v2");
// batch.Put("key", "v3");
//
// Multiple threads can invoke const methods on a WriteBatch without
// external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same WriteBatch must use
// external synchronization.
#ifndef STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
#define STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
#include <string>
#include "rocksdb/status.h"
namespace rocksdb {
class Slice;
class ColumnFamilyHandle;
struct SliceParts;
class WriteBatch {
public:
explicit WriteBatch(size_t reserved_bytes = 0);
~WriteBatch();
// Store the mapping "key->value" in the database.
void Put(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value);
void Put(const Slice& key, const Slice& value) {
Put(nullptr, key, value);
}
// Variant of Put() that gathers output like writev(2). The key and value
// that will be written to the database are concatentations of arrays of
// slices.
void Put(ColumnFamilyHandle* column_family, const SliceParts& key,
const SliceParts& value);
void Put(const SliceParts& key, const SliceParts& value) {
Put(nullptr, key, value);
}
// Merge "value" with the existing value of "key" in the database.
// "key->merge(existing, value)"
void Merge(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value);
void Merge(const Slice& key, const Slice& value) {
Merge(nullptr, key, value);
}
// If the database contains a mapping for "key", erase it. Else do nothing.
void Delete(ColumnFamilyHandle* column_family, const Slice& key);
void Delete(const Slice& key) { Delete(nullptr, key); }
// variant that takes SliceParts
void Delete(ColumnFamilyHandle* column_family, const SliceParts& key);
void Delete(const SliceParts& key) { Delete(nullptr, key); }
// Append a blob of arbitrary size to the records in this batch. The blob will
// be stored in the transaction log but not in any other file. In particular,
// it will not be persisted to the SST files. When iterating over this
// WriteBatch, WriteBatch::Handler::LogData will be called with the contents
// of the blob as it is encountered. Blobs, puts, deletes, and merges will be
// encountered in the same order in thich they were inserted. The blob will
// NOT consume sequence number(s) and will NOT increase the count of the batch
//
// Example application: add timestamps to the transaction log for use in
// replication.
void PutLogData(const Slice& blob);
// Clear all updates buffered in this batch.
void Clear();
// Support for iterating over the contents of a batch.
class Handler {
public:
virtual ~Handler();
// default implementation will just call Put without column family for
// backwards compatibility. If the column family is not default,
// the function is noop
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) {
if (column_family_id == 0) {
// Put() historically doesn't return status. We didn't want to be
// backwards incompatible so we didn't change the return status
// (this is a public API). We do an ordinary get and return Status::OK()
Put(key, value);
return Status::OK();
}
return Status::InvalidArgument(
"non-default column family and PutCF not implemented");
}
virtual void Put(const Slice& key, const Slice& value);
// Merge and LogData are not pure virtual. Otherwise, we would break
// existing clients of Handler on a source code level. The default
// implementation of Merge simply throws a runtime exception.
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
const Slice& value) {
if (column_family_id == 0) {
Merge(key, value);
return Status::OK();
}
return Status::InvalidArgument(
"non-default column family and MergeCF not implemented");
}
virtual void Merge(const Slice& key, const Slice& value);
// The default implementation of LogData does nothing.
virtual void LogData(const Slice& blob);
virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
if (column_family_id == 0) {
Delete(key);
return Status::OK();
}
return Status::InvalidArgument(
"non-default column family and DeleteCF not implemented");
}
virtual void Delete(const Slice& key);
// Continue is called by WriteBatch::Iterate. If it returns false,
// iteration is halted. Otherwise, it continues iterating. The default
// implementation always returns true.
virtual bool Continue();
};
Status Iterate(Handler* handler) const;
// Retrieve the serialized version of this batch.
const std::string& Data() const { return rep_; }
// Retrieve data size of the batch.
size_t GetDataSize() const { return rep_.size(); }
// Returns the number of updates in the batch
int Count() const;
// Constructor with a serialized string object
explicit WriteBatch(std::string rep): rep_(rep) {}
private:
friend class WriteBatchInternal;
protected:
std::string rep_; // See comment in write_batch.cc for the format of rep_
// Intentionally copyable
};
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_