rocksdb/db/write_batch_internal.h
Nathan Bronson 6ce42dd075 Don't merge WriteBatch-es if WAL is disabled
Summary:
There's no need for WriteImpl to flatten the write batch group
into a single WriteBatch if the WAL is disabled.  This diff moves the
flattening into the WAL step, and skips flattening entirely if it isn't
needed.  It's good for about 5% speedup on a multi-threaded workload
with no WAL.

This diff also adds clarifying comments about the chance for partial
failure of WriteBatchInternal::InsertInto, and always sets bg_error_ if
the memtable state diverges from the logged state or if a WriteBatch
succeeds only partially.

Benchmark for speedup:
  db_bench -benchmarks=fillrandom -threads=16 -batch_size=1 -memtablerep=skip_list -value_size=0 --num=200000 -level0_slowdown_writes_trigger=9999 -level0_stop_writes_trigger=9999 -disable_auto_compactions --max_write_buffer_number=8 -max_background_flushes=8 --disable_wal --write_buffer_size=160000000

Test Plan: asserts + make check

Reviewers: sdong, igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D50583
2015-11-12 10:50:38 -08:00

152 lines
5.6 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "rocksdb/types.h"
#include "rocksdb/write_batch.h"
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "util/autovector.h"
namespace rocksdb {
class MemTable;
class ColumnFamilyMemTables {
public:
virtual ~ColumnFamilyMemTables() {}
virtual bool Seek(uint32_t column_family_id) = 0;
// returns true if the update to memtable should be ignored
// (useful when recovering from log whose updates have already
// been processed)
virtual uint64_t GetLogNumber() const = 0;
virtual MemTable* GetMemTable() const = 0;
virtual ColumnFamilyHandle* GetColumnFamilyHandle() = 0;
virtual void CheckMemtableFull() = 0;
};
class ColumnFamilyMemTablesDefault : public ColumnFamilyMemTables {
public:
explicit ColumnFamilyMemTablesDefault(MemTable* mem)
: ok_(false), mem_(mem) {}
bool Seek(uint32_t column_family_id) override {
ok_ = (column_family_id == 0);
return ok_;
}
uint64_t GetLogNumber() const override { return 0; }
MemTable* GetMemTable() const override {
assert(ok_);
return mem_;
}
ColumnFamilyHandle* GetColumnFamilyHandle() override { return nullptr; }
void CheckMemtableFull() override {}
private:
bool ok_;
MemTable* mem_;
};
// WriteBatchInternal provides static methods for manipulating a
// WriteBatch that we don't want in the public WriteBatch interface.
class WriteBatchInternal {
public:
// WriteBatch methods with column_family_id instead of ColumnFamilyHandle*
static void Put(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);
static void Put(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);
static void Delete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);
static void Delete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);
static void SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key);
static void SingleDelete(WriteBatch* batch, uint32_t column_family_id,
const Slice& key);
static void Merge(WriteBatch* batch, uint32_t column_family_id,
const Slice& key, const Slice& value);
static void Merge(WriteBatch* batch, uint32_t column_family_id,
const SliceParts& key, const SliceParts& value);
// Return the number of entries in the batch.
static int Count(const WriteBatch* batch);
// Set the count for the number of entries in the batch.
static void SetCount(WriteBatch* batch, int n);
// Return the seqeunce number for the start of this batch.
static SequenceNumber Sequence(const WriteBatch* batch);
// Store the specified number as the seqeunce number for the start of
// this batch.
static void SetSequence(WriteBatch* batch, SequenceNumber seq);
// Returns the offset of the first entry in the batch.
// This offset is only valid if the batch is not empty.
static size_t GetFirstOffset(WriteBatch* batch);
static Slice Contents(const WriteBatch* batch) {
return Slice(batch->rep_);
}
static size_t ByteSize(const WriteBatch* batch) {
return batch->rep_.size();
}
static void SetContents(WriteBatch* batch, const Slice& contents);
// Inserts batches[i] into memtable, for i in 0..num_batches-1 inclusive.
//
// If dont_filter_deletes is false AND options.filter_deletes is true
// AND db->KeyMayExist is false, then a Delete won't modify the memtable.
//
// If ignore_missing_column_families == true. WriteBatch
// referencing non-existing column family will be ignored.
// If ignore_missing_column_families == false, processing of the
// batches will be stopped if a reference is found to a non-existing
// column family and InvalidArgument() will be returned. The writes
// in batches may be only partially applied at that point.
//
// If log_number is non-zero, the memtable will be updated only if
// memtables->GetLogNumber() >= log_number.
static Status InsertInto(const autovector<WriteBatch*>& batches,
SequenceNumber sequence,
ColumnFamilyMemTables* memtables,
bool ignore_missing_column_families = false,
uint64_t log_number = 0, DB* db = nullptr,
const bool dont_filter_deletes = true);
// Convenience form of InsertInto when you have only one batch
static Status InsertInto(const WriteBatch* batch,
ColumnFamilyMemTables* memtables,
bool ignore_missing_column_families = false,
uint64_t log_number = 0, DB* db = nullptr,
const bool dont_filter_deletes = true);
static void Append(WriteBatch* dst, const WriteBatch* src);
// Returns the byte size of appending a WriteBatch with ByteSize
// leftByteSize and a WriteBatch with ByteSize rightByteSize
static size_t AppendedByteSize(size_t leftByteSize, size_t rightByteSize);
};
} // namespace rocksdb