bf66c10b13
Summary: Introduced KeyMayExist checking during writebatch-delete and removed from Outer Delete API because it uses writebatch-delete. Added code to skip getting Table from disk if not already present in table_cache. Some renaming of variables. Introduced KeyMayExistImpl which allows checking since specified sequence number in GetImpl useful to check partially written writebatch. Changed KeyMayExist to not be pure virtual and provided a default implementation. Expanded unit-tests in db_test to check appropriately. Ran db_stress for 1 hour with ./db_stress --max_key=100000 --ops_per_thread=10000000 --delpercent=50 --filter_deletes=1 --statistics=1. Test Plan: db_stress;make check Reviewers: dhruba, haobo Reviewed By: dhruba CC: leveldb, xjin Differential Revision: https://reviews.facebook.net/D11745
200 lines
5.6 KiB
C++
200 lines
5.6 KiB
C++
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// WriteBatch::rep_ :=
|
|
// sequence: fixed64
|
|
// count: fixed32
|
|
// data: record[count]
|
|
// record :=
|
|
// kTypeValue varstring varstring
|
|
// kTypeMerge varstring varstring
|
|
// kTypeDeletion varstring
|
|
// varstring :=
|
|
// len: varint32
|
|
// data: uint8[len]
|
|
|
|
#include "leveldb/write_batch.h"
|
|
|
|
#include "leveldb/statistics.h"
|
|
#include "db/dbformat.h"
|
|
#include "db/db_impl.h"
|
|
#include "db/memtable.h"
|
|
#include "db/write_batch_internal.h"
|
|
#include "util/coding.h"
|
|
#include <stdexcept>
|
|
|
|
namespace leveldb {
|
|
|
|
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
|
|
static const size_t kHeader = 12;
|
|
|
|
WriteBatch::WriteBatch() {
|
|
Clear();
|
|
}
|
|
|
|
WriteBatch::~WriteBatch() { }
|
|
|
|
WriteBatch::Handler::~Handler() { }
|
|
|
|
void WriteBatch::Handler::Merge(const Slice& key, const Slice& value) {
|
|
throw std::runtime_error("Handler::Merge not implemented!");
|
|
}
|
|
|
|
void WriteBatch::Clear() {
|
|
rep_.clear();
|
|
rep_.resize(kHeader);
|
|
}
|
|
|
|
int WriteBatch::Count() const {
|
|
return WriteBatchInternal::Count(this);
|
|
}
|
|
|
|
Status WriteBatch::Iterate(Handler* handler) const {
|
|
Slice input(rep_);
|
|
if (input.size() < kHeader) {
|
|
return Status::Corruption("malformed WriteBatch (too small)");
|
|
}
|
|
|
|
input.remove_prefix(kHeader);
|
|
Slice key, value;
|
|
int found = 0;
|
|
while (!input.empty()) {
|
|
found++;
|
|
char tag = input[0];
|
|
input.remove_prefix(1);
|
|
switch (tag) {
|
|
case kTypeValue:
|
|
if (GetLengthPrefixedSlice(&input, &key) &&
|
|
GetLengthPrefixedSlice(&input, &value)) {
|
|
handler->Put(key, value);
|
|
} else {
|
|
return Status::Corruption("bad WriteBatch Put");
|
|
}
|
|
break;
|
|
case kTypeDeletion:
|
|
if (GetLengthPrefixedSlice(&input, &key)) {
|
|
handler->Delete(key);
|
|
} else {
|
|
return Status::Corruption("bad WriteBatch Delete");
|
|
}
|
|
break;
|
|
case kTypeMerge:
|
|
if (GetLengthPrefixedSlice(&input, &key) &&
|
|
GetLengthPrefixedSlice(&input, &value)) {
|
|
handler->Merge(key, value);
|
|
} else {
|
|
return Status::Corruption("bad WriteBatch Merge");
|
|
}
|
|
break;
|
|
default:
|
|
return Status::Corruption("unknown WriteBatch tag");
|
|
}
|
|
}
|
|
if (found != WriteBatchInternal::Count(this)) {
|
|
return Status::Corruption("WriteBatch has wrong count");
|
|
} else {
|
|
return Status::OK();
|
|
}
|
|
}
|
|
|
|
int WriteBatchInternal::Count(const WriteBatch* b) {
|
|
return DecodeFixed32(b->rep_.data() + 8);
|
|
}
|
|
|
|
void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
|
|
EncodeFixed32(&b->rep_[8], n);
|
|
}
|
|
|
|
SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
|
|
return SequenceNumber(DecodeFixed64(b->rep_.data()));
|
|
}
|
|
|
|
void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
|
|
EncodeFixed64(&b->rep_[0], seq);
|
|
}
|
|
|
|
void WriteBatch::Put(const Slice& key, const Slice& value) {
|
|
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
|
rep_.push_back(static_cast<char>(kTypeValue));
|
|
PutLengthPrefixedSlice(&rep_, key);
|
|
PutLengthPrefixedSlice(&rep_, value);
|
|
}
|
|
|
|
void WriteBatch::Delete(const Slice& key) {
|
|
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
|
rep_.push_back(static_cast<char>(kTypeDeletion));
|
|
PutLengthPrefixedSlice(&rep_, key);
|
|
}
|
|
|
|
void WriteBatch::Merge(const Slice& key, const Slice& value) {
|
|
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
|
rep_.push_back(static_cast<char>(kTypeMerge));
|
|
PutLengthPrefixedSlice(&rep_, key);
|
|
PutLengthPrefixedSlice(&rep_, value);
|
|
}
|
|
|
|
|
|
namespace {
|
|
class MemTableInserter : public WriteBatch::Handler {
|
|
public:
|
|
SequenceNumber sequence_;
|
|
MemTable* mem_;
|
|
const Options* options_;
|
|
DBImpl* db_;
|
|
const bool filter_deletes_;
|
|
|
|
MemTableInserter(SequenceNumber sequence, MemTable* mem, const Options* opts,
|
|
DB* db, const bool filter_deletes)
|
|
: sequence_(sequence),
|
|
mem_(mem),
|
|
options_(opts),
|
|
db_(reinterpret_cast<DBImpl*>(db)),
|
|
filter_deletes_(filter_deletes) {
|
|
assert(mem_);
|
|
if (filter_deletes_) {
|
|
assert(options_);
|
|
assert(db_);
|
|
}
|
|
}
|
|
|
|
virtual void Put(const Slice& key, const Slice& value) {
|
|
mem_->Add(sequence_, kTypeValue, key, value);
|
|
sequence_++;
|
|
}
|
|
virtual void Merge(const Slice& key, const Slice& value) {
|
|
mem_->Add(sequence_, kTypeMerge, key, value);
|
|
sequence_++;
|
|
}
|
|
virtual void Delete(const Slice& key) {
|
|
if (filter_deletes_ && !db_->KeyMayExistImpl(key, sequence_)) {
|
|
RecordTick(options_->statistics, NUMBER_FILTERED_DELETES);
|
|
return;
|
|
}
|
|
mem_->Add(sequence_, kTypeDeletion, key, Slice());
|
|
sequence_++;
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* mem,
|
|
const Options* opts, DB* db,
|
|
const bool filter_deletes) {
|
|
MemTableInserter inserter(WriteBatchInternal::Sequence(b), mem, opts, db,
|
|
filter_deletes);
|
|
return b->Iterate(&inserter);
|
|
}
|
|
|
|
void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
|
|
assert(contents.size() >= kHeader);
|
|
b->rep_.assign(contents.data(), contents.size());
|
|
}
|
|
|
|
void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
|
|
SetCount(dst, Count(dst) + Count(src));
|
|
assert(src->rep_.size() >= kHeader);
|
|
dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
|
|
}
|
|
|
|
} // namespace leveldb
|