2011-03-18 23:37:00 +01:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/builder.h"
|
|
|
|
|
|
|
|
#include "db/filename.h"
|
|
|
|
#include "db/dbformat.h"
|
2013-03-21 23:59:47 +01:00
|
|
|
#include "db/merge_helper.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
#include "db/table_cache.h"
|
|
|
|
#include "db/version_edit.h"
|
2011-03-30 20:35:40 +02:00
|
|
|
#include "leveldb/db.h"
|
|
|
|
#include "leveldb/env.h"
|
|
|
|
#include "leveldb/iterator.h"
|
2013-06-05 20:06:21 +02:00
|
|
|
#include "util/stop_watch.h"
|
2011-03-18 23:37:00 +01:00
|
|
|
|
|
|
|
namespace leveldb {
|
|
|
|
|
|
|
|
Status BuildTable(const std::string& dbname,
|
|
|
|
Env* env,
|
|
|
|
const Options& options,
|
2013-03-15 01:00:04 +01:00
|
|
|
const StorageOptions& soptions,
|
2011-03-18 23:37:00 +01:00
|
|
|
TableCache* table_cache,
|
|
|
|
Iterator* iter,
|
2013-02-28 23:09:30 +01:00
|
|
|
FileMetaData* meta,
|
|
|
|
const Comparator* user_comparator,
|
|
|
|
const SequenceNumber newest_snapshot,
|
|
|
|
const SequenceNumber earliest_seqno_in_memtable) {
|
2011-03-18 23:37:00 +01:00
|
|
|
Status s;
|
|
|
|
meta->file_size = 0;
|
|
|
|
iter->SeekToFirst();
|
|
|
|
|
2013-02-28 23:09:30 +01:00
|
|
|
// If the sequence number of the smallest entry in the memtable is
|
|
|
|
// smaller than the most recent snapshot, then we do not trigger
|
|
|
|
// removal of duplicate/deleted keys as part of this builder.
|
|
|
|
bool purge = options.purge_redundant_kvs_while_flush;
|
|
|
|
if (earliest_seqno_in_memtable <= newest_snapshot) {
|
|
|
|
purge = false;
|
|
|
|
}
|
|
|
|
|
2011-03-18 23:37:00 +01:00
|
|
|
std::string fname = TableFileName(dbname, meta->number);
|
|
|
|
if (iter->Valid()) {
|
2013-01-20 11:07:13 +01:00
|
|
|
unique_ptr<WritableFile> file;
|
2013-03-15 01:00:04 +01:00
|
|
|
s = env->NewWritableFile(fname, &file, soptions);
|
2011-03-18 23:37:00 +01:00
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
2013-01-20 11:07:13 +01:00
|
|
|
TableBuilder* builder = new TableBuilder(options, file.get(), 0);
|
2013-02-28 23:09:30 +01:00
|
|
|
|
|
|
|
// the first key is the smallest key
|
|
|
|
Slice key = iter->key();
|
|
|
|
meta->smallest.DecodeFrom(key);
|
|
|
|
|
2013-03-21 23:59:47 +01:00
|
|
|
MergeHelper merge(user_comparator, options.merge_operator,
|
|
|
|
options.info_log.get(),
|
|
|
|
true /* internal key corruption is not ok */);
|
|
|
|
|
2013-02-28 23:09:30 +01:00
|
|
|
if (purge) {
|
2013-03-21 23:59:47 +01:00
|
|
|
ParsedInternalKey ikey;
|
|
|
|
// Ugly walkaround to avoid compiler error for release build
|
|
|
|
// TODO: find a clean way to treat in memory key corruption
|
|
|
|
ikey.type = kTypeValue;
|
2013-02-28 23:09:30 +01:00
|
|
|
ParsedInternalKey prev_ikey;
|
|
|
|
std::string prev_value;
|
|
|
|
std::string prev_key;
|
|
|
|
|
2013-03-21 23:59:47 +01:00
|
|
|
// Ugly walkaround to avoid compiler error for release build
|
|
|
|
// TODO: find a clean way to treat in memory key corruption
|
|
|
|
auto ok __attribute__((unused)) = ParseInternalKey(key, &ikey);
|
|
|
|
// in-memory key corruption is not ok;
|
|
|
|
assert(ok);
|
|
|
|
|
|
|
|
if (ikey.type == kTypeMerge) {
|
|
|
|
// merge values if the first entry is of merge type
|
|
|
|
merge.MergeUntil(iter, 0 /* don't worry about snapshot */);
|
|
|
|
prev_key.assign(merge.key().data(), merge.key().size());
|
|
|
|
ok = ParseInternalKey(Slice(prev_key), &prev_ikey);
|
|
|
|
assert(ok);
|
|
|
|
prev_value.assign(merge.value().data(), merge.value().size());
|
|
|
|
} else {
|
|
|
|
// store first key-value
|
|
|
|
prev_key.assign(key.data(), key.size());
|
|
|
|
prev_value.assign(iter->value().data(), iter->value().size());
|
|
|
|
ok = ParseInternalKey(Slice(prev_key), &prev_ikey);
|
|
|
|
assert(ok);
|
|
|
|
assert(prev_ikey.sequence >= earliest_seqno_in_memtable);
|
|
|
|
iter->Next();
|
|
|
|
}
|
2013-02-28 23:09:30 +01:00
|
|
|
|
2013-03-21 23:59:47 +01:00
|
|
|
while (iter->Valid()) {
|
|
|
|
bool iterator_at_next = false;
|
2013-02-28 23:09:30 +01:00
|
|
|
ParsedInternalKey this_ikey;
|
|
|
|
Slice key = iter->key();
|
2013-03-21 23:59:47 +01:00
|
|
|
ok = ParseInternalKey(key, &this_ikey);
|
|
|
|
assert(ok);
|
2013-02-28 23:09:30 +01:00
|
|
|
assert(this_ikey.sequence >= earliest_seqno_in_memtable);
|
|
|
|
|
|
|
|
if (user_comparator->Compare(prev_ikey.user_key, this_ikey.user_key)) {
|
|
|
|
// This key is different from previous key.
|
|
|
|
// Output prev key and remember current key
|
|
|
|
builder->Add(Slice(prev_key), Slice(prev_value));
|
2013-03-21 23:59:47 +01:00
|
|
|
if (this_ikey.type == kTypeMerge) {
|
|
|
|
merge.MergeUntil(iter, 0 /* don't worry about snapshot */);
|
|
|
|
iterator_at_next = true;
|
|
|
|
prev_key.assign(merge.key().data(), merge.key().size());
|
|
|
|
ok = ParseInternalKey(Slice(prev_key), &prev_ikey);
|
|
|
|
assert(ok);
|
|
|
|
prev_value.assign(merge.value().data(), merge.value().size());
|
|
|
|
} else {
|
|
|
|
prev_key.assign(key.data(), key.size());
|
|
|
|
prev_value.assign(iter->value().data(), iter->value().size());
|
|
|
|
ok = ParseInternalKey(Slice(prev_key), &prev_ikey);
|
|
|
|
assert(ok);
|
|
|
|
}
|
2013-02-28 23:09:30 +01:00
|
|
|
} else {
|
|
|
|
// seqno within the same key are in decreasing order
|
|
|
|
assert(this_ikey.sequence < prev_ikey.sequence);
|
|
|
|
// This key is an earlier version of the same key in prev_key.
|
|
|
|
// Skip current key.
|
|
|
|
}
|
2013-03-21 23:59:47 +01:00
|
|
|
|
|
|
|
if (!iterator_at_next) iter->Next();
|
2013-02-28 23:09:30 +01:00
|
|
|
}
|
|
|
|
// output last key
|
|
|
|
builder->Add(Slice(prev_key), Slice(prev_value));
|
|
|
|
meta->largest.DecodeFrom(Slice(prev_key));
|
|
|
|
|
|
|
|
} else {
|
|
|
|
for (; iter->Valid(); iter->Next()) {
|
|
|
|
Slice key = iter->key();
|
|
|
|
meta->largest.DecodeFrom(key);
|
|
|
|
builder->Add(key, iter->value());
|
|
|
|
}
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finish and check for builder errors
|
|
|
|
if (s.ok()) {
|
|
|
|
s = builder->Finish();
|
|
|
|
if (s.ok()) {
|
|
|
|
meta->file_size = builder->FileSize();
|
|
|
|
assert(meta->file_size > 0);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
builder->Abandon();
|
|
|
|
}
|
|
|
|
delete builder;
|
|
|
|
|
|
|
|
// Finish and check for file errors
|
2012-08-16 01:38:08 +02:00
|
|
|
if (s.ok() && !options.disableDataSync) {
|
2012-08-27 21:10:26 +02:00
|
|
|
if (options.use_fsync) {
|
2013-06-05 20:06:21 +02:00
|
|
|
StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS);
|
2012-08-27 21:10:26 +02:00
|
|
|
s = file->Fsync();
|
|
|
|
} else {
|
2013-06-05 20:06:21 +02:00
|
|
|
StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS);
|
2012-08-27 21:10:26 +02:00
|
|
|
s = file->Sync();
|
|
|
|
}
|
2011-03-18 23:37:00 +01:00
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
s = file->Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
// Verify that the table is usable
|
2011-03-28 22:43:44 +02:00
|
|
|
Iterator* it = table_cache->NewIterator(ReadOptions(),
|
2013-03-15 01:00:04 +01:00
|
|
|
soptions,
|
2011-03-28 22:43:44 +02:00
|
|
|
meta->number,
|
|
|
|
meta->file_size);
|
2011-03-18 23:37:00 +01:00
|
|
|
s = it->status();
|
|
|
|
delete it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for input iterator errors
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
s = iter->status();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok() && meta->file_size > 0) {
|
2011-06-22 04:36:45 +02:00
|
|
|
// Keep it
|
2011-03-18 23:37:00 +01:00
|
|
|
} else {
|
|
|
|
env->DeleteFile(fname);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2011-10-31 18:22:06 +01:00
|
|
|
} // namespace leveldb
|