2013-10-28 17:54:09 -07:00
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include <algorithm>
|
|
|
|
#include <set>
|
|
|
|
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/filter_policy.h"
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
#include "db/filename.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
2014-01-17 12:46:06 -08:00
|
|
|
#include "rocksdb/statistics.h"
|
2013-10-28 17:54:09 -07:00
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/compaction_filter.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/table.h"
|
2014-01-27 21:58:46 -08:00
|
|
|
#include "rocksdb/table_properties.h"
|
|
|
|
#include "table/table_builder.h"
|
2013-10-28 17:54:09 -07:00
|
|
|
#include "util/hash.h"
|
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/mutexlock.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
#include "utilities/merge_operators.h"
|
|
|
|
|
|
|
|
using std::unique_ptr;
|
|
|
|
|
2014-01-30 17:18:17 -08:00
|
|
|
// IS THIS FILE STILL NEEDED?
|
2013-10-28 17:54:09 -07:00
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
|
|
|
|
// as production quality.
|
|
|
|
// SimpleTable requires the input key size to be fixed 16 bytes, value cannot
|
|
|
|
// be longer than 150000 bytes and stored data on disk in this format:
|
|
|
|
// +--------------------------------------------+ <= key1 offset
|
|
|
|
// | key1 | value_size (4 bytes) | |
|
|
|
|
// +----------------------------------------+ |
|
|
|
|
// | value1 |
|
|
|
|
// | |
|
|
|
|
// +----------------------------------------+---+ <= key2 offset
|
|
|
|
// | key2 | value_size (4 bytes) | |
|
|
|
|
// +----------------------------------------+ |
|
|
|
|
// | value2 |
|
|
|
|
// | |
|
|
|
|
// | ...... |
|
|
|
|
// +-----------------+--------------------------+ <= index_block_offset
|
|
|
|
// | key1 | key1 offset (8 bytes) |
|
|
|
|
// +-----------------+--------------------------+
|
|
|
|
// | key2 | key2 offset (8 bytes) |
|
|
|
|
// +-----------------+--------------------------+
|
|
|
|
// | key3 | key3 offset (8 bytes) |
|
|
|
|
// +-----------------+--------------------------+
|
|
|
|
// | ...... |
|
|
|
|
// +-----------------+------------+-------------+
|
|
|
|
// | index_block_offset (8 bytes) |
|
|
|
|
// +------------------------------+
|
|
|
|
|
|
|
|
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
|
|
|
|
// as production quality.
|
2013-10-30 10:52:33 -07:00
|
|
|
class SimpleTableReader: public TableReader {
|
|
|
|
public:
|
2013-10-28 17:54:09 -07:00
|
|
|
// Attempt to open the table that is stored in bytes [0..file_size)
|
|
|
|
// of "file", and read the metadata entries necessary to allow
|
|
|
|
// retrieving data from the table.
|
|
|
|
//
|
|
|
|
// If successful, returns ok and sets "*table" to the newly opened
|
|
|
|
// table. The client should delete "*table" when no longer needed.
|
|
|
|
// If there was an error while initializing the table, sets "*table"
|
|
|
|
// to nullptr and returns a non-ok status. Does not take ownership of
|
|
|
|
// "*source", but the client must ensure that "source" remains live
|
|
|
|
// for the duration of the returned table's lifetime.
|
|
|
|
//
|
|
|
|
// *file must remain live while this Table is in use.
|
2013-10-30 10:52:33 -07:00
|
|
|
static Status Open(const Options& options, const EnvOptions& soptions,
|
|
|
|
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
|
|
|
unique_ptr<TableReader>* table_reader);
|
2013-10-28 17:54:09 -07:00
|
|
|
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-02 16:38:00 -07:00
|
|
|
Iterator* NewIterator(const ReadOptions&, Arena* arena) override;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2014-01-27 13:53:22 -08:00
|
|
|
Status Get(const ReadOptions&, const Slice& key, void* arg,
|
|
|
|
bool (*handle_result)(void* arg, const ParsedInternalKey& k,
|
2014-06-20 10:23:02 +02:00
|
|
|
const Slice& v),
|
2014-01-27 13:53:22 -08:00
|
|
|
void (*mark_key_may_exist)(void*) = nullptr) override;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
uint64_t ApproximateOffsetOf(const Slice& key) override;
|
|
|
|
|
|
|
|
void SetupForCompaction() override;
|
|
|
|
|
2014-02-07 19:26:49 -08:00
|
|
|
std::shared_ptr<const TableProperties> GetTableProperties() const override;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
~SimpleTableReader();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
private:
|
2013-10-28 17:54:09 -07:00
|
|
|
struct Rep;
|
|
|
|
Rep* rep_;
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
explicit SimpleTableReader(Rep* rep) {
|
2013-10-28 17:54:09 -07:00
|
|
|
rep_ = rep;
|
|
|
|
}
|
|
|
|
friend class TableCache;
|
|
|
|
friend class SimpleTableIterator;
|
|
|
|
|
|
|
|
Status GetOffset(const Slice& target, uint64_t* offset);
|
|
|
|
|
|
|
|
// No copying allowed
|
2013-10-30 10:52:33 -07:00
|
|
|
explicit SimpleTableReader(const TableReader&) = delete;
|
|
|
|
void operator=(const TableReader&) = delete;
|
2013-10-28 17:54:09 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
// Iterator to iterate SimpleTable
|
|
|
|
class SimpleTableIterator: public Iterator {
|
|
|
|
public:
|
2013-10-30 10:52:33 -07:00
|
|
|
explicit SimpleTableIterator(SimpleTableReader* table);
|
2013-10-28 17:54:09 -07:00
|
|
|
~SimpleTableIterator();
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
bool Valid() const;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void SeekToFirst();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void SeekToLast();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void Seek(const Slice& target);
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void Next();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void Prev();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Slice key() const;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Slice value() const;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Status status() const;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
private:
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableReader* table_;
|
|
|
|
uint64_t offset_;
|
|
|
|
uint64_t next_offset_;
|
|
|
|
Slice key_;
|
|
|
|
Slice value_;
|
|
|
|
char tmp_str_[4];
|
|
|
|
char* key_str_;
|
|
|
|
char* value_str_;
|
|
|
|
int value_str_len_;
|
|
|
|
Status status_;
|
|
|
|
// No copying allowed
|
|
|
|
SimpleTableIterator(const SimpleTableIterator&) = delete;
|
|
|
|
void operator=(const Iterator&) = delete;
|
2013-10-28 17:54:09 -07:00
|
|
|
};
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
struct SimpleTableReader::Rep {
|
2013-10-28 17:54:09 -07:00
|
|
|
~Rep() {
|
|
|
|
}
|
|
|
|
Rep(const EnvOptions& storage_options, uint64_t index_start_offset,
|
|
|
|
int num_entries) :
|
|
|
|
soptions(storage_options), index_start_offset(index_start_offset),
|
|
|
|
num_entries(num_entries) {
|
|
|
|
}
|
|
|
|
|
|
|
|
Options options;
|
|
|
|
const EnvOptions& soptions;
|
|
|
|
Status status;
|
|
|
|
unique_ptr<RandomAccessFile> file;
|
|
|
|
uint64_t index_start_offset;
|
|
|
|
int num_entries;
|
2014-02-07 19:26:49 -08:00
|
|
|
std::shared_ptr<TableProperties> table_properties;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
const static int user_key_size = 16;
|
|
|
|
const static int offset_length = 8;
|
|
|
|
const static int key_footer_len = 8;
|
|
|
|
|
|
|
|
static int GetInternalKeyLength() {
|
|
|
|
return user_key_size + key_footer_len;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableReader::~SimpleTableReader() {
|
2013-10-28 17:54:09 -07:00
|
|
|
delete rep_;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Status SimpleTableReader::Open(const Options& options,
|
|
|
|
const EnvOptions& soptions,
|
|
|
|
unique_ptr<RandomAccessFile> && file,
|
|
|
|
uint64_t size,
|
|
|
|
unique_ptr<TableReader>* table_reader) {
|
2013-10-28 17:54:09 -07:00
|
|
|
char footer_space[Rep::offset_length];
|
|
|
|
Slice footer_input;
|
|
|
|
Status s = file->Read(size - Rep::offset_length, Rep::offset_length,
|
|
|
|
&footer_input, footer_space);
|
|
|
|
if (s.ok()) {
|
|
|
|
uint64_t index_start_offset = DecodeFixed64(footer_space);
|
|
|
|
|
|
|
|
int num_entries = (size - Rep::offset_length - index_start_offset)
|
|
|
|
/ (Rep::GetInternalKeyLength() + Rep::offset_length);
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions,
|
|
|
|
index_start_offset,
|
|
|
|
num_entries);
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
rep->file = std::move(file);
|
|
|
|
rep->options = options;
|
2013-10-30 10:52:33 -07:00
|
|
|
table_reader->reset(new SimpleTableReader(rep));
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
void SimpleTableReader::SetupForCompaction() {
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
2014-02-07 19:26:49 -08:00
|
|
|
std::shared_ptr<const TableProperties> SimpleTableReader::GetTableProperties()
|
|
|
|
const {
|
2013-11-19 16:29:42 -08:00
|
|
|
return rep_->table_properties;
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-02 16:38:00 -07:00
|
|
|
Iterator* SimpleTableReader::NewIterator(const ReadOptions& options,
|
|
|
|
Arena* arena) {
|
|
|
|
if (arena == nullptr) {
|
|
|
|
return new SimpleTableIterator(this);
|
|
|
|
} else {
|
|
|
|
auto mem = arena->AllocateAligned(sizeof(SimpleTableIterator));
|
|
|
|
return new (mem) SimpleTableIterator(this);
|
|
|
|
}
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) {
|
2013-10-28 17:54:09 -07:00
|
|
|
uint32_t left = 0;
|
|
|
|
uint32_t right = rep_->num_entries - 1;
|
|
|
|
char key_chars[Rep::GetInternalKeyLength()];
|
|
|
|
Slice tmp_slice;
|
|
|
|
|
|
|
|
uint32_t target_offset = 0;
|
|
|
|
while (left <= right) {
|
|
|
|
uint32_t mid = (left + right + 1) / 2;
|
|
|
|
|
|
|
|
uint64_t offset_to_read = rep_->index_start_offset
|
|
|
|
+ (Rep::GetInternalKeyLength() + Rep::offset_length) * mid;
|
|
|
|
Status s = rep_->file->Read(offset_to_read, Rep::GetInternalKeyLength(),
|
|
|
|
&tmp_slice, key_chars);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2014-01-27 13:53:22 -08:00
|
|
|
InternalKeyComparator ikc(rep_->options.comparator);
|
|
|
|
int compare_result = ikc.Compare(tmp_slice, target);
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
if (compare_result < 0) {
|
|
|
|
if (left == right) {
|
|
|
|
target_offset = right + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
left = mid;
|
|
|
|
} else {
|
|
|
|
if (left == right) {
|
|
|
|
target_offset = left;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
right = mid - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (target_offset >= (uint32_t) rep_->num_entries) {
|
|
|
|
*offset = rep_->index_start_offset;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
char value_offset_chars[Rep::offset_length];
|
|
|
|
|
|
|
|
int64_t offset_for_value_offset = rep_->index_start_offset
|
|
|
|
+ (Rep::GetInternalKeyLength() + Rep::offset_length) * target_offset
|
|
|
|
+ Rep::GetInternalKeyLength();
|
|
|
|
Status s = rep_->file->Read(offset_for_value_offset, Rep::offset_length,
|
|
|
|
&tmp_slice, value_offset_chars);
|
|
|
|
if (s.ok()) {
|
|
|
|
*offset = DecodeFixed64(value_offset_chars);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2014-01-27 13:53:22 -08:00
|
|
|
Status SimpleTableReader::Get(const ReadOptions& options, const Slice& k,
|
|
|
|
void* arg,
|
|
|
|
bool (*saver)(void*, const ParsedInternalKey&,
|
2014-06-20 10:23:02 +02:00
|
|
|
const Slice&),
|
2014-01-27 13:53:22 -08:00
|
|
|
void (*mark_key_may_exist)(void*)) {
|
2013-10-28 17:54:09 -07:00
|
|
|
Status s;
|
|
|
|
SimpleTableIterator* iter = new SimpleTableIterator(this);
|
|
|
|
for (iter->Seek(k); iter->Valid(); iter->Next()) {
|
2014-01-27 13:53:22 -08:00
|
|
|
ParsedInternalKey parsed_key;
|
|
|
|
if (!ParseInternalKey(iter->key(), &parsed_key)) {
|
|
|
|
return Status::Corruption(Slice());
|
|
|
|
}
|
|
|
|
|
2014-06-20 10:23:02 +02:00
|
|
|
if (!(*saver)(arg, parsed_key, iter->value())) {
|
2013-10-28 17:54:09 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = iter->status();
|
|
|
|
delete iter;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
uint64_t SimpleTableReader::ApproximateOffsetOf(const Slice& key) {
|
2013-10-28 17:54:09 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableIterator::SimpleTableIterator(SimpleTableReader* table) :
|
2013-10-28 17:54:09 -07:00
|
|
|
table_(table) {
|
2013-10-30 10:52:33 -07:00
|
|
|
key_str_ = new char[SimpleTableReader::Rep::GetInternalKeyLength()];
|
2013-10-28 17:54:09 -07:00
|
|
|
value_str_len_ = -1;
|
|
|
|
SeekToFirst();
|
|
|
|
}
|
|
|
|
|
|
|
|
SimpleTableIterator::~SimpleTableIterator() {
|
2013-10-29 14:29:03 -07:00
|
|
|
delete[] key_str_;
|
2013-10-28 17:54:09 -07:00
|
|
|
if (value_str_len_ >= 0) {
|
2013-10-29 14:29:03 -07:00
|
|
|
delete[] value_str_;
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SimpleTableIterator::Valid() const {
|
2013-11-12 20:05:28 -08:00
|
|
|
return offset_ < table_->rep_->index_start_offset;
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableIterator::SeekToFirst() {
|
|
|
|
next_offset_ = 0;
|
|
|
|
Next();
|
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableIterator::SeekToLast() {
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableIterator::Seek(const Slice& target) {
|
|
|
|
Status s = table_->GetOffset(target, &next_offset_);
|
|
|
|
if (!s.ok()) {
|
|
|
|
status_ = s;
|
|
|
|
}
|
|
|
|
Next();
|
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableIterator::Next() {
|
|
|
|
offset_ = next_offset_;
|
|
|
|
if (offset_ >= table_->rep_->index_start_offset) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
Slice result;
|
2013-10-30 10:52:33 -07:00
|
|
|
int internal_key_size = SimpleTableReader::Rep::GetInternalKeyLength();
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result,
|
|
|
|
key_str_);
|
|
|
|
next_offset_ += internal_key_size;
|
|
|
|
key_ = result;
|
|
|
|
|
|
|
|
Slice value_size_slice;
|
|
|
|
s = table_->rep_->file->Read(next_offset_, 4, &value_size_slice, tmp_str_);
|
|
|
|
next_offset_ += 4;
|
|
|
|
uint32_t value_size = DecodeFixed32(tmp_str_);
|
|
|
|
|
|
|
|
Slice value_slice;
|
|
|
|
if ((int) value_size > value_str_len_) {
|
|
|
|
if (value_str_len_ >= 0) {
|
2013-10-29 14:29:03 -07:00
|
|
|
delete[] value_str_;
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
value_str_ = new char[value_size];
|
|
|
|
value_str_len_ = value_size;
|
|
|
|
}
|
|
|
|
s = table_->rep_->file->Read(next_offset_, value_size, &value_slice,
|
|
|
|
value_str_);
|
|
|
|
next_offset_ += value_size;
|
|
|
|
value_ = value_slice;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableIterator::Prev() {
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice SimpleTableIterator::key() const {
|
|
|
|
Log(table_->rep_->options.info_log, "key!!!!");
|
|
|
|
return key_;
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice SimpleTableIterator::value() const {
|
|
|
|
return value_;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SimpleTableIterator::status() const {
|
|
|
|
return status_;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
class SimpleTableBuilder: public TableBuilder {
|
|
|
|
public:
|
2013-10-28 17:54:09 -07:00
|
|
|
// Create a builder that will store the contents of the table it is
|
|
|
|
// building in *file. Does not close the file. It is up to the
|
|
|
|
// caller to close the file after calling Finish(). The output file
|
|
|
|
// will be part of level specified by 'level'. A value of -1 means
|
|
|
|
// that the caller does not know which level the output file will reside.
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableBuilder(const Options& options, WritableFile* file,
|
|
|
|
CompressionType compression_type);
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
|
|
|
~SimpleTableBuilder();
|
|
|
|
|
|
|
|
// Add key,value to the table being constructed.
|
|
|
|
// REQUIRES: key is after any previously added key according to comparator.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
void Add(const Slice& key, const Slice& value) override;
|
|
|
|
|
|
|
|
// Return non-ok iff some error has been detected.
|
|
|
|
Status status() const override;
|
|
|
|
|
|
|
|
// Finish building the table. Stops using the file passed to the
|
|
|
|
// constructor after this function returns.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
Status Finish() override;
|
|
|
|
|
|
|
|
// Indicate that the contents of this builder should be abandoned. Stops
|
|
|
|
// using the file passed to the constructor after this function returns.
|
|
|
|
// If the caller is not going to call Finish(), it must call Abandon()
|
|
|
|
// before destroying this builder.
|
|
|
|
// REQUIRES: Finish(), Abandon() have not been called
|
|
|
|
void Abandon() override;
|
|
|
|
|
|
|
|
// Number of calls to Add() so far.
|
|
|
|
uint64_t NumEntries() const override;
|
|
|
|
|
|
|
|
// Size of the file generated so far. If invoked after a successful
|
|
|
|
// Finish() call, returns the size of the final generated file.
|
|
|
|
uint64_t FileSize() const override;
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
private:
|
2013-10-28 17:54:09 -07:00
|
|
|
struct Rep;
|
|
|
|
Rep* rep_;
|
|
|
|
|
|
|
|
// No copying allowed
|
|
|
|
SimpleTableBuilder(const SimpleTableBuilder&) = delete;
|
|
|
|
void operator=(const SimpleTableBuilder&) = delete;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SimpleTableBuilder::Rep {
|
|
|
|
Options options;
|
|
|
|
WritableFile* file;
|
|
|
|
uint64_t offset = 0;
|
|
|
|
Status status;
|
|
|
|
|
|
|
|
uint64_t num_entries = 0;
|
|
|
|
|
|
|
|
bool closed = false; // Either Finish() or Abandon() has been called.
|
|
|
|
|
|
|
|
const static int user_key_size = 16;
|
|
|
|
const static int offset_length = 8;
|
|
|
|
const static int key_footer_len = 8;
|
|
|
|
|
|
|
|
static int GetInternalKeyLength() {
|
|
|
|
return user_key_size + key_footer_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string index;
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
Rep(const Options& opt, WritableFile* f) :
|
|
|
|
options(opt), file(f) {
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
~Rep() {
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
SimpleTableBuilder::SimpleTableBuilder(const Options& options,
|
2013-10-30 10:52:33 -07:00
|
|
|
WritableFile* file,
|
|
|
|
CompressionType compression_type) :
|
|
|
|
rep_(new SimpleTableBuilder::Rep(options, file)) {
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
SimpleTableBuilder::~SimpleTableBuilder() {
|
2013-10-30 10:52:33 -07:00
|
|
|
delete (rep_);
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableBuilder::Add(const Slice& key, const Slice& value) {
|
2013-10-30 10:52:33 -07:00
|
|
|
assert((int ) key.size() == Rep::GetInternalKeyLength());
|
2013-10-28 17:54:09 -07:00
|
|
|
|
|
|
|
// Update index
|
|
|
|
rep_->index.append(key.data(), key.size());
|
|
|
|
PutFixed64(&(rep_->index), rep_->offset);
|
|
|
|
|
|
|
|
// Write key-value pair
|
|
|
|
rep_->file->Append(key);
|
|
|
|
rep_->offset += Rep::GetInternalKeyLength();
|
|
|
|
|
|
|
|
std::string size;
|
|
|
|
int value_size = value.size();
|
|
|
|
PutFixed32(&size, value_size);
|
|
|
|
Slice sizeSlice(size);
|
|
|
|
rep_->file->Append(sizeSlice);
|
|
|
|
rep_->file->Append(value);
|
|
|
|
rep_->offset += value_size + 4;
|
|
|
|
|
|
|
|
rep_->num_entries++;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SimpleTableBuilder::status() const {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SimpleTableBuilder::Finish() {
|
|
|
|
Rep* r = rep_;
|
|
|
|
assert(!r->closed);
|
|
|
|
r->closed = true;
|
|
|
|
|
|
|
|
uint64_t index_offset = rep_->offset;
|
|
|
|
Slice index_slice(rep_->index);
|
|
|
|
rep_->file->Append(index_slice);
|
|
|
|
rep_->offset += index_slice.size();
|
|
|
|
|
|
|
|
std::string index_offset_str;
|
|
|
|
PutFixed64(&index_offset_str, index_offset);
|
|
|
|
Slice foot_slice(index_offset_str);
|
|
|
|
rep_->file->Append(foot_slice);
|
|
|
|
rep_->offset += foot_slice.size();
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void SimpleTableBuilder::Abandon() {
|
|
|
|
rep_->closed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t SimpleTableBuilder::NumEntries() const {
|
|
|
|
return rep_->num_entries;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t SimpleTableBuilder::FileSize() const {
|
|
|
|
return rep_->offset;
|
|
|
|
}
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
class SimpleTableFactory: public TableFactory {
|
|
|
|
public:
|
|
|
|
~SimpleTableFactory() {
|
|
|
|
}
|
|
|
|
SimpleTableFactory() {
|
|
|
|
}
|
2013-10-28 17:54:09 -07:00
|
|
|
const char* Name() const override {
|
|
|
|
return "SimpleTable";
|
|
|
|
}
|
2014-01-27 21:58:46 -08:00
|
|
|
Status NewTableReader(const Options& options, const EnvOptions& soptions,
|
2014-01-27 13:53:22 -08:00
|
|
|
const InternalKeyComparator& internal_key,
|
2014-01-27 21:58:46 -08:00
|
|
|
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
|
2013-10-30 10:52:33 -07:00
|
|
|
unique_ptr<TableReader>* table_reader) const;
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2014-01-27 13:53:22 -08:00
|
|
|
TableBuilder* NewTableBuilder(const Options& options,
|
|
|
|
const InternalKeyComparator& internal_key,
|
|
|
|
WritableFile* file,
|
2013-10-30 10:52:33 -07:00
|
|
|
CompressionType compression_type) const;
|
2013-10-28 17:54:09 -07:00
|
|
|
};
|
|
|
|
|
2014-01-27 21:58:46 -08:00
|
|
|
Status SimpleTableFactory::NewTableReader(
|
2013-10-30 10:52:33 -07:00
|
|
|
const Options& options, const EnvOptions& soptions,
|
2014-01-27 13:53:22 -08:00
|
|
|
const InternalKeyComparator& internal_key,
|
2014-01-27 21:58:46 -08:00
|
|
|
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
|
2013-10-30 10:52:33 -07:00
|
|
|
unique_ptr<TableReader>* table_reader) const {
|
2013-10-28 17:54:09 -07:00
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
return SimpleTableReader::Open(options, soptions, std::move(file), file_size,
|
|
|
|
table_reader);
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
2014-01-27 21:58:46 -08:00
|
|
|
TableBuilder* SimpleTableFactory::NewTableBuilder(
|
2014-01-27 13:53:22 -08:00
|
|
|
const Options& options, const InternalKeyComparator& internal_key,
|
|
|
|
WritableFile* file, CompressionType compression_type) const {
|
2013-10-30 10:52:33 -07:00
|
|
|
return new SimpleTableBuilder(options, file, compression_type);
|
2013-10-28 17:54:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
class SimpleTableDBTest {
|
2013-10-30 10:52:33 -07:00
|
|
|
protected:
|
|
|
|
public:
|
2013-10-28 17:54:09 -07:00
|
|
|
std::string dbname_;
|
2013-10-30 10:52:33 -07:00
|
|
|
Env* env_;
|
2013-10-28 17:54:09 -07:00
|
|
|
DB* db_;
|
|
|
|
|
|
|
|
Options last_options_;
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
SimpleTableDBTest() :
|
|
|
|
env_(Env::Default()) {
|
2013-10-28 17:54:09 -07:00
|
|
|
dbname_ = test::TmpDir() + "/simple_table_db_test";
|
|
|
|
ASSERT_OK(DestroyDB(dbname_, Options()));
|
|
|
|
db_ = nullptr;
|
|
|
|
Reopen();
|
|
|
|
}
|
|
|
|
|
|
|
|
~SimpleTableDBTest() {
|
|
|
|
delete db_;
|
|
|
|
ASSERT_OK(DestroyDB(dbname_, Options()));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the current option configuration.
|
|
|
|
Options CurrentOptions() {
|
|
|
|
Options options;
|
|
|
|
options.table_factory.reset(new SimpleTableFactory());
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBImpl* dbfull() {
|
|
|
|
return reinterpret_cast<DBImpl*>(db_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reopen(Options* options = nullptr) {
|
|
|
|
ASSERT_OK(TryReopen(options));
|
|
|
|
}
|
|
|
|
|
|
|
|
void Close() {
|
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void DestroyAndReopen(Options* options = nullptr) {
|
|
|
|
//Destroy using last options
|
|
|
|
Destroy(&last_options_);
|
|
|
|
ASSERT_OK(TryReopen(options));
|
|
|
|
}
|
|
|
|
|
|
|
|
void Destroy(Options* options) {
|
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
ASSERT_OK(DestroyDB(dbname_, *options));
|
|
|
|
}
|
|
|
|
|
|
|
|
Status PureReopen(Options* options, DB** db) {
|
|
|
|
return DB::Open(*options, dbname_, db);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status TryReopen(Options* options = nullptr) {
|
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
Options opts;
|
|
|
|
if (options != nullptr) {
|
|
|
|
opts = *options;
|
|
|
|
} else {
|
|
|
|
opts = CurrentOptions();
|
|
|
|
opts.create_if_missing = true;
|
|
|
|
}
|
|
|
|
last_options_ = opts;
|
|
|
|
|
|
|
|
return DB::Open(opts, dbname_, &db_);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Put(const Slice& k, const Slice& v) {
|
|
|
|
return db_->Put(WriteOptions(), k, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Delete(const std::string& k) {
|
|
|
|
return db_->Delete(WriteOptions(), k);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
|
|
|
|
ReadOptions options;
|
|
|
|
options.snapshot = snapshot;
|
|
|
|
std::string result;
|
|
|
|
Status s = db_->Get(options, k, &result);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
result = "NOT_FOUND";
|
|
|
|
} else if (!s.ok()) {
|
|
|
|
result = s.ToString();
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int NumTableFilesAtLevel(int level) {
|
|
|
|
std::string property;
|
|
|
|
ASSERT_TRUE(
|
|
|
|
db_->GetProperty("rocksdb.num-files-at-level" + NumberToString(level),
|
|
|
|
&property));
|
|
|
|
return atoi(property.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return spread of files per level
|
|
|
|
std::string FilesPerLevel() {
|
|
|
|
std::string result;
|
|
|
|
int last_non_zero_offset = 0;
|
|
|
|
for (int level = 0; level < db_->NumberLevels(); level++) {
|
|
|
|
int f = NumTableFilesAtLevel(level);
|
|
|
|
char buf[100];
|
|
|
|
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
|
|
|
|
result += buf;
|
|
|
|
if (f > 0) {
|
|
|
|
last_non_zero_offset = result.size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.resize(last_non_zero_offset);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string IterStatus(Iterator* iter) {
|
|
|
|
std::string result;
|
|
|
|
if (iter->Valid()) {
|
|
|
|
result = iter->key().ToString() + "->" + iter->value().ToString();
|
|
|
|
} else {
|
|
|
|
result = "(invalid)";
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(SimpleTableDBTest, Empty) {
|
|
|
|
ASSERT_TRUE(db_ != nullptr);
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get("0000000000000foo"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(SimpleTableDBTest, ReadWrite) {
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v1"));
|
|
|
|
ASSERT_EQ("v1", Get("0000000000000foo"));
|
|
|
|
ASSERT_OK(Put("0000000000000bar", "v2"));
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v3"));
|
|
|
|
ASSERT_EQ("v3", Get("0000000000000foo"));
|
|
|
|
ASSERT_EQ("v2", Get("0000000000000bar"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(SimpleTableDBTest, Flush) {
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v1"));
|
|
|
|
ASSERT_OK(Put("0000000000000bar", "v2"));
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v3"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
ASSERT_EQ("v3", Get("0000000000000foo"));
|
|
|
|
ASSERT_EQ("v2", Get("0000000000000bar"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(SimpleTableDBTest, Flush2) {
|
|
|
|
ASSERT_OK(Put("0000000000000bar", "b"));
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v1"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
|
|
|
|
ASSERT_OK(Put("0000000000000foo", "v2"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
ASSERT_EQ("v2", Get("0000000000000foo"));
|
|
|
|
|
|
|
|
ASSERT_OK(Put("0000000000000eee", "v3"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
ASSERT_EQ("v3", Get("0000000000000eee"));
|
|
|
|
|
|
|
|
ASSERT_OK(Delete("0000000000000bar"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get("0000000000000bar"));
|
|
|
|
|
|
|
|
ASSERT_OK(Put("0000000000000eee", "v5"));
|
|
|
|
dbfull()->TEST_FlushMemTable();
|
|
|
|
ASSERT_EQ("v5", Get("0000000000000eee"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string Key(int i) {
|
|
|
|
char buf[100];
|
|
|
|
snprintf(buf, sizeof(buf), "key_______%06d", i);
|
|
|
|
return std::string(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string RandomString(Random* rnd, int len) {
|
|
|
|
std::string r;
|
|
|
|
test::RandomString(rnd, len, &r);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(SimpleTableDBTest, CompactionTrigger) {
|
|
|
|
Options options = CurrentOptions();
|
2013-10-30 10:52:33 -07:00
|
|
|
options.write_buffer_size = 100 << 10; //100KB
|
2013-10-28 17:54:09 -07:00
|
|
|
options.num_levels = 3;
|
|
|
|
options.max_mem_compaction_level = 0;
|
|
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
Reopen(&options);
|
|
|
|
|
|
|
|
Random rnd(301);
|
|
|
|
|
2013-10-30 10:52:33 -07:00
|
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
|
|
|
|
num++) {
|
2013-10-28 17:54:09 -07:00
|
|
|
std::vector<std::string> values;
|
|
|
|
// Write 120KB (12 values, each 10K)
|
|
|
|
for (int i = 0; i < 12; i++) {
|
|
|
|
values.push_back(RandomString(&rnd, 10000));
|
|
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
|
|
}
|
|
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0), num + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
//generate one more file in level-0, and should trigger level-0 compaction
|
|
|
|
std::vector<std::string> values;
|
|
|
|
for (int i = 0; i < 12; i++) {
|
|
|
|
values.push_back(RandomString(&rnd, 10000));
|
|
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
|
|
}
|
|
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
return rocksdb::test::RunAllTests();
|
|
|
|
}
|