Adding a new SST table builder based on Cuckoo Hashing
Summary: Cuckoo Hashing based SST table builder. Contains: - Cuckoo Hashing logic and file storage logic. - Unit tests for logic Test Plan: make cuckoo_table_builder_test ./cuckoo_table_builder_test make check all Reviewers: yhchiang, igor, sdong, ljin Reviewed By: ljin Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D19545
This commit is contained in:
parent
f6f1533c6f
commit
cf3da899b0
6
Makefile
6
Makefile
@ -115,7 +115,8 @@ TESTS = \
|
|||||||
table_test \
|
table_test \
|
||||||
thread_local_test \
|
thread_local_test \
|
||||||
geodb_test \
|
geodb_test \
|
||||||
rate_limiter_test
|
rate_limiter_test \
|
||||||
|
cuckoo_table_builder_test
|
||||||
|
|
||||||
TOOLS = \
|
TOOLS = \
|
||||||
sst_dump \
|
sst_dump \
|
||||||
@ -410,6 +411,9 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
|||||||
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
$(CXX) utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
|
||||||
|
cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CXX) table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||||
|
|
||||||
$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
|
$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
|
||||||
rm -f $@
|
rm -f $@
|
||||||
$(AR) -rs $@ $(MEMENVOBJECTS)
|
$(AR) -rs $@ $(MEMENVOBJECTS)
|
||||||
|
@ -184,6 +184,12 @@ EncodingType encoding_type = kPlain;
|
|||||||
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
|
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
|
||||||
PlainTableOptions());
|
PlainTableOptions());
|
||||||
|
|
||||||
|
struct CuckooTablePropertyNames {
|
||||||
|
static const std::string kEmptyBucket;
|
||||||
|
static const std::string kNumHashTable;
|
||||||
|
static const std::string kMaxNumBuckets;
|
||||||
|
};
|
||||||
|
|
||||||
#endif // ROCKSDB_LITE
|
#endif // ROCKSDB_LITE
|
||||||
|
|
||||||
// A base class for table factories.
|
// A base class for table factories.
|
||||||
|
333
table/cuckoo_table_builder.cc
Normal file
333
table/cuckoo_table_builder.cc
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
#include "table/cuckoo_table_builder.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "db/dbformat.h"
|
||||||
|
#include "rocksdb/env.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
|
#include "table/block_builder.h"
|
||||||
|
#include "table/format.h"
|
||||||
|
#include "table/meta_blocks.h"
|
||||||
|
#include "util/autovector.h"
|
||||||
|
#include "util/random.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
const std::string CuckooTablePropertyNames::kEmptyBucket =
|
||||||
|
"rocksdb.cuckoo.bucket.empty.bucket";
|
||||||
|
const std::string CuckooTablePropertyNames::kNumHashTable =
|
||||||
|
"rocksdb.cuckoo.hash.num";
|
||||||
|
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
|
||||||
|
"rocksdb.cuckoo.bucket.maxnum";
|
||||||
|
|
||||||
|
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||||
|
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||||
|
|
||||||
|
CuckooTableBuilder::CuckooTableBuilder(
|
||||||
|
WritableFile* file, unsigned int fixed_key_length,
|
||||||
|
unsigned int fixed_value_length, double hash_table_ratio,
|
||||||
|
unsigned int file_size, unsigned int max_num_hash_table,
|
||||||
|
unsigned int max_search_depth,
|
||||||
|
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int,
|
||||||
|
unsigned int))
|
||||||
|
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)),
|
||||||
|
file_(file),
|
||||||
|
key_length_(fixed_key_length),
|
||||||
|
value_length_(fixed_value_length),
|
||||||
|
bucket_size_(fixed_key_length + fixed_value_length),
|
||||||
|
hash_table_ratio_(hash_table_ratio),
|
||||||
|
max_num_buckets_(file_size / bucket_size_),
|
||||||
|
max_num_hash_table_(max_num_hash_table),
|
||||||
|
max_search_depth_(max_search_depth),
|
||||||
|
buckets_(max_num_buckets_),
|
||||||
|
GetSliceHash(GetSliceHashPtr) {
|
||||||
|
// The bucket_size is currently not optimized for last level.
|
||||||
|
// In last level, the bucket will not contain full key.
|
||||||
|
// TODO(rbs): Find how we can determine if last level or not
|
||||||
|
// before we start adding entries into the table.
|
||||||
|
properties_.num_entries = 0;
|
||||||
|
// Data is in a huge block.
|
||||||
|
properties_.num_data_blocks = 1;
|
||||||
|
properties_.index_size = 0;
|
||||||
|
properties_.filter_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
CuckooTableBuilder::~CuckooTableBuilder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||||
|
if (NumEntries() == max_num_buckets_) {
|
||||||
|
status_ = Status::Corruption("Hash Table is full.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
unsigned int bucket_id;
|
||||||
|
bool bucket_found = false;
|
||||||
|
autovector<unsigned int> hash_vals;
|
||||||
|
ParsedInternalKey ikey;
|
||||||
|
if (!ParseInternalKey(key, &ikey)) {
|
||||||
|
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Slice user_key = ikey.user_key;
|
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
|
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
||||||
|
if (buckets_[hash_val].is_empty) {
|
||||||
|
bucket_id = hash_val;
|
||||||
|
bucket_found = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) {
|
||||||
|
status_ = Status::Corruption("Same key is being inserted again.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
hash_vals.push_back(hash_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (!bucket_found && !MakeSpaceForKey(key, &bucket_id, hash_vals)) {
|
||||||
|
// Rehash by increashing number of hash tables.
|
||||||
|
if (num_hash_table_ >= max_num_hash_table_) {
|
||||||
|
status_ = Status::Corruption("Too many collissions. Unable to hash.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// We don't really need to rehash the entire table because old hashes are
|
||||||
|
// still valid and we only increased the number of hash functions.
|
||||||
|
unsigned int old_num_hash = num_hash_table_;
|
||||||
|
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_);
|
||||||
|
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) {
|
||||||
|
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_);
|
||||||
|
if (buckets_[hash_val].is_empty) {
|
||||||
|
bucket_found = true;
|
||||||
|
bucket_id = hash_val;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
hash_vals.push_back(hash_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buckets_[bucket_id].key = key;
|
||||||
|
buckets_[bucket_id].value = value;
|
||||||
|
buckets_[bucket_id].is_empty = false;
|
||||||
|
|
||||||
|
if (ikey.sequence != 0) {
|
||||||
|
// This is not a last level file.
|
||||||
|
is_last_level_file_ = false;
|
||||||
|
}
|
||||||
|
properties_.num_entries++;
|
||||||
|
|
||||||
|
// We assume that the keys are inserted in sorted order. To identify an
|
||||||
|
// unused key, which will be used in filling empty buckets in the table,
|
||||||
|
// we try to find gaps between successive keys inserted. This is done by
|
||||||
|
// maintaining the previous key and comparing it with next key.
|
||||||
|
if (unused_user_key_.empty()) {
|
||||||
|
if (prev_key_.empty()) {
|
||||||
|
prev_key_ = user_key.ToString();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::string new_user_key = prev_key_;
|
||||||
|
new_user_key.back()++;
|
||||||
|
// We ignore carry-overs and check that it is larger than previous key.
|
||||||
|
if ((new_user_key > prev_key_) &&
|
||||||
|
(new_user_key < user_key.ToString())) {
|
||||||
|
unused_user_key_ = new_user_key;
|
||||||
|
} else {
|
||||||
|
prev_key_ = user_key.ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CuckooTableBuilder::status() const { return status_; }
|
||||||
|
|
||||||
|
Status CuckooTableBuilder::Finish() {
|
||||||
|
assert(!closed_);
|
||||||
|
closed_ = true;
|
||||||
|
|
||||||
|
if (unused_user_key_.empty()) {
|
||||||
|
if (prev_key_.empty()) {
|
||||||
|
return Status::Corruption("Unable to find unused key");
|
||||||
|
}
|
||||||
|
std::string new_user_key = prev_key_;
|
||||||
|
new_user_key.back()++;
|
||||||
|
// We ignore carry-overs and check that it is larger than previous key.
|
||||||
|
if (new_user_key > prev_key_) {
|
||||||
|
unused_user_key_ = new_user_key;
|
||||||
|
} else {
|
||||||
|
return Status::Corruption("Unable to find unused key");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string unused_bucket;
|
||||||
|
if (is_last_level_file_) {
|
||||||
|
unused_bucket = unused_user_key_;
|
||||||
|
} else {
|
||||||
|
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
|
||||||
|
AppendInternalKey(&unused_bucket, ikey);
|
||||||
|
}
|
||||||
|
properties_.fixed_key_len = unused_bucket.size();
|
||||||
|
unsigned int bucket_size = unused_bucket.size() + value_length_;
|
||||||
|
// Resize to bucket size.
|
||||||
|
unused_bucket.resize(bucket_size, 'a');
|
||||||
|
|
||||||
|
// Write the table.
|
||||||
|
for (auto& bucket : buckets_) {
|
||||||
|
Status s;
|
||||||
|
if (bucket.is_empty) {
|
||||||
|
s = file_->Append(Slice(unused_bucket));
|
||||||
|
} else {
|
||||||
|
if (is_last_level_file_) {
|
||||||
|
Slice user_key = ExtractUserKey(bucket.key);
|
||||||
|
s = file_->Append(user_key);
|
||||||
|
if (s.ok()) {
|
||||||
|
s = file_->Append(bucket.value);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s = file_->Append(bucket.key);
|
||||||
|
if (s.ok()) {
|
||||||
|
s = file_->Append(bucket.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int offset = buckets_.size() * bucket_size;
|
||||||
|
properties_.user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket;
|
||||||
|
properties_.user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_);
|
||||||
|
PutVarint32(&properties_.user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_);
|
||||||
|
|
||||||
|
// Write meta blocks.
|
||||||
|
MetaIndexBuilder meta_index_builer;
|
||||||
|
PropertyBlockBuilder property_block_builder;
|
||||||
|
|
||||||
|
property_block_builder.AddTableProperty(properties_);
|
||||||
|
property_block_builder.Add(properties_.user_collected_properties);
|
||||||
|
Slice property_block = property_block_builder.Finish();
|
||||||
|
BlockHandle property_block_handle;
|
||||||
|
property_block_handle.set_offset(offset);
|
||||||
|
property_block_handle.set_size(property_block.size());
|
||||||
|
Status s = file_->Append(property_block);
|
||||||
|
offset += property_block.size();
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
|
||||||
|
Slice meta_index_block = meta_index_builer.Finish();
|
||||||
|
|
||||||
|
BlockHandle meta_index_block_handle;
|
||||||
|
meta_index_block_handle.set_offset(offset);
|
||||||
|
meta_index_block_handle.set_size(meta_index_block.size());
|
||||||
|
s = file_->Append(meta_index_block);
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Footer footer(kCuckooTableMagicNumber);
|
||||||
|
footer.set_metaindex_handle(meta_index_block_handle);
|
||||||
|
footer.set_index_handle(BlockHandle::NullBlockHandle());
|
||||||
|
std::string footer_encoding;
|
||||||
|
footer.EncodeTo(&footer_encoding);
|
||||||
|
s = file_->Append(footer_encoding);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CuckooTableBuilder::Abandon() {
|
||||||
|
assert(!closed_);
|
||||||
|
closed_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CuckooTableBuilder::NumEntries() const {
|
||||||
|
return properties_.num_entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CuckooTableBuilder::FileSize() const {
|
||||||
|
if (closed_) {
|
||||||
|
return file_->GetFileSize();
|
||||||
|
} else {
|
||||||
|
// This is not the actual size of the file as we need to account for
|
||||||
|
// hash table ratio. This returns the size of filled buckets in the table
|
||||||
|
// scaled up by a factor of 1/hash table ratio.
|
||||||
|
return (properties_.num_entries * bucket_size_) / hash_table_ratio_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||||
|
unsigned int *bucket_id, autovector<unsigned int> hash_vals) {
|
||||||
|
struct CuckooNode {
|
||||||
|
unsigned int bucket_id;
|
||||||
|
unsigned int depth;
|
||||||
|
int parent_pos;
|
||||||
|
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos)
|
||||||
|
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
|
||||||
|
};
|
||||||
|
// This is BFS search tree that is stored simply as a vector.
|
||||||
|
// Each node stores the index of parent node in the vector.
|
||||||
|
std::vector<CuckooNode> tree;
|
||||||
|
// This is a very bad way to keep track of visited nodes.
|
||||||
|
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket
|
||||||
|
// and use it to track visited nodes.
|
||||||
|
std::vector<bool> buckets_visited(max_num_buckets_, false);
|
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
|
unsigned int bucket_id = hash_vals[hash_cnt];
|
||||||
|
buckets_visited[bucket_id] = true;
|
||||||
|
tree.push_back(CuckooNode(bucket_id, 0, -1));
|
||||||
|
}
|
||||||
|
bool null_found = false;
|
||||||
|
unsigned int curr_pos = 0;
|
||||||
|
while (!null_found && curr_pos < tree.size()) {
|
||||||
|
CuckooNode& curr_node = tree[curr_pos];
|
||||||
|
if (curr_node.depth >= max_search_depth_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
|
unsigned int child_bucket_id = GetSliceHash(
|
||||||
|
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
||||||
|
if (child_bucket_id == curr_node.bucket_id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (buckets_visited[child_bucket_id]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
buckets_visited[child_bucket_id] = true;
|
||||||
|
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
|
||||||
|
curr_pos));
|
||||||
|
if (buckets_[child_bucket_id].is_empty) {
|
||||||
|
null_found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++curr_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null_found) {
|
||||||
|
int bucket_to_replace_pos = tree.size()-1;
|
||||||
|
while (bucket_to_replace_pos >= 0) {
|
||||||
|
CuckooNode& curr_node = tree[bucket_to_replace_pos];
|
||||||
|
if (curr_node.parent_pos != -1) {
|
||||||
|
buckets_[curr_node.bucket_id] = buckets_[curr_node.parent_pos];
|
||||||
|
bucket_to_replace_pos = curr_node.parent_pos;
|
||||||
|
} else {
|
||||||
|
*bucket_id = curr_node.bucket_id;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
#endif // ROCKSDB_LITE
|
97
table/cuckoo_table_builder.h
Normal file
97
table/cuckoo_table_builder.h
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "rocksdb/status.h"
|
||||||
|
#include "table/table_builder.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
|
#include "rocksdb/table_properties.h"
|
||||||
|
#include "util/autovector.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
struct CuckooBucket {
|
||||||
|
CuckooBucket(): is_empty(true) {}
|
||||||
|
Slice key;
|
||||||
|
Slice value;
|
||||||
|
bool is_empty;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CuckooTableBuilder: public TableBuilder {
|
||||||
|
public:
|
||||||
|
CuckooTableBuilder(
|
||||||
|
WritableFile* file, unsigned int fixed_key_length,
|
||||||
|
unsigned int fixed_value_length, double hash_table_ratio,
|
||||||
|
unsigned int file_size, unsigned int max_num_hash_table,
|
||||||
|
unsigned int max_search_depth,
|
||||||
|
unsigned int (*GetSliceHash)(const Slice&, unsigned int,
|
||||||
|
unsigned int));
|
||||||
|
|
||||||
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||||
|
~CuckooTableBuilder();
|
||||||
|
|
||||||
|
// Add key,value to the table being constructed.
|
||||||
|
// REQUIRES: key is after any previously added key according to comparator.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Add(const Slice& key, const Slice& value) override;
|
||||||
|
|
||||||
|
// Return non-ok iff some error has been detected.
|
||||||
|
Status status() const override;
|
||||||
|
|
||||||
|
// Finish building the table. Stops using the file passed to the
|
||||||
|
// constructor after this function returns.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
Status Finish() override;
|
||||||
|
|
||||||
|
// Indicate that the contents of this builder should be abandoned. Stops
|
||||||
|
// using the file passed to the constructor after this function returns.
|
||||||
|
// If the caller is not going to call Finish(), it must call Abandon()
|
||||||
|
// before destroying this builder.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Abandon() override;
|
||||||
|
|
||||||
|
// Number of calls to Add() so far.
|
||||||
|
uint64_t NumEntries() const override;
|
||||||
|
|
||||||
|
// Size of the file generated so far. If invoked after a successful
|
||||||
|
// Finish() call, returns the size of the final generated file.
|
||||||
|
uint64_t FileSize() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id,
|
||||||
|
autovector<unsigned int> hash_vals);
|
||||||
|
|
||||||
|
unsigned int num_hash_table_;
|
||||||
|
WritableFile* file_;
|
||||||
|
const unsigned int key_length_;
|
||||||
|
const unsigned int value_length_;
|
||||||
|
const unsigned int bucket_size_;
|
||||||
|
const double hash_table_ratio_;
|
||||||
|
const unsigned int max_num_buckets_;
|
||||||
|
const unsigned int max_num_hash_table_;
|
||||||
|
const unsigned int max_search_depth_;
|
||||||
|
Status status_;
|
||||||
|
std::vector<CuckooBucket> buckets_;
|
||||||
|
bool is_last_level_file_ = true;
|
||||||
|
TableProperties properties_;
|
||||||
|
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index,
|
||||||
|
unsigned int max_num_buckets);
|
||||||
|
std::string unused_user_key_ = "";
|
||||||
|
std::string prev_key_;
|
||||||
|
|
||||||
|
bool closed_ = false; // Either Finish() or Abandon() has been called.
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
CuckooTableBuilder(const CuckooTableBuilder&) = delete;
|
||||||
|
void operator=(const CuckooTableBuilder&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
#endif // ROCKSDB_LITE
|
468
table/cuckoo_table_builder_test.cc
Normal file
468
table/cuckoo_table_builder_test.cc
Normal file
@ -0,0 +1,468 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "table/meta_blocks.h"
|
||||||
|
#include "table/cuckoo_table_builder.h"
|
||||||
|
#include "util/random.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
extern const uint64_t kCuckooTableMagicNumber;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::unordered_map<std::string, std::vector<unsigned int>> hash_map;
|
||||||
|
|
||||||
|
void AddHashLookups(const std::string& s, unsigned int bucket_id,
|
||||||
|
unsigned int num_hash_fun) {
|
||||||
|
std::vector<unsigned int> v;
|
||||||
|
for (unsigned int i = 0; i < num_hash_fun; i++) {
|
||||||
|
v.push_back(bucket_id + i);
|
||||||
|
}
|
||||||
|
hash_map[s] = v;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int GetSliceHash(const Slice& s, unsigned int index,
|
||||||
|
unsigned int max_num_buckets) {
|
||||||
|
return hash_map[s.ToString()][index];
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class CuckooBuilderTest {
|
||||||
|
public:
|
||||||
|
CuckooBuilderTest() {
|
||||||
|
env_ = Env::Default();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CheckFileContents(const std::string& expected_data) {
|
||||||
|
// Read file
|
||||||
|
unique_ptr<RandomAccessFile> read_file;
|
||||||
|
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
|
||||||
|
uint64_t read_file_size;
|
||||||
|
ASSERT_OK(env_->GetFileSize(fname, &read_file_size));
|
||||||
|
|
||||||
|
// Assert Table Properties.
|
||||||
|
TableProperties* props = nullptr;
|
||||||
|
ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size,
|
||||||
|
kCuckooTableMagicNumber, env_, nullptr, &props));
|
||||||
|
ASSERT_EQ(props->num_entries, num_items);
|
||||||
|
ASSERT_EQ(props->fixed_key_len, key_length);
|
||||||
|
|
||||||
|
// Check unused bucket.
|
||||||
|
std::string unused_bucket = props->user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kEmptyBucket];
|
||||||
|
ASSERT_EQ(expected_unused_bucket, unused_bucket);
|
||||||
|
|
||||||
|
unsigned int max_buckets;
|
||||||
|
Slice max_buckets_slice = Slice(props->user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets]);
|
||||||
|
GetVarint32(&max_buckets_slice, &max_buckets);
|
||||||
|
ASSERT_EQ(expected_max_buckets, max_buckets);
|
||||||
|
// Check contents of the bucket.
|
||||||
|
std::string read_data;
|
||||||
|
read_data.resize(expected_data.size());
|
||||||
|
Slice read_slice;
|
||||||
|
ASSERT_OK(read_file->Read(0, expected_data.size(),
|
||||||
|
&read_slice, &read_data[0]));
|
||||||
|
ASSERT_EQ(expected_data, read_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
Env* env_;
|
||||||
|
const EnvOptions env_options_;
|
||||||
|
std::string fname;
|
||||||
|
std::string expected_unused_bucket;
|
||||||
|
unsigned int file_size = 100000;
|
||||||
|
unsigned int num_items = 20;
|
||||||
|
unsigned int num_hash_fun = 64;
|
||||||
|
double hash_table_ratio = 0.9;
|
||||||
|
unsigned int ikey_length;
|
||||||
|
unsigned int user_key_length;
|
||||||
|
unsigned int key_length;
|
||||||
|
unsigned int value_length;
|
||||||
|
unsigned int bucket_length;
|
||||||
|
unsigned int expected_max_buckets;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, NoCollision) {
|
||||||
|
hash_map.clear();
|
||||||
|
num_items = 20;
|
||||||
|
num_hash_fun = 64;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
unsigned int bucket_ids = 0;
|
||||||
|
for (unsigned int i = 0; i < num_items; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun);
|
||||||
|
bucket_ids += num_hash_fun;
|
||||||
|
}
|
||||||
|
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
key_length = ikey_length;
|
||||||
|
value_length = values[0].size();
|
||||||
|
bucket_length = ikey_length + value_length;
|
||||||
|
expected_max_buckets = file_size / bucket_length;
|
||||||
|
std::string expected_unused_user_key = "keys10:";
|
||||||
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/BasicTest_writable_file";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), ikey_length,
|
||||||
|
value_length, hash_table_ratio,
|
||||||
|
file_size, num_hash_fun, 100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
unsigned int key_idx = 0;
|
||||||
|
std::string expected_file_data = "";
|
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||||
|
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
expected_file_data.append(keys[key_idx] + values[key_idx]);
|
||||||
|
++key_idx;
|
||||||
|
} else {
|
||||||
|
expected_file_data.append(expected_unused_bucket);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_OK(cuckoo_builder->Finish());
|
||||||
|
writable_file->Close();
|
||||||
|
CheckFileContents(expected_file_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||||
|
hash_map.clear();
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
unsigned int bucket_ids = 0;
|
||||||
|
for (unsigned int i = 0; i < num_items; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
// Set zero sequence number in all keys.
|
||||||
|
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun);
|
||||||
|
bucket_ids += num_hash_fun;
|
||||||
|
}
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
user_key_length = user_keys[0].size();
|
||||||
|
key_length = user_key_length;
|
||||||
|
value_length = values[0].size();
|
||||||
|
bucket_length = key_length + value_length;
|
||||||
|
expected_max_buckets = file_size / bucket_length;
|
||||||
|
expected_unused_bucket = "keys10:";
|
||||||
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), key_length,
|
||||||
|
value_length, hash_table_ratio,
|
||||||
|
file_size, num_hash_fun, 100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
unsigned int key_idx = 0;
|
||||||
|
std::string expected_file_data = "";
|
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||||
|
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
expected_file_data.append(user_keys[key_idx] + values[key_idx]);
|
||||||
|
++key_idx;
|
||||||
|
} else {
|
||||||
|
expected_file_data.append(expected_unused_bucket);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_OK(cuckoo_builder->Finish());
|
||||||
|
writable_file->Close();
|
||||||
|
CheckFileContents(expected_file_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, WithCollision) {
|
||||||
|
// Take keys with colliding hash function values.
|
||||||
|
hash_map.clear();
|
||||||
|
num_hash_fun = 20;
|
||||||
|
num_items = num_hash_fun;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
for (unsigned int i = 0; i < num_items; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
// Make all hash values collide.
|
||||||
|
AddHashLookups(user_keys[i], 0, num_hash_fun);
|
||||||
|
}
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
value_length = values[0].size();
|
||||||
|
key_length = ikey_length;
|
||||||
|
bucket_length = key_length + value_length;
|
||||||
|
expected_max_buckets = file_size / bucket_length;
|
||||||
|
std::string expected_unused_user_key = "keys10:";
|
||||||
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/WithCollision_writable_file";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), key_length, value_length, hash_table_ratio,
|
||||||
|
file_size, num_hash_fun, 100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
unsigned int key_idx = 0;
|
||||||
|
std::string expected_file_data = "";
|
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||||
|
if (key_idx == i && key_idx < num_items) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
expected_file_data.append(keys[key_idx] + values[key_idx]);
|
||||||
|
++key_idx;
|
||||||
|
} else {
|
||||||
|
expected_file_data.append(expected_unused_bucket);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_OK(cuckoo_builder->Finish());
|
||||||
|
writable_file->Close();
|
||||||
|
CheckFileContents(expected_file_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||||
|
// Take keys with colliding hash function values.
|
||||||
|
// Take more keys than the number of hash functions.
|
||||||
|
hash_map.clear();
|
||||||
|
num_hash_fun = 20;
|
||||||
|
num_items = num_hash_fun + 1;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
for (unsigned int i = 0; i < num_items; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
// Make all hash values collide.
|
||||||
|
AddHashLookups(user_keys[i], 0, num_hash_fun);
|
||||||
|
}
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
value_length = values[0].size();
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/FailWithTooManyCollisions_writable";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), ikey_length,
|
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
|
100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
}
|
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back()));
|
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption());
|
||||||
|
cuckoo_builder->Abandon();
|
||||||
|
writable_file->Close();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||||
|
hash_map.clear();
|
||||||
|
std::string user_key = "repeatedkey";
|
||||||
|
AddHashLookups(user_key, 0, num_hash_fun);
|
||||||
|
std::string key_to_reuse1, key_to_reuse2;
|
||||||
|
ParsedInternalKey ikey1(user_key, 1000, kTypeValue);
|
||||||
|
ParsedInternalKey ikey2(user_key, 1001, kTypeValue);
|
||||||
|
AppendInternalKey(&key_to_reuse1, ikey1);
|
||||||
|
AppendInternalKey(&key_to_reuse2, ikey2);
|
||||||
|
std::string value = "value";
|
||||||
|
ikey_length = key_to_reuse1.size();
|
||||||
|
value_length = value.size();
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/FailWhenSameKeyInserted_writable_file";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), ikey_length,
|
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
|
100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
cuckoo_builder->Add(Slice(key_to_reuse1), Slice(value));
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), 1);
|
||||||
|
cuckoo_builder->Add(Slice(key_to_reuse2), Slice(value));
|
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption());
|
||||||
|
cuckoo_builder->Abandon();
|
||||||
|
writable_file->Close();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||||
|
hash_map.clear();
|
||||||
|
// Have two hash functions. Insert elements with overlapping hashes.
|
||||||
|
// Finally insert an element which will displace all the current elements.
|
||||||
|
num_hash_fun = 2;
|
||||||
|
|
||||||
|
unsigned int max_search_depth = 100;
|
||||||
|
num_items = max_search_depth + 2;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
std::vector<unsigned int> expected_bucket_id(num_items);
|
||||||
|
for (unsigned int i = 0; i < num_items - 1; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
// Make all hash values collide with the next element.
|
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||||
|
expected_bucket_id[i] = i+1;
|
||||||
|
}
|
||||||
|
expected_bucket_id[0] = 0;
|
||||||
|
user_keys.back() = "keys" + std::to_string(num_items + 99);
|
||||||
|
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys.back(), ikey);
|
||||||
|
values.back() = "value" + std::to_string(num_items+100);
|
||||||
|
// Make both hash values collide with first element.
|
||||||
|
AddHashLookups(user_keys.back(), 0, num_hash_fun);
|
||||||
|
expected_bucket_id.back() = 1;
|
||||||
|
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
value_length = values[0].size();
|
||||||
|
key_length = ikey_length;
|
||||||
|
bucket_length = key_length + value_length;
|
||||||
|
|
||||||
|
expected_max_buckets = file_size / bucket_length;
|
||||||
|
std::string expected_unused_user_key = "keys10:";
|
||||||
|
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
|
std::string expected_file_data = "";
|
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||||
|
expected_file_data += expected_unused_bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/WithCollisionPath_writable_file";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), key_length,
|
||||||
|
value_length, hash_table_ratio, file_size,
|
||||||
|
num_hash_fun, max_search_depth, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
expected_file_data.replace(expected_bucket_id[key_idx]*bucket_length,
|
||||||
|
bucket_length, keys[key_idx] + values[key_idx]);
|
||||||
|
}
|
||||||
|
ASSERT_OK(cuckoo_builder->Finish());
|
||||||
|
writable_file->Close();
|
||||||
|
CheckFileContents(expected_file_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||||
|
hash_map.clear();
|
||||||
|
// Have two hash functions. Insert elements with overlapping hashes.
|
||||||
|
// Finally insert an element which will displace all the current elements.
|
||||||
|
num_hash_fun = 2;
|
||||||
|
|
||||||
|
unsigned int max_search_depth = 100;
|
||||||
|
num_items = max_search_depth + 3;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
for (unsigned int i = 0; i < num_items - 1; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
// Make all hash values collide with the next element.
|
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||||
|
}
|
||||||
|
user_keys.back() = "keys" + std::to_string(num_items + 99);
|
||||||
|
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys.back(), ikey);
|
||||||
|
Slice(values.back()) = "value" + std::to_string(num_items+100);
|
||||||
|
// Make both hash values collide with first element.
|
||||||
|
AddHashLookups(user_keys.back(), 0, num_hash_fun);
|
||||||
|
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
value_length = values[0].size();
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/FailWhenCollisionPathTooLong_writable";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), ikey_length,
|
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
|
max_search_depth, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
}
|
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back()));
|
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption());
|
||||||
|
cuckoo_builder->Abandon();
|
||||||
|
writable_file->Close();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
||||||
|
hash_map.clear();
|
||||||
|
file_size = 160;
|
||||||
|
|
||||||
|
num_items = 7;
|
||||||
|
std::vector<std::string> user_keys(num_items);
|
||||||
|
std::vector<std::string> keys(num_items);
|
||||||
|
std::vector<std::string> values(num_items);
|
||||||
|
for (unsigned int i = 0; i < num_items; i++) {
|
||||||
|
user_keys[i] = "keys" + std::to_string(i+1000);
|
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
|
AppendInternalKey(&keys[i], ikey);
|
||||||
|
values[i] = "value" + std::to_string(i+100);
|
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||||
|
}
|
||||||
|
ikey_length = keys[0].size();
|
||||||
|
value_length = values[0].size();
|
||||||
|
bucket_length = ikey_length + value_length;
|
||||||
|
// Check that number of items is tight.
|
||||||
|
ASSERT_GT(bucket_length * num_items, file_size);
|
||||||
|
ASSERT_LE(bucket_length * (num_items-1), file_size);
|
||||||
|
|
||||||
|
unique_ptr<WritableFile> writable_file;
|
||||||
|
fname = test::TmpDir() + "/FailWhenTabelIsFull_writable";
|
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder(
|
||||||
|
writable_file.get(), ikey_length,
|
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
|
100, GetSliceHash);
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
|
ASSERT_OK(cuckoo_builder->status());
|
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1);
|
||||||
|
}
|
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back()));
|
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption());
|
||||||
|
cuckoo_builder->Abandon();
|
||||||
|
writable_file->Close();
|
||||||
|
}
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|
32
table/cuckoo_table_factory.h
Normal file
32
table/cuckoo_table_factory.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
|
||||||
|
#include "util/murmurhash.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
static const unsigned int kMaxNumHashTable = 64;
|
||||||
|
|
||||||
|
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index,
|
||||||
|
unsigned int max_num_buckets) {
|
||||||
|
static constexpr unsigned int seeds[kMaxNumHashTable] = {
|
||||||
|
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
|
||||||
|
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
|
||||||
|
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,
|
||||||
|
263144466, 241420041, 444294464, 731606396, 304158902, 563235655,
|
||||||
|
968740453, 336996831, 462831574, 407970157, 985877240, 637708754,
|
||||||
|
736932700, 205026023, 755371467, 729648411, 807744117, 46482135,
|
||||||
|
847092855, 620960699, 102476362, 314094354, 625838942, 550889395,
|
||||||
|
639071379, 834567510, 397667304, 151945969, 443634243, 196618243,
|
||||||
|
421986347, 407218337, 964502417, 327741231, 493359459, 452453139,
|
||||||
|
692216398, 108161624, 816246924, 234779764, 618949448, 496133787,
|
||||||
|
156374056, 316589799, 982915425, 553105889 };
|
||||||
|
return MurmurHash(s.data(), s.size(), seeds[index]) % max_num_buckets;
|
||||||
|
}
|
||||||
|
} // namespace rocksdb
|
||||||
|
#endif // ROCKSDB_LITE
|
Loading…
Reference in New Issue
Block a user