Addressing TODOs in CuckooTableBuilder
Summary: Contains the following changes in CuckooTableBuilder: - Take an extra parameter in constructor to identify last level file. - Implement a better way to identify if a bucket has been inserted into the tree already during BFS search. - Minor typos Test Plan: make cuckoo_table_builder ./cuckoo_table_builder make valgrind_check Reviewers: sdong, igor, yhchiang, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20445
This commit is contained in:
parent
4b61a3d67d
commit
07a7d870b8
@ -185,7 +185,8 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
|
|||||||
PlainTableOptions());
|
PlainTableOptions());
|
||||||
|
|
||||||
struct CuckooTablePropertyNames {
|
struct CuckooTablePropertyNames {
|
||||||
static const std::string kEmptyBucket;
|
static const std::string kEmptyKey;
|
||||||
|
static const std::string kValueLength;
|
||||||
static const std::string kNumHashTable;
|
static const std::string kNumHashTable;
|
||||||
static const std::string kMaxNumBuckets;
|
static const std::string kMaxNumBuckets;
|
||||||
};
|
};
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
@ -21,37 +21,38 @@
|
|||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
const std::string CuckooTablePropertyNames::kEmptyBucket =
|
const std::string CuckooTablePropertyNames::kEmptyKey =
|
||||||
"rocksdb.cuckoo.bucket.empty.bucket";
|
"rocksdb.cuckoo.bucket.empty.key";
|
||||||
const std::string CuckooTablePropertyNames::kNumHashTable =
|
const std::string CuckooTablePropertyNames::kNumHashTable =
|
||||||
"rocksdb.cuckoo.hash.num";
|
"rocksdb.cuckoo.hash.num";
|
||||||
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
|
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
|
||||||
"rocksdb.cuckoo.bucket.maxnum";
|
"rocksdb.cuckoo.bucket.maxnum";
|
||||||
|
const std::string CuckooTablePropertyNames::kValueLength =
|
||||||
|
"rocksdb.cuckoo.value.length";
|
||||||
|
|
||||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||||
|
|
||||||
CuckooTableBuilder::CuckooTableBuilder(
|
CuckooTableBuilder::CuckooTableBuilder(
|
||||||
WritableFile* file, unsigned int fixed_key_length,
|
WritableFile* file, uint32_t fixed_key_length,
|
||||||
unsigned int fixed_value_length, double hash_table_ratio,
|
uint32_t fixed_value_length, double hash_table_ratio,
|
||||||
unsigned int file_size, unsigned int max_num_hash_table,
|
uint64_t file_size, uint32_t max_num_hash_table,
|
||||||
unsigned int max_search_depth,
|
uint32_t max_search_depth, bool is_last_level,
|
||||||
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int,
|
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
|
||||||
unsigned int))
|
: num_hash_table_(2),
|
||||||
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)),
|
|
||||||
file_(file),
|
file_(file),
|
||||||
value_length_(fixed_value_length),
|
value_length_(fixed_value_length),
|
||||||
bucket_size_(fixed_key_length + fixed_value_length),
|
// 8 is the difference between sizes of user key and InternalKey.
|
||||||
|
bucket_size_(fixed_key_length +
|
||||||
|
fixed_value_length - (is_last_level ? 8 : 0)),
|
||||||
hash_table_ratio_(hash_table_ratio),
|
hash_table_ratio_(hash_table_ratio),
|
||||||
max_num_buckets_(file_size / bucket_size_),
|
max_num_buckets_(file_size / bucket_size_),
|
||||||
max_num_hash_table_(max_num_hash_table),
|
max_num_hash_table_(max_num_hash_table),
|
||||||
max_search_depth_(max_search_depth),
|
max_search_depth_(max_search_depth),
|
||||||
|
is_last_level_file_(is_last_level),
|
||||||
buckets_(max_num_buckets_),
|
buckets_(max_num_buckets_),
|
||||||
|
make_space_for_key_call_id_(0),
|
||||||
GetSliceHash(GetSliceHashPtr) {
|
GetSliceHash(GetSliceHashPtr) {
|
||||||
// The bucket_size is currently not optimized for last level.
|
|
||||||
// In last level, the bucket will not contain full key.
|
|
||||||
// TODO(rbs): Find how we can determine if last level or not
|
|
||||||
// before we start adding entries into the table.
|
|
||||||
properties_.num_entries = 0;
|
properties_.num_entries = 0;
|
||||||
// Data is in a huge block.
|
// Data is in a huge block.
|
||||||
properties_.num_data_blocks = 1;
|
properties_.num_data_blocks = 1;
|
||||||
@ -67,17 +68,17 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
status_ = Status::Corruption("Hash Table is full.");
|
status_ = Status::Corruption("Hash Table is full.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
unsigned int bucket_id;
|
uint64_t bucket_id;
|
||||||
bool bucket_found = false;
|
bool bucket_found = false;
|
||||||
autovector<unsigned int> hash_vals;
|
autovector<uint64_t> hash_vals;
|
||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
if (!ParseInternalKey(key, &ikey)) {
|
if (!ParseInternalKey(key, &ikey)) {
|
||||||
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Slice user_key = ikey.user_key;
|
Slice user_key = ikey.user_key;
|
||||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
||||||
if (buckets_[hash_val].is_empty) {
|
if (buckets_[hash_val].is_empty) {
|
||||||
bucket_id = hash_val;
|
bucket_id = hash_val;
|
||||||
bucket_found = true;
|
bucket_found = true;
|
||||||
@ -98,10 +99,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
}
|
}
|
||||||
// We don't really need to rehash the entire table because old hashes are
|
// We don't really need to rehash the entire table because old hashes are
|
||||||
// still valid and we only increased the number of hash functions.
|
// still valid and we only increased the number of hash functions.
|
||||||
unsigned int old_num_hash = num_hash_table_;
|
uint64_t hash_val = GetSliceHash(user_key,
|
||||||
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_);
|
num_hash_table_, max_num_buckets_);
|
||||||
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) {
|
++num_hash_table_;
|
||||||
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_);
|
|
||||||
if (buckets_[hash_val].is_empty) {
|
if (buckets_[hash_val].is_empty) {
|
||||||
bucket_found = true;
|
bucket_found = true;
|
||||||
bucket_id = hash_val;
|
bucket_id = hash_val;
|
||||||
@ -110,15 +110,10 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
hash_vals.push_back(hash_val);
|
hash_vals.push_back(hash_val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
buckets_[bucket_id].key = key;
|
buckets_[bucket_id].key = key;
|
||||||
buckets_[bucket_id].value = value;
|
buckets_[bucket_id].value = value;
|
||||||
buckets_[bucket_id].is_empty = false;
|
buckets_[bucket_id].is_empty = false;
|
||||||
|
|
||||||
if (ikey.sequence != 0) {
|
|
||||||
// This is not a last level file.
|
|
||||||
is_last_level_file_ = false;
|
|
||||||
}
|
|
||||||
properties_.num_entries++;
|
properties_.num_entries++;
|
||||||
|
|
||||||
// We assume that the keys are inserted in sorted order. To identify an
|
// We assume that the keys are inserted in sorted order. To identify an
|
||||||
@ -169,10 +164,11 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
AppendInternalKey(&unused_bucket, ikey);
|
AppendInternalKey(&unused_bucket, ikey);
|
||||||
}
|
}
|
||||||
properties_.fixed_key_len = unused_bucket.size();
|
properties_.fixed_key_len = unused_bucket.size();
|
||||||
unsigned int bucket_size = unused_bucket.size() + value_length_;
|
properties_.user_collected_properties[
|
||||||
// Resize to bucket size.
|
CuckooTablePropertyNames::kValueLength].assign(
|
||||||
unused_bucket.resize(bucket_size, 'a');
|
reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
|
||||||
|
|
||||||
|
unused_bucket.resize(bucket_size_, 'a');
|
||||||
// Write the table.
|
// Write the table.
|
||||||
for (auto& bucket : buckets_) {
|
for (auto& bucket : buckets_) {
|
||||||
Status s;
|
Status s;
|
||||||
@ -197,16 +193,20 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int offset = buckets_.size() * bucket_size;
|
uint64_t offset = buckets_.size() * bucket_size_;
|
||||||
|
unused_bucket.resize(properties_.fixed_key_len);
|
||||||
properties_.user_collected_properties[
|
properties_.user_collected_properties[
|
||||||
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket;
|
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
|
||||||
properties_.user_collected_properties[
|
properties_.user_collected_properties[
|
||||||
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_);
|
CuckooTablePropertyNames::kNumHashTable].assign(
|
||||||
PutVarint32(&properties_.user_collected_properties[
|
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
|
||||||
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_);
|
properties_.user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets].assign(
|
||||||
|
reinterpret_cast<const char*>(&max_num_buckets_),
|
||||||
|
sizeof(max_num_buckets_));
|
||||||
|
|
||||||
// Write meta blocks.
|
// Write meta blocks.
|
||||||
MetaIndexBuilder meta_index_builer;
|
MetaIndexBuilder meta_index_builder;
|
||||||
PropertyBlockBuilder property_block_builder;
|
PropertyBlockBuilder property_block_builder;
|
||||||
|
|
||||||
property_block_builder.AddTableProperty(properties_);
|
property_block_builder.AddTableProperty(properties_);
|
||||||
@ -221,8 +221,8 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
|
meta_index_builder.Add(kPropertiesBlock, property_block_handle);
|
||||||
Slice meta_index_block = meta_index_builer.Finish();
|
Slice meta_index_block = meta_index_builder.Finish();
|
||||||
|
|
||||||
BlockHandle meta_index_block_handle;
|
BlockHandle meta_index_block_handle;
|
||||||
meta_index_block_handle.set_offset(offset);
|
meta_index_block_handle.set_offset(offset);
|
||||||
@ -262,44 +262,52 @@ uint64_t CuckooTableBuilder::FileSize() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||||
unsigned int *bucket_id, autovector<unsigned int> hash_vals) {
|
uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
|
||||||
struct CuckooNode {
|
struct CuckooNode {
|
||||||
unsigned int bucket_id;
|
uint64_t bucket_id;
|
||||||
unsigned int depth;
|
uint32_t depth;
|
||||||
int parent_pos;
|
int parent_pos;
|
||||||
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos)
|
CuckooNode(uint64_t bucket_id, uint32_t depth, int parent_pos)
|
||||||
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
|
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
|
||||||
};
|
};
|
||||||
// This is BFS search tree that is stored simply as a vector.
|
// This is BFS search tree that is stored simply as a vector.
|
||||||
// Each node stores the index of parent node in the vector.
|
// Each node stores the index of parent node in the vector.
|
||||||
std::vector<CuckooNode> tree;
|
std::vector<CuckooNode> tree;
|
||||||
// This is a very bad way to keep track of visited nodes.
|
// We want to identify already visited buckets in the current method call so
|
||||||
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket
|
// that we don't add same buckets again for exploration in the tree.
|
||||||
// and use it to track visited nodes.
|
// We do this by maintaining a count of current method call, which acts as a
|
||||||
std::vector<bool> buckets_visited(max_num_buckets_, false);
|
// unique id for this invocation of the method. We store this number into
|
||||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
// the nodes that we explore in current method call.
|
||||||
unsigned int bucket_id = hash_vals[hash_cnt];
|
// It is unlikely for the increment operation to overflow because the maximum
|
||||||
buckets_visited[bucket_id] = true;
|
// number of times this will be called is <= max_num_hash_table_ +
|
||||||
|
// max_num_buckets_.
|
||||||
|
++make_space_for_key_call_id_;
|
||||||
|
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
|
uint64_t bucket_id = hash_vals[hash_cnt];
|
||||||
|
buckets_[bucket_id].make_space_for_key_call_id =
|
||||||
|
make_space_for_key_call_id_;
|
||||||
tree.push_back(CuckooNode(bucket_id, 0, -1));
|
tree.push_back(CuckooNode(bucket_id, 0, -1));
|
||||||
}
|
}
|
||||||
bool null_found = false;
|
bool null_found = false;
|
||||||
unsigned int curr_pos = 0;
|
uint32_t curr_pos = 0;
|
||||||
while (!null_found && curr_pos < tree.size()) {
|
while (!null_found && curr_pos < tree.size()) {
|
||||||
CuckooNode& curr_node = tree[curr_pos];
|
CuckooNode& curr_node = tree[curr_pos];
|
||||||
if (curr_node.depth >= max_search_depth_) {
|
if (curr_node.depth >= max_search_depth_) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
unsigned int child_bucket_id = GetSliceHash(
|
uint64_t child_bucket_id = GetSliceHash(
|
||||||
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
||||||
if (child_bucket_id == curr_node.bucket_id) {
|
if (child_bucket_id == curr_node.bucket_id) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (buckets_visited[child_bucket_id]) {
|
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
||||||
|
make_space_for_key_call_id_) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
buckets_visited[child_bucket_id] = true;
|
buckets_[child_bucket_id].make_space_for_key_call_id =
|
||||||
|
make_space_for_key_call_id_;
|
||||||
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
|
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
|
||||||
curr_pos));
|
curr_pos));
|
||||||
if (buckets_[child_bucket_id].is_empty) {
|
if (buckets_[child_bucket_id].is_empty) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
@ -16,22 +16,14 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
struct CuckooBucket {
|
|
||||||
CuckooBucket(): is_empty(true) {}
|
|
||||||
Slice key;
|
|
||||||
Slice value;
|
|
||||||
bool is_empty;
|
|
||||||
};
|
|
||||||
|
|
||||||
class CuckooTableBuilder: public TableBuilder {
|
class CuckooTableBuilder: public TableBuilder {
|
||||||
public:
|
public:
|
||||||
CuckooTableBuilder(
|
CuckooTableBuilder(
|
||||||
WritableFile* file, unsigned int fixed_key_length,
|
WritableFile* file, uint32_t fixed_key_length,
|
||||||
unsigned int fixed_value_length, double hash_table_ratio,
|
uint32_t fixed_value_length, double hash_table_ratio,
|
||||||
unsigned int file_size, unsigned int max_num_hash_table,
|
uint64_t file_size, uint32_t max_num_hash_table,
|
||||||
unsigned int max_search_depth,
|
uint32_t max_search_depth, bool is_last_level,
|
||||||
unsigned int (*GetSliceHash)(const Slice&, unsigned int,
|
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
|
||||||
unsigned int));
|
|
||||||
|
|
||||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||||
~CuckooTableBuilder();
|
~CuckooTableBuilder();
|
||||||
@ -64,23 +56,32 @@ class CuckooTableBuilder: public TableBuilder {
|
|||||||
uint64_t FileSize() const override;
|
uint64_t FileSize() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id,
|
struct CuckooBucket {
|
||||||
autovector<unsigned int> hash_vals);
|
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
||||||
|
Slice key;
|
||||||
|
Slice value;
|
||||||
|
bool is_empty;
|
||||||
|
uint64_t make_space_for_key_call_id;
|
||||||
|
};
|
||||||
|
|
||||||
unsigned int num_hash_table_;
|
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
|
||||||
|
autovector<uint64_t> hash_vals);
|
||||||
|
|
||||||
|
uint32_t num_hash_table_;
|
||||||
WritableFile* file_;
|
WritableFile* file_;
|
||||||
const unsigned int value_length_;
|
const uint32_t value_length_;
|
||||||
const unsigned int bucket_size_;
|
const uint32_t bucket_size_;
|
||||||
const double hash_table_ratio_;
|
const double hash_table_ratio_;
|
||||||
const unsigned int max_num_buckets_;
|
const uint64_t max_num_buckets_;
|
||||||
const unsigned int max_num_hash_table_;
|
const uint32_t max_num_hash_table_;
|
||||||
const unsigned int max_search_depth_;
|
const uint32_t max_search_depth_;
|
||||||
|
const bool is_last_level_file_;
|
||||||
Status status_;
|
Status status_;
|
||||||
std::vector<CuckooBucket> buckets_;
|
std::vector<CuckooBucket> buckets_;
|
||||||
bool is_last_level_file_ = true;
|
|
||||||
TableProperties properties_;
|
TableProperties properties_;
|
||||||
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index,
|
uint64_t make_space_for_key_call_id_;
|
||||||
unsigned int max_num_buckets);
|
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
|
||||||
|
uint64_t max_num_buckets);
|
||||||
std::string unused_user_key_ = "";
|
std::string unused_user_key_ = "";
|
||||||
std::string prev_key_;
|
std::string prev_key_;
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
@ -19,20 +19,19 @@ namespace rocksdb {
|
|||||||
extern const uint64_t kCuckooTableMagicNumber;
|
extern const uint64_t kCuckooTableMagicNumber;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
std::unordered_map<std::string, std::vector<unsigned int>> hash_map;
|
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
||||||
|
|
||||||
void AddHashLookups(const std::string& s, unsigned int bucket_id,
|
void AddHashLookups(const std::string& s, uint64_t bucket_id,
|
||||||
unsigned int num_hash_fun) {
|
uint32_t num_hash_fun) {
|
||||||
std::vector<unsigned int> v;
|
std::vector<uint64_t> v;
|
||||||
for (unsigned int i = 0; i < num_hash_fun; i++) {
|
for (uint32_t i = 0; i < num_hash_fun; i++) {
|
||||||
v.push_back(bucket_id + i);
|
v.push_back(bucket_id + i);
|
||||||
}
|
}
|
||||||
hash_map[s] = v;
|
hash_map[s] = v;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int GetSliceHash(const Slice& s, unsigned int index,
|
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
||||||
unsigned int max_num_buckets) {
|
uint64_t max_num_buckets) {
|
||||||
return hash_map[s.ToString()][index];
|
return hash_map[s.ToString()][index];
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -43,7 +42,9 @@ class CuckooBuilderTest {
|
|||||||
env_ = Env::Default();
|
env_ = Env::Default();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CheckFileContents(const std::string& expected_data) {
|
void CheckFileContents(const std::string& expected_data,
|
||||||
|
std::string expected_unused_bucket, uint64_t expected_max_buckets,
|
||||||
|
uint32_t expected_num_hash_fun) {
|
||||||
// Read file
|
// Read file
|
||||||
unique_ptr<RandomAccessFile> read_file;
|
unique_ptr<RandomAccessFile> read_file;
|
||||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
|
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
|
||||||
@ -58,15 +59,22 @@ class CuckooBuilderTest {
|
|||||||
ASSERT_EQ(props->fixed_key_len, key_length);
|
ASSERT_EQ(props->fixed_key_len, key_length);
|
||||||
|
|
||||||
// Check unused bucket.
|
// Check unused bucket.
|
||||||
std::string unused_bucket = props->user_collected_properties[
|
std::string unused_key = props->user_collected_properties[
|
||||||
CuckooTablePropertyNames::kEmptyBucket];
|
CuckooTablePropertyNames::kEmptyKey];
|
||||||
ASSERT_EQ(expected_unused_bucket, unused_bucket);
|
ASSERT_EQ(expected_unused_bucket.substr(0, key_length), unused_key);
|
||||||
|
|
||||||
unsigned int max_buckets;
|
uint32_t value_len_found =
|
||||||
Slice max_buckets_slice = Slice(props->user_collected_properties[
|
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||||
CuckooTablePropertyNames::kMaxNumBuckets]);
|
CuckooTablePropertyNames::kValueLength].data());
|
||||||
GetVarint32(&max_buckets_slice, &max_buckets);
|
ASSERT_EQ(value_length, value_len_found);
|
||||||
|
const uint64_t max_buckets =
|
||||||
|
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets].data());
|
||||||
ASSERT_EQ(expected_max_buckets, max_buckets);
|
ASSERT_EQ(expected_max_buckets, max_buckets);
|
||||||
|
const uint32_t num_hash_fun_found =
|
||||||
|
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||||
|
CuckooTablePropertyNames::kNumHashTable].data());
|
||||||
|
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
|
||||||
delete props;
|
delete props;
|
||||||
// Check contents of the bucket.
|
// Check contents of the bucket.
|
||||||
std::string read_data;
|
std::string read_data;
|
||||||
@ -80,28 +88,25 @@ class CuckooBuilderTest {
|
|||||||
Env* env_;
|
Env* env_;
|
||||||
const EnvOptions env_options_;
|
const EnvOptions env_options_;
|
||||||
std::string fname;
|
std::string fname;
|
||||||
std::string expected_unused_bucket;
|
uint64_t file_size = 100000;
|
||||||
unsigned int file_size = 100000;
|
uint32_t num_items = 20;
|
||||||
unsigned int num_items = 20;
|
uint32_t num_hash_fun = 64;
|
||||||
unsigned int num_hash_fun = 64;
|
|
||||||
double hash_table_ratio = 0.9;
|
double hash_table_ratio = 0.9;
|
||||||
unsigned int ikey_length;
|
uint32_t ikey_length;
|
||||||
unsigned int user_key_length;
|
uint32_t user_key_length;
|
||||||
unsigned int key_length;
|
uint32_t key_length;
|
||||||
unsigned int value_length;
|
uint32_t value_length;
|
||||||
unsigned int bucket_length;
|
uint32_t bucket_length;
|
||||||
unsigned int expected_max_buckets;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST(CuckooBuilderTest, NoCollision) {
|
TEST(CuckooBuilderTest, NoCollision) {
|
||||||
hash_map.clear();
|
hash_map.clear();
|
||||||
num_items = 20;
|
uint32_t expected_num_hash_fun = 2;
|
||||||
num_hash_fun = 64;
|
|
||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
unsigned int bucket_ids = 0;
|
uint64_t bucket_ids = 0;
|
||||||
for (unsigned int i = 0; i < num_items; i++) {
|
for (uint32_t i = 0; i < num_items; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -114,9 +119,10 @@ TEST(CuckooBuilderTest, NoCollision) {
|
|||||||
key_length = ikey_length;
|
key_length = ikey_length;
|
||||||
value_length = values[0].size();
|
value_length = values[0].size();
|
||||||
bucket_length = ikey_length + value_length;
|
bucket_length = ikey_length + value_length;
|
||||||
expected_max_buckets = file_size / bucket_length;
|
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||||
std::string expected_unused_user_key = "keys10:";
|
std::string expected_unused_user_key = "keys10:";
|
||||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
std::string expected_unused_bucket;
|
||||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
expected_unused_bucket.resize(bucket_length, 'a');
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
unique_ptr<WritableFile> writable_file;
|
unique_ptr<WritableFile> writable_file;
|
||||||
@ -125,11 +131,11 @@ TEST(CuckooBuilderTest, NoCollision) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), ikey_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio,
|
value_length, hash_table_ratio,
|
||||||
file_size, num_hash_fun, 100, GetSliceHash);
|
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
unsigned int key_idx = 0;
|
uint32_t key_idx = 0;
|
||||||
std::string expected_file_data = "";
|
std::string expected_file_data = "";
|
||||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -142,16 +148,18 @@ TEST(CuckooBuilderTest, NoCollision) {
|
|||||||
}
|
}
|
||||||
ASSERT_OK(cuckoo_builder.Finish());
|
ASSERT_OK(cuckoo_builder.Finish());
|
||||||
writable_file->Close();
|
writable_file->Close();
|
||||||
CheckFileContents(expected_file_data);
|
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||||
|
expected_max_buckets, expected_num_hash_fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||||
hash_map.clear();
|
hash_map.clear();
|
||||||
|
uint32_t expected_num_hash_fun = 2;
|
||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
unsigned int bucket_ids = 0;
|
uint64_t bucket_ids = 0;
|
||||||
for (unsigned int i = 0; i < num_items; i++) {
|
for (uint32_t i = 0; i < num_items; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
// Set zero sequence number in all keys.
|
// Set zero sequence number in all keys.
|
||||||
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
||||||
@ -165,20 +173,20 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
|||||||
key_length = user_key_length;
|
key_length = user_key_length;
|
||||||
value_length = values[0].size();
|
value_length = values[0].size();
|
||||||
bucket_length = key_length + value_length;
|
bucket_length = key_length + value_length;
|
||||||
expected_max_buckets = file_size / bucket_length;
|
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||||
expected_unused_bucket = "keys10:";
|
std::string expected_unused_bucket = "keys10:";
|
||||||
expected_unused_bucket.resize(bucket_length, 'a');
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
unique_ptr<WritableFile> writable_file;
|
unique_ptr<WritableFile> writable_file;
|
||||||
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
|
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
|
||||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), key_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio,
|
value_length, hash_table_ratio,
|
||||||
file_size, num_hash_fun, 100, GetSliceHash);
|
file_size, num_hash_fun, 100, true, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
unsigned int key_idx = 0;
|
uint32_t key_idx = 0;
|
||||||
std::string expected_file_data = "";
|
std::string expected_file_data = "";
|
||||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -191,7 +199,8 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
|||||||
}
|
}
|
||||||
ASSERT_OK(cuckoo_builder.Finish());
|
ASSERT_OK(cuckoo_builder.Finish());
|
||||||
writable_file->Close();
|
writable_file->Close();
|
||||||
CheckFileContents(expected_file_data);
|
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||||
|
expected_max_buckets, expected_num_hash_fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CuckooBuilderTest, WithCollision) {
|
TEST(CuckooBuilderTest, WithCollision) {
|
||||||
@ -199,10 +208,11 @@ TEST(CuckooBuilderTest, WithCollision) {
|
|||||||
hash_map.clear();
|
hash_map.clear();
|
||||||
num_hash_fun = 20;
|
num_hash_fun = 20;
|
||||||
num_items = num_hash_fun;
|
num_items = num_hash_fun;
|
||||||
|
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
for (unsigned int i = 0; i < num_items; i++) {
|
for (uint32_t i = 0; i < num_items; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -214,9 +224,10 @@ TEST(CuckooBuilderTest, WithCollision) {
|
|||||||
value_length = values[0].size();
|
value_length = values[0].size();
|
||||||
key_length = ikey_length;
|
key_length = ikey_length;
|
||||||
bucket_length = key_length + value_length;
|
bucket_length = key_length + value_length;
|
||||||
expected_max_buckets = file_size / bucket_length;
|
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||||
std::string expected_unused_user_key = "keys10:";
|
std::string expected_unused_user_key = "keys10:";
|
||||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
std::string expected_unused_bucket;
|
||||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
expected_unused_bucket.resize(bucket_length, 'a');
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
unique_ptr<WritableFile> writable_file;
|
unique_ptr<WritableFile> writable_file;
|
||||||
@ -224,11 +235,11 @@ TEST(CuckooBuilderTest, WithCollision) {
|
|||||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), key_length, value_length, hash_table_ratio,
|
writable_file.get(), key_length, value_length, hash_table_ratio,
|
||||||
file_size, num_hash_fun, 100, GetSliceHash);
|
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
unsigned int key_idx = 0;
|
uint32_t key_idx = 0;
|
||||||
std::string expected_file_data = "";
|
std::string expected_file_data = "";
|
||||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||||
if (key_idx == i && key_idx < num_items) {
|
if (key_idx == i && key_idx < num_items) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -241,7 +252,8 @@ TEST(CuckooBuilderTest, WithCollision) {
|
|||||||
}
|
}
|
||||||
ASSERT_OK(cuckoo_builder.Finish());
|
ASSERT_OK(cuckoo_builder.Finish());
|
||||||
writable_file->Close();
|
writable_file->Close();
|
||||||
CheckFileContents(expected_file_data);
|
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||||
|
expected_max_buckets, expected_num_hash_fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||||
@ -253,7 +265,7 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
|||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
for (unsigned int i = 0; i < num_items; i++) {
|
for (uint32_t i = 0; i < num_items; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -269,9 +281,9 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), ikey_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
100, GetSliceHash);
|
100, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -300,7 +312,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), ikey_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
100, GetSliceHash);
|
100, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
|
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
@ -316,14 +328,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
|||||||
// Have two hash functions. Insert elements with overlapping hashes.
|
// Have two hash functions. Insert elements with overlapping hashes.
|
||||||
// Finally insert an element which will displace all the current elements.
|
// Finally insert an element which will displace all the current elements.
|
||||||
num_hash_fun = 2;
|
num_hash_fun = 2;
|
||||||
|
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||||
unsigned int max_search_depth = 100;
|
uint32_t max_search_depth = 100;
|
||||||
num_items = max_search_depth + 2;
|
num_items = max_search_depth + 2;
|
||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
std::vector<unsigned int> expected_bucket_id(num_items);
|
std::vector<uint64_t> expected_bucket_id(num_items);
|
||||||
for (unsigned int i = 0; i < num_items - 1; i++) {
|
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -346,13 +358,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
|||||||
key_length = ikey_length;
|
key_length = ikey_length;
|
||||||
bucket_length = key_length + value_length;
|
bucket_length = key_length + value_length;
|
||||||
|
|
||||||
expected_max_buckets = file_size / bucket_length;
|
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||||
std::string expected_unused_user_key = "keys10:";
|
std::string expected_unused_user_key = "keys10:";
|
||||||
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
|
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
|
||||||
|
std::string expected_unused_bucket;
|
||||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||||
expected_unused_bucket.resize(bucket_length, 'a');
|
expected_unused_bucket.resize(bucket_length, 'a');
|
||||||
std::string expected_file_data = "";
|
std::string expected_file_data = "";
|
||||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||||
expected_file_data += expected_unused_bucket;
|
expected_file_data += expected_unused_bucket;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -362,9 +375,9 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), key_length,
|
writable_file.get(), key_length,
|
||||||
value_length, hash_table_ratio, file_size,
|
value_length, hash_table_ratio, file_size,
|
||||||
num_hash_fun, max_search_depth, GetSliceHash);
|
num_hash_fun, max_search_depth, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) {
|
for (uint32_t key_idx = 0; key_idx < num_items; key_idx++) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -373,7 +386,8 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
|||||||
}
|
}
|
||||||
ASSERT_OK(cuckoo_builder.Finish());
|
ASSERT_OK(cuckoo_builder.Finish());
|
||||||
writable_file->Close();
|
writable_file->Close();
|
||||||
CheckFileContents(expected_file_data);
|
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||||
|
expected_max_buckets, expected_num_hash_fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||||
@ -382,12 +396,12 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
|||||||
// Finally insert an element which will displace all the current elements.
|
// Finally insert an element which will displace all the current elements.
|
||||||
num_hash_fun = 2;
|
num_hash_fun = 2;
|
||||||
|
|
||||||
unsigned int max_search_depth = 100;
|
uint32_t max_search_depth = 100;
|
||||||
num_items = max_search_depth + 3;
|
num_items = max_search_depth + 3;
|
||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
for (unsigned int i = 0; i < num_items - 1; i++) {
|
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+100);
|
user_keys[i] = "keys" + std::to_string(i+100);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -410,9 +424,9 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), ikey_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
max_search_depth, GetSliceHash);
|
max_search_depth, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
@ -431,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
|||||||
std::vector<std::string> user_keys(num_items);
|
std::vector<std::string> user_keys(num_items);
|
||||||
std::vector<std::string> keys(num_items);
|
std::vector<std::string> keys(num_items);
|
||||||
std::vector<std::string> values(num_items);
|
std::vector<std::string> values(num_items);
|
||||||
for (unsigned int i = 0; i < num_items; i++) {
|
for (uint32_t i = 0; i < num_items; i++) {
|
||||||
user_keys[i] = "keys" + std::to_string(i+1000);
|
user_keys[i] = "keys" + std::to_string(i+1000);
|
||||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||||
AppendInternalKey(&keys[i], ikey);
|
AppendInternalKey(&keys[i], ikey);
|
||||||
@ -451,9 +465,9 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
|||||||
CuckooTableBuilder cuckoo_builder(
|
CuckooTableBuilder cuckoo_builder(
|
||||||
writable_file.get(), ikey_length,
|
writable_file.get(), ikey_length,
|
||||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||||
100, GetSliceHash);
|
100, false, GetSliceHash);
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||||
ASSERT_OK(cuckoo_builder.status());
|
ASSERT_OK(cuckoo_builder.status());
|
||||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
@ -10,11 +10,11 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
static const unsigned int kMaxNumHashTable = 64;
|
static const uint32_t kMaxNumHashTable = 64;
|
||||||
|
|
||||||
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index,
|
uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
|
||||||
unsigned int max_num_buckets) {
|
uint64_t max_num_buckets) {
|
||||||
static constexpr unsigned int seeds[kMaxNumHashTable] = {
|
static constexpr uint32_t seeds[kMaxNumHashTable] = {
|
||||||
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
|
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
|
||||||
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
|
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
|
||||||
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,
|
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,
|
||||||
|
Loading…
Reference in New Issue
Block a user