Addressing TODOs in CuckooTableBuilder
Summary: Contains the following changes in CuckooTableBuilder: - Take an extra parameter in constructor to identify last level file. - Implement a better way to identify if a bucket has been inserted into the tree already during BFS search. - Minor typos Test Plan: make cuckoo_table_builder ./cuckoo_table_builder make valgrind_check Reviewers: sdong, igor, yhchiang, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20445
This commit is contained in:
parent
4b61a3d67d
commit
07a7d870b8
@ -185,7 +185,8 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
|
||||
PlainTableOptions());
|
||||
|
||||
struct CuckooTablePropertyNames {
|
||||
static const std::string kEmptyBucket;
|
||||
static const std::string kEmptyKey;
|
||||
static const std::string kValueLength;
|
||||
static const std::string kNumHashTable;
|
||||
static const std::string kMaxNumBuckets;
|
||||
};
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
@ -21,37 +21,38 @@
|
||||
#include "util/random.h"
|
||||
|
||||
namespace rocksdb {
|
||||
const std::string CuckooTablePropertyNames::kEmptyBucket =
|
||||
"rocksdb.cuckoo.bucket.empty.bucket";
|
||||
const std::string CuckooTablePropertyNames::kEmptyKey =
|
||||
"rocksdb.cuckoo.bucket.empty.key";
|
||||
const std::string CuckooTablePropertyNames::kNumHashTable =
|
||||
"rocksdb.cuckoo.hash.num";
|
||||
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
|
||||
"rocksdb.cuckoo.bucket.maxnum";
|
||||
const std::string CuckooTablePropertyNames::kValueLength =
|
||||
"rocksdb.cuckoo.value.length";
|
||||
|
||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
|
||||
CuckooTableBuilder::CuckooTableBuilder(
|
||||
WritableFile* file, unsigned int fixed_key_length,
|
||||
unsigned int fixed_value_length, double hash_table_ratio,
|
||||
unsigned int file_size, unsigned int max_num_hash_table,
|
||||
unsigned int max_search_depth,
|
||||
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int,
|
||||
unsigned int))
|
||||
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)),
|
||||
WritableFile* file, uint32_t fixed_key_length,
|
||||
uint32_t fixed_value_length, double hash_table_ratio,
|
||||
uint64_t file_size, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth, bool is_last_level,
|
||||
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
|
||||
: num_hash_table_(2),
|
||||
file_(file),
|
||||
value_length_(fixed_value_length),
|
||||
bucket_size_(fixed_key_length + fixed_value_length),
|
||||
// 8 is the difference between sizes of user key and InternalKey.
|
||||
bucket_size_(fixed_key_length +
|
||||
fixed_value_length - (is_last_level ? 8 : 0)),
|
||||
hash_table_ratio_(hash_table_ratio),
|
||||
max_num_buckets_(file_size / bucket_size_),
|
||||
max_num_hash_table_(max_num_hash_table),
|
||||
max_search_depth_(max_search_depth),
|
||||
is_last_level_file_(is_last_level),
|
||||
buckets_(max_num_buckets_),
|
||||
make_space_for_key_call_id_(0),
|
||||
GetSliceHash(GetSliceHashPtr) {
|
||||
// The bucket_size is currently not optimized for last level.
|
||||
// In last level, the bucket will not contain full key.
|
||||
// TODO(rbs): Find how we can determine if last level or not
|
||||
// before we start adding entries into the table.
|
||||
properties_.num_entries = 0;
|
||||
// Data is in a huge block.
|
||||
properties_.num_data_blocks = 1;
|
||||
@ -67,17 +68,17 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
status_ = Status::Corruption("Hash Table is full.");
|
||||
return;
|
||||
}
|
||||
unsigned int bucket_id;
|
||||
uint64_t bucket_id;
|
||||
bool bucket_found = false;
|
||||
autovector<unsigned int> hash_vals;
|
||||
autovector<uint64_t> hash_vals;
|
||||
ParsedInternalKey ikey;
|
||||
if (!ParseInternalKey(key, &ikey)) {
|
||||
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
||||
return;
|
||||
}
|
||||
Slice user_key = ikey.user_key;
|
||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
||||
if (buckets_[hash_val].is_empty) {
|
||||
bucket_id = hash_val;
|
||||
bucket_found = true;
|
||||
@ -98,10 +99,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
}
|
||||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
unsigned int old_num_hash = num_hash_table_;
|
||||
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_);
|
||||
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) {
|
||||
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_);
|
||||
uint64_t hash_val = GetSliceHash(user_key,
|
||||
num_hash_table_, max_num_buckets_);
|
||||
++num_hash_table_;
|
||||
if (buckets_[hash_val].is_empty) {
|
||||
bucket_found = true;
|
||||
bucket_id = hash_val;
|
||||
@ -110,15 +110,10 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
buckets_[bucket_id].key = key;
|
||||
buckets_[bucket_id].value = value;
|
||||
buckets_[bucket_id].is_empty = false;
|
||||
|
||||
if (ikey.sequence != 0) {
|
||||
// This is not a last level file.
|
||||
is_last_level_file_ = false;
|
||||
}
|
||||
properties_.num_entries++;
|
||||
|
||||
// We assume that the keys are inserted in sorted order. To identify an
|
||||
@ -169,10 +164,11 @@ Status CuckooTableBuilder::Finish() {
|
||||
AppendInternalKey(&unused_bucket, ikey);
|
||||
}
|
||||
properties_.fixed_key_len = unused_bucket.size();
|
||||
unsigned int bucket_size = unused_bucket.size() + value_length_;
|
||||
// Resize to bucket size.
|
||||
unused_bucket.resize(bucket_size, 'a');
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kValueLength].assign(
|
||||
reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
|
||||
|
||||
unused_bucket.resize(bucket_size_, 'a');
|
||||
// Write the table.
|
||||
for (auto& bucket : buckets_) {
|
||||
Status s;
|
||||
@ -197,16 +193,20 @@ Status CuckooTableBuilder::Finish() {
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int offset = buckets_.size() * bucket_size;
|
||||
uint64_t offset = buckets_.size() * bucket_size_;
|
||||
unused_bucket.resize(properties_.fixed_key_len);
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket;
|
||||
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_);
|
||||
PutVarint32(&properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_);
|
||||
CuckooTablePropertyNames::kNumHashTable].assign(
|
||||
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].assign(
|
||||
reinterpret_cast<const char*>(&max_num_buckets_),
|
||||
sizeof(max_num_buckets_));
|
||||
|
||||
// Write meta blocks.
|
||||
MetaIndexBuilder meta_index_builer;
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
PropertyBlockBuilder property_block_builder;
|
||||
|
||||
property_block_builder.AddTableProperty(properties_);
|
||||
@ -221,8 +221,8 @@ Status CuckooTableBuilder::Finish() {
|
||||
return s;
|
||||
}
|
||||
|
||||
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
|
||||
Slice meta_index_block = meta_index_builer.Finish();
|
||||
meta_index_builder.Add(kPropertiesBlock, property_block_handle);
|
||||
Slice meta_index_block = meta_index_builder.Finish();
|
||||
|
||||
BlockHandle meta_index_block_handle;
|
||||
meta_index_block_handle.set_offset(offset);
|
||||
@ -262,44 +262,52 @@ uint64_t CuckooTableBuilder::FileSize() const {
|
||||
}
|
||||
|
||||
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||
unsigned int *bucket_id, autovector<unsigned int> hash_vals) {
|
||||
uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
|
||||
struct CuckooNode {
|
||||
unsigned int bucket_id;
|
||||
unsigned int depth;
|
||||
uint64_t bucket_id;
|
||||
uint32_t depth;
|
||||
int parent_pos;
|
||||
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos)
|
||||
CuckooNode(uint64_t bucket_id, uint32_t depth, int parent_pos)
|
||||
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
|
||||
};
|
||||
// This is BFS search tree that is stored simply as a vector.
|
||||
// Each node stores the index of parent node in the vector.
|
||||
std::vector<CuckooNode> tree;
|
||||
// This is a very bad way to keep track of visited nodes.
|
||||
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket
|
||||
// and use it to track visited nodes.
|
||||
std::vector<bool> buckets_visited(max_num_buckets_, false);
|
||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
unsigned int bucket_id = hash_vals[hash_cnt];
|
||||
buckets_visited[bucket_id] = true;
|
||||
// We want to identify already visited buckets in the current method call so
|
||||
// that we don't add same buckets again for exploration in the tree.
|
||||
// We do this by maintaining a count of current method call, which acts as a
|
||||
// unique id for this invocation of the method. We store this number into
|
||||
// the nodes that we explore in current method call.
|
||||
// It is unlikely for the increment operation to overflow because the maximum
|
||||
// number of times this will be called is <= max_num_hash_table_ +
|
||||
// max_num_buckets_.
|
||||
++make_space_for_key_call_id_;
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t bucket_id = hash_vals[hash_cnt];
|
||||
buckets_[bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id_;
|
||||
tree.push_back(CuckooNode(bucket_id, 0, -1));
|
||||
}
|
||||
bool null_found = false;
|
||||
unsigned int curr_pos = 0;
|
||||
uint32_t curr_pos = 0;
|
||||
while (!null_found && curr_pos < tree.size()) {
|
||||
CuckooNode& curr_node = tree[curr_pos];
|
||||
if (curr_node.depth >= max_search_depth_) {
|
||||
break;
|
||||
}
|
||||
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
unsigned int child_bucket_id = GetSliceHash(
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t child_bucket_id = GetSliceHash(
|
||||
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
||||
if (child_bucket_id == curr_node.bucket_id) {
|
||||
continue;
|
||||
}
|
||||
if (buckets_visited[child_bucket_id]) {
|
||||
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id_) {
|
||||
continue;
|
||||
}
|
||||
buckets_visited[child_bucket_id] = true;
|
||||
buckets_[child_bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id_;
|
||||
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
|
||||
curr_pos));
|
||||
if (buckets_[child_bucket_id].is_empty) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
@ -16,22 +16,14 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
struct CuckooBucket {
|
||||
CuckooBucket(): is_empty(true) {}
|
||||
Slice key;
|
||||
Slice value;
|
||||
bool is_empty;
|
||||
};
|
||||
|
||||
class CuckooTableBuilder: public TableBuilder {
|
||||
public:
|
||||
CuckooTableBuilder(
|
||||
WritableFile* file, unsigned int fixed_key_length,
|
||||
unsigned int fixed_value_length, double hash_table_ratio,
|
||||
unsigned int file_size, unsigned int max_num_hash_table,
|
||||
unsigned int max_search_depth,
|
||||
unsigned int (*GetSliceHash)(const Slice&, unsigned int,
|
||||
unsigned int));
|
||||
WritableFile* file, uint32_t fixed_key_length,
|
||||
uint32_t fixed_value_length, double hash_table_ratio,
|
||||
uint64_t file_size, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth, bool is_last_level,
|
||||
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
~CuckooTableBuilder();
|
||||
@ -64,23 +56,32 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
uint64_t FileSize() const override;
|
||||
|
||||
private:
|
||||
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id,
|
||||
autovector<unsigned int> hash_vals);
|
||||
struct CuckooBucket {
|
||||
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
||||
Slice key;
|
||||
Slice value;
|
||||
bool is_empty;
|
||||
uint64_t make_space_for_key_call_id;
|
||||
};
|
||||
|
||||
unsigned int num_hash_table_;
|
||||
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
|
||||
autovector<uint64_t> hash_vals);
|
||||
|
||||
uint32_t num_hash_table_;
|
||||
WritableFile* file_;
|
||||
const unsigned int value_length_;
|
||||
const unsigned int bucket_size_;
|
||||
const uint32_t value_length_;
|
||||
const uint32_t bucket_size_;
|
||||
const double hash_table_ratio_;
|
||||
const unsigned int max_num_buckets_;
|
||||
const unsigned int max_num_hash_table_;
|
||||
const unsigned int max_search_depth_;
|
||||
const uint64_t max_num_buckets_;
|
||||
const uint32_t max_num_hash_table_;
|
||||
const uint32_t max_search_depth_;
|
||||
const bool is_last_level_file_;
|
||||
Status status_;
|
||||
std::vector<CuckooBucket> buckets_;
|
||||
bool is_last_level_file_ = true;
|
||||
TableProperties properties_;
|
||||
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index,
|
||||
unsigned int max_num_buckets);
|
||||
uint64_t make_space_for_key_call_id_;
|
||||
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
std::string unused_user_key_ = "";
|
||||
std::string prev_key_;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
@ -19,20 +19,19 @@ namespace rocksdb {
|
||||
extern const uint64_t kCuckooTableMagicNumber;
|
||||
|
||||
namespace {
|
||||
std::unordered_map<std::string, std::vector<unsigned int>> hash_map;
|
||||
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
||||
|
||||
void AddHashLookups(const std::string& s, unsigned int bucket_id,
|
||||
unsigned int num_hash_fun) {
|
||||
std::vector<unsigned int> v;
|
||||
for (unsigned int i = 0; i < num_hash_fun; i++) {
|
||||
void AddHashLookups(const std::string& s, uint64_t bucket_id,
|
||||
uint32_t num_hash_fun) {
|
||||
std::vector<uint64_t> v;
|
||||
for (uint32_t i = 0; i < num_hash_fun; i++) {
|
||||
v.push_back(bucket_id + i);
|
||||
}
|
||||
hash_map[s] = v;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int GetSliceHash(const Slice& s, unsigned int index,
|
||||
unsigned int max_num_buckets) {
|
||||
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets) {
|
||||
return hash_map[s.ToString()][index];
|
||||
}
|
||||
} // namespace
|
||||
@ -43,7 +42,9 @@ class CuckooBuilderTest {
|
||||
env_ = Env::Default();
|
||||
}
|
||||
|
||||
void CheckFileContents(const std::string& expected_data) {
|
||||
void CheckFileContents(const std::string& expected_data,
|
||||
std::string expected_unused_bucket, uint64_t expected_max_buckets,
|
||||
uint32_t expected_num_hash_fun) {
|
||||
// Read file
|
||||
unique_ptr<RandomAccessFile> read_file;
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
|
||||
@ -58,15 +59,22 @@ class CuckooBuilderTest {
|
||||
ASSERT_EQ(props->fixed_key_len, key_length);
|
||||
|
||||
// Check unused bucket.
|
||||
std::string unused_bucket = props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyBucket];
|
||||
ASSERT_EQ(expected_unused_bucket, unused_bucket);
|
||||
std::string unused_key = props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyKey];
|
||||
ASSERT_EQ(expected_unused_bucket.substr(0, key_length), unused_key);
|
||||
|
||||
unsigned int max_buckets;
|
||||
Slice max_buckets_slice = Slice(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets]);
|
||||
GetVarint32(&max_buckets_slice, &max_buckets);
|
||||
uint32_t value_len_found =
|
||||
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kValueLength].data());
|
||||
ASSERT_EQ(value_length, value_len_found);
|
||||
const uint64_t max_buckets =
|
||||
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].data());
|
||||
ASSERT_EQ(expected_max_buckets, max_buckets);
|
||||
const uint32_t num_hash_fun_found =
|
||||
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kNumHashTable].data());
|
||||
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
|
||||
delete props;
|
||||
// Check contents of the bucket.
|
||||
std::string read_data;
|
||||
@ -80,28 +88,25 @@ class CuckooBuilderTest {
|
||||
Env* env_;
|
||||
const EnvOptions env_options_;
|
||||
std::string fname;
|
||||
std::string expected_unused_bucket;
|
||||
unsigned int file_size = 100000;
|
||||
unsigned int num_items = 20;
|
||||
unsigned int num_hash_fun = 64;
|
||||
uint64_t file_size = 100000;
|
||||
uint32_t num_items = 20;
|
||||
uint32_t num_hash_fun = 64;
|
||||
double hash_table_ratio = 0.9;
|
||||
unsigned int ikey_length;
|
||||
unsigned int user_key_length;
|
||||
unsigned int key_length;
|
||||
unsigned int value_length;
|
||||
unsigned int bucket_length;
|
||||
unsigned int expected_max_buckets;
|
||||
uint32_t ikey_length;
|
||||
uint32_t user_key_length;
|
||||
uint32_t key_length;
|
||||
uint32_t value_length;
|
||||
uint32_t bucket_length;
|
||||
};
|
||||
|
||||
TEST(CuckooBuilderTest, NoCollision) {
|
||||
hash_map.clear();
|
||||
num_items = 20;
|
||||
num_hash_fun = 64;
|
||||
uint32_t expected_num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
unsigned int bucket_ids = 0;
|
||||
for (unsigned int i = 0; i < num_items; i++) {
|
||||
uint64_t bucket_ids = 0;
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -114,9 +119,10 @@ TEST(CuckooBuilderTest, NoCollision) {
|
||||
key_length = ikey_length;
|
||||
value_length = values[0].size();
|
||||
bucket_length = ikey_length + value_length;
|
||||
expected_max_buckets = file_size / bucket_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
@ -125,11 +131,11 @@ TEST(CuckooBuilderTest, NoCollision) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, GetSliceHash);
|
||||
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
unsigned int key_idx = 0;
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -142,16 +148,18 @@ TEST(CuckooBuilderTest, NoCollision) {
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data);
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||
hash_map.clear();
|
||||
uint32_t expected_num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
unsigned int bucket_ids = 0;
|
||||
for (unsigned int i = 0; i < num_items; i++) {
|
||||
uint64_t bucket_ids = 0;
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
// Set zero sequence number in all keys.
|
||||
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
||||
@ -165,20 +173,20 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||
key_length = user_key_length;
|
||||
value_length = values[0].size();
|
||||
bucket_length = key_length + value_length;
|
||||
expected_max_buckets = file_size / bucket_length;
|
||||
expected_unused_bucket = "keys10:";
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_bucket = "keys10:";
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), key_length,
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, GetSliceHash);
|
||||
file_size, num_hash_fun, 100, true, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
unsigned int key_idx = 0;
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -191,7 +199,8 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data);
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollision) {
|
||||
@ -199,10 +208,11 @@ TEST(CuckooBuilderTest, WithCollision) {
|
||||
hash_map.clear();
|
||||
num_hash_fun = 20;
|
||||
num_items = num_hash_fun;
|
||||
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (unsigned int i = 0; i < num_items; i++) {
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -214,9 +224,10 @@ TEST(CuckooBuilderTest, WithCollision) {
|
||||
value_length = values[0].size();
|
||||
key_length = ikey_length;
|
||||
bucket_length = key_length + value_length;
|
||||
expected_max_buckets = file_size / bucket_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
@ -224,11 +235,11 @@ TEST(CuckooBuilderTest, WithCollision) {
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), key_length, value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, GetSliceHash);
|
||||
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
unsigned int key_idx = 0;
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -241,7 +252,8 @@ TEST(CuckooBuilderTest, WithCollision) {
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data);
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||
@ -253,7 +265,7 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (unsigned int i = 0; i < num_items; i++) {
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -269,9 +281,9 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, GetSliceHash);
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -300,7 +312,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, GetSliceHash);
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
@ -316,14 +328,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||
// Have two hash functions. Insert elements with overlapping hashes.
|
||||
// Finally insert an element which will displace all the current elements.
|
||||
num_hash_fun = 2;
|
||||
|
||||
unsigned int max_search_depth = 100;
|
||||
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||
uint32_t max_search_depth = 100;
|
||||
num_items = max_search_depth + 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
std::vector<unsigned int> expected_bucket_id(num_items);
|
||||
for (unsigned int i = 0; i < num_items - 1; i++) {
|
||||
std::vector<uint64_t> expected_bucket_id(num_items);
|
||||
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -346,13 +358,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||
key_length = ikey_length;
|
||||
bucket_length = key_length + value_length;
|
||||
|
||||
expected_max_buckets = file_size / bucket_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
std::string expected_file_data = "";
|
||||
for (unsigned int i = 0; i < expected_max_buckets; i++) {
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
expected_file_data += expected_unused_bucket;
|
||||
}
|
||||
|
||||
@ -362,9 +375,9 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), key_length,
|
||||
value_length, hash_table_ratio, file_size,
|
||||
num_hash_fun, max_search_depth, GetSliceHash);
|
||||
num_hash_fun, max_search_depth, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) {
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -373,7 +386,8 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data);
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
@ -382,12 +396,12 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
// Finally insert an element which will displace all the current elements.
|
||||
num_hash_fun = 2;
|
||||
|
||||
unsigned int max_search_depth = 100;
|
||||
uint32_t max_search_depth = 100;
|
||||
num_items = max_search_depth + 3;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (unsigned int i = 0; i < num_items - 1; i++) {
|
||||
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -410,9 +424,9 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
max_search_depth, GetSliceHash);
|
||||
max_search_depth, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
@ -431,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (unsigned int i = 0; i < num_items; i++) {
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+1000);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
@ -451,9 +465,9 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, GetSliceHash);
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
@ -10,11 +10,11 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
static const unsigned int kMaxNumHashTable = 64;
|
||||
static const uint32_t kMaxNumHashTable = 64;
|
||||
|
||||
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index,
|
||||
unsigned int max_num_buckets) {
|
||||
static constexpr unsigned int seeds[kMaxNumHashTable] = {
|
||||
uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets) {
|
||||
static constexpr uint32_t seeds[kMaxNumHashTable] = {
|
||||
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
|
||||
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
|
||||
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,
|
||||
|
Loading…
Reference in New Issue
Block a user