option to choose module when calculating CuckooTable hash
Summary: Using module to calculate hash makes lookup ~8% slower. But it has its benefit: file size is more predictable, more space enffient Test Plan: db_bench Reviewers: igor, yhchiang, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23691
This commit is contained in:
parent
fbd2dafc9f
commit
581442d446
@ -253,6 +253,8 @@ struct CuckooTablePropertyNames {
|
||||
static const std::string kIsLastLevel;
|
||||
// Indicate if using identity function for the first hash function.
|
||||
static const std::string kIdentityAsFirstHash;
|
||||
// Indicate if using module or bit and to calculate hash value
|
||||
static const std::string kUseModuleHash;
|
||||
};
|
||||
|
||||
struct CuckooTableOptions {
|
||||
@ -271,11 +273,17 @@ struct CuckooTableOptions {
|
||||
// function. This makes lookups more cache friendly in case
|
||||
// of collisions.
|
||||
uint32_t cuckoo_block_size = 5;
|
||||
// If this options is enabled, user key is treated as uint64_t and its value
|
||||
// If this option is enabled, user key is treated as uint64_t and its value
|
||||
// is used as hash value directly. This option changes builder's behavior.
|
||||
// Reader ignore this option and behave according to what specified in table
|
||||
// property.
|
||||
bool identity_as_first_hash = false;
|
||||
// If this option is set to true, module is used during hash calculation.
|
||||
// This often yields better space efficiency at the cost of performance.
|
||||
// If this optino is set to false, # of entries in table is constrained to be
|
||||
// power of two, and bit and is used to calculate hash, which is faster in
|
||||
// general.
|
||||
bool use_module_hash = true;
|
||||
};
|
||||
|
||||
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
|
||||
|
@ -37,6 +37,8 @@ const std::string CuckooTablePropertyNames::kCuckooBlockSize =
|
||||
"rocksdb.cuckoo.hash.cuckooblocksize";
|
||||
const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
|
||||
"rocksdb.cuckoo.hash.identityfirst";
|
||||
const std::string CuckooTablePropertyNames::kUseModuleHash =
|
||||
"rocksdb.cuckoo.hash.usemodule";
|
||||
|
||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
@ -45,7 +47,7 @@ CuckooTableBuilder::CuckooTableBuilder(
|
||||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_table, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
bool identity_as_first_hash,
|
||||
bool use_module_hash, bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
|
||||
: num_hash_func_(2),
|
||||
file_(file),
|
||||
@ -53,10 +55,11 @@ CuckooTableBuilder::CuckooTableBuilder(
|
||||
max_num_hash_func_(max_num_hash_table),
|
||||
max_search_depth_(max_search_depth),
|
||||
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
|
||||
hash_table_size_(2),
|
||||
hash_table_size_(use_module_hash ? 0 : 2),
|
||||
is_last_level_file_(false),
|
||||
has_seen_first_key_(false),
|
||||
ucomp_(user_comparator),
|
||||
use_module_hash_(use_module_hash),
|
||||
identity_as_first_hash_(identity_as_first_hash),
|
||||
get_slice_hash_(get_slice_hash),
|
||||
closed_(false) {
|
||||
@ -105,14 +108,15 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
} else if (ikey.user_key.compare(largest_user_key_) > 0) {
|
||||
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
||||
}
|
||||
if (hash_table_size_ < kvs_.size() / max_hash_table_ratio_) {
|
||||
hash_table_size_ *= 2;
|
||||
if (!use_module_hash_) {
|
||||
if (hash_table_size_ < kvs_.size() / max_hash_table_ratio_) {
|
||||
hash_table_size_ *= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
uint64_t hash_table_size_minus_one = hash_table_size_ - 1;
|
||||
buckets->resize(hash_table_size_minus_one + cuckoo_block_size_);
|
||||
buckets->resize(hash_table_size_ + cuckoo_block_size_ - 1);
|
||||
uint64_t make_space_for_key_call_id = 0;
|
||||
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
|
||||
uint64_t bucket_id;
|
||||
@ -122,8 +126,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
ExtractUserKey(kvs_[vector_idx].first);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
|
||||
++hash_cnt) {
|
||||
uint64_t hash_val = CuckooHash(user_key, hash_cnt,
|
||||
hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_);
|
||||
uint64_t hash_val = CuckooHash(user_key, hash_cnt, use_module_hash_,
|
||||
hash_table_size_, identity_as_first_hash_, get_slice_hash_);
|
||||
// If there is a collision, check next cuckoo_block_size_ locations for
|
||||
// empty locations. While checking, if we reach end of the hash table,
|
||||
// stop searching and proceed for next hash function.
|
||||
@ -152,8 +156,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
}
|
||||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
uint64_t hash_val = CuckooHash(user_key, num_hash_func_,
|
||||
hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_);
|
||||
uint64_t hash_val = CuckooHash(user_key, num_hash_func_, use_module_hash_,
|
||||
hash_table_size_, identity_as_first_hash_, get_slice_hash_);
|
||||
++num_hash_func_;
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++hash_val) {
|
||||
@ -178,6 +182,10 @@ Status CuckooTableBuilder::Finish() {
|
||||
Status s;
|
||||
std::string unused_bucket;
|
||||
if (!kvs_.empty()) {
|
||||
// Calculate the real hash size if module hash is enabled.
|
||||
if (use_module_hash_) {
|
||||
hash_table_size_ = kvs_.size() / max_hash_table_ratio_;
|
||||
}
|
||||
s = MakeHashTable(&buckets);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
@ -252,11 +260,10 @@ Status CuckooTableBuilder::Finish() {
|
||||
CuckooTablePropertyNames::kNumHashFunc].assign(
|
||||
reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
|
||||
|
||||
uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kHashTableSize].assign(
|
||||
reinterpret_cast<const char*>(&hash_table_size),
|
||||
sizeof(hash_table_size));
|
||||
reinterpret_cast<const char*>(&hash_table_size_),
|
||||
sizeof(hash_table_size_));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kIsLastLevel].assign(
|
||||
reinterpret_cast<const char*>(&is_last_level_file_),
|
||||
@ -269,6 +276,10 @@ Status CuckooTableBuilder::Finish() {
|
||||
CuckooTablePropertyNames::kIdentityAsFirstHash].assign(
|
||||
reinterpret_cast<const char*>(&identity_as_first_hash_),
|
||||
sizeof(identity_as_first_hash_));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kUseModuleHash].assign(
|
||||
reinterpret_cast<const char*>(&use_module_hash_),
|
||||
sizeof(use_module_hash_));
|
||||
|
||||
// Write meta blocks.
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
@ -322,16 +333,22 @@ uint64_t CuckooTableBuilder::FileSize() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Account for buckets being a power of two.
|
||||
// As elements are added, file size remains constant for a while and doubles
|
||||
// its size. Since compaction algorithm stops adding elements only after it
|
||||
// exceeds the file limit, we account for the extra element being added here.
|
||||
uint64_t expected_hash_table_size = hash_table_size_;
|
||||
if (expected_hash_table_size < (kvs_.size() + 1) / max_hash_table_ratio_) {
|
||||
expected_hash_table_size *= 2;
|
||||
if (use_module_hash_) {
|
||||
return (kvs_[0].first.size() + kvs_[0].second.size()) * kvs_.size() /
|
||||
max_hash_table_ratio_;
|
||||
} else {
|
||||
// Account for buckets being a power of two.
|
||||
// As elements are added, file size remains constant for a while and
|
||||
// doubles its size. Since compaction algorithm stops adding elements
|
||||
// only after it exceeds the file limit, we account for the extra element
|
||||
// being added here.
|
||||
uint64_t expected_hash_table_size = hash_table_size_;
|
||||
if (expected_hash_table_size < (kvs_.size() + 1) / max_hash_table_ratio_) {
|
||||
expected_hash_table_size *= 2;
|
||||
}
|
||||
return (kvs_[0].first.size() + kvs_[0].second.size()) *
|
||||
expected_hash_table_size - 1;
|
||||
}
|
||||
return (kvs_[0].first.size() + kvs_[0].second.size()) *
|
||||
expected_hash_table_size - 1;
|
||||
}
|
||||
|
||||
// This method is invoked when there is no place to insert the target key.
|
||||
@ -373,7 +390,6 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(bucket_id, 0, 0));
|
||||
}
|
||||
uint64_t hash_table_size_minus_one = hash_table_size_ - 1;
|
||||
bool null_found = false;
|
||||
uint32_t curr_pos = 0;
|
||||
while (!null_found && curr_pos < tree.size()) {
|
||||
@ -388,7 +404,7 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
uint64_t child_bucket_id = CuckooHash(
|
||||
(is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first :
|
||||
ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first))),
|
||||
hash_cnt, hash_table_size_minus_one, identity_as_first_hash_,
|
||||
hash_cnt, use_module_hash_, hash_table_size_, identity_as_first_hash_,
|
||||
get_slice_hash_);
|
||||
// Iterate inside Cuckoo Block.
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
|
@ -24,7 +24,7 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_func, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
bool identity_as_first_hash,
|
||||
bool use_module_hash, bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
@ -88,6 +88,7 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
TableProperties properties_;
|
||||
bool has_seen_first_key_;
|
||||
const Comparator* ucomp_;
|
||||
bool use_module_hash_;
|
||||
bool identity_as_first_hash_;
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
|
@ -50,12 +50,6 @@ class CuckooBuilderTest {
|
||||
TableProperties* props = nullptr;
|
||||
ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size,
|
||||
kCuckooTableMagicNumber, env_, nullptr, &props));
|
||||
ASSERT_EQ(props->num_entries, keys.size());
|
||||
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
||||
ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
|
||||
(expected_table_size + expected_cuckoo_block_size - 1));
|
||||
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
|
||||
|
||||
// Check unused bucket.
|
||||
std::string unused_key = props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyKey];
|
||||
@ -83,6 +77,12 @@ class CuckooBuilderTest {
|
||||
*reinterpret_cast<const bool*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kIsLastLevel].data());
|
||||
ASSERT_EQ(expected_is_last_level, is_last_level_found);
|
||||
|
||||
ASSERT_EQ(props->num_entries, keys.size());
|
||||
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
||||
ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
|
||||
(expected_table_size + expected_cuckoo_block_size - 1));
|
||||
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
|
||||
delete props;
|
||||
|
||||
// Check contents of the bucket.
|
||||
@ -133,12 +133,12 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
|
||||
fname = test::TmpDir() + "/EmptyFile";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
4, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
4, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
ASSERT_EQ(0UL, builder.FileSize());
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
CheckFileContents({}, {}, {}, "", 0, 2, false);
|
||||
CheckFileContents({}, {}, {}, "", 2, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
||||
@ -162,7 +162,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
||||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -202,7 +202,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -243,8 +243,8 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
|
||||
fname = test::TmpDir() + "/WithCollisionFullKey2";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false,
|
||||
GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size,
|
||||
false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -289,7 +289,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -331,7 +331,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 2, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 2, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -367,7 +367,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
||||
fname = test::TmpDir() + "/NoCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -403,7 +403,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -441,7 +441,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -479,7 +479,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
||||
@ -499,7 +499,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
|
||||
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
||||
|
@ -30,10 +30,11 @@ TableBuilder* CuckooTableFactory::NewTableBuilder(
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
WritableFile* file, const CompressionType,
|
||||
const CompressionOptions&) const {
|
||||
// TODO: change builder to take the option struct
|
||||
return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64,
|
||||
table_options_.max_search_depth, internal_comparator.user_comparator(),
|
||||
table_options_.cuckoo_block_size, table_options_.identity_as_first_hash,
|
||||
nullptr);
|
||||
table_options_.cuckoo_block_size, table_options_.use_module_hash,
|
||||
table_options_.identity_as_first_hash, nullptr);
|
||||
}
|
||||
|
||||
std::string CuckooTableFactory::GetPrintableTableOptions() const {
|
||||
|
@ -15,21 +15,27 @@ namespace rocksdb {
|
||||
|
||||
const uint32_t kCuckooMurmurSeedMultiplier = 816922183;
|
||||
static inline uint64_t CuckooHash(
|
||||
const Slice& user_key, uint32_t hash_cnt, uint64_t table_size_minus_one,
|
||||
bool identity_as_first_hash,
|
||||
const Slice& user_key, uint32_t hash_cnt, bool use_module_hash,
|
||||
uint64_t table_size_, bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) {
|
||||
#ifndef NDEBUG
|
||||
// This part is used only in unit tests.
|
||||
if (get_slice_hash != nullptr) {
|
||||
return get_slice_hash(user_key, hash_cnt, table_size_minus_one + 1);
|
||||
return get_slice_hash(user_key, hash_cnt, table_size_);
|
||||
}
|
||||
#endif
|
||||
uint64_t value = 0;
|
||||
if (hash_cnt == 0 && identity_as_first_hash) {
|
||||
return (*reinterpret_cast<const int64_t*>(user_key.data())) &
|
||||
table_size_minus_one;
|
||||
value = (*reinterpret_cast<const int64_t*>(user_key.data()));
|
||||
} else {
|
||||
value = MurmurHash(user_key.data(), user_key.size(),
|
||||
kCuckooMurmurSeedMultiplier * hash_cnt);
|
||||
}
|
||||
if (use_module_hash) {
|
||||
return value % table_size_;
|
||||
} else {
|
||||
return value & (table_size_ - 1);
|
||||
}
|
||||
return MurmurHash(user_key.data(), user_key.size(),
|
||||
kCuckooMurmurSeedMultiplier * hash_cnt) & table_size_minus_one;
|
||||
}
|
||||
|
||||
// Cuckoo Table is designed for applications that require fast point lookups
|
||||
|
@ -77,8 +77,9 @@ CuckooTableReader::CuckooTableReader(
|
||||
status_ = Status::Corruption("Hash table size not found");
|
||||
return;
|
||||
}
|
||||
table_size_minus_one_ = *reinterpret_cast<const uint64_t*>(
|
||||
hash_table_size->second.data()) - 1;
|
||||
table_size_ = *reinterpret_cast<const uint64_t*>(
|
||||
hash_table_size->second.data());
|
||||
|
||||
auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
|
||||
if (is_last_level == user_props.end()) {
|
||||
status_ = Status::Corruption("Is last level not found");
|
||||
@ -95,6 +96,15 @@ CuckooTableReader::CuckooTableReader(
|
||||
identity_as_first_hash_ = *reinterpret_cast<const bool*>(
|
||||
identity_as_first_hash->second.data());
|
||||
|
||||
auto use_module_hash = user_props.find(
|
||||
CuckooTablePropertyNames::kUseModuleHash);
|
||||
if (use_module_hash == user_props.end()) {
|
||||
status_ = Status::Corruption("hash type is not found");
|
||||
return;
|
||||
}
|
||||
use_module_hash_ = *reinterpret_cast<const bool*>(
|
||||
use_module_hash->second.data());
|
||||
fprintf(stderr, "use_module_hash %d\n", use_module_hash_);
|
||||
auto cuckoo_block_size = user_props.find(
|
||||
CuckooTablePropertyNames::kCuckooBlockSize);
|
||||
if (cuckoo_block_size == user_props.end()) {
|
||||
@ -116,8 +126,8 @@ Status CuckooTableReader::Get(
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
|
||||
uint64_t offset = bucket_length_ * CuckooHash(
|
||||
user_key, hash_cnt, table_size_minus_one_, identity_as_first_hash_,
|
||||
get_slice_hash_);
|
||||
user_key, hash_cnt, use_module_hash_, table_size_,
|
||||
identity_as_first_hash_, get_slice_hash_);
|
||||
const char* bucket = &file_data_.data()[offset];
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, bucket += bucket_length_) {
|
||||
@ -151,7 +161,7 @@ void CuckooTableReader::Prepare(const Slice& key) {
|
||||
// Prefetch the first Cuckoo Block.
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) +
|
||||
bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_,
|
||||
bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_,
|
||||
identity_as_first_hash_, nullptr);
|
||||
uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
|
||||
for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
|
||||
@ -219,8 +229,7 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
|
||||
|
||||
void CuckooTableIterator::LoadKeysFromReader() {
|
||||
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
|
||||
uint64_t num_buckets = reader_->table_size_minus_one_ +
|
||||
reader_->cuckoo_block_size_;
|
||||
uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
|
||||
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
|
||||
Slice read_key;
|
||||
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
|
||||
|
@ -65,6 +65,7 @@ class CuckooTableReader: public TableReader {
|
||||
Slice file_data_;
|
||||
bool is_last_level_;
|
||||
bool identity_as_first_hash_;
|
||||
bool use_module_hash_;
|
||||
std::shared_ptr<const TableProperties> table_props_;
|
||||
Status status_;
|
||||
uint32_t num_hash_func_;
|
||||
@ -74,7 +75,7 @@ class CuckooTableReader: public TableReader {
|
||||
uint32_t bucket_length_;
|
||||
uint32_t cuckoo_block_size_;
|
||||
uint32_t cuckoo_block_bytes_minus_one_;
|
||||
uint64_t table_size_minus_one_;
|
||||
uint64_t table_size_;
|
||||
const Comparator* ucomp_;
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
|
@ -110,8 +110,8 @@ class CuckooReaderTest {
|
||||
std::unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false,
|
||||
GetSliceHash);
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2,
|
||||
false, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
||||
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
@ -434,7 +434,7 @@ void WriteFile(const std::vector<std::string>& keys,
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), hash_ratio,
|
||||
64, 1000, test::Uint64Comparator(), 5,
|
||||
FLAGS_identity_as_first_hash, nullptr);
|
||||
false, FLAGS_identity_as_first_hash, nullptr);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
// Value is just a part of key.
|
||||
|
Loading…
Reference in New Issue
Block a user