Minor changes to CuckooTableBuilder
Summary: - Copy the key and value to in-memory hash table during Add operation. Also modified cuckoo_table_reader_test to use this. - Store only the user_key in in-memory hash table if it is last level file. - Handle Carryover while chosing unused key in Finish() method in case unused key was never found before Finish() call. Test Plan: cuckoo_table_reader_test --enable_perf cuckoo_table_builder_test valgrind_check asan_check Reviewers: sdong, yhchiang, igor, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20715
This commit is contained in:
parent
f04356e660
commit
91c01485d1
@ -86,7 +86,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
bucket_found = true;
|
||||
break;
|
||||
} else {
|
||||
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) {
|
||||
if (user_key.compare(
|
||||
is_last_level_file_ ? Slice(buckets_[hash_val].key)
|
||||
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
|
||||
status_ = Status::Corruption("Same key is being inserted again.");
|
||||
return;
|
||||
}
|
||||
@ -112,8 +114,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
buckets_[bucket_id].key = key;
|
||||
buckets_[bucket_id].value = value;
|
||||
if (is_last_level_file_) {
|
||||
buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
|
||||
} else {
|
||||
buckets_[bucket_id].key.assign(key.data(), key.size());
|
||||
}
|
||||
buckets_[bucket_id].value.assign(value.data(), value.size());
|
||||
buckets_[bucket_id].is_empty = false;
|
||||
|
||||
properties_.num_entries++;
|
||||
@ -149,12 +155,18 @@ Status CuckooTableBuilder::Finish() {
|
||||
if (prev_key_.empty()) {
|
||||
return Status::Corruption("Unable to find unused key");
|
||||
}
|
||||
// Try to find the key next to prev_key_ by handling carryovers.
|
||||
std::string new_user_key = prev_key_;
|
||||
new_user_key.back()++;
|
||||
// We ignore carry-overs and check that it is larger than previous key.
|
||||
if (new_user_key > prev_key_) {
|
||||
unused_user_key_ = new_user_key;
|
||||
} else {
|
||||
int curr_pos = new_user_key.size() - 1;
|
||||
while (curr_pos >= 0) {
|
||||
++new_user_key[curr_pos];
|
||||
if (new_user_key > prev_key_) {
|
||||
unused_user_key_ = new_user_key;
|
||||
break;
|
||||
}
|
||||
--curr_pos;
|
||||
}
|
||||
if (curr_pos < 0) {
|
||||
return Status::Corruption("Unable to find unused key");
|
||||
}
|
||||
}
|
||||
@ -179,17 +191,9 @@ Status CuckooTableBuilder::Finish() {
|
||||
s = file_->Append(Slice(unused_bucket));
|
||||
} else {
|
||||
++num_added;
|
||||
if (is_last_level_file_) {
|
||||
Slice user_key = ExtractUserKey(bucket.key);
|
||||
s = file_->Append(user_key);
|
||||
if (s.ok()) {
|
||||
s = file_->Append(bucket.value);
|
||||
}
|
||||
} else {
|
||||
s = file_->Append(bucket.key);
|
||||
if (s.ok()) {
|
||||
s = file_->Append(bucket.value);
|
||||
}
|
||||
s = file_->Append(Slice(bucket.key));
|
||||
if (s.ok()) {
|
||||
s = file_->Append(Slice(bucket.value));
|
||||
}
|
||||
}
|
||||
if (!s.ok()) {
|
||||
@ -307,7 +311,9 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t child_bucket_id = GetSliceHash(
|
||||
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
||||
is_last_level_file_ ? curr_bucket.key
|
||||
: ExtractUserKey(Slice(curr_bucket.key)),
|
||||
hash_cnt, max_num_buckets_);
|
||||
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id_) {
|
||||
continue;
|
||||
|
@ -58,8 +58,8 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
private:
|
||||
struct CuckooBucket {
|
||||
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
||||
Slice key;
|
||||
Slice value;
|
||||
std::string key;
|
||||
std::string value;
|
||||
bool is_empty;
|
||||
uint64_t make_space_for_key_call_id;
|
||||
};
|
||||
|
@ -236,18 +236,6 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
|
||||
|
||||
// Performance tests
|
||||
namespace {
|
||||
void GenerateKeys(uint64_t num, std::vector<std::string>* keys,
|
||||
uint32_t user_key_length) {
|
||||
for (uint64_t i = 0; i < num; ++i) {
|
||||
std::string new_key(reinterpret_cast<char*>(&i), sizeof(i));
|
||||
new_key = std::string(user_key_length - new_key.size(), 'k') + new_key;
|
||||
ParsedInternalKey ikey(new_key, num, kTypeValue);
|
||||
std::string full_key;
|
||||
AppendInternalKey(&full_key, ikey);
|
||||
keys->push_back(full_key);
|
||||
}
|
||||
}
|
||||
|
||||
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
||||
// Deliberately empty.
|
||||
return false;
|
||||
@ -266,6 +254,7 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
|
||||
void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
||||
uint32_t value_length, uint64_t num_reads, double hash_ratio) {
|
||||
assert(value_length <= key_length);
|
||||
assert(8 <= key_length);
|
||||
std::vector<std::string> keys;
|
||||
Options options;
|
||||
options.allow_mmap_reads = true;
|
||||
@ -277,21 +266,26 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
||||
}
|
||||
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
|
||||
|
||||
GenerateKeys(num, &keys, key_length);
|
||||
uint64_t predicted_file_size =
|
||||
num * (key_length + value_length) / hash_ratio + 1024;
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), keys[0].size(), value_length, hash_ratio,
|
||||
writable_file.get(), key_length + 8, value_length, hash_ratio,
|
||||
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
// Value is just a part of key.
|
||||
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], value_length));
|
||||
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
||||
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
|
||||
ParsedInternalKey ikey(new_key, num, kTypeValue);
|
||||
std::string full_key;
|
||||
AppendInternalKey(&full_key, ikey);
|
||||
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
|
||||
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
keys.push_back(full_key);
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_EQ(num, builder.NumEntries());
|
||||
|
Loading…
Reference in New Issue
Block a user