From cf3da899b04a9a6a76609bf76a49ff1217411f9f Mon Sep 17 00:00:00 2001 From: Radheshyam Balasundaram Date: Mon, 21 Jul 2014 13:26:09 -0700 Subject: [PATCH] Adding a new SST table builder based on Cuckoo Hashing Summary: Cuckoo Hashing based SST table builder. Contains: - Cuckoo Hashing logic and file storage logic. - Unit tests for logic Test Plan: make cuckoo_table_builder_test ./cuckoo_table_builder_test make check all Reviewers: yhchiang, igor, sdong, ljin Reviewed By: ljin Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D19545 --- Makefile | 6 +- include/rocksdb/table.h | 6 + table/cuckoo_table_builder.cc | 333 ++++++++++++++++++++ table/cuckoo_table_builder.h | 97 ++++++ table/cuckoo_table_builder_test.cc | 468 +++++++++++++++++++++++++++++ table/cuckoo_table_factory.h | 32 ++ 6 files changed, 941 insertions(+), 1 deletion(-) create mode 100644 table/cuckoo_table_builder.cc create mode 100644 table/cuckoo_table_builder.h create mode 100644 table/cuckoo_table_builder_test.cc create mode 100644 table/cuckoo_table_factory.h diff --git a/Makefile b/Makefile index b085c008f..a87c79241 100644 --- a/Makefile +++ b/Makefile @@ -115,7 +115,8 @@ TESTS = \ table_test \ thread_local_test \ geodb_test \ - rate_limiter_test + rate_limiter_test \ + cuckoo_table_builder_test TOOLS = \ sst_dump \ @@ -410,6 +411,9 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS) geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) + $(MEMENVLIBRARY) : $(MEMENVOBJECTS) rm -f $@ $(AR) -rs $@ $(MEMENVOBJECTS) diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 96c67f956..65f717a60 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -184,6 +184,12 @@ EncodingType encoding_type = kPlain; extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options = PlainTableOptions()); +struct CuckooTablePropertyNames { + static const std::string kEmptyBucket; + static const std::string kNumHashTable; + static const std::string kMaxNumBuckets; +}; + #endif // ROCKSDB_LITE // A base class for table factories. diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc new file mode 100644 index 000000000..e92ef3735 --- /dev/null +++ b/table/cuckoo_table_builder.cc @@ -0,0 +1,333 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef ROCKSDB_LITE +#include "table/cuckoo_table_builder.h" + +#include +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/env.h" +#include "rocksdb/table.h" +#include "table/block_builder.h" +#include "table/format.h" +#include "table/meta_blocks.h" +#include "util/autovector.h" +#include "util/random.h" + +namespace rocksdb { +const std::string CuckooTablePropertyNames::kEmptyBucket = + "rocksdb.cuckoo.bucket.empty.bucket"; +const std::string CuckooTablePropertyNames::kNumHashTable = + "rocksdb.cuckoo.hash.num"; +const std::string CuckooTablePropertyNames::kMaxNumBuckets = + "rocksdb.cuckoo.bucket.maxnum"; + +// Obtained by running echo rocksdb.table.cuckoo | sha1sum +extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; + +CuckooTableBuilder::CuckooTableBuilder( + WritableFile* file, unsigned int fixed_key_length, + unsigned int fixed_value_length, double hash_table_ratio, + unsigned int file_size, unsigned int max_num_hash_table, + unsigned int max_search_depth, + unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int, + unsigned int)) + : num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)), + file_(file), + key_length_(fixed_key_length), + value_length_(fixed_value_length), + bucket_size_(fixed_key_length + fixed_value_length), + hash_table_ratio_(hash_table_ratio), + max_num_buckets_(file_size / bucket_size_), + max_num_hash_table_(max_num_hash_table), + max_search_depth_(max_search_depth), + buckets_(max_num_buckets_), + GetSliceHash(GetSliceHashPtr) { + // The bucket_size is currently not optimized for last level. + // In last level, the bucket will not contain full key. + // TODO(rbs): Find how we can determine if last level or not + // before we start adding entries into the table. + properties_.num_entries = 0; + // Data is in a huge block. + properties_.num_data_blocks = 1; + properties_.index_size = 0; + properties_.filter_size = 0; +} + +CuckooTableBuilder::~CuckooTableBuilder() { +} + +void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { + if (NumEntries() == max_num_buckets_) { + status_ = Status::Corruption("Hash Table is full."); + return; + } + unsigned int bucket_id; + bool bucket_found = false; + autovector hash_vals; + ParsedInternalKey ikey; + if (!ParseInternalKey(key, &ikey)) { + status_ = Status::Corruption("Unable to parse key into inernal key."); + return; + } + Slice user_key = ikey.user_key; + for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { + unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_); + if (buckets_[hash_val].is_empty) { + bucket_id = hash_val; + bucket_found = true; + break; + } else { + if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) { + status_ = Status::Corruption("Same key is being inserted again."); + return; + } + hash_vals.push_back(hash_val); + } + } + while (!bucket_found && !MakeSpaceForKey(key, &bucket_id, hash_vals)) { + // Rehash by increashing number of hash tables. + if (num_hash_table_ >= max_num_hash_table_) { + status_ = Status::Corruption("Too many collissions. Unable to hash."); + return; + } + // We don't really need to rehash the entire table because old hashes are + // still valid and we only increased the number of hash functions. + unsigned int old_num_hash = num_hash_table_; + num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_); + for (unsigned int i = old_num_hash; i < num_hash_table_; i++) { + unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_); + if (buckets_[hash_val].is_empty) { + bucket_found = true; + bucket_id = hash_val; + break; + } else { + hash_vals.push_back(hash_val); + } + } + } + buckets_[bucket_id].key = key; + buckets_[bucket_id].value = value; + buckets_[bucket_id].is_empty = false; + + if (ikey.sequence != 0) { + // This is not a last level file. + is_last_level_file_ = false; + } + properties_.num_entries++; + + // We assume that the keys are inserted in sorted order. To identify an + // unused key, which will be used in filling empty buckets in the table, + // we try to find gaps between successive keys inserted. This is done by + // maintaining the previous key and comparing it with next key. + if (unused_user_key_.empty()) { + if (prev_key_.empty()) { + prev_key_ = user_key.ToString(); + return; + } + std::string new_user_key = prev_key_; + new_user_key.back()++; + // We ignore carry-overs and check that it is larger than previous key. + if ((new_user_key > prev_key_) && + (new_user_key < user_key.ToString())) { + unused_user_key_ = new_user_key; + } else { + prev_key_ = user_key.ToString(); + } + } +} + +Status CuckooTableBuilder::status() const { return status_; } + +Status CuckooTableBuilder::Finish() { + assert(!closed_); + closed_ = true; + + if (unused_user_key_.empty()) { + if (prev_key_.empty()) { + return Status::Corruption("Unable to find unused key"); + } + std::string new_user_key = prev_key_; + new_user_key.back()++; + // We ignore carry-overs and check that it is larger than previous key. + if (new_user_key > prev_key_) { + unused_user_key_ = new_user_key; + } else { + return Status::Corruption("Unable to find unused key"); + } + } + std::string unused_bucket; + if (is_last_level_file_) { + unused_bucket = unused_user_key_; + } else { + ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue); + AppendInternalKey(&unused_bucket, ikey); + } + properties_.fixed_key_len = unused_bucket.size(); + unsigned int bucket_size = unused_bucket.size() + value_length_; + // Resize to bucket size. + unused_bucket.resize(bucket_size, 'a'); + + // Write the table. + for (auto& bucket : buckets_) { + Status s; + if (bucket.is_empty) { + s = file_->Append(Slice(unused_bucket)); + } else { + if (is_last_level_file_) { + Slice user_key = ExtractUserKey(bucket.key); + s = file_->Append(user_key); + if (s.ok()) { + s = file_->Append(bucket.value); + } + } else { + s = file_->Append(bucket.key); + if (s.ok()) { + s = file_->Append(bucket.value); + } + } + } + if (!s.ok()) { + return s; + } + } + + unsigned int offset = buckets_.size() * bucket_size; + properties_.user_collected_properties[ + CuckooTablePropertyNames::kEmptyBucket] = unused_bucket; + properties_.user_collected_properties[ + CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_); + PutVarint32(&properties_.user_collected_properties[ + CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_); + + // Write meta blocks. + MetaIndexBuilder meta_index_builer; + PropertyBlockBuilder property_block_builder; + + property_block_builder.AddTableProperty(properties_); + property_block_builder.Add(properties_.user_collected_properties); + Slice property_block = property_block_builder.Finish(); + BlockHandle property_block_handle; + property_block_handle.set_offset(offset); + property_block_handle.set_size(property_block.size()); + Status s = file_->Append(property_block); + offset += property_block.size(); + if (!s.ok()) { + return s; + } + + meta_index_builer.Add(kPropertiesBlock, property_block_handle); + Slice meta_index_block = meta_index_builer.Finish(); + + BlockHandle meta_index_block_handle; + meta_index_block_handle.set_offset(offset); + meta_index_block_handle.set_size(meta_index_block.size()); + s = file_->Append(meta_index_block); + if (!s.ok()) { + return s; + } + + Footer footer(kCuckooTableMagicNumber); + footer.set_metaindex_handle(meta_index_block_handle); + footer.set_index_handle(BlockHandle::NullBlockHandle()); + std::string footer_encoding; + footer.EncodeTo(&footer_encoding); + s = file_->Append(footer_encoding); + return s; +} + +void CuckooTableBuilder::Abandon() { + assert(!closed_); + closed_ = true; +} + +uint64_t CuckooTableBuilder::NumEntries() const { + return properties_.num_entries; +} + +uint64_t CuckooTableBuilder::FileSize() const { + if (closed_) { + return file_->GetFileSize(); + } else { + // This is not the actual size of the file as we need to account for + // hash table ratio. This returns the size of filled buckets in the table + // scaled up by a factor of 1/hash table ratio. + return (properties_.num_entries * bucket_size_) / hash_table_ratio_; + } +} + +bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key, + unsigned int *bucket_id, autovector hash_vals) { + struct CuckooNode { + unsigned int bucket_id; + unsigned int depth; + int parent_pos; + CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos) + : bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {} + }; + // This is BFS search tree that is stored simply as a vector. + // Each node stores the index of parent node in the vector. + std::vector tree; + // This is a very bad way to keep track of visited nodes. + // TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket + // and use it to track visited nodes. + std::vector buckets_visited(max_num_buckets_, false); + for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { + unsigned int bucket_id = hash_vals[hash_cnt]; + buckets_visited[bucket_id] = true; + tree.push_back(CuckooNode(bucket_id, 0, -1)); + } + bool null_found = false; + unsigned int curr_pos = 0; + while (!null_found && curr_pos < tree.size()) { + CuckooNode& curr_node = tree[curr_pos]; + if (curr_node.depth >= max_search_depth_) { + break; + } + CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id]; + for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { + unsigned int child_bucket_id = GetSliceHash( + ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_); + if (child_bucket_id == curr_node.bucket_id) { + continue; + } + if (buckets_visited[child_bucket_id]) { + continue; + } + buckets_visited[child_bucket_id] = true; + tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1, + curr_pos)); + if (buckets_[child_bucket_id].is_empty) { + null_found = true; + break; + } + } + ++curr_pos; + } + + if (null_found) { + int bucket_to_replace_pos = tree.size()-1; + while (bucket_to_replace_pos >= 0) { + CuckooNode& curr_node = tree[bucket_to_replace_pos]; + if (curr_node.parent_pos != -1) { + buckets_[curr_node.bucket_id] = buckets_[curr_node.parent_pos]; + bucket_to_replace_pos = curr_node.parent_pos; + } else { + *bucket_id = curr_node.bucket_id; + return true; + } + } + return true; + } else { + return false; + } +} + +} // namespace rocksdb +#endif // ROCKSDB_LITE diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h new file mode 100644 index 000000000..baee9a48a --- /dev/null +++ b/table/cuckoo_table_builder.h @@ -0,0 +1,97 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#ifndef ROCKSDB_LITE +#include +#include +#include +#include "rocksdb/status.h" +#include "table/table_builder.h" +#include "rocksdb/table.h" +#include "rocksdb/table_properties.h" +#include "util/autovector.h" + +namespace rocksdb { + +struct CuckooBucket { + CuckooBucket(): is_empty(true) {} + Slice key; + Slice value; + bool is_empty; +}; + +class CuckooTableBuilder: public TableBuilder { + public: + CuckooTableBuilder( + WritableFile* file, unsigned int fixed_key_length, + unsigned int fixed_value_length, double hash_table_ratio, + unsigned int file_size, unsigned int max_num_hash_table, + unsigned int max_search_depth, + unsigned int (*GetSliceHash)(const Slice&, unsigned int, + unsigned int)); + + // REQUIRES: Either Finish() or Abandon() has been called. + ~CuckooTableBuilder(); + + // Add key,value to the table being constructed. + // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: Finish(), Abandon() have not been called + void Add(const Slice& key, const Slice& value) override; + + // Return non-ok iff some error has been detected. + Status status() const override; + + // Finish building the table. Stops using the file passed to the + // constructor after this function returns. + // REQUIRES: Finish(), Abandon() have not been called + Status Finish() override; + + // Indicate that the contents of this builder should be abandoned. Stops + // using the file passed to the constructor after this function returns. + // If the caller is not going to call Finish(), it must call Abandon() + // before destroying this builder. + // REQUIRES: Finish(), Abandon() have not been called + void Abandon() override; + + // Number of calls to Add() so far. + uint64_t NumEntries() const override; + + // Size of the file generated so far. If invoked after a successful + // Finish() call, returns the size of the final generated file. + uint64_t FileSize() const override; + + private: + bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id, + autovector hash_vals); + + unsigned int num_hash_table_; + WritableFile* file_; + const unsigned int key_length_; + const unsigned int value_length_; + const unsigned int bucket_size_; + const double hash_table_ratio_; + const unsigned int max_num_buckets_; + const unsigned int max_num_hash_table_; + const unsigned int max_search_depth_; + Status status_; + std::vector buckets_; + bool is_last_level_file_ = true; + TableProperties properties_; + unsigned int (*GetSliceHash)(const Slice& s, unsigned int index, + unsigned int max_num_buckets); + std::string unused_user_key_ = ""; + std::string prev_key_; + + bool closed_ = false; // Either Finish() or Abandon() has been called. + + // No copying allowed + CuckooTableBuilder(const CuckooTableBuilder&) = delete; + void operator=(const CuckooTableBuilder&) = delete; +}; + +} // namespace rocksdb + +#endif // ROCKSDB_LITE diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc new file mode 100644 index 000000000..f20463e12 --- /dev/null +++ b/table/cuckoo_table_builder_test.cc @@ -0,0 +1,468 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include +#include +#include +#include + +#include "table/meta_blocks.h" +#include "table/cuckoo_table_builder.h" +#include "util/random.h" +#include "util/testharness.h" +#include "util/testutil.h" + +namespace rocksdb { + +extern const uint64_t kCuckooTableMagicNumber; + +namespace { +std::unordered_map> hash_map; + +void AddHashLookups(const std::string& s, unsigned int bucket_id, + unsigned int num_hash_fun) { + std::vector v; + for (unsigned int i = 0; i < num_hash_fun; i++) { + v.push_back(bucket_id + i); + } + hash_map[s] = v; + return; +} + +unsigned int GetSliceHash(const Slice& s, unsigned int index, + unsigned int max_num_buckets) { + return hash_map[s.ToString()][index]; +} +} // namespace + +class CuckooBuilderTest { + public: + CuckooBuilderTest() { + env_ = Env::Default(); + } + + void CheckFileContents(const std::string& expected_data) { + // Read file + unique_ptr read_file; + ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); + uint64_t read_file_size; + ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); + + // Assert Table Properties. + TableProperties* props = nullptr; + ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size, + kCuckooTableMagicNumber, env_, nullptr, &props)); + ASSERT_EQ(props->num_entries, num_items); + ASSERT_EQ(props->fixed_key_len, key_length); + + // Check unused bucket. + std::string unused_bucket = props->user_collected_properties[ + CuckooTablePropertyNames::kEmptyBucket]; + ASSERT_EQ(expected_unused_bucket, unused_bucket); + + unsigned int max_buckets; + Slice max_buckets_slice = Slice(props->user_collected_properties[ + CuckooTablePropertyNames::kMaxNumBuckets]); + GetVarint32(&max_buckets_slice, &max_buckets); + ASSERT_EQ(expected_max_buckets, max_buckets); + // Check contents of the bucket. + std::string read_data; + read_data.resize(expected_data.size()); + Slice read_slice; + ASSERT_OK(read_file->Read(0, expected_data.size(), + &read_slice, &read_data[0])); + ASSERT_EQ(expected_data, read_data); + } + + Env* env_; + const EnvOptions env_options_; + std::string fname; + std::string expected_unused_bucket; + unsigned int file_size = 100000; + unsigned int num_items = 20; + unsigned int num_hash_fun = 64; + double hash_table_ratio = 0.9; + unsigned int ikey_length; + unsigned int user_key_length; + unsigned int key_length; + unsigned int value_length; + unsigned int bucket_length; + unsigned int expected_max_buckets; +}; + + +TEST(CuckooBuilderTest, NoCollision) { + hash_map.clear(); + num_items = 20; + num_hash_fun = 64; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + unsigned int bucket_ids = 0; + for (unsigned int i = 0; i < num_items; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + AddHashLookups(user_keys[i], bucket_ids, num_hash_fun); + bucket_ids += num_hash_fun; + } + + ikey_length = keys[0].size(); + key_length = ikey_length; + value_length = values[0].size(); + bucket_length = ikey_length + value_length; + expected_max_buckets = file_size / bucket_length; + std::string expected_unused_user_key = "keys10:"; + ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); + AppendInternalKey(&expected_unused_bucket, ikey); + expected_unused_bucket.resize(bucket_length, 'a'); + unique_ptr writable_file; + fname = test::TmpDir() + "/BasicTest_writable_file"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), ikey_length, + value_length, hash_table_ratio, + file_size, num_hash_fun, 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + unsigned int key_idx = 0; + std::string expected_file_data = ""; + for (unsigned int i = 0; i < expected_max_buckets; i++) { + if (key_idx * num_hash_fun == i && key_idx < num_items) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + ASSERT_OK(cuckoo_builder->status()); + expected_file_data.append(keys[key_idx] + values[key_idx]); + ++key_idx; + } else { + expected_file_data.append(expected_unused_bucket); + } + } + ASSERT_OK(cuckoo_builder->Finish()); + writable_file->Close(); + CheckFileContents(expected_file_data); +} + +TEST(CuckooBuilderTest, NoCollisionLastLevel) { + hash_map.clear(); + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + unsigned int bucket_ids = 0; + for (unsigned int i = 0; i < num_items; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + // Set zero sequence number in all keys. + ParsedInternalKey ikey(user_keys[i], 0, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + AddHashLookups(user_keys[i], bucket_ids, num_hash_fun); + bucket_ids += num_hash_fun; + } + ikey_length = keys[0].size(); + user_key_length = user_keys[0].size(); + key_length = user_key_length; + value_length = values[0].size(); + bucket_length = key_length + value_length; + expected_max_buckets = file_size / bucket_length; + expected_unused_bucket = "keys10:"; + expected_unused_bucket.resize(bucket_length, 'a'); + unique_ptr writable_file; + fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), key_length, + value_length, hash_table_ratio, + file_size, num_hash_fun, 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + unsigned int key_idx = 0; + std::string expected_file_data = ""; + for (unsigned int i = 0; i < expected_max_buckets; i++) { + if (key_idx * num_hash_fun == i && key_idx < num_items) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + ASSERT_OK(cuckoo_builder->status()); + expected_file_data.append(user_keys[key_idx] + values[key_idx]); + ++key_idx; + } else { + expected_file_data.append(expected_unused_bucket); + } + } + ASSERT_OK(cuckoo_builder->Finish()); + writable_file->Close(); + CheckFileContents(expected_file_data); +} + +TEST(CuckooBuilderTest, WithCollision) { + // Take keys with colliding hash function values. + hash_map.clear(); + num_hash_fun = 20; + num_items = num_hash_fun; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + for (unsigned int i = 0; i < num_items; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + // Make all hash values collide. + AddHashLookups(user_keys[i], 0, num_hash_fun); + } + ikey_length = keys[0].size(); + value_length = values[0].size(); + key_length = ikey_length; + bucket_length = key_length + value_length; + expected_max_buckets = file_size / bucket_length; + std::string expected_unused_user_key = "keys10:"; + ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); + AppendInternalKey(&expected_unused_bucket, ikey); + expected_unused_bucket.resize(bucket_length, 'a'); + unique_ptr writable_file; + fname = test::TmpDir() + "/WithCollision_writable_file"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), key_length, value_length, hash_table_ratio, + file_size, num_hash_fun, 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + unsigned int key_idx = 0; + std::string expected_file_data = ""; + for (unsigned int i = 0; i < expected_max_buckets; i++) { + if (key_idx == i && key_idx < num_items) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + ASSERT_OK(cuckoo_builder->status()); + expected_file_data.append(keys[key_idx] + values[key_idx]); + ++key_idx; + } else { + expected_file_data.append(expected_unused_bucket); + } + } + ASSERT_OK(cuckoo_builder->Finish()); + writable_file->Close(); + CheckFileContents(expected_file_data); +} + +TEST(CuckooBuilderTest, FailWithTooManyCollisions) { + // Take keys with colliding hash function values. + // Take more keys than the number of hash functions. + hash_map.clear(); + num_hash_fun = 20; + num_items = num_hash_fun + 1; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + for (unsigned int i = 0; i < num_items; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + // Make all hash values collide. + AddHashLookups(user_keys[i], 0, num_hash_fun); + } + ikey_length = keys[0].size(); + value_length = values[0].size(); + unique_ptr writable_file; + fname = test::TmpDir() + "/FailWithTooManyCollisions_writable"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), ikey_length, + value_length, hash_table_ratio, file_size, num_hash_fun, + 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_OK(cuckoo_builder->status()); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + } + cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); + ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); + cuckoo_builder->Abandon(); + writable_file->Close(); +} + +TEST(CuckooBuilderTest, FailWhenSameKeyInserted) { + hash_map.clear(); + std::string user_key = "repeatedkey"; + AddHashLookups(user_key, 0, num_hash_fun); + std::string key_to_reuse1, key_to_reuse2; + ParsedInternalKey ikey1(user_key, 1000, kTypeValue); + ParsedInternalKey ikey2(user_key, 1001, kTypeValue); + AppendInternalKey(&key_to_reuse1, ikey1); + AppendInternalKey(&key_to_reuse2, ikey2); + std::string value = "value"; + ikey_length = key_to_reuse1.size(); + value_length = value.size(); + unique_ptr writable_file; + fname = test::TmpDir() + "/FailWhenSameKeyInserted_writable_file"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), ikey_length, + value_length, hash_table_ratio, file_size, num_hash_fun, + 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + cuckoo_builder->Add(Slice(key_to_reuse1), Slice(value)); + ASSERT_OK(cuckoo_builder->status()); + ASSERT_EQ(cuckoo_builder->NumEntries(), 1); + cuckoo_builder->Add(Slice(key_to_reuse2), Slice(value)); + ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); + cuckoo_builder->Abandon(); + writable_file->Close(); +} + +TEST(CuckooBuilderTest, WithACollisionPath) { + hash_map.clear(); + // Have two hash functions. Insert elements with overlapping hashes. + // Finally insert an element which will displace all the current elements. + num_hash_fun = 2; + + unsigned int max_search_depth = 100; + num_items = max_search_depth + 2; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + std::vector expected_bucket_id(num_items); + for (unsigned int i = 0; i < num_items - 1; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + // Make all hash values collide with the next element. + AddHashLookups(user_keys[i], i, num_hash_fun); + expected_bucket_id[i] = i+1; + } + expected_bucket_id[0] = 0; + user_keys.back() = "keys" + std::to_string(num_items + 99); + ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue); + AppendInternalKey(&keys.back(), ikey); + values.back() = "value" + std::to_string(num_items+100); + // Make both hash values collide with first element. + AddHashLookups(user_keys.back(), 0, num_hash_fun); + expected_bucket_id.back() = 1; + + ikey_length = keys[0].size(); + value_length = values[0].size(); + key_length = ikey_length; + bucket_length = key_length + value_length; + + expected_max_buckets = file_size / bucket_length; + std::string expected_unused_user_key = "keys10:"; + ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue); + AppendInternalKey(&expected_unused_bucket, ikey); + expected_unused_bucket.resize(bucket_length, 'a'); + std::string expected_file_data = ""; + for (unsigned int i = 0; i < expected_max_buckets; i++) { + expected_file_data += expected_unused_bucket; + } + + unique_ptr writable_file; + fname = test::TmpDir() + "/WithCollisionPath_writable_file"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), key_length, + value_length, hash_table_ratio, file_size, + num_hash_fun, max_search_depth, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_OK(cuckoo_builder->status()); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + expected_file_data.replace(expected_bucket_id[key_idx]*bucket_length, + bucket_length, keys[key_idx] + values[key_idx]); + } + ASSERT_OK(cuckoo_builder->Finish()); + writable_file->Close(); + CheckFileContents(expected_file_data); +} + +TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { + hash_map.clear(); + // Have two hash functions. Insert elements with overlapping hashes. + // Finally insert an element which will displace all the current elements. + num_hash_fun = 2; + + unsigned int max_search_depth = 100; + num_items = max_search_depth + 3; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + for (unsigned int i = 0; i < num_items - 1; i++) { + user_keys[i] = "keys" + std::to_string(i+100); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + // Make all hash values collide with the next element. + AddHashLookups(user_keys[i], i, num_hash_fun); + } + user_keys.back() = "keys" + std::to_string(num_items + 99); + ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue); + AppendInternalKey(&keys.back(), ikey); + Slice(values.back()) = "value" + std::to_string(num_items+100); + // Make both hash values collide with first element. + AddHashLookups(user_keys.back(), 0, num_hash_fun); + + ikey_length = keys[0].size(); + value_length = values[0].size(); + unique_ptr writable_file; + fname = test::TmpDir() + "/FailWhenCollisionPathTooLong_writable"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), ikey_length, + value_length, hash_table_ratio, file_size, num_hash_fun, + max_search_depth, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_OK(cuckoo_builder->status()); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + } + cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); + ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); + cuckoo_builder->Abandon(); + writable_file->Close(); +} + +TEST(CuckooBuilderTest, FailWhenTableIsFull) { + hash_map.clear(); + file_size = 160; + + num_items = 7; + std::vector user_keys(num_items); + std::vector keys(num_items); + std::vector values(num_items); + for (unsigned int i = 0; i < num_items; i++) { + user_keys[i] = "keys" + std::to_string(i+1000); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + std::to_string(i+100); + AddHashLookups(user_keys[i], i, num_hash_fun); + } + ikey_length = keys[0].size(); + value_length = values[0].size(); + bucket_length = ikey_length + value_length; + // Check that number of items is tight. + ASSERT_GT(bucket_length * num_items, file_size); + ASSERT_LE(bucket_length * (num_items-1), file_size); + + unique_ptr writable_file; + fname = test::TmpDir() + "/FailWhenTabelIsFull_writable"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( + writable_file.get(), ikey_length, + value_length, hash_table_ratio, file_size, num_hash_fun, + 100, GetSliceHash); + ASSERT_OK(cuckoo_builder->status()); + for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { + cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_OK(cuckoo_builder->status()); + ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); + } + cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); + ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); + cuckoo_builder->Abandon(); + writable_file->Close(); +} +} // namespace rocksdb + +int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h new file mode 100644 index 000000000..65f14fc59 --- /dev/null +++ b/table/cuckoo_table_factory.h @@ -0,0 +1,32 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#ifndef ROCKSDB_LITE + +#include "util/murmurhash.h" + +namespace rocksdb { + +static const unsigned int kMaxNumHashTable = 64; + +unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index, + unsigned int max_num_buckets) { + static constexpr unsigned int seeds[kMaxNumHashTable] = { + 816922183, 506425713, 949485004, 22513986, 421427259, 500437285, + 888981693, 847587269, 511007211, 722295391, 934013645, 566947683, + 193618736, 428277388, 770956674, 819994962, 755946528, 40807421, + 263144466, 241420041, 444294464, 731606396, 304158902, 563235655, + 968740453, 336996831, 462831574, 407970157, 985877240, 637708754, + 736932700, 205026023, 755371467, 729648411, 807744117, 46482135, + 847092855, 620960699, 102476362, 314094354, 625838942, 550889395, + 639071379, 834567510, 397667304, 151945969, 443634243, 196618243, + 421986347, 407218337, 964502417, 327741231, 493359459, 452453139, + 692216398, 108161624, 816246924, 234779764, 618949448, 496133787, + 156374056, 316589799, 982915425, 553105889 }; + return MurmurHash(s.data(), s.size(), seeds[index]) % max_num_buckets; +} +} // namespace rocksdb +#endif // ROCKSDB_LITE