diff --git a/HISTORY.md b/HISTORY.md index 7a05c54e8..b64e12b42 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,11 +2,12 @@ ## Unreleased (will be released with 3.6) ### Disk format changes -* If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy +* If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy ### Behavior changes * We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6. * When disableDataSync=true, we no longer sync the MANIFEST file. +* Add identity_as_first_hash property to CuckooTable. SST file needs to be rebuilt to be opened by reader properly. ----- Past Releases ----- diff --git a/db/db_bench.cc b/db/db_bench.cc index eada95b6b..08e61e46b 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -514,6 +514,9 @@ DEFINE_int64(keys_per_prefix, 0, "control average number of keys generated " "i.e. use the prefix comes with the generated random number."); DEFINE_bool(enable_io_prio, false, "Lower the background flush/compaction " "threads' IO priority"); +DEFINE_bool(identity_as_first_hash, false, "the first hash function of cuckoo " + "table becomes an identity function. This is only valid when key " + "is 8 bytes"); enum RepFactory { kSkipList, @@ -1739,8 +1742,11 @@ class Benchmark { fprintf(stderr, "Invalid cuckoo_hash_ratio\n"); exit(1); } + rocksdb::CuckooTableOptions table_options; + table_options.hash_table_ratio = FLAGS_cuckoo_hash_ratio; + table_options.identity_as_first_hash = FLAGS_identity_as_first_hash; options.table_factory = std::shared_ptr( - NewCuckooTableFactory(FLAGS_cuckoo_hash_ratio)); + NewCuckooTableFactory(table_options)); } else { BlockBasedTableOptions block_based_options; if (FLAGS_use_hash_search) { diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 2fb4f50dd..2b0255a97 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -251,23 +251,36 @@ struct CuckooTablePropertyNames { // Denotes if the key sorted in the file is Internal Key (if false) // or User Key only (if true). static const std::string kIsLastLevel; + // Indicate if using identity function for the first hash function. + static const std::string kIdentityAsFirstHash; +}; + +struct CuckooTableOptions { + // Determines the utilization of hash tables. Smaller values + // result in larger hash tables with fewer collisions. + double hash_table_ratio = 0.9; + // A property used by builder to determine the depth to go to + // to search for a path to displace elements in case of + // collision. See Builder.MakeSpaceForKey method. Higher + // values result in more efficient hash tables with fewer + // lookups but take more time to build. + uint32_t max_search_depth = 100; + // In case of collision while inserting, the builder + // attempts to insert in the next cuckoo_block_size + // locations before skipping over to the next Cuckoo hash + // function. This makes lookups more cache friendly in case + // of collisions. + uint32_t cuckoo_block_size = 5; + // If this options is enabled, user key is treated as uint64_t and its value + // is used as hash value directly. This option changes builder's behavior. + // Reader ignore this option and behave according to what specified in table + // property. + bool identity_as_first_hash = false; }; // Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing -// @hash_table_ratio: Determines the utilization of hash tables. Smaller values -// result in larger hash tables with fewer collisions. -// @max_search_depth: A property used by builder to determine the depth to go to -// to search for a path to displace elements in case of -// collision. See Builder.MakeSpaceForKey method. Higher -// values result in more efficient hash tables with fewer -// lookups but take more time to build. -// @cuckoo_block_size: In case of collision while inserting, the builder -// attempts to insert in the next cuckoo_block_size -// locations before skipping over to the next Cuckoo hash -// function. This makes lookups more cache friendly in case -// of collisions. -extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9, - uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5); +extern TableFactory* NewCuckooTableFactory( + const CuckooTableOptions& table_options = CuckooTableOptions()); #endif // ROCKSDB_LITE diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 1cf19e3aa..51c80d9df 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -35,6 +35,8 @@ const std::string CuckooTablePropertyNames::kIsLastLevel = "rocksdb.cuckoo.file.islastlevel"; const std::string CuckooTablePropertyNames::kCuckooBlockSize = "rocksdb.cuckoo.hash.cuckooblocksize"; +const std::string CuckooTablePropertyNames::kIdentityAsFirstHash = + "rocksdb.cuckoo.hash.identityfirst"; // Obtained by running echo rocksdb.table.cuckoo | sha1sum extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; @@ -43,6 +45,7 @@ CuckooTableBuilder::CuckooTableBuilder( WritableFile* file, double max_hash_table_ratio, uint32_t max_num_hash_table, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, + bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) : num_hash_func_(2), file_(file), @@ -54,6 +57,7 @@ CuckooTableBuilder::CuckooTableBuilder( is_last_level_file_(false), has_seen_first_key_(false), ucomp_(user_comparator), + identity_as_first_hash_(identity_as_first_hash), get_slice_hash_(get_slice_hash), closed_(false) { // Data is in a huge block. @@ -119,7 +123,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found; ++hash_cnt) { uint64_t hash_val = CuckooHash(user_key, hash_cnt, - hash_table_size_minus_one, get_slice_hash_); + hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_); // If there is a collision, check next cuckoo_block_size_ locations for // empty locations. While checking, if we reach end of the hash table, // stop searching and proceed for next hash function. @@ -149,7 +153,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { // We don't really need to rehash the entire table because old hashes are // still valid and we only increased the number of hash functions. uint64_t hash_val = CuckooHash(user_key, num_hash_func_, - hash_table_size_minus_one, get_slice_hash_); + hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_); ++num_hash_func_; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { @@ -261,6 +265,10 @@ Status CuckooTableBuilder::Finish() { CuckooTablePropertyNames::kCuckooBlockSize].assign( reinterpret_cast(&cuckoo_block_size_), sizeof(cuckoo_block_size_)); + properties_.user_collected_properties[ + CuckooTablePropertyNames::kIdentityAsFirstHash].assign( + reinterpret_cast(&identity_as_first_hash_), + sizeof(identity_as_first_hash_)); // Write meta blocks. MetaIndexBuilder meta_index_builder; @@ -380,7 +388,8 @@ bool CuckooTableBuilder::MakeSpaceForKey( uint64_t child_bucket_id = CuckooHash( (is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first : ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first))), - hash_cnt, hash_table_size_minus_one, get_slice_hash_); + hash_cnt, hash_table_size_minus_one, identity_as_first_hash_, + get_slice_hash_); // Iterate inside Cuckoo Block. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++child_bucket_id) { diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index 2bf206102..45cf49315 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -24,6 +24,7 @@ class CuckooTableBuilder: public TableBuilder { WritableFile* file, double max_hash_table_ratio, uint32_t max_num_hash_func, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, + bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); // REQUIRES: Either Finish() or Abandon() has been called. @@ -87,6 +88,7 @@ class CuckooTableBuilder: public TableBuilder { TableProperties properties_; bool has_seen_first_key_; const Comparator* ucomp_; + bool identity_as_first_hash_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); std::string largest_user_key_ = ""; diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 62183dd9c..d25950728 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -133,7 +133,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) { fname = test::TmpDir() + "/EmptyFile"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - 4, 100, BytewiseComparator(), 1, GetSliceHash); + 4, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); ASSERT_EQ(0UL, builder.FileSize()); ASSERT_OK(builder.Finish()); @@ -162,7 +162,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { fname = test::TmpDir() + "/NoCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -202,7 +202,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { fname = test::TmpDir() + "/WithCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -243,7 +243,8 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { fname = test::TmpDir() + "/WithCollisionFullKey2"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false, + GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -288,7 +289,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { fname = test::TmpDir() + "/WithCollisionPathFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -330,7 +331,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 2, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -366,7 +367,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { fname = test::TmpDir() + "/NoCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -402,7 +403,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { fname = test::TmpDir() + "/WithCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -440,7 +441,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -478,7 +479,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); @@ -498,7 +499,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) { fname = test::TmpDir() + "/FailWhenSameKeyInserted"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); ASSERT_OK(builder.status()); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 5727a91c0..18db54ed7 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -30,9 +30,10 @@ TableBuilder* CuckooTableFactory::NewTableBuilder( const InternalKeyComparator& internal_comparator, WritableFile* file, const CompressionType, const CompressionOptions&) const { - return new CuckooTableBuilder(file, hash_table_ratio_, 64, - max_search_depth_, internal_comparator.user_comparator(), - cuckoo_block_size_, nullptr); + return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64, + table_options_.max_search_depth, internal_comparator.user_comparator(), + table_options_.cuckoo_block_size, table_options_.identity_as_first_hash, + nullptr); } std::string CuckooTableFactory::GetPrintableTableOptions() const { @@ -42,21 +43,22 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const { char buffer[kBufferSize]; snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n", - hash_table_ratio_); + table_options_.hash_table_ratio); ret.append(buffer); snprintf(buffer, kBufferSize, " max_search_depth: %u\n", - max_search_depth_); + table_options_.max_search_depth); ret.append(buffer); snprintf(buffer, kBufferSize, " cuckoo_block_size: %u\n", - cuckoo_block_size_); + table_options_.cuckoo_block_size); + ret.append(buffer); + snprintf(buffer, kBufferSize, " identity_as_first_hash: %d\n", + table_options_.identity_as_first_hash); ret.append(buffer); return ret; } -TableFactory* NewCuckooTableFactory(double hash_table_ratio, - uint32_t max_search_depth, uint32_t cuckoo_block_size) { - return new CuckooTableFactory( - hash_table_ratio, max_search_depth, cuckoo_block_size); +TableFactory* NewCuckooTableFactory(const CuckooTableOptions& table_options) { + return new CuckooTableFactory(table_options); } } // namespace rocksdb diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 2b575dc45..7b2f32ce3 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -16,6 +16,7 @@ namespace rocksdb { const uint32_t kCuckooMurmurSeedMultiplier = 816922183; static inline uint64_t CuckooHash( const Slice& user_key, uint32_t hash_cnt, uint64_t table_size_minus_one, + bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) { #ifndef NDEBUG // This part is used only in unit tests. @@ -23,6 +24,10 @@ static inline uint64_t CuckooHash( return get_slice_hash(user_key, hash_cnt, table_size_minus_one + 1); } #endif + if (hash_cnt == 0 && identity_as_first_hash) { + return (*reinterpret_cast(user_key.data())) & + table_size_minus_one; + } return MurmurHash(user_key.data(), user_key.size(), kCuckooMurmurSeedMultiplier * hash_cnt) & table_size_minus_one; } @@ -36,11 +41,8 @@ static inline uint64_t CuckooHash( // - Does not support Merge operations. class CuckooTableFactory : public TableFactory { public: - CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth, - uint32_t cuckoo_block_size) - : hash_table_ratio_(hash_table_ratio), - max_search_depth_(max_search_depth), - cuckoo_block_size_(cuckoo_block_size) {} + explicit CuckooTableFactory(const CuckooTableOptions& table_options) + : table_options_(table_options) {} ~CuckooTableFactory() {} const char* Name() const override { return "CuckooTable"; } @@ -63,9 +65,7 @@ class CuckooTableFactory : public TableFactory { std::string GetPrintableTableOptions() const override; private: - const double hash_table_ratio_; - const uint32_t max_search_depth_; - const uint32_t cuckoo_block_size_; + const CuckooTableOptions table_options_; }; } // namespace rocksdb diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index 1fdbc4475..63b8a2c8c 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -50,13 +50,13 @@ CuckooTableReader::CuckooTableReader( auto& user_props = props->user_collected_properties; auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc); if (hash_funs == user_props.end()) { - status_ = Status::InvalidArgument("Number of hash functions not found"); + status_ = Status::Corruption("Number of hash functions not found"); return; } num_hash_func_ = *reinterpret_cast(hash_funs->second.data()); auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey); if (unused_key == user_props.end()) { - status_ = Status::InvalidArgument("Empty bucket value not found"); + status_ = Status::Corruption("Empty bucket value not found"); return; } unused_key_ = unused_key->second; @@ -64,7 +64,7 @@ CuckooTableReader::CuckooTableReader( key_length_ = props->fixed_key_len; auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength); if (value_length == user_props.end()) { - status_ = Status::InvalidArgument("Value length not found"); + status_ = Status::Corruption("Value length not found"); return; } value_length_ = *reinterpret_cast( @@ -74,21 +74,31 @@ CuckooTableReader::CuckooTableReader( auto hash_table_size = user_props.find( CuckooTablePropertyNames::kHashTableSize); if (hash_table_size == user_props.end()) { - status_ = Status::InvalidArgument("Hash table size not found"); + status_ = Status::Corruption("Hash table size not found"); return; } table_size_minus_one_ = *reinterpret_cast( hash_table_size->second.data()) - 1; auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); if (is_last_level == user_props.end()) { - status_ = Status::InvalidArgument("Is last level not found"); + status_ = Status::Corruption("Is last level not found"); return; } is_last_level_ = *reinterpret_cast(is_last_level->second.data()); + + auto identity_as_first_hash = user_props.find( + CuckooTablePropertyNames::kIdentityAsFirstHash); + if (identity_as_first_hash == user_props.end()) { + status_ = Status::Corruption("identity as first hash not found"); + return; + } + identity_as_first_hash_ = *reinterpret_cast( + identity_as_first_hash->second.data()); + auto cuckoo_block_size = user_props.find( CuckooTablePropertyNames::kCuckooBlockSize); if (cuckoo_block_size == user_props.end()) { - status_ = Status::InvalidArgument("Cuckoo block size not found"); + status_ = Status::Corruption("Cuckoo block size not found"); return; } cuckoo_block_size_ = *reinterpret_cast( @@ -106,7 +116,8 @@ Status CuckooTableReader::Get( Slice user_key = ExtractUserKey(key); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t offset = bucket_length_ * CuckooHash( - user_key, hash_cnt, table_size_minus_one_, get_slice_hash_); + user_key, hash_cnt, table_size_minus_one_, identity_as_first_hash_, + get_slice_hash_); const char* bucket = &file_data_.data()[offset]; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, bucket += bucket_length_) { @@ -117,7 +128,7 @@ Status CuckooTableReader::Get( // Here, we compare only the user key part as we support only one entry // per user key and we don't support sanpshot. if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) { - Slice value = Slice(&bucket[key_length_], value_length_); + Slice value(bucket + key_length_, value_length_); if (is_last_level_) { ParsedInternalKey found_ikey( Slice(bucket, key_length_), 0, kTypeValue); @@ -140,7 +151,8 @@ void CuckooTableReader::Prepare(const Slice& key) { // Prefetch the first Cuckoo Block. Slice user_key = ExtractUserKey(key); uint64_t addr = reinterpret_cast(file_data_.data()) + - bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_, nullptr); + bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_, + identity_as_first_hash_, nullptr); uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_; for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) { PREFETCH(reinterpret_cast(addr), 0, 3); diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index 61e048eb6..f9e93abf4 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -64,6 +64,7 @@ class CuckooTableReader: public TableReader { std::unique_ptr file_; Slice file_data_; bool is_last_level_; + bool identity_as_first_hash_; std::shared_ptr table_props_; Status status_; uint32_t num_hash_func_; diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index 3138fb9ef..3b170b638 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -38,6 +38,7 @@ DEFINE_string(file_dir, "", "Directory where the files will be created" DEFINE_bool(enable_perf, false, "Run Benchmark Tests too."); DEFINE_bool(write, false, "Should write new values to file in performance tests?"); +DEFINE_bool(identity_as_first_hash, true, "use identity as first hash"); namespace rocksdb { @@ -109,7 +110,8 @@ class CuckooReaderTest { std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( - writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash); + writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false, + GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); @@ -375,8 +377,15 @@ TEST(CuckooReaderTest, WhenKeyNotFound) { // Performance tests namespace { +int64_t found_count = 0; +std::string value; bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) { // Deliberately empty. + if (*reinterpret_cast(k.user_key.data()) == + *reinterpret_cast(v.data())) { + ++found_count; + value.assign(v.data(), v.size()); + } return false; } @@ -389,12 +398,14 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) { } void GetKeys(uint64_t num, std::vector* keys) { + keys->clear(); IterKey k; k.SetInternalKey("", 0, kTypeValue); std::string internal_key_suffix = k.GetKey().ToString(); ASSERT_EQ(static_cast(8), internal_key_suffix.size()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { - std::string new_key(reinterpret_cast(&key_idx), sizeof(key_idx)); + uint64_t value = 2 * key_idx; + std::string new_key(reinterpret_cast(&value), sizeof(value)); new_key += internal_key_suffix; keys->push_back(new_key); } @@ -422,7 +433,8 @@ void WriteFile(const std::vector& keys, ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( writable_file.get(), hash_ratio, - 64, 1000, test::Uint64Comparator(), 5, nullptr); + 64, 1000, test::Uint64Comparator(), 5, + FLAGS_identity_as_first_hash, nullptr); ASSERT_OK(builder.status()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key. @@ -482,27 +494,36 @@ void ReadKeys(uint64_t num, uint32_t batch_size) { " hash functions: %u.\n", num, num * 100.0 / (table_size), num_hash_fun); ReadOptions r_options; + std::vector keys; + keys.reserve(num); + for (uint64_t i = 0; i < num; ++i) { + keys.push_back(2 * i); + } + std::random_shuffle(keys.begin(), keys.end()); + + found_count = 0; uint64_t start_time = env->NowMicros(); if (batch_size > 0) { for (uint64_t i = 0; i < num; i += batch_size) { for (uint64_t j = i; j < i+batch_size && j < num; ++j) { - reader.Prepare(Slice(reinterpret_cast(&j), 16)); + reader.Prepare(Slice(reinterpret_cast(&keys[j]), 16)); } for (uint64_t j = i; j < i+batch_size && j < num; ++j) { - reader.Get(r_options, Slice(reinterpret_cast(&j), 16), - nullptr, DoNothing, nullptr); + reader.Get(r_options, Slice(reinterpret_cast(&keys[j]), 16), + nullptr, DoNothing, nullptr); } } } else { for (uint64_t i = 0; i < num; i++) { - reader.Get(r_options, Slice(reinterpret_cast(&i), 16), nullptr, - DoNothing, nullptr); + reader.Get(r_options, Slice(reinterpret_cast(&keys[i]), 16), + nullptr, DoNothing, nullptr); } } float time_per_op = (env->NowMicros() - start_time) * 1.0 / num; fprintf(stderr, - "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n", - time_per_op, 1.0 / time_per_op, batch_size); + "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u, " + "# of found keys %ld\n", + time_per_op, 1.0 / time_per_op, batch_size, found_count); } } // namespace. @@ -514,16 +535,16 @@ TEST(CuckooReaderTest, TestReadPerformance) { // These numbers are chosen to have a hash utilizaiton % close to // 0.9, 0.75, 0.6 and 0.5 respectively. // They all create 128 M buckets. - std::vector nums = {120*1000*1000, 100*1000*1000, 80*1000*1000, - 70*1000*1000}; + std::vector nums = {120*1024*1024, 100*1024*1024, 80*1024*1024, + 70*1024*1024}; #ifndef NDEBUG fprintf(stdout, "WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n"); #endif std::vector keys; - GetKeys(*std::max_element(nums.begin(), nums.end()), &keys); for (uint64_t num : nums) { if (FLAGS_write || !Env::Default()->FileExists(GetFileName(num))) { + GetKeys(num, &keys); WriteFile(keys, num, hash_ratio); } ReadKeys(num, 0); diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 584937587..aa791f4c4 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -260,8 +260,9 @@ int main(int argc, char** argv) { if (FLAGS_table_factory == "cuckoo_hash") { options.allow_mmap_reads = true; env_options.use_mmap_reads = true; - - tf.reset(rocksdb::NewCuckooTableFactory(0.75)); + rocksdb::CuckooTableOptions table_options; + table_options.hash_table_ratio = 0.75; + tf.reset(rocksdb::NewCuckooTableFactory(table_options)); } else if (FLAGS_table_factory == "plain_table") { options.allow_mmap_reads = true; env_options.use_mmap_reads = true;