2016-02-09 15:12:00 -08:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 16:03:42 -07:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2015-07-20 10:50:46 -07:00
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
#include "table/cuckoo/cuckoo_table_builder.h"
|
|
|
|
|
2014-07-21 13:26:09 -07:00
|
|
|
#include <map>
|
2021-01-28 22:08:46 -08:00
|
|
|
#include <string>
|
2014-07-21 13:26:09 -07:00
|
|
|
#include <utility>
|
2021-01-28 22:08:46 -08:00
|
|
|
#include <vector>
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2019-09-16 10:31:27 -07:00
|
|
|
#include "file/random_access_file_reader.h"
|
|
|
|
#include "file/writable_file_writer.h"
|
2021-01-28 22:08:46 -08:00
|
|
|
#include "rocksdb/file_system.h"
|
2019-05-30 17:39:43 -07:00
|
|
|
#include "table/meta_blocks.h"
|
2019-05-30 11:21:38 -07:00
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2014-07-21 13:26:09 -07:00
|
|
|
extern const uint64_t kCuckooTableMagicNumber;
|
|
|
|
|
|
|
|
namespace {
|
2014-07-24 10:07:41 -07:00
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2014-07-24 10:07:41 -07:00
|
|
|
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
2018-03-05 13:08:17 -08:00
|
|
|
uint64_t /*max_num_buckets*/) {
|
2014-07-21 13:26:09 -07:00
|
|
|
return hash_map[s.ToString()][index];
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
class CuckooBuilderTest : public testing::Test {
|
2014-07-21 13:26:09 -07:00
|
|
|
public:
|
|
|
|
CuckooBuilderTest() {
|
|
|
|
env_ = Env::Default();
|
2014-08-05 20:55:46 -07:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
2021-01-28 22:08:46 -08:00
|
|
|
file_options_ = FileOptions(options);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
void CheckFileContents(const std::vector<std::string>& keys,
|
|
|
|
const std::vector<std::string>& values,
|
|
|
|
const std::vector<uint64_t>& expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
std::string expected_unused_bucket, uint64_t expected_table_size,
|
|
|
|
uint32_t expected_num_hash_func, bool expected_is_last_level,
|
|
|
|
uint32_t expected_cuckoo_block_size = 1) {
|
2018-10-30 15:29:58 -07:00
|
|
|
uint64_t num_deletions = 0;
|
|
|
|
for (const auto& key : keys) {
|
|
|
|
ParsedInternalKey parsed;
|
2020-10-28 10:11:13 -07:00
|
|
|
Status pik_status =
|
|
|
|
ParseInternalKey(key, &parsed, true /* log_err_key */);
|
|
|
|
if (pik_status.ok() && parsed.type == kTypeDeletion) {
|
2018-10-30 15:29:58 -07:00
|
|
|
num_deletions++;
|
|
|
|
}
|
|
|
|
}
|
2014-07-21 13:26:09 -07:00
|
|
|
// Read file
|
|
|
|
uint64_t read_file_size;
|
|
|
|
ASSERT_OK(env_->GetFileSize(fname, &read_file_size));
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<RandomAccessFileReader> file_reader;
|
|
|
|
ASSERT_OK(RandomAccessFileReader::Create(
|
|
|
|
env_->GetFileSystem(), fname, file_options_, &file_reader, nullptr));
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2020-04-20 11:37:36 -07:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
2021-05-05 13:59:21 -07:00
|
|
|
ImmutableOptions ioptions(options);
|
2016-07-19 09:44:03 -07:00
|
|
|
|
2014-07-21 13:26:09 -07:00
|
|
|
// Assert Table Properties.
|
|
|
|
TableProperties* props = nullptr;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size,
|
2016-07-19 09:44:03 -07:00
|
|
|
kCuckooTableMagicNumber, ioptions,
|
2018-06-15 19:24:21 -07:00
|
|
|
&props, true /* compression_type_missing */));
|
2014-07-21 13:26:09 -07:00
|
|
|
// Check unused bucket.
|
2014-07-24 10:07:41 -07:00
|
|
|
std::string unused_key = props->user_collected_properties[
|
|
|
|
CuckooTablePropertyNames::kEmptyKey];
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_EQ(expected_unused_bucket.substr(0,
|
|
|
|
props->fixed_key_len), unused_key);
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2017-04-21 20:41:37 -07:00
|
|
|
uint64_t value_len_found =
|
|
|
|
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
2014-07-24 10:07:41 -07:00
|
|
|
CuckooTablePropertyNames::kValueLength].data());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
|
2014-08-11 20:21:07 -07:00
|
|
|
ASSERT_EQ(props->raw_value_size, values.size()*value_len_found);
|
2014-08-28 10:42:23 -07:00
|
|
|
const uint64_t table_size =
|
2014-07-24 10:07:41 -07:00
|
|
|
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
2014-08-28 10:42:23 -07:00
|
|
|
CuckooTablePropertyNames::kHashTableSize].data());
|
|
|
|
ASSERT_EQ(expected_table_size, table_size);
|
|
|
|
const uint32_t num_hash_func_found =
|
2014-07-24 10:07:41 -07:00
|
|
|
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
2014-08-28 10:42:23 -07:00
|
|
|
CuckooTablePropertyNames::kNumHashFunc].data());
|
|
|
|
ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
|
|
|
|
const uint32_t cuckoo_block_size =
|
|
|
|
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
|
|
|
CuckooTablePropertyNames::kCuckooBlockSize].data());
|
|
|
|
ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
|
2014-07-25 16:37:32 -07:00
|
|
|
const bool is_last_level_found =
|
|
|
|
*reinterpret_cast<const bool*>(props->user_collected_properties[
|
|
|
|
CuckooTablePropertyNames::kIsLastLevel].data());
|
|
|
|
ASSERT_EQ(expected_is_last_level, is_last_level_found);
|
2014-09-25 13:53:27 -07:00
|
|
|
|
|
|
|
ASSERT_EQ(props->num_entries, keys.size());
|
2018-10-30 15:29:58 -07:00
|
|
|
ASSERT_EQ(props->num_deletions, num_deletions);
|
2014-09-25 13:53:27 -07:00
|
|
|
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
|
|
|
ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
|
|
|
|
(expected_table_size + expected_cuckoo_block_size - 1));
|
|
|
|
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
|
2016-04-06 23:10:32 -07:00
|
|
|
ASSERT_EQ(props->column_family_id, 0);
|
|
|
|
ASSERT_EQ(props->column_family_name, kDefaultColumnFamilyName);
|
2014-07-22 09:49:04 -07:00
|
|
|
delete props;
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-07-21 13:26:09 -07:00
|
|
|
// Check contents of the bucket.
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<bool> keys_found(keys.size(), false);
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = expected_unused_bucket.size();
|
2020-06-02 15:02:44 -07:00
|
|
|
for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) {
|
2014-08-05 20:55:46 -07:00
|
|
|
Slice read_slice;
|
2020-04-30 14:48:51 -07:00
|
|
|
ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size,
|
|
|
|
&read_slice, nullptr, nullptr));
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t key_idx =
|
|
|
|
std::find(expected_locations.begin(), expected_locations.end(), i) -
|
|
|
|
expected_locations.begin();
|
2014-08-05 20:55:46 -07:00
|
|
|
if (key_idx == keys.size()) {
|
2017-05-17 23:03:54 -07:00
|
|
|
// i is not one of the expected locations. Empty bucket.
|
2017-08-23 10:45:17 -07:00
|
|
|
if (read_slice.data() == nullptr) {
|
|
|
|
ASSERT_EQ(0, expected_unused_bucket.size());
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0);
|
|
|
|
}
|
2014-08-05 20:55:46 -07:00
|
|
|
} else {
|
|
|
|
keys_found[key_idx] = true;
|
|
|
|
ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto key_found : keys_found) {
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
// Check that all keys wereReader found.
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_TRUE(key_found);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-30 15:29:58 -07:00
|
|
|
std::string GetInternalKey(Slice user_key, bool zero_seqno,
|
|
|
|
ValueType type = kTypeValue) {
|
2014-08-05 20:55:46 -07:00
|
|
|
IterKey ikey;
|
2018-10-30 15:29:58 -07:00
|
|
|
ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type);
|
2017-04-04 14:17:16 -07:00
|
|
|
return ikey.GetInternalKey().ToString();
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-29 19:06:15 -07:00
|
|
|
uint64_t NextPowOf2(uint64_t num) {
|
|
|
|
uint64_t n = 2;
|
|
|
|
while (n <= num) {
|
|
|
|
n *= 2;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t GetExpectedTableSize(uint64_t num) {
|
|
|
|
return NextPowOf2(static_cast<uint64_t>(num / kHashTableRatio));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-21 13:26:09 -07:00
|
|
|
Env* env_;
|
2021-01-28 22:08:46 -08:00
|
|
|
FileOptions file_options_;
|
2014-07-21 13:26:09 -07:00
|
|
|
std::string fname;
|
2014-08-05 20:55:46 -07:00
|
|
|
const double kHashTableRatio = 0.9;
|
2014-07-21 13:26:09 -07:00
|
|
|
};
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) {
|
2018-11-09 11:17:34 -08:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("EmptyFile");
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100,
|
|
|
|
BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_EQ(0UL, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-25 13:53:27 -07:00
|
|
|
CheckFileContents({}, {}, {}, "", 2, 2, false);
|
2014-08-05 20:55:46 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
2018-10-30 15:29:58 -07:00
|
|
|
for (auto type : {kTypeValue, kTypeDeletion}) {
|
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values;
|
|
|
|
if (type == kTypeValue) {
|
|
|
|
values = {"v01", "v02", "v03", "v04"};
|
|
|
|
} else {
|
|
|
|
values = {"", "", "", ""};
|
|
|
|
}
|
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {1, 2, 3, 4}},
|
|
|
|
{user_keys[2], {2, 3, 4, 5}},
|
|
|
|
{user_keys[3], {3, 4, 5, 6}}};
|
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false, type));
|
|
|
|
}
|
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
|
|
|
|
|
|
|
fname = test::PerThreadDBPath("NoCollisionFullKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
2018-10-30 15:29:58 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
2018-10-30 15:29:58 -07:00
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
|
|
|
ASSERT_OK(builder.Finish());
|
|
|
|
ASSERT_OK(file_writer->Close());
|
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 2, false);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
2014-08-05 20:55:46 -07:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-05 20:55:46 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionFullKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-08-27 10:39:31 -07:00
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
2014-08-05 20:55:46 -07:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
expected_unused_bucket, expected_table_size, 4, false);
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
|
2014-08-28 10:42:23 -07:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-28 10:42:23 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-28 10:42:23 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
|
|
|
}
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-28 10:42:23 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2014-08-28 10:42:23 -07:00
|
|
|
uint32_t cuckoo_block_size = 2;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionFullKey2");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
2016-04-06 23:10:32 -07:00
|
|
|
CuckooTableBuilder builder(
|
|
|
|
file_writer.get(), kHashTableRatio, num_hash_fun, 100,
|
|
|
|
BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash,
|
|
|
|
0 /* column_family_id */, kDefaultColumnFamilyName);
|
2014-08-28 10:42:23 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-28 10:42:23 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-28 10:42:23 -07:00
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations,
|
|
|
|
expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
|
2014-08-05 20:55:46 -07:00
|
|
|
// Have two hash functions. Insert elements with overlapping hashes.
|
|
|
|
// Finally insert an element with hash value somewhere in the middle
|
|
|
|
// so that it displaces all the elements after that.
|
|
|
|
uint32_t num_hash_fun = 2;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
|
|
|
"key04", "key05"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1}},
|
|
|
|
{user_keys[1], {1, 2}},
|
|
|
|
{user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}},
|
|
|
|
{user_keys[4], {0, 2}},
|
2014-08-05 20:55:46 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathFullKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-28 10:42:23 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-28 10:42:23 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-28 10:42:23 -07:00
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations,
|
|
|
|
expected_unused_bucket, expected_table_size, 2, false);
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
|
2014-08-28 10:42:23 -07:00
|
|
|
uint32_t num_hash_fun = 2;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
|
|
|
"key04", "key05"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1}},
|
|
|
|
{user_keys[1], {1, 2}},
|
|
|
|
{user_keys[2], {3, 4}},
|
|
|
|
{user_keys[3], {4, 5}},
|
|
|
|
{user_keys[4], {0, 3}},
|
2014-08-28 10:42:23 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-28 10:42:23 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
|
|
|
}
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-28 10:42:23 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 2, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-08-27 10:39:31 -07:00
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
2014-08-05 20:55:46 -07:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
expected_unused_bucket, expected_table_size, 2, false, 2);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
2014-08-05 20:55:46 -07:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {1, 2, 3, 4}},
|
|
|
|
{user_keys[2], {2, 3, 4, 5}},
|
|
|
|
{user_keys[3], {3, 4, 5, 6}}};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("NoCollisionUserKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-08-27 10:39:31 -07:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-05 20:55:46 -07:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
expected_unused_bucket, expected_table_size, 2, true);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
2014-08-05 20:55:46 -07:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-05 20:55:46 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionUserKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-08-27 10:39:31 -07:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-05 20:55:46 -07:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
expected_unused_bucket, expected_table_size, 4, true);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
|
2014-08-05 20:55:46 -07:00
|
|
|
uint32_t num_hash_fun = 2;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
|
|
|
"key04", "key05"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1}},
|
|
|
|
{user_keys[1], {1, 2}},
|
|
|
|
{user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}},
|
|
|
|
{user_keys[4], {0, 2}},
|
2014-08-05 20:55:46 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-05 20:55:46 -07:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
2015-11-19 11:47:12 -08:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathUserKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
2, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2014-11-11 16:47:22 -05:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 15:34:10 -07:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.Finish());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-09-05 11:18:01 -07:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2014-08-27 10:39:31 -07:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-05 20:55:46 -07:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2014-08-28 10:42:23 -07:00
|
|
|
expected_unused_bucket, expected_table_size, 2, true);
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
2014-07-21 13:26:09 -07:00
|
|
|
// Have two hash functions. Insert elements with overlapping hashes.
|
2014-08-05 20:55:46 -07:00
|
|
|
// Finally try inserting an element with hash value somewhere in the middle
|
|
|
|
// and it should fail because the no. of elements to displace is too high.
|
|
|
|
uint32_t num_hash_fun = 2;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
|
|
|
"key04", "key05"};
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1}},
|
|
|
|
{user_keys[1], {1, 2}},
|
|
|
|
{user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}},
|
|
|
|
{user_keys[4], {0, 1}},
|
2014-08-05 20:55:46 -07:00
|
|
|
};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
2014-08-05 20:55:46 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathUserKey");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
2, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
2015-07-13 12:11:05 -07:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{"repeatedkey", {0, 1, 2, 3}}};
|
2015-07-01 16:13:49 -07:00
|
|
|
hash_map = std::move(hm);
|
2014-08-05 20:55:46 -07:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::string user_key = "repeatedkey";
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2021-01-28 22:08:46 -08:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-13 17:18:39 -07:00
|
|
|
fname = test::PerThreadDBPath("FailWhenSameKeyInserted");
|
2021-01-28 22:08:46 -08:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-06 23:10:32 -07:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
|
|
|
|
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
2014-08-07 17:06:07 +08:00
|
|
|
ASSERT_EQ(builder.NumEntries(), 1u);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2"));
|
2014-08-07 17:06:07 +08:00
|
|
|
ASSERT_EQ(builder.NumEntries(), 2u);
|
2014-08-05 20:55:46 -07:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
|
|
|
|
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
ASSERT_OK(file_writer->Close());
|
2014-07-21 13:26:09 -07:00
|
|
|
}
|
2020-02-20 12:07:53 -08:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2014-07-21 13:26:09 -07:00
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|
2015-07-20 10:50:46 -07:00
|
|
|
|
|
|
|
#else
|
|
|
|
#include <stdio.h>
|
|
|
|
|
2018-04-15 17:19:57 -07:00
|
|
|
int main(int /*argc*/, char** /*argv*/) {
|
2015-07-20 10:50:46 -07:00
|
|
|
fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|