2014-07-26 01:37:32 +02:00
|
|
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run this test\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2014-08-01 05:52:13 +02:00
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#include <inttypes.h>
|
2014-07-26 01:37:32 +02:00
|
|
|
#include <gflags/gflags.h>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
#include "table/meta_blocks.h"
|
|
|
|
#include "table/cuckoo_table_builder.h"
|
|
|
|
#include "table/cuckoo_table_reader.h"
|
|
|
|
#include "table/cuckoo_table_factory.h"
|
2014-08-06 01:35:02 +02:00
|
|
|
#include "util/arena.h"
|
2014-07-26 01:37:32 +02:00
|
|
|
#include "util/random.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
using GFLAGS::SetUsageMessage;
|
|
|
|
|
|
|
|
DEFINE_string(file_dir, "", "Directory where the files will be created"
|
|
|
|
" for benchmark. Added for using tmpfs.");
|
|
|
|
DEFINE_bool(enable_perf, false, "Run Benchmark Tests too.");
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
DEFINE_bool(write, false,
|
|
|
|
"Should write new values to file in performance tests?");
|
2014-07-26 01:37:32 +02:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
extern const uint64_t kCuckooTableMagicNumber;
|
2014-08-12 05:21:07 +02:00
|
|
|
extern const uint64_t kMaxNumHashTable;
|
2014-07-26 01:37:32 +02:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
const uint32_t kNumHashFunc = 10;
|
|
|
|
// Methods, variables related to Hash functions.
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
|
|
|
|
|
|
|
void AddHashLookups(const std::string& s, uint64_t bucket_id,
|
|
|
|
uint32_t num_hash_fun) {
|
|
|
|
std::vector<uint64_t> v;
|
|
|
|
for (uint32_t i = 0; i < num_hash_fun; i++) {
|
|
|
|
v.push_back(bucket_id + i);
|
|
|
|
}
|
|
|
|
hash_map[s] = v;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
|
|
|
uint64_t max_num_buckets) {
|
|
|
|
return hash_map[s.ToString()][index];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Methods, variables for checking key and values read.
|
|
|
|
struct ValuesToAssert {
|
|
|
|
ValuesToAssert(const std::string& key, const Slice& value)
|
|
|
|
: expected_user_key(key),
|
|
|
|
expected_value(value),
|
|
|
|
call_count(0) {}
|
|
|
|
std::string expected_user_key;
|
|
|
|
Slice expected_value;
|
|
|
|
int call_count;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool AssertValues(void* assert_obj,
|
|
|
|
const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
ValuesToAssert *ptr = reinterpret_cast<ValuesToAssert*>(assert_obj);
|
|
|
|
ASSERT_EQ(ptr->expected_value.ToString(), v.ToString());
|
|
|
|
ASSERT_EQ(ptr->expected_user_key, k.user_key.ToString());
|
|
|
|
++ptr->call_count;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
class CuckooReaderTest {
|
|
|
|
public:
|
|
|
|
CuckooReaderTest() {
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
env = options.env;
|
|
|
|
env_options = EnvOptions(options);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetUp(int num_items) {
|
|
|
|
this->num_items = num_items;
|
|
|
|
hash_map.clear();
|
|
|
|
keys.clear();
|
|
|
|
keys.resize(num_items);
|
|
|
|
user_keys.clear();
|
|
|
|
user_keys.resize(num_items);
|
|
|
|
values.clear();
|
|
|
|
values.resize(num_items);
|
|
|
|
}
|
|
|
|
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string NumToStr(int64_t i) {
|
|
|
|
return std::string(reinterpret_cast<char*>(&i), sizeof(i));
|
|
|
|
}
|
|
|
|
|
2014-08-06 05:55:46 +02:00
|
|
|
void CreateCuckooFileAndCheckReader() {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
|
|
|
CuckooTableBuilder builder(
|
2014-08-06 05:55:46 +02:00
|
|
|
writable_file.get(), 0.9, kNumHashFunc, 100, GetSliceHash);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
|
|
|
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
|
|
|
ASSERT_OK(builder.status());
|
2014-08-06 01:35:02 +02:00
|
|
|
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
ASSERT_OK(builder.Finish());
|
|
|
|
ASSERT_EQ(num_items, builder.NumEntries());
|
|
|
|
file_size = builder.FileSize();
|
|
|
|
ASSERT_OK(writable_file->Close());
|
|
|
|
|
2014-08-06 05:55:46 +02:00
|
|
|
// Check reader now.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
for (uint32_t i = 0; i < num_items; ++i) {
|
|
|
|
ValuesToAssert v(user_keys[i], values[i]);
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(keys[i]), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(1, v.call_count);
|
|
|
|
}
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
void UpdateKeys(bool with_zero_seqno) {
|
|
|
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
|
|
ParsedInternalKey ikey(user_keys[i],
|
|
|
|
with_zero_seqno ? 0 : i + 1000, kTypeValue);
|
|
|
|
keys[i].clear();
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
}
|
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
|
2014-08-06 01:35:02 +02:00
|
|
|
void CheckIterator() {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-08-06 01:35:02 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
Iterator* it = reader.NewIterator(ReadOptions(), nullptr);
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(!it->Valid());
|
|
|
|
it->SeekToFirst();
|
|
|
|
int cnt = 0;
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
++cnt;
|
|
|
|
it->Next();
|
|
|
|
}
|
2014-08-07 11:06:07 +02:00
|
|
|
ASSERT_EQ(static_cast<uint32_t>(cnt), num_items);
|
2014-08-06 01:35:02 +02:00
|
|
|
|
|
|
|
it->SeekToLast();
|
|
|
|
cnt = num_items - 1;
|
|
|
|
ASSERT_TRUE(it->Valid());
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
--cnt;
|
|
|
|
it->Prev();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(cnt, -1);
|
|
|
|
|
|
|
|
cnt = num_items / 2;
|
|
|
|
it->Seek(keys[cnt]);
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
++cnt;
|
|
|
|
it->Next();
|
|
|
|
}
|
2014-08-07 11:06:07 +02:00
|
|
|
ASSERT_EQ(static_cast<uint32_t>(cnt), num_items);
|
2014-08-06 01:35:02 +02:00
|
|
|
delete it;
|
|
|
|
|
|
|
|
Arena arena;
|
|
|
|
it = reader.NewIterator(ReadOptions(), &arena);
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(!it->Valid());
|
|
|
|
it->Seek(keys[num_items/2]);
|
|
|
|
ASSERT_TRUE(it->Valid());
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(keys[num_items/2] == it->key());
|
|
|
|
ASSERT_TRUE(values[num_items/2] == it->value());
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
it->~Iterator();
|
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
std::vector<std::string> keys;
|
|
|
|
std::vector<std::string> user_keys;
|
|
|
|
std::vector<std::string> values;
|
2014-08-06 01:35:02 +02:00
|
|
|
uint64_t num_items;
|
2014-07-26 01:37:32 +02:00
|
|
|
std::string fname;
|
|
|
|
uint64_t file_size;
|
|
|
|
Options options;
|
|
|
|
Env* env;
|
|
|
|
EnvOptions env_options;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(CuckooReaderTest, WhenKeyExists) {
|
2014-08-06 01:35:02 +02:00
|
|
|
SetUp(kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
fname = test::TmpDir() + "/CuckooReader_WhenKeyExists";
|
2014-08-06 01:35:02 +02:00
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
2014-08-06 01:35:02 +02:00
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values.
|
2014-08-06 05:55:46 +02:00
|
|
|
AddHashLookups(user_keys[i], i, kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Test with collision. Make all hash values collide.
|
|
|
|
hash_map.clear();
|
|
|
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
|
|
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(false);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2014-08-06 01:35:02 +02:00
|
|
|
TEST(CuckooReaderTest, CheckIterator) {
|
|
|
|
SetUp(2*kNumHashFunc);
|
|
|
|
fname = test::TmpDir() + "/CuckooReader_CheckIterator";
|
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
|
|
|
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values, in reverse order.
|
2014-08-06 05:55:46 +02:00
|
|
|
AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc);
|
2014-08-06 01:35:02 +02:00
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-08-06 01:35:02 +02:00
|
|
|
CheckIterator();
|
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-08-06 01:35:02 +02:00
|
|
|
CheckIterator();
|
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
TEST(CuckooReaderTest, WhenKeyNotFound) {
|
|
|
|
// Add keys with colliding hash values.
|
2014-08-06 05:55:46 +02:00
|
|
|
SetUp(kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
fname = test::TmpDir() + "/CuckooReader_WhenKeyNotFound";
|
2014-08-06 01:35:02 +02:00
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
2014-08-06 01:35:02 +02:00
|
|
|
values[i] = "value" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
// Make all hash values collide.
|
|
|
|
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
// Search for a key with colliding hash values.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_user_key = "key" + NumToStr(num_items);
|
2014-07-26 01:37:32 +02:00
|
|
|
std::string not_found_key;
|
|
|
|
AddHashLookups(not_found_user_key, 0, kNumHashFunc);
|
|
|
|
ParsedInternalKey ikey(not_found_user_key, 1000, kTypeValue);
|
|
|
|
AppendInternalKey(¬_found_key, ikey);
|
|
|
|
ValuesToAssert v("", "");
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(not_found_key), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
// Search for a key with an independent hash value.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_user_key2 = "key" + NumToStr(num_items + 1);
|
2014-07-26 01:37:32 +02:00
|
|
|
AddHashLookups(not_found_user_key2, kNumHashFunc, kNumHashFunc);
|
|
|
|
ParsedInternalKey ikey2(not_found_user_key2, 1000, kTypeValue);
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_key2;
|
2014-07-26 01:37:32 +02:00
|
|
|
AppendInternalKey(¬_found_key2, ikey2);
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(not_found_key2), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
|
|
|
|
// Test read with corrupted key.
|
2014-08-06 01:35:02 +02:00
|
|
|
Slice corrupt_key("corrupt_ikey");
|
|
|
|
ASSERT_TRUE(!ParseInternalKey(corrupt_key, &ikey));
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_TRUE(reader.Get(
|
2014-08-06 01:35:02 +02:00
|
|
|
ReadOptions(), corrupt_key, &v,
|
2014-07-26 01:37:32 +02:00
|
|
|
AssertValues, nullptr).IsCorruption());
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
|
2014-08-12 05:21:07 +02:00
|
|
|
// Test read with key of invalid length.
|
|
|
|
IterKey k;
|
|
|
|
k.SetInternalKey("very_long_key", 0, kTypeValue);
|
|
|
|
ASSERT_TRUE(reader.Get(
|
|
|
|
ReadOptions(), k.GetKey(), &v,
|
|
|
|
AssertValues, nullptr).IsInvalidArgument());
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
k.Clear();
|
|
|
|
k.SetInternalKey("s", 0, kTypeValue);
|
|
|
|
ASSERT_TRUE(reader.Get(
|
|
|
|
ReadOptions(), k.GetKey(), &v,
|
|
|
|
AssertValues, nullptr).IsInvalidArgument());
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
// Test read when key is unused key.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string unused_key =
|
|
|
|
reader.GetTableProperties()->user_collected_properties.at(
|
|
|
|
CuckooTablePropertyNames::kEmptyKey);
|
2014-07-26 01:37:32 +02:00
|
|
|
// Add hash values that map to empty buckets.
|
2014-08-06 01:35:02 +02:00
|
|
|
AddHashLookups(ExtractUserKey(unused_key).ToString(),
|
|
|
|
kNumHashFunc, kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(unused_key), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Performance tests
|
|
|
|
namespace {
|
|
|
|
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
// Deliberately empty.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
++*reinterpret_cast<int*>(cnt_ptr);
|
|
|
|
std::string expected_value;
|
|
|
|
AppendInternalKey(&expected_value, k);
|
|
|
|
ASSERT_EQ(0, v.compare(Slice(&expected_value[0], v.size())));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
void GetKeys(uint64_t num, std::vector<std::string>* keys) {
|
|
|
|
IterKey k;
|
|
|
|
k.SetInternalKey("", 0, kTypeValue);
|
|
|
|
std::string internal_key_suffix = k.GetKey().ToString();
|
2014-08-22 03:11:33 +02:00
|
|
|
ASSERT_EQ(static_cast<size_t>(8), internal_key_suffix.size());
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
|
|
|
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
|
|
|
new_key += internal_key_suffix;
|
|
|
|
keys->push_back(new_key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string GetFileName(uint64_t num, double hash_ratio) {
|
|
|
|
if (FLAGS_file_dir.empty()) {
|
|
|
|
FLAGS_file_dir = test::TmpDir();
|
|
|
|
}
|
|
|
|
return FLAGS_file_dir + "/cuckoo_read_benchmark" +
|
|
|
|
std::to_string(num/1000000) + "Mratio" +
|
|
|
|
std::to_string(static_cast<int>(100*hash_ratio));
|
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
// Create last level file as we are interested in measuring performance of
|
|
|
|
// last level file only.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
void WriteFile(const std::vector<std::string>& keys,
|
|
|
|
const uint64_t num, double hash_ratio) {
|
2014-07-26 01:37:32 +02:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
Env* env = options.env;
|
|
|
|
EnvOptions env_options = EnvOptions(options);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::string fname = GetFileName(num, hash_ratio);
|
2014-07-26 01:37:32 +02:00
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
|
|
|
CuckooTableBuilder builder(
|
2014-08-06 05:55:46 +02:00
|
|
|
writable_file.get(), hash_ratio,
|
|
|
|
kMaxNumHashTable, 1000, GetSliceMurmurHash);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(builder.status());
|
2014-07-29 02:14:25 +02:00
|
|
|
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
2014-07-26 01:37:32 +02:00
|
|
|
// Value is just a part of key.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], 4));
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
|
|
|
ASSERT_OK(builder.Finish());
|
|
|
|
ASSERT_EQ(num, builder.NumEntries());
|
|
|
|
ASSERT_OK(writable_file->Close());
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
|
|
|
|
uint64_t file_size;
|
|
|
|
env->GetFileSize(fname, &file_size);
|
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
|
|
|
|
CuckooTableReader reader(
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
options, std::move(read_file), file_size, GetSliceMurmurHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
ReadOptions r_options;
|
|
|
|
for (const auto& key : keys) {
|
|
|
|
int cnt = 0;
|
|
|
|
ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr));
|
|
|
|
if (cnt != 1) {
|
2014-08-22 03:11:33 +02:00
|
|
|
fprintf(stderr, "%" PRIx64 " not found.\n",
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
*reinterpret_cast<const uint64_t*>(key.data()));
|
|
|
|
ASSERT_EQ(1, cnt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
|
|
|
|
double hash_ratio, uint32_t batch_size) {
|
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
Env* env = options.env;
|
|
|
|
EnvOptions env_options = EnvOptions(options);
|
|
|
|
std::string fname = GetFileName(num, hash_ratio);
|
|
|
|
|
|
|
|
uint64_t file_size;
|
|
|
|
env->GetFileSize(fname, &file_size);
|
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options, std::move(read_file), file_size, GetSliceMurmurHash);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
const UserCollectedProperties user_props =
|
|
|
|
reader.GetTableProperties()->user_collected_properties;
|
|
|
|
const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>(
|
|
|
|
user_props.at(CuckooTablePropertyNames::kNumHashTable).data());
|
2014-08-01 05:52:13 +02:00
|
|
|
fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of"
|
|
|
|
" hash functions used: %u.\n", num, hash_ratio, num_hash_fun);
|
2014-07-26 01:37:32 +02:00
|
|
|
ReadOptions r_options;
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
|
|
|
|
uint64_t start_time = env->NowMicros();
|
|
|
|
if (batch_size > 0) {
|
|
|
|
for (uint64_t i = 0; i < num; i += batch_size) {
|
|
|
|
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
|
|
|
reader.Prepare(Slice(keys[j]));
|
|
|
|
}
|
|
|
|
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
|
|
|
reader.Get(r_options, Slice(keys[j]), nullptr, DoNothing, nullptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (uint64_t i = 0; i < num; i++) {
|
|
|
|
reader.Get(r_options, Slice(keys[i]), nullptr, DoNothing, nullptr);
|
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
float time_per_op = (env->NowMicros() - start_time) * 1.0 / num;
|
|
|
|
fprintf(stderr,
|
|
|
|
"Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n",
|
|
|
|
time_per_op, 1.0 / time_per_op, batch_size);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
} // namespace.
|
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
TEST(CuckooReaderTest, TestReadPerformance) {
|
|
|
|
uint64_t num = 1000*1000*100;
|
2014-07-26 01:37:32 +02:00
|
|
|
if (!FLAGS_enable_perf) {
|
|
|
|
return;
|
|
|
|
}
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
#ifndef NDEBUG
|
|
|
|
fprintf(stdout,
|
|
|
|
"WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n");
|
|
|
|
#endif
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
GetKeys(num, &keys);
|
|
|
|
for (double hash_ratio : std::vector<double>({0.5, 0.6, 0.75, 0.9})) {
|
|
|
|
if (FLAGS_write || !Env::Default()->FileExists(
|
|
|
|
GetFileName(num, hash_ratio))) {
|
|
|
|
WriteFile(keys, num, hash_ratio);
|
|
|
|
}
|
|
|
|
ReadKeys(keys, num, hash_ratio, 0);
|
|
|
|
ReadKeys(keys, num, hash_ratio, 10);
|
|
|
|
ReadKeys(keys, num, hash_ratio, 25);
|
|
|
|
ReadKeys(keys, num, hash_ratio, 50);
|
|
|
|
ReadKeys(keys, num, hash_ratio, 100);
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
|
|
|
rocksdb::test::RunAllTests();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // GFLAGS.
|