2014-07-26 01:37:32 +02:00
|
|
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run this test\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2014-08-01 05:52:13 +02:00
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#include <inttypes.h>
|
2014-07-26 01:37:32 +02:00
|
|
|
#include <gflags/gflags.h>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
#include "table/meta_blocks.h"
|
|
|
|
#include "table/cuckoo_table_builder.h"
|
|
|
|
#include "table/cuckoo_table_reader.h"
|
|
|
|
#include "table/cuckoo_table_factory.h"
|
2014-08-06 01:35:02 +02:00
|
|
|
#include "util/arena.h"
|
2014-07-26 01:37:32 +02:00
|
|
|
#include "util/random.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
using GFLAGS::SetUsageMessage;
|
|
|
|
|
|
|
|
DEFINE_string(file_dir, "", "Directory where the files will be created"
|
|
|
|
" for benchmark. Added for using tmpfs.");
|
|
|
|
DEFINE_bool(enable_perf, false, "Run Benchmark Tests too.");
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
DEFINE_bool(write, false,
|
|
|
|
"Should write new values to file in performance tests?");
|
2014-07-26 01:37:32 +02:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
const uint32_t kNumHashFunc = 10;
|
|
|
|
// Methods, variables related to Hash functions.
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
|
|
|
|
|
|
|
void AddHashLookups(const std::string& s, uint64_t bucket_id,
|
|
|
|
uint32_t num_hash_fun) {
|
|
|
|
std::vector<uint64_t> v;
|
|
|
|
for (uint32_t i = 0; i < num_hash_fun; i++) {
|
|
|
|
v.push_back(bucket_id + i);
|
|
|
|
}
|
|
|
|
hash_map[s] = v;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
|
|
|
uint64_t max_num_buckets) {
|
|
|
|
return hash_map[s.ToString()][index];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Methods, variables for checking key and values read.
|
|
|
|
struct ValuesToAssert {
|
|
|
|
ValuesToAssert(const std::string& key, const Slice& value)
|
|
|
|
: expected_user_key(key),
|
|
|
|
expected_value(value),
|
|
|
|
call_count(0) {}
|
|
|
|
std::string expected_user_key;
|
|
|
|
Slice expected_value;
|
|
|
|
int call_count;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool AssertValues(void* assert_obj,
|
|
|
|
const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
ValuesToAssert *ptr = reinterpret_cast<ValuesToAssert*>(assert_obj);
|
|
|
|
ASSERT_EQ(ptr->expected_value.ToString(), v.ToString());
|
|
|
|
ASSERT_EQ(ptr->expected_user_key, k.user_key.ToString());
|
|
|
|
++ptr->call_count;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
class CuckooReaderTest {
|
|
|
|
public:
|
|
|
|
CuckooReaderTest() {
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
env = options.env;
|
|
|
|
env_options = EnvOptions(options);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetUp(int num_items) {
|
|
|
|
this->num_items = num_items;
|
|
|
|
hash_map.clear();
|
|
|
|
keys.clear();
|
|
|
|
keys.resize(num_items);
|
|
|
|
user_keys.clear();
|
|
|
|
user_keys.resize(num_items);
|
|
|
|
values.clear();
|
|
|
|
values.resize(num_items);
|
|
|
|
}
|
|
|
|
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string NumToStr(int64_t i) {
|
|
|
|
return std::string(reinterpret_cast<char*>(&i), sizeof(i));
|
|
|
|
}
|
|
|
|
|
2014-08-27 19:39:31 +02:00
|
|
|
void CreateCuckooFileAndCheckReader(
|
|
|
|
const Comparator* ucomp = BytewiseComparator()) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
|
|
|
CuckooTableBuilder builder(
|
2014-08-28 19:42:23 +02:00
|
|
|
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
|
|
|
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
|
|
|
ASSERT_OK(builder.status());
|
2014-08-06 01:35:02 +02:00
|
|
|
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
ASSERT_OK(builder.Finish());
|
|
|
|
ASSERT_EQ(num_items, builder.NumEntries());
|
|
|
|
file_size = builder.FileSize();
|
|
|
|
ASSERT_OK(writable_file->Close());
|
|
|
|
|
2014-08-06 05:55:46 +02:00
|
|
|
// Check reader now.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
2014-08-27 19:39:31 +02:00
|
|
|
ucomp,
|
2014-07-26 01:37:32 +02:00
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
for (uint32_t i = 0; i < num_items; ++i) {
|
|
|
|
ValuesToAssert v(user_keys[i], values[i]);
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(keys[i]), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(1, v.call_count);
|
|
|
|
}
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
void UpdateKeys(bool with_zero_seqno) {
|
|
|
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
|
|
ParsedInternalKey ikey(user_keys[i],
|
|
|
|
with_zero_seqno ? 0 : i + 1000, kTypeValue);
|
|
|
|
keys[i].clear();
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
}
|
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
|
2014-08-27 19:39:31 +02:00
|
|
|
void CheckIterator(const Comparator* ucomp = BytewiseComparator()) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-08-06 01:35:02 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
2014-08-27 19:39:31 +02:00
|
|
|
ucomp,
|
2014-08-06 01:35:02 +02:00
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
Iterator* it = reader.NewIterator(ReadOptions(), nullptr);
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(!it->Valid());
|
|
|
|
it->SeekToFirst();
|
|
|
|
int cnt = 0;
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
++cnt;
|
|
|
|
it->Next();
|
|
|
|
}
|
2014-08-07 11:06:07 +02:00
|
|
|
ASSERT_EQ(static_cast<uint32_t>(cnt), num_items);
|
2014-08-06 01:35:02 +02:00
|
|
|
|
|
|
|
it->SeekToLast();
|
|
|
|
cnt = num_items - 1;
|
|
|
|
ASSERT_TRUE(it->Valid());
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
--cnt;
|
|
|
|
it->Prev();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(cnt, -1);
|
|
|
|
|
|
|
|
cnt = num_items / 2;
|
|
|
|
it->Seek(keys[cnt]);
|
|
|
|
while (it->Valid()) {
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(Slice(keys[cnt]) == it->key());
|
|
|
|
ASSERT_TRUE(Slice(values[cnt]) == it->value());
|
|
|
|
++cnt;
|
|
|
|
it->Next();
|
|
|
|
}
|
2014-08-07 11:06:07 +02:00
|
|
|
ASSERT_EQ(static_cast<uint32_t>(cnt), num_items);
|
2014-08-06 01:35:02 +02:00
|
|
|
delete it;
|
|
|
|
|
|
|
|
Arena arena;
|
|
|
|
it = reader.NewIterator(ReadOptions(), &arena);
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(!it->Valid());
|
|
|
|
it->Seek(keys[num_items/2]);
|
|
|
|
ASSERT_TRUE(it->Valid());
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
ASSERT_TRUE(keys[num_items/2] == it->key());
|
|
|
|
ASSERT_TRUE(values[num_items/2] == it->value());
|
|
|
|
ASSERT_OK(it->status());
|
|
|
|
it->~Iterator();
|
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
std::vector<std::string> keys;
|
|
|
|
std::vector<std::string> user_keys;
|
|
|
|
std::vector<std::string> values;
|
2014-08-06 01:35:02 +02:00
|
|
|
uint64_t num_items;
|
2014-07-26 01:37:32 +02:00
|
|
|
std::string fname;
|
|
|
|
uint64_t file_size;
|
|
|
|
Options options;
|
|
|
|
Env* env;
|
|
|
|
EnvOptions env_options;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(CuckooReaderTest, WhenKeyExists) {
|
2014-08-06 01:35:02 +02:00
|
|
|
SetUp(kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
fname = test::TmpDir() + "/CuckooReader_WhenKeyExists";
|
2014-08-06 01:35:02 +02:00
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
2014-08-06 01:35:02 +02:00
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values.
|
2014-08-06 05:55:46 +02:00
|
|
|
AddHashLookups(user_keys[i], i, kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Test with collision. Make all hash values collide.
|
|
|
|
hash_map.clear();
|
|
|
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
|
|
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(false);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
|
2014-08-27 19:39:31 +02:00
|
|
|
TEST(CuckooReaderTest, WhenKeyExistsWithUint64Comparator) {
|
|
|
|
SetUp(kNumHashFunc);
|
|
|
|
fname = test::TmpDir() + "/CuckooReaderUint64_WhenKeyExists";
|
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i].resize(8);
|
|
|
|
memcpy(&user_keys[i][0], static_cast<void*>(&i), 8);
|
|
|
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values.
|
|
|
|
AddHashLookups(user_keys[i], i, kNumHashFunc);
|
|
|
|
}
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
// Last level file.
|
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
// Test with collision. Make all hash values collide.
|
|
|
|
hash_map.clear();
|
|
|
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
|
|
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
|
|
|
}
|
|
|
|
UpdateKeys(false);
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
// Last level file.
|
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
}
|
|
|
|
|
2014-08-06 01:35:02 +02:00
|
|
|
TEST(CuckooReaderTest, CheckIterator) {
|
|
|
|
SetUp(2*kNumHashFunc);
|
|
|
|
fname = test::TmpDir() + "/CuckooReader_CheckIterator";
|
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
2014-08-27 19:39:31 +02:00
|
|
|
ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue);
|
2014-08-06 01:35:02 +02:00
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values, in reverse order.
|
2014-08-06 05:55:46 +02:00
|
|
|
AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc);
|
2014-08-06 01:35:02 +02:00
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-08-06 01:35:02 +02:00
|
|
|
CheckIterator();
|
|
|
|
// Last level file.
|
2014-08-06 05:55:46 +02:00
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader();
|
2014-08-06 01:35:02 +02:00
|
|
|
CheckIterator();
|
|
|
|
}
|
|
|
|
|
2014-08-27 19:39:31 +02:00
|
|
|
TEST(CuckooReaderTest, CheckIteratorUint64) {
|
|
|
|
SetUp(2*kNumHashFunc);
|
|
|
|
fname = test::TmpDir() + "/CuckooReader_CheckIterator";
|
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i].resize(8);
|
|
|
|
memcpy(&user_keys[i][0], static_cast<void*>(&i), 8);
|
|
|
|
ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
|
|
|
values[i] = "value" + NumToStr(i);
|
|
|
|
// Give disjoint hash values, in reverse order.
|
|
|
|
AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc);
|
|
|
|
}
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
CheckIterator(test::Uint64Comparator());
|
|
|
|
// Last level file.
|
|
|
|
UpdateKeys(true);
|
|
|
|
CreateCuckooFileAndCheckReader(test::Uint64Comparator());
|
|
|
|
CheckIterator(test::Uint64Comparator());
|
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
TEST(CuckooReaderTest, WhenKeyNotFound) {
|
|
|
|
// Add keys with colliding hash values.
|
2014-08-06 05:55:46 +02:00
|
|
|
SetUp(kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
fname = test::TmpDir() + "/CuckooReader_WhenKeyNotFound";
|
2014-08-06 01:35:02 +02:00
|
|
|
for (uint64_t i = 0; i < num_items; i++) {
|
|
|
|
user_keys[i] = "key" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
|
|
|
AppendInternalKey(&keys[i], ikey);
|
2014-08-06 01:35:02 +02:00
|
|
|
values[i] = "value" + NumToStr(i);
|
2014-07-26 01:37:32 +02:00
|
|
|
// Make all hash values collide.
|
|
|
|
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
|
|
|
}
|
2014-08-06 05:55:46 +02:00
|
|
|
CreateCuckooFileAndCheckReader();
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
CuckooTableReader reader(
|
|
|
|
options,
|
|
|
|
std::move(read_file),
|
|
|
|
file_size,
|
2014-08-27 19:39:31 +02:00
|
|
|
BytewiseComparator(),
|
2014-07-26 01:37:32 +02:00
|
|
|
GetSliceHash);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
// Search for a key with colliding hash values.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_user_key = "key" + NumToStr(num_items);
|
2014-07-26 01:37:32 +02:00
|
|
|
std::string not_found_key;
|
|
|
|
AddHashLookups(not_found_user_key, 0, kNumHashFunc);
|
|
|
|
ParsedInternalKey ikey(not_found_user_key, 1000, kTypeValue);
|
|
|
|
AppendInternalKey(¬_found_key, ikey);
|
|
|
|
ValuesToAssert v("", "");
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(not_found_key), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
// Search for a key with an independent hash value.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_user_key2 = "key" + NumToStr(num_items + 1);
|
2014-07-26 01:37:32 +02:00
|
|
|
AddHashLookups(not_found_user_key2, kNumHashFunc, kNumHashFunc);
|
|
|
|
ParsedInternalKey ikey2(not_found_user_key2, 1000, kTypeValue);
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string not_found_key2;
|
2014-07-26 01:37:32 +02:00
|
|
|
AppendInternalKey(¬_found_key2, ikey2);
|
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(not_found_key2), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
|
|
|
|
// Test read when key is unused key.
|
2014-08-06 01:35:02 +02:00
|
|
|
std::string unused_key =
|
|
|
|
reader.GetTableProperties()->user_collected_properties.at(
|
|
|
|
CuckooTablePropertyNames::kEmptyKey);
|
2014-07-26 01:37:32 +02:00
|
|
|
// Add hash values that map to empty buckets.
|
2014-08-06 01:35:02 +02:00
|
|
|
AddHashLookups(ExtractUserKey(unused_key).ToString(),
|
|
|
|
kNumHashFunc, kNumHashFunc);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(reader.Get(
|
|
|
|
ReadOptions(), Slice(unused_key), &v, AssertValues, nullptr));
|
|
|
|
ASSERT_EQ(0, v.call_count);
|
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Performance tests
|
|
|
|
namespace {
|
|
|
|
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
// Deliberately empty.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
|
|
|
|
++*reinterpret_cast<int*>(cnt_ptr);
|
|
|
|
std::string expected_value;
|
|
|
|
AppendInternalKey(&expected_value, k);
|
|
|
|
ASSERT_EQ(0, v.compare(Slice(&expected_value[0], v.size())));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
void GetKeys(uint64_t num, std::vector<std::string>* keys) {
|
|
|
|
IterKey k;
|
|
|
|
k.SetInternalKey("", 0, kTypeValue);
|
|
|
|
std::string internal_key_suffix = k.GetKey().ToString();
|
2014-08-22 03:11:33 +02:00
|
|
|
ASSERT_EQ(static_cast<size_t>(8), internal_key_suffix.size());
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
|
|
|
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
|
|
|
new_key += internal_key_suffix;
|
|
|
|
keys->push_back(new_key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
std::string GetFileName(uint64_t num) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
if (FLAGS_file_dir.empty()) {
|
|
|
|
FLAGS_file_dir = test::TmpDir();
|
|
|
|
}
|
|
|
|
return FLAGS_file_dir + "/cuckoo_read_benchmark" +
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
std::to_string(num/1000000) + "Mkeys";
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
}
|
|
|
|
|
2014-07-26 01:37:32 +02:00
|
|
|
// Create last level file as we are interested in measuring performance of
|
|
|
|
// last level file only.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
void WriteFile(const std::vector<std::string>& keys,
|
|
|
|
const uint64_t num, double hash_ratio) {
|
2014-07-26 01:37:32 +02:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
Env* env = options.env;
|
|
|
|
EnvOptions env_options = EnvOptions(options);
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
std::string fname = GetFileName(num);
|
2014-07-26 01:37:32 +02:00
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
|
|
|
CuckooTableBuilder builder(
|
2014-08-06 05:55:46 +02:00
|
|
|
writable_file.get(), hash_ratio,
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
64, 1000, test::Uint64Comparator(), 5, nullptr);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(builder.status());
|
2014-07-29 02:14:25 +02:00
|
|
|
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
2014-07-26 01:37:32 +02:00
|
|
|
// Value is just a part of key.
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], 4));
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
|
|
|
ASSERT_OK(builder.Finish());
|
|
|
|
ASSERT_EQ(num, builder.NumEntries());
|
|
|
|
ASSERT_OK(writable_file->Close());
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
|
|
|
|
uint64_t file_size;
|
|
|
|
env->GetFileSize(fname, &file_size);
|
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
|
|
|
|
CuckooTableReader reader(
|
2014-08-27 19:39:31 +02:00
|
|
|
options, std::move(read_file), file_size,
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
test::Uint64Comparator(), nullptr);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
ReadOptions r_options;
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
for (uint64_t i = 0; i < num; ++i) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
int cnt = 0;
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
ASSERT_OK(reader.Get(r_options, Slice(keys[i]), &cnt, CheckValue, nullptr));
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
if (cnt != 1) {
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
fprintf(stderr, "%" PRIu64 " not found.\n", i);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
ASSERT_EQ(1, cnt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
void ReadKeys(uint64_t num, uint32_t batch_size) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
Env* env = options.env;
|
|
|
|
EnvOptions env_options = EnvOptions(options);
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
std::string fname = GetFileName(num);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
|
|
|
|
uint64_t file_size;
|
|
|
|
env->GetFileSize(fname, &file_size);
|
|
|
|
std::unique_ptr<RandomAccessFile> read_file;
|
|
|
|
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
|
|
|
|
|
|
|
CuckooTableReader reader(
|
2014-08-27 19:39:31 +02:00
|
|
|
options, std::move(read_file), file_size, test::Uint64Comparator(),
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
nullptr);
|
2014-07-26 01:37:32 +02:00
|
|
|
ASSERT_OK(reader.status());
|
|
|
|
const UserCollectedProperties user_props =
|
|
|
|
reader.GetTableProperties()->user_collected_properties;
|
|
|
|
const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>(
|
2014-08-28 19:42:23 +02:00
|
|
|
user_props.at(CuckooTablePropertyNames::kNumHashFunc).data());
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
const uint64_t table_size = *reinterpret_cast<const uint64_t*>(
|
|
|
|
user_props.at(CuckooTablePropertyNames::kHashTableSize).data());
|
|
|
|
fprintf(stderr, "With %" PRIu64 " items, utilization is %.2f%%, number of"
|
|
|
|
" hash functions: %u.\n", num, num * 100.0 / (table_size), num_hash_fun);
|
2014-07-26 01:37:32 +02:00
|
|
|
ReadOptions r_options;
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
|
|
|
|
uint64_t start_time = env->NowMicros();
|
|
|
|
if (batch_size > 0) {
|
|
|
|
for (uint64_t i = 0; i < num; i += batch_size) {
|
|
|
|
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
reader.Prepare(Slice(reinterpret_cast<char*>(&j), 16));
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
}
|
|
|
|
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
reader.Get(r_options, Slice(reinterpret_cast<char*>(&j), 16),
|
|
|
|
nullptr, DoNothing, nullptr);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (uint64_t i = 0; i < num; i++) {
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
reader.Get(r_options, Slice(reinterpret_cast<char*>(&i), 16), nullptr,
|
|
|
|
DoNothing, nullptr);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
float time_per_op = (env->NowMicros() - start_time) * 1.0 / num;
|
|
|
|
fprintf(stderr,
|
|
|
|
"Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n",
|
|
|
|
time_per_op, 1.0 / time_per_op, batch_size);
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
} // namespace.
|
|
|
|
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
TEST(CuckooReaderTest, TestReadPerformance) {
|
2014-07-26 01:37:32 +02:00
|
|
|
if (!FLAGS_enable_perf) {
|
|
|
|
return;
|
|
|
|
}
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
double hash_ratio = 0.95;
|
|
|
|
// These numbers are chosen to have a hash utilizaiton % close to
|
|
|
|
// 0.9, 0.75, 0.6 and 0.5 respectively.
|
|
|
|
// They all create 128 M buckets.
|
|
|
|
std::vector<uint64_t> nums = {120*1000*1000, 100*1000*1000, 80*1000*1000,
|
|
|
|
70*1000*1000};
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
#ifndef NDEBUG
|
|
|
|
fprintf(stdout,
|
|
|
|
"WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n");
|
|
|
|
#endif
|
|
|
|
std::vector<std::string> keys;
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
GetKeys(*std::max_element(nums.begin(), nums.end()), &keys);
|
|
|
|
for (uint64_t num : nums) {
|
|
|
|
if (FLAGS_write || !Env::Default()->FileExists(GetFileName(num))) {
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
WriteFile(keys, num, hash_ratio);
|
|
|
|
}
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 04:06:15 +02:00
|
|
|
ReadKeys(num, 0);
|
|
|
|
ReadKeys(num, 10);
|
|
|
|
ReadKeys(num, 25);
|
|
|
|
ReadKeys(num, 50);
|
|
|
|
ReadKeys(num, 100);
|
Implement Prepare method in CuckooTableReader
Summary:
- Implement Prepare method
- Rewrite performance tests in cuckoo_table_reader_test to write new file only if one doesn't already exist.
- Add performance tests for batch lookup along with prefetching.
Test Plan:
./cuckoo_table_reader_test --enable_perf
Results (We get better results if we used int64 comparator instead of string comparator (TBD in future diffs)):
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.208us (4.8 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.182us (5.5 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.161us (6.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.500000, number of hash functions used: 2.
Time taken per op is 0.163us (6.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.252us (4.0 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.192us (5.2 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.195us (5.1 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.191us (5.2 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.600000, number of hash functions used: 3.
Time taken per op is 0.194us (5.1 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.228us (4.4 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.185us (5.4 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.186us (5.4 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.750000, number of hash functions used: 3.
Time taken per op is 0.188us (5.3 Mqps) with batch size of 100
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.325us (3.1 Mqps) with batch size of 0
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 10
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.199us (5.0 Mqps) with batch size of 25
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.196us (5.1 Mqps) with batch size of 50
With 100000000 items and hash table ratio 0.900000, number of hash functions used: 3.
Time taken per op is 0.209us (4.8 Mqps) with batch size of 100
Reviewers: sdong, yhchiang, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22167
2014-08-21 03:35:35 +02:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
2014-07-26 01:37:32 +02:00
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
|
|
|
rocksdb::test::RunAllTests();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // GFLAGS.
|