2013-11-27 14:27:02 -08:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
2014-05-09 08:34:18 -07:00
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2014-04-03 16:04:10 -07:00
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#include <inttypes.h>
|
2014-03-28 09:21:20 -07:00
|
|
|
#include <algorithm>
|
2013-11-27 14:27:02 -08:00
|
|
|
#include <gflags/gflags.h>
|
|
|
|
|
|
|
|
#include "dynamic_bloom.h"
|
2014-03-28 09:21:20 -07:00
|
|
|
#include "port/port.h"
|
2013-11-27 14:27:02 -08:00
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
2014-03-28 09:21:20 -07:00
|
|
|
#include "util/stop_watch.h"
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-05-09 08:34:18 -07:00
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
|
2013-11-27 14:27:02 -08:00
|
|
|
DEFINE_int32(bits_per_key, 10, "");
|
|
|
|
DEFINE_int32(num_probes, 6, "");
|
2014-03-28 09:21:20 -07:00
|
|
|
DEFINE_bool(enable_perf, false, "");
|
2013-11-27 14:27:02 -08:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
static Slice Key(uint64_t i, char* buffer) {
|
2013-11-27 14:27:02 -08:00
|
|
|
memcpy(buffer, &i, sizeof(i));
|
|
|
|
return Slice(buffer, sizeof(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
class DynamicBloomTest {
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(DynamicBloomTest, EmptyFilter) {
|
2014-03-28 09:21:20 -07:00
|
|
|
DynamicBloom bloom1(100, 0, 2);
|
|
|
|
ASSERT_TRUE(!bloom1.MayContain("hello"));
|
|
|
|
ASSERT_TRUE(!bloom1.MayContain("world"));
|
|
|
|
|
|
|
|
DynamicBloom bloom2(CACHE_LINE_SIZE * 8 * 2 - 1, 1, 2);
|
|
|
|
ASSERT_TRUE(!bloom2.MayContain("hello"));
|
|
|
|
ASSERT_TRUE(!bloom2.MayContain("world"));
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(DynamicBloomTest, Small) {
|
2014-03-28 09:21:20 -07:00
|
|
|
DynamicBloom bloom1(100, 0, 2);
|
|
|
|
bloom1.Add("hello");
|
|
|
|
bloom1.Add("world");
|
|
|
|
ASSERT_TRUE(bloom1.MayContain("hello"));
|
|
|
|
ASSERT_TRUE(bloom1.MayContain("world"));
|
|
|
|
ASSERT_TRUE(!bloom1.MayContain("x"));
|
|
|
|
ASSERT_TRUE(!bloom1.MayContain("foo"));
|
|
|
|
|
|
|
|
DynamicBloom bloom2(CACHE_LINE_SIZE * 8 * 2 - 1, 1, 2);
|
|
|
|
bloom2.Add("hello");
|
|
|
|
bloom2.Add("world");
|
|
|
|
ASSERT_TRUE(bloom2.MayContain("hello"));
|
|
|
|
ASSERT_TRUE(bloom2.MayContain("world"));
|
|
|
|
ASSERT_TRUE(!bloom2.MayContain("x"));
|
|
|
|
ASSERT_TRUE(!bloom2.MayContain("foo"));
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
static uint32_t NextNum(uint32_t num) {
|
|
|
|
if (num < 10) {
|
|
|
|
num += 1;
|
|
|
|
} else if (num < 100) {
|
|
|
|
num += 10;
|
|
|
|
} else if (num < 1000) {
|
|
|
|
num += 100;
|
2013-11-27 14:27:02 -08:00
|
|
|
} else {
|
2014-03-28 09:21:20 -07:00
|
|
|
num += 1000;
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
2014-03-28 09:21:20 -07:00
|
|
|
return num;
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(DynamicBloomTest, VaryingLengths) {
|
2014-03-28 16:21:42 -07:00
|
|
|
char buffer[sizeof(uint64_t)];
|
2013-11-27 14:27:02 -08:00
|
|
|
|
|
|
|
// Count number of filters that significantly exceed the false positive rate
|
|
|
|
int mediocre_filters = 0;
|
|
|
|
int good_filters = 0;
|
2014-04-02 15:05:03 -07:00
|
|
|
uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
|
|
|
fprintf(stderr, "bits_per_key: %d num_probes: %d\n",
|
2014-04-02 15:05:03 -07:00
|
|
|
FLAGS_bits_per_key, num_probes);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-06-02 16:52:29 -07:00
|
|
|
for (uint32_t enable_locality = 0; enable_locality < 2; ++enable_locality) {
|
2014-03-28 09:21:20 -07:00
|
|
|
for (uint32_t num = 1; num <= 10000; num = NextNum(num)) {
|
|
|
|
uint32_t bloom_bits = 0;
|
2014-06-02 16:52:29 -07:00
|
|
|
if (enable_locality == 0) {
|
2014-03-28 09:21:20 -07:00
|
|
|
bloom_bits = std::max(num * FLAGS_bits_per_key, 64U);
|
|
|
|
} else {
|
|
|
|
bloom_bits = std::max(num * FLAGS_bits_per_key,
|
2014-06-02 16:52:29 -07:00
|
|
|
enable_locality * CACHE_LINE_SIZE * 8);
|
2014-03-28 09:21:20 -07:00
|
|
|
}
|
2014-06-02 16:52:29 -07:00
|
|
|
DynamicBloom bloom(bloom_bits, enable_locality, num_probes);
|
2014-03-28 09:21:20 -07:00
|
|
|
for (uint64_t i = 0; i < num; i++) {
|
|
|
|
bloom.Add(Key(i, buffer));
|
|
|
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer)));
|
|
|
|
}
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
// All added keys must match
|
|
|
|
for (uint64_t i = 0; i < num; i++) {
|
|
|
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer)))
|
|
|
|
<< "Num " << num << "; key " << i;
|
|
|
|
}
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
// Check false positive rate
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
int result = 0;
|
|
|
|
for (uint64_t i = 0; i < 10000; i++) {
|
|
|
|
if (bloom.MayContain(Key(i + 1000000000, buffer))) {
|
|
|
|
result++;
|
|
|
|
}
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
2014-03-28 09:21:20 -07:00
|
|
|
double rate = result / 10000.0;
|
|
|
|
|
2014-06-02 16:52:29 -07:00
|
|
|
fprintf(stderr,
|
|
|
|
"False positives: %5.2f%% @ num = %6u, bloom_bits = %6u, "
|
|
|
|
"enable locality?%u\n",
|
|
|
|
rate * 100.0, num, bloom_bits, enable_locality);
|
2014-03-28 09:21:20 -07:00
|
|
|
|
|
|
|
if (rate > 0.0125)
|
|
|
|
mediocre_filters++; // Allowed, but not too often
|
|
|
|
else
|
|
|
|
good_filters++;
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
|
|
|
good_filters, mediocre_filters);
|
|
|
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(DynamicBloomTest, perf) {
|
|
|
|
StopWatchNano timer(Env::Default());
|
2014-04-02 15:05:03 -07:00
|
|
|
uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
if (!FLAGS_enable_perf) {
|
|
|
|
return;
|
2013-11-27 14:27:02 -08:00
|
|
|
}
|
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
for (uint64_t m = 1; m <= 8; ++m) {
|
|
|
|
const uint64_t num_keys = m * 8 * 1024 * 1024;
|
2014-04-03 16:04:10 -07:00
|
|
|
fprintf(stderr, "testing %" PRIu64 "M keys\n", m * 8);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-04-02 15:05:03 -07:00
|
|
|
DynamicBloom std_bloom(num_keys * 10, 0, num_probes);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2014-03-28 09:21:20 -07:00
|
|
|
timer.Start();
|
|
|
|
for (uint64_t i = 1; i <= num_keys; ++i) {
|
|
|
|
std_bloom.Add(Slice(reinterpret_cast<const char*>(&i), 8));
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t elapsed = timer.ElapsedNanos();
|
2014-04-03 16:04:10 -07:00
|
|
|
fprintf(stderr, "standard bloom, avg add latency %" PRIu64 "\n",
|
2014-03-28 09:21:20 -07:00
|
|
|
elapsed / num_keys);
|
|
|
|
|
|
|
|
uint64_t count = 0;
|
|
|
|
timer.Start();
|
|
|
|
for (uint64_t i = 1; i <= num_keys; ++i) {
|
|
|
|
if (std_bloom.MayContain(Slice(reinterpret_cast<const char*>(&i), 8))) {
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elapsed = timer.ElapsedNanos();
|
2014-04-03 16:04:10 -07:00
|
|
|
fprintf(stderr, "standard bloom, avg query latency %" PRIu64 "\n",
|
2014-03-28 09:21:20 -07:00
|
|
|
elapsed / count);
|
|
|
|
ASSERT_TRUE(count == num_keys);
|
|
|
|
|
2014-06-02 16:52:29 -07:00
|
|
|
// Locality enabled version
|
|
|
|
DynamicBloom blocked_bloom(num_keys * 10, 1, num_probes);
|
2014-03-28 09:21:20 -07:00
|
|
|
|
|
|
|
timer.Start();
|
|
|
|
for (uint64_t i = 1; i <= num_keys; ++i) {
|
|
|
|
blocked_bloom.Add(Slice(reinterpret_cast<const char*>(&i), 8));
|
|
|
|
}
|
|
|
|
|
2014-06-02 16:52:29 -07:00
|
|
|
elapsed = timer.ElapsedNanos();
|
|
|
|
fprintf(stderr,
|
|
|
|
"blocked bloom(enable locality), avg add latency %" PRIu64 "\n",
|
|
|
|
elapsed / num_keys);
|
2014-03-28 09:21:20 -07:00
|
|
|
|
2014-06-02 16:52:29 -07:00
|
|
|
count = 0;
|
2014-03-28 09:21:20 -07:00
|
|
|
timer.Start();
|
|
|
|
for (uint64_t i = 1; i <= num_keys; ++i) {
|
|
|
|
if (blocked_bloom.MayContain(
|
|
|
|
Slice(reinterpret_cast<const char*>(&i), 8))) {
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
elapsed = timer.ElapsedNanos();
|
2014-06-02 16:52:29 -07:00
|
|
|
fprintf(stderr,
|
|
|
|
"blocked bloom(enable locality), avg query latency %" PRIu64 "\n",
|
|
|
|
elapsed / count);
|
2014-03-28 09:21:20 -07:00
|
|
|
ASSERT_TRUE(count == num_keys);
|
|
|
|
}
|
|
|
|
}
|
2013-11-27 14:27:02 -08:00
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2014-05-09 08:34:18 -07:00
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
|
|
|
return rocksdb::test::RunAllTests();
|
|
|
|
}
|
2014-05-09 08:34:18 -07:00
|
|
|
|
|
|
|
#endif // GFLAGS
|