Fix some implicit conversions in filter_bench (#5894)
Summary: Fixed some spots where converting size_t or uint_fast32_t to uint32_t. Wrapped mt19937 in a new Random32 class to avoid future such traps. NB: I tried using Random32::Uniform (std::uniform_int_distribution) in filter_bench instead of fastrange, but that more than doubled the dry run time! So I added fastrange as Random32::Uniformish. ;) Pull Request resolved: https://github.com/facebook/rocksdb/pull/5894 Test Plan: USE_CLANG=1 build, and manual re-run filter_bench Differential Revision: D17825131 Pulled By: pdillinger fbshipit-source-id: 68feee333b5f8193c084ded760e3d6679b405ecd
This commit is contained in:
parent
167cdc9f17
commit
90e285efde
@ -13,7 +13,6 @@ int main() {
|
|||||||
|
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <random>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
@ -23,13 +22,14 @@ int main() {
|
|||||||
#include "table/block_based/mock_block_based_table.h"
|
#include "table/block_based/mock_block_based_table.h"
|
||||||
#include "util/gflags_compat.h"
|
#include "util/gflags_compat.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
|
#include "util/random.h"
|
||||||
#include "util/stop_watch.h"
|
#include "util/stop_watch.h"
|
||||||
|
|
||||||
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
||||||
using GFLAGS_NAMESPACE::RegisterFlagValidator;
|
using GFLAGS_NAMESPACE::RegisterFlagValidator;
|
||||||
using GFLAGS_NAMESPACE::SetUsageMessage;
|
using GFLAGS_NAMESPACE::SetUsageMessage;
|
||||||
|
|
||||||
DEFINE_int64(seed, 0, "Seed for random number generators");
|
DEFINE_uint32(seed, 0, "Seed for random number generators");
|
||||||
|
|
||||||
DEFINE_double(working_mem_size_mb, 200,
|
DEFINE_double(working_mem_size_mb, 200,
|
||||||
"MB of memory to get up to among all filters");
|
"MB of memory to get up to among all filters");
|
||||||
@ -70,6 +70,7 @@ using rocksdb::fastrange32;
|
|||||||
using rocksdb::FilterBitsBuilder;
|
using rocksdb::FilterBitsBuilder;
|
||||||
using rocksdb::FilterBitsReader;
|
using rocksdb::FilterBitsReader;
|
||||||
using rocksdb::FullFilterBlockReader;
|
using rocksdb::FullFilterBlockReader;
|
||||||
|
using rocksdb::Random32;
|
||||||
using rocksdb::Slice;
|
using rocksdb::Slice;
|
||||||
using rocksdb::mock::MockBlockBasedTableTester;
|
using rocksdb::mock::MockBlockBasedTableTester;
|
||||||
|
|
||||||
@ -154,7 +155,7 @@ const char *TestModeToString(TestMode tm) {
|
|||||||
struct FilterBench : public MockBlockBasedTableTester {
|
struct FilterBench : public MockBlockBasedTableTester {
|
||||||
std::vector<KeyMaker> kms_;
|
std::vector<KeyMaker> kms_;
|
||||||
std::vector<FilterInfo> infos_;
|
std::vector<FilterInfo> infos_;
|
||||||
std::mt19937 random_;
|
Random32 random_;
|
||||||
|
|
||||||
FilterBench()
|
FilterBench()
|
||||||
: MockBlockBasedTableTester(
|
: MockBlockBasedTableTester(
|
||||||
@ -193,9 +194,10 @@ void FilterBench::Go() {
|
|||||||
rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
|
rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
|
||||||
|
|
||||||
while (total_memory_used < 1024 * 1024 * FLAGS_working_mem_size_mb) {
|
while (total_memory_used < 1024 * 1024 * FLAGS_working_mem_size_mb) {
|
||||||
uint32_t filter_id = random_();
|
uint32_t filter_id = random_.Next();
|
||||||
uint32_t keys_to_add = FLAGS_average_keys_per_filter +
|
uint32_t keys_to_add = FLAGS_average_keys_per_filter +
|
||||||
(random_() & variance_mask) - (variance_mask / 2);
|
(random_.Next() & variance_mask) -
|
||||||
|
(variance_mask / 2);
|
||||||
for (uint32_t i = 0; i < keys_to_add; ++i) {
|
for (uint32_t i = 0; i < keys_to_add; ++i) {
|
||||||
builder->AddKey(kms_[0].Get(filter_id, i));
|
builder->AddKey(kms_[0].Get(filter_id, i));
|
||||||
}
|
}
|
||||||
@ -256,19 +258,19 @@ void FilterBench::Go() {
|
|||||||
|
|
||||||
std::cout << "----------------------------" << std::endl;
|
std::cout << "----------------------------" << std::endl;
|
||||||
std::cout << "Inside queries..." << std::endl;
|
std::cout << "Inside queries..." << std::endl;
|
||||||
random_.seed(FLAGS_seed + 1);
|
random_.Seed(FLAGS_seed + 1);
|
||||||
RandomQueryTest(/*inside*/ true, /*dry_run*/ true, kRandomFilter);
|
RandomQueryTest(/*inside*/ true, /*dry_run*/ true, kRandomFilter);
|
||||||
for (TestMode tm : testModes) {
|
for (TestMode tm : testModes) {
|
||||||
random_.seed(FLAGS_seed + 1);
|
random_.Seed(FLAGS_seed + 1);
|
||||||
RandomQueryTest(/*inside*/ true, /*dry_run*/ false, tm);
|
RandomQueryTest(/*inside*/ true, /*dry_run*/ false, tm);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "----------------------------" << std::endl;
|
std::cout << "----------------------------" << std::endl;
|
||||||
std::cout << "Outside queries..." << std::endl;
|
std::cout << "Outside queries..." << std::endl;
|
||||||
random_.seed(FLAGS_seed + 2);
|
random_.Seed(FLAGS_seed + 2);
|
||||||
RandomQueryTest(/*inside*/ false, /*dry_run*/ true, kRandomFilter);
|
RandomQueryTest(/*inside*/ false, /*dry_run*/ true, kRandomFilter);
|
||||||
for (TestMode tm : testModes) {
|
for (TestMode tm : testModes) {
|
||||||
random_.seed(FLAGS_seed + 2);
|
random_.Seed(FLAGS_seed + 2);
|
||||||
RandomQueryTest(/*inside*/ false, /*dry_run*/ false, tm);
|
RandomQueryTest(/*inside*/ false, /*dry_run*/ false, tm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,13 +284,14 @@ void FilterBench::RandomQueryTest(bool inside, bool dry_run, TestMode mode) {
|
|||||||
info.false_positives_ = 0;
|
info.false_positives_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t num_infos = static_cast<uint32_t>(infos_.size());
|
||||||
uint32_t dry_run_hash = 0;
|
uint32_t dry_run_hash = 0;
|
||||||
uint64_t max_queries =
|
uint64_t max_queries =
|
||||||
static_cast<uint64_t>(FLAGS_m_queries * 1000000 + 0.50);
|
static_cast<uint64_t>(FLAGS_m_queries * 1000000 + 0.50);
|
||||||
// Some filters may be considered secondary in order to implement skewed
|
// Some filters may be considered secondary in order to implement skewed
|
||||||
// queries. num_primary_filters is the number that are to be treated as
|
// queries. num_primary_filters is the number that are to be treated as
|
||||||
// equal, and any remainder will be treated as secondary.
|
// equal, and any remainder will be treated as secondary.
|
||||||
size_t num_primary_filters = infos_.size();
|
uint32_t num_primary_filters = num_infos;
|
||||||
// The proportion (when divided by 2^32 - 1) of filter queries going to
|
// The proportion (when divided by 2^32 - 1) of filter queries going to
|
||||||
// the primary filters (default = all). The remainder of queries are
|
// the primary filters (default = all). The remainder of queries are
|
||||||
// against secondary filters.
|
// against secondary filters.
|
||||||
@ -307,14 +310,14 @@ void FilterBench::RandomQueryTest(bool inside, bool dry_run, TestMode mode) {
|
|||||||
// to 20% of filters
|
// to 20% of filters
|
||||||
num_primary_filters = (num_primary_filters + 4) / 5;
|
num_primary_filters = (num_primary_filters + 4) / 5;
|
||||||
}
|
}
|
||||||
size_t batch_size = 1;
|
uint32_t batch_size = 1;
|
||||||
std::unique_ptr<Slice *[]> batch_slices;
|
std::unique_ptr<Slice *[]> batch_slices;
|
||||||
std::unique_ptr<bool[]> batch_results;
|
std::unique_ptr<bool[]> batch_results;
|
||||||
if (mode == kBatchPrepared || mode == kBatchUnprepared) {
|
if (mode == kBatchPrepared || mode == kBatchUnprepared) {
|
||||||
batch_size = kms_.size();
|
batch_size = static_cast<uint32_t>(kms_.size());
|
||||||
batch_slices.reset(new Slice *[batch_size]);
|
batch_slices.reset(new Slice *[batch_size]);
|
||||||
batch_results.reset(new bool[batch_size]);
|
batch_results.reset(new bool[batch_size]);
|
||||||
for (size_t i = 0; i < batch_size; ++i) {
|
for (uint32_t i = 0; i < batch_size; ++i) {
|
||||||
batch_slices[i] = &kms_[i].slice_;
|
batch_slices[i] = &kms_[i].slice_;
|
||||||
batch_results[i] = false;
|
batch_results[i] = false;
|
||||||
}
|
}
|
||||||
@ -324,31 +327,30 @@ void FilterBench::RandomQueryTest(bool inside, bool dry_run, TestMode mode) {
|
|||||||
|
|
||||||
for (uint64_t q = 0; q < max_queries; q += batch_size) {
|
for (uint64_t q = 0; q < max_queries; q += batch_size) {
|
||||||
uint32_t filter_index;
|
uint32_t filter_index;
|
||||||
if (random_() <= primary_filter_threshold) {
|
if (random_.Next() <= primary_filter_threshold) {
|
||||||
filter_index = fastrange32(num_primary_filters, random_());
|
filter_index = random_.Uniformish(num_primary_filters);
|
||||||
} else {
|
} else {
|
||||||
// secondary
|
// secondary
|
||||||
filter_index =
|
filter_index = num_primary_filters +
|
||||||
num_primary_filters +
|
random_.Uniformish(num_infos - num_primary_filters);
|
||||||
fastrange32(infos_.size() - num_primary_filters, random_());
|
|
||||||
}
|
}
|
||||||
FilterInfo &info = infos_[filter_index];
|
FilterInfo &info = infos_[filter_index];
|
||||||
for (size_t i = 0; i < batch_size; ++i) {
|
for (uint32_t i = 0; i < batch_size; ++i) {
|
||||||
if (inside) {
|
if (inside) {
|
||||||
kms_[i].Get(info.filter_id_, fastrange32(info.keys_added_, random_()));
|
kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
|
||||||
} else {
|
} else {
|
||||||
kms_[i].Get(info.filter_id_, random_() | 0x80000000);
|
kms_[i].Get(info.filter_id_, random_.Next() | uint32_t{0x80000000});
|
||||||
info.outside_queries_++;
|
info.outside_queries_++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO: implement batched interface to full block reader
|
// TODO: implement batched interface to full block reader
|
||||||
if (mode == kBatchPrepared && !dry_run && !FLAGS_use_full_block_reader) {
|
if (mode == kBatchPrepared && !dry_run && !FLAGS_use_full_block_reader) {
|
||||||
for (size_t i = 0; i < batch_size; ++i) {
|
for (uint32_t i = 0; i < batch_size; ++i) {
|
||||||
batch_results[i] = false;
|
batch_results[i] = false;
|
||||||
}
|
}
|
||||||
info.reader_->MayMatch(batch_size, batch_slices.get(),
|
info.reader_->MayMatch(batch_size, batch_slices.get(),
|
||||||
batch_results.get());
|
batch_results.get());
|
||||||
for (size_t i = 0; i < batch_size; ++i) {
|
for (uint32_t i = 0; i < batch_size; ++i) {
|
||||||
if (inside) {
|
if (inside) {
|
||||||
ALWAYS_ASSERT(batch_results[i]);
|
ALWAYS_ASSERT(batch_results[i]);
|
||||||
} else {
|
} else {
|
||||||
@ -356,7 +358,7 @@ void FilterBench::RandomQueryTest(bool inside, bool dry_run, TestMode mode) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (size_t i = 0; i < batch_size; ++i) {
|
for (uint32_t i = 0; i < batch_size; ++i) {
|
||||||
if (dry_run) {
|
if (dry_run) {
|
||||||
dry_run_hash ^= rocksdb::BloomHash(kms_[i].slice_);
|
dry_run_hash ^= rocksdb::BloomHash(kms_[i].slice_);
|
||||||
} else {
|
} else {
|
||||||
|
@ -77,7 +77,51 @@ class Random {
|
|||||||
static Random* GetTLSInstance();
|
static Random* GetTLSInstance();
|
||||||
};
|
};
|
||||||
|
|
||||||
// A simple 64bit random number generator based on std::mt19937_64
|
// A good 32-bit random number generator based on std::mt19937.
|
||||||
|
// This exists in part to avoid compiler variance in warning about coercing
|
||||||
|
// uint_fast32_t from mt19937 to uint32_t.
|
||||||
|
class Random32 {
|
||||||
|
private:
|
||||||
|
std::mt19937 generator_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Random32(uint32_t s) : generator_(s) {}
|
||||||
|
|
||||||
|
// Generates the next random number
|
||||||
|
uint32_t Next() { return static_cast<uint32_t>(generator_()); }
|
||||||
|
|
||||||
|
// Returns a uniformly distributed value in the range [0..n-1]
|
||||||
|
// REQUIRES: n > 0
|
||||||
|
uint32_t Uniform(uint32_t n) {
|
||||||
|
return static_cast<uint32_t>(
|
||||||
|
std::uniform_int_distribution<std::mt19937::result_type>(
|
||||||
|
0, n - 1)(generator_));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns an *almost* uniformly distributed value in the range [0..n-1].
|
||||||
|
// Much faster than Uniform().
|
||||||
|
// REQUIRES: n > 0
|
||||||
|
uint32_t Uniformish(uint32_t n) {
|
||||||
|
// fastrange (without the header)
|
||||||
|
return static_cast<uint32_t>((uint64_t(generator_()) * uint64_t(n)) >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Randomly returns true ~"1/n" of the time, and false otherwise.
|
||||||
|
// REQUIRES: n > 0
|
||||||
|
bool OneIn(uint32_t n) { return Uniform(n) == 0; }
|
||||||
|
|
||||||
|
// Skewed: pick "base" uniformly from range [0,max_log] and then
|
||||||
|
// return "base" random bits. The effect is to pick a number in the
|
||||||
|
// range [0,2^max_log-1] with exponential bias towards smaller numbers.
|
||||||
|
uint32_t Skewed(int max_log) {
|
||||||
|
return Uniform(uint32_t{1} << Uniform(max_log + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the seed of the generator to the given value
|
||||||
|
void Seed(uint32_t new_seed) { generator_.seed(new_seed); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// A good 64-bit random number generator based on std::mt19937_64
|
||||||
class Random64 {
|
class Random64 {
|
||||||
private:
|
private:
|
||||||
std::mt19937_64 generator_;
|
std::mt19937_64 generator_;
|
||||||
|
Loading…
Reference in New Issue
Block a user