speed up db_bench filluniquerandom mode
Summary: filluniquerandom is painfully slow due to the naive bitmap check to find out if a key has been seen before. Majority of time is spent on searching the last few keys. Split a giant BitSet to smaller ones so that we can quickly check if a BitSet is full and thus can skip quickly. It used to take over one hour to filluniquerandom for 100M keys, now it takes about 10 mins. Test Plan: unit test also verified correctness in db_bench and make sure all keys are generated Reviewers: igor, haobo, yhchiang Reviewed By: igor CC: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D17607
This commit is contained in:
parent
2014915d32
commit
4824014e3b
@ -28,7 +28,6 @@
|
||||
#include "rocksdb/statistics.h"
|
||||
#include "rocksdb/perf_context.h"
|
||||
#include "port/port.h"
|
||||
#include "util/bit_set.h"
|
||||
#include "util/crc32c.h"
|
||||
#include "util/histogram.h"
|
||||
#include "util/mutexlock.h"
|
||||
@ -151,6 +150,7 @@ static bool ValidateKeySize(const char* flagname, int32_t value) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
DEFINE_int32(key_size, 16, "size of each key");
|
||||
|
||||
DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
|
||||
@ -1680,15 +1680,56 @@ class Benchmark {
|
||||
DoWrite(thread, UNIQUE_RANDOM);
|
||||
}
|
||||
|
||||
class KeyGenerator {
|
||||
public:
|
||||
KeyGenerator(Random64* rand, WriteMode mode,
|
||||
uint64_t num, uint64_t num_per_set = 64 * 1024)
|
||||
: rand_(rand),
|
||||
mode_(mode),
|
||||
num_(num),
|
||||
next_(0) {
|
||||
if (mode_ == UNIQUE_RANDOM) {
|
||||
// NOTE: if memory consumption of this approach becomes a concern,
|
||||
// we can either break it into pieces and only random shuffle a section
|
||||
// each time. Alternatively, use a bit map implementation
|
||||
// (https://reviews.facebook.net/differential/diff/54627/)
|
||||
values_.resize(num_);
|
||||
for (uint64_t i = 0; i < num_; ++i) {
|
||||
values_[i] = i;
|
||||
}
|
||||
std::shuffle(values_.begin(), values_.end(),
|
||||
std::default_random_engine(FLAGS_seed));
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Next() {
|
||||
switch (mode_) {
|
||||
case SEQUENTIAL:
|
||||
return next_++;
|
||||
case RANDOM:
|
||||
return rand_->Next() % num_;
|
||||
case UNIQUE_RANDOM:
|
||||
return values_[next_++];
|
||||
}
|
||||
assert(false);
|
||||
return std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
|
||||
private:
|
||||
Random64* rand_;
|
||||
WriteMode mode_;
|
||||
const uint64_t num_;
|
||||
uint64_t next_;
|
||||
std::vector<uint64_t> values_;
|
||||
};
|
||||
|
||||
|
||||
void DoWrite(ThreadState* thread, WriteMode write_mode) {
|
||||
const int test_duration = write_mode == RANDOM ? FLAGS_duration : 0;
|
||||
const int64_t num_ops = writes_ == 0 ? num_ : writes_;
|
||||
Duration duration(test_duration, num_ops);
|
||||
unique_ptr<BitSet> bit_set;
|
||||
|
||||
if (write_mode == UNIQUE_RANDOM) {
|
||||
bit_set.reset(new BitSet(num_ops));
|
||||
}
|
||||
KeyGenerator key_gen(&(thread->rand), write_mode, num_ops);
|
||||
|
||||
if (num_ != FLAGS_num) {
|
||||
char msg[100];
|
||||
@ -1700,52 +1741,13 @@ class Benchmark {
|
||||
WriteBatch batch;
|
||||
Status s;
|
||||
int64_t bytes = 0;
|
||||
int64_t i = 0;
|
||||
|
||||
Slice key = AllocateKey();
|
||||
std::unique_ptr<const char[]> key_guard(key.data());
|
||||
while (!duration.Done(entries_per_batch_)) {
|
||||
batch.Clear();
|
||||
for (int64_t j = 0; j < entries_per_batch_; j++) {
|
||||
int64_t k = 0;
|
||||
switch(write_mode) {
|
||||
case SEQUENTIAL:
|
||||
k = i +j;
|
||||
break;
|
||||
case RANDOM:
|
||||
k = thread->rand.Next() % FLAGS_num;
|
||||
break;
|
||||
case UNIQUE_RANDOM:
|
||||
{
|
||||
const int64_t t = thread->rand.Next() % FLAGS_num;
|
||||
if (!bit_set->test(t)) {
|
||||
// best case
|
||||
k = t;
|
||||
} else {
|
||||
bool found = false;
|
||||
// look forward
|
||||
for (size_t i = t + 1; i < bit_set->size(); ++i) {
|
||||
if (!bit_set->test(i)) {
|
||||
found = true;
|
||||
k = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
for (size_t i = t; i-- > 0;) {
|
||||
if (!bit_set->test(i)) {
|
||||
found = true;
|
||||
k = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bit_set->set(k);
|
||||
break;
|
||||
}
|
||||
};
|
||||
GenerateKeyFromInt(k, FLAGS_num, &key);
|
||||
GenerateKeyFromInt(key_gen.Next(), FLAGS_num, &key);
|
||||
batch.Put(key, gen.Generate(value_size_));
|
||||
bytes += value_size_ + key_size_;
|
||||
thread->stats.FinishedSingleOp(db_);
|
||||
@ -1755,7 +1757,6 @@ class Benchmark {
|
||||
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
i += entries_per_batch_;
|
||||
}
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
|
@ -1,71 +0,0 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class BitSet {
|
||||
public:
|
||||
/**
|
||||
* Create a bit set of numBits, with the bits set to either true or false.
|
||||
*/
|
||||
explicit BitSet(size_t numBits, bool initial=false)
|
||||
: numBits_(numBits),
|
||||
data_(numWords(), initial ? ~0UL : 0UL) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set bit b to 1.
|
||||
*/
|
||||
void set(size_t b) {
|
||||
assert(b >= 0 && b < numBits_);
|
||||
data_[word(b)] |= wordOffsetMask(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set bit b to 0;
|
||||
*/
|
||||
void reset(size_t b) {
|
||||
assert(b >= 0 && b < numBits_);
|
||||
data_[word(b)] &= ~wordOffsetMask(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a bit.
|
||||
*/
|
||||
bool test(int b) const {
|
||||
return data_[word(b)] & wordOffsetMask(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the size of the BitSet, in bits.
|
||||
*/
|
||||
size_t size() const {
|
||||
return numBits_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
inline size_t numWords() const {
|
||||
if (numBits_ == 0) return 0;
|
||||
return 1 + (numBits_-1) / (8*sizeof(unsigned long));
|
||||
}
|
||||
inline static size_t word(int b) {
|
||||
return b / (8*sizeof(unsigned long));
|
||||
}
|
||||
inline static int wordOffset(int b) {
|
||||
return b % (8*sizeof(unsigned long));
|
||||
}
|
||||
inline static unsigned long wordOffsetMask(int b) {
|
||||
return 1UL << wordOffset(b);
|
||||
}
|
||||
|
||||
size_t numBits_;
|
||||
std::vector<unsigned long> data_;
|
||||
};
|
||||
|
||||
} // namespace facebook
|
Loading…
x
Reference in New Issue
Block a user