options.delayed_write_rate use the rate of rate_limiter by default.

Summary:
It's hard for RocksDB to come up with a good default of delayed write rate. Use rate given by rate limiter if it is availalbe. This provides the I/O order of magnitude.
Closes https://github.com/facebook/rocksdb/pull/2357

Differential Revision: D5115324

Pulled By: siying

fbshipit-source-id: 341065ad2211c981fc804011c0f0e59a50c7e754
This commit is contained in:
Siying Dong 2017-05-24 09:52:08 -07:00 committed by Facebook Github Bot
parent 5068034666
commit 41cbb72749
9 changed files with 47 additions and 4 deletions

View File

@ -1,5 +1,8 @@
# RocksDB default options change log
## Unreleased
* delayed_write_rate takes the rate given by rate_limiter if not specified.
## 5.2
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
## 5.0 (11/17/2016)

View File

@ -1,7 +1,8 @@
# Rocksdb Change Log
## Unreleased
### Public API Change
*Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads.
* Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads.
* options.delayed_write_rate by default take the value of options.rate_limiter rate.
### New Features
* Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads.

View File

@ -18,6 +18,7 @@
#include "db/builder.h"
#include "options/options_helper.h"
#include "rocksdb/wal_filter.h"
#include "util/rate_limiter.h"
#include "util/sst_file_manager_impl.h"
#include "util/sync_point.h"
@ -72,6 +73,15 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
}
}
if (result.delayed_write_rate == 0) {
if (result.rate_limiter.get() != nullptr) {
result.delayed_write_rate = result.rate_limiter->GetBytesPerSecond();
}
if (result.delayed_write_rate == 0) {
result.delayed_write_rate = 16 * 1024 * 1024;
}
}
if (result.WAL_ttl_seconds > 0 || result.WAL_size_limit_MB > 0) {
result.recycle_log_file_num = false;
}

View File

@ -19,6 +19,7 @@
#include "port/stack_trace.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/rate_limiter.h"
#include "util/random.h"
#include "util/sync_point.h"
#include "util/testutil.h"
@ -404,6 +405,17 @@ TEST_F(DBOptionsTest, MaxOpenFilesChange) {
Close();
}
TEST_F(DBOptionsTest, SanitizeDelayedWriteRate) {
Options options;
options.delayed_write_rate = 0;
Reopen(options);
ASSERT_EQ(16 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate);
options.rate_limiter.reset(NewGenericRateLimiter(31 * 1024 * 1024));
Reopen(options);
ASSERT_EQ(31 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate);
}
#endif // ROCKSDB_LITE
} // namespace rocksdb

View File

@ -746,10 +746,15 @@ struct DBOptions {
// calculated using size of user write requests before compression.
// RocksDB may decide to slow down more if the compaction still
// gets behind further.
// If the value is 0, we will infer a value from `rater_limiter` value
// if it is not empty, or 16MB if `rater_limiter` is empty. Note that
// if users change the rate in `rate_limiter` after DB is opened,
// `delayed_write_rate` won't be adjusted.
//
// Unit: byte per second.
//
// Default: 16MB/s
uint64_t delayed_write_rate = 16 * 1024U * 1024U;
// Default: 0
uint64_t delayed_write_rate = 0;
// By default, a single write thread queue is maintained. The thread gets
// to the head of the queue becomes write batch group leader and responsible

View File

@ -53,6 +53,8 @@ class RateLimiter {
// Total # of requests that go though rate limiter
virtual int64_t GetTotalRequests(
const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
virtual int64_t GetBytesPerSecond() const = 0;
};
// Create a RateLimiter object, which can be shared among RocksDB instances to

View File

@ -486,8 +486,10 @@ DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
if (rocksdb_major_version < 5 ||
(rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
delayed_write_rate = 2 * 1024U * 1024U;
} else if (rocksdb_major_version < 5 ||
(rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
delayed_write_rate = 16 * 1024U * 1024U;
}
max_open_files = 5000;
base_background_compactions = -1;
wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;

View File

@ -32,6 +32,7 @@ GenericRateLimiter::GenericRateLimiter(int64_t rate_bytes_per_sec,
int64_t refill_period_us,
int32_t fairness)
: refill_period_us_(refill_period_us),
rate_bytes_per_sec_(rate_bytes_per_sec),
refill_bytes_per_period_(
CalculateRefillBytesPerPeriod(rate_bytes_per_sec)),
env_(Env::Default()),
@ -68,6 +69,7 @@ GenericRateLimiter::~GenericRateLimiter() {
// This API allows user to dynamically change rate limiter's bytes per second.
void GenericRateLimiter::SetBytesPerSecond(int64_t bytes_per_second) {
assert(bytes_per_second > 0);
rate_bytes_per_sec_ = bytes_per_second;
refill_bytes_per_period_.store(
CalculateRefillBytesPerPeriod(bytes_per_second),
std::memory_order_relaxed);

View File

@ -62,6 +62,10 @@ class GenericRateLimiter : public RateLimiter {
return total_requests_[pri];
}
virtual int64_t GetBytesPerSecond() const override {
return rate_bytes_per_sec_;
}
private:
void Refill();
int64_t CalculateRefillBytesPerPeriod(int64_t rate_bytes_per_sec);
@ -75,6 +79,8 @@ class GenericRateLimiter : public RateLimiter {
const int64_t kMinRefillBytesPerPeriod = 100;
const int64_t refill_period_us_;
int64_t rate_bytes_per_sec_;
// This variable can be changed dynamically.
std::atomic<int64_t> refill_bytes_per_period_;
Env* const env_;