options.delayed_write_rate use the rate of rate_limiter by default.
Summary: It's hard for RocksDB to come up with a good default of delayed write rate. Use rate given by rate limiter if it is availalbe. This provides the I/O order of magnitude. Closes https://github.com/facebook/rocksdb/pull/2357 Differential Revision: D5115324 Pulled By: siying fbshipit-source-id: 341065ad2211c981fc804011c0f0e59a50c7e754
This commit is contained in:
parent
5068034666
commit
41cbb72749
@ -1,5 +1,8 @@
|
|||||||
# RocksDB default options change log
|
# RocksDB default options change log
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
* delayed_write_rate takes the rate given by rate_limiter if not specified.
|
||||||
|
|
||||||
|
## 5.2
|
||||||
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
|
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
|
||||||
|
|
||||||
## 5.0 (11/17/2016)
|
## 5.0 (11/17/2016)
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
# Rocksdb Change Log
|
# Rocksdb Change Log
|
||||||
## Unreleased
|
## Unreleased
|
||||||
### Public API Change
|
### Public API Change
|
||||||
*Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads.
|
* Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads.
|
||||||
|
* options.delayed_write_rate by default take the value of options.rate_limiter rate.
|
||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
* Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads.
|
* Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads.
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "db/builder.h"
|
#include "db/builder.h"
|
||||||
#include "options/options_helper.h"
|
#include "options/options_helper.h"
|
||||||
#include "rocksdb/wal_filter.h"
|
#include "rocksdb/wal_filter.h"
|
||||||
|
#include "util/rate_limiter.h"
|
||||||
#include "util/sst_file_manager_impl.h"
|
#include "util/sst_file_manager_impl.h"
|
||||||
#include "util/sync_point.h"
|
#include "util/sync_point.h"
|
||||||
|
|
||||||
@ -72,6 +73,15 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (result.delayed_write_rate == 0) {
|
||||||
|
if (result.rate_limiter.get() != nullptr) {
|
||||||
|
result.delayed_write_rate = result.rate_limiter->GetBytesPerSecond();
|
||||||
|
}
|
||||||
|
if (result.delayed_write_rate == 0) {
|
||||||
|
result.delayed_write_rate = 16 * 1024 * 1024;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (result.WAL_ttl_seconds > 0 || result.WAL_size_limit_MB > 0) {
|
if (result.WAL_ttl_seconds > 0 || result.WAL_size_limit_MB > 0) {
|
||||||
result.recycle_log_file_num = false;
|
result.recycle_log_file_num = false;
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "port/stack_trace.h"
|
#include "port/stack_trace.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/convenience.h"
|
#include "rocksdb/convenience.h"
|
||||||
|
#include "rocksdb/rate_limiter.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
#include "util/sync_point.h"
|
#include "util/sync_point.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
@ -404,6 +405,17 @@ TEST_F(DBOptionsTest, MaxOpenFilesChange) {
|
|||||||
Close();
|
Close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBOptionsTest, SanitizeDelayedWriteRate) {
|
||||||
|
Options options;
|
||||||
|
options.delayed_write_rate = 0;
|
||||||
|
Reopen(options);
|
||||||
|
ASSERT_EQ(16 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate);
|
||||||
|
|
||||||
|
options.rate_limiter.reset(NewGenericRateLimiter(31 * 1024 * 1024));
|
||||||
|
Reopen(options);
|
||||||
|
ASSERT_EQ(31 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // ROCKSDB_LITE
|
#endif // ROCKSDB_LITE
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -746,10 +746,15 @@ struct DBOptions {
|
|||||||
// calculated using size of user write requests before compression.
|
// calculated using size of user write requests before compression.
|
||||||
// RocksDB may decide to slow down more if the compaction still
|
// RocksDB may decide to slow down more if the compaction still
|
||||||
// gets behind further.
|
// gets behind further.
|
||||||
|
// If the value is 0, we will infer a value from `rater_limiter` value
|
||||||
|
// if it is not empty, or 16MB if `rater_limiter` is empty. Note that
|
||||||
|
// if users change the rate in `rate_limiter` after DB is opened,
|
||||||
|
// `delayed_write_rate` won't be adjusted.
|
||||||
|
//
|
||||||
// Unit: byte per second.
|
// Unit: byte per second.
|
||||||
//
|
//
|
||||||
// Default: 16MB/s
|
// Default: 0
|
||||||
uint64_t delayed_write_rate = 16 * 1024U * 1024U;
|
uint64_t delayed_write_rate = 0;
|
||||||
|
|
||||||
// By default, a single write thread queue is maintained. The thread gets
|
// By default, a single write thread queue is maintained. The thread gets
|
||||||
// to the head of the queue becomes write batch group leader and responsible
|
// to the head of the queue becomes write batch group leader and responsible
|
||||||
|
@ -53,6 +53,8 @@ class RateLimiter {
|
|||||||
// Total # of requests that go though rate limiter
|
// Total # of requests that go though rate limiter
|
||||||
virtual int64_t GetTotalRequests(
|
virtual int64_t GetTotalRequests(
|
||||||
const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
|
const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
|
||||||
|
|
||||||
|
virtual int64_t GetBytesPerSecond() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Create a RateLimiter object, which can be shared among RocksDB instances to
|
// Create a RateLimiter object, which can be shared among RocksDB instances to
|
||||||
|
@ -486,8 +486,10 @@ DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
|
|||||||
if (rocksdb_major_version < 5 ||
|
if (rocksdb_major_version < 5 ||
|
||||||
(rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
|
(rocksdb_major_version == 5 && rocksdb_minor_version < 2)) {
|
||||||
delayed_write_rate = 2 * 1024U * 1024U;
|
delayed_write_rate = 2 * 1024U * 1024U;
|
||||||
|
} else if (rocksdb_major_version < 5 ||
|
||||||
|
(rocksdb_major_version == 5 && rocksdb_minor_version < 6)) {
|
||||||
|
delayed_write_rate = 16 * 1024U * 1024U;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_open_files = 5000;
|
max_open_files = 5000;
|
||||||
base_background_compactions = -1;
|
base_background_compactions = -1;
|
||||||
wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
|
wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
|
||||||
|
@ -32,6 +32,7 @@ GenericRateLimiter::GenericRateLimiter(int64_t rate_bytes_per_sec,
|
|||||||
int64_t refill_period_us,
|
int64_t refill_period_us,
|
||||||
int32_t fairness)
|
int32_t fairness)
|
||||||
: refill_period_us_(refill_period_us),
|
: refill_period_us_(refill_period_us),
|
||||||
|
rate_bytes_per_sec_(rate_bytes_per_sec),
|
||||||
refill_bytes_per_period_(
|
refill_bytes_per_period_(
|
||||||
CalculateRefillBytesPerPeriod(rate_bytes_per_sec)),
|
CalculateRefillBytesPerPeriod(rate_bytes_per_sec)),
|
||||||
env_(Env::Default()),
|
env_(Env::Default()),
|
||||||
@ -68,6 +69,7 @@ GenericRateLimiter::~GenericRateLimiter() {
|
|||||||
// This API allows user to dynamically change rate limiter's bytes per second.
|
// This API allows user to dynamically change rate limiter's bytes per second.
|
||||||
void GenericRateLimiter::SetBytesPerSecond(int64_t bytes_per_second) {
|
void GenericRateLimiter::SetBytesPerSecond(int64_t bytes_per_second) {
|
||||||
assert(bytes_per_second > 0);
|
assert(bytes_per_second > 0);
|
||||||
|
rate_bytes_per_sec_ = bytes_per_second;
|
||||||
refill_bytes_per_period_.store(
|
refill_bytes_per_period_.store(
|
||||||
CalculateRefillBytesPerPeriod(bytes_per_second),
|
CalculateRefillBytesPerPeriod(bytes_per_second),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
|
@ -62,6 +62,10 @@ class GenericRateLimiter : public RateLimiter {
|
|||||||
return total_requests_[pri];
|
return total_requests_[pri];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual int64_t GetBytesPerSecond() const override {
|
||||||
|
return rate_bytes_per_sec_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Refill();
|
void Refill();
|
||||||
int64_t CalculateRefillBytesPerPeriod(int64_t rate_bytes_per_sec);
|
int64_t CalculateRefillBytesPerPeriod(int64_t rate_bytes_per_sec);
|
||||||
@ -75,6 +79,8 @@ class GenericRateLimiter : public RateLimiter {
|
|||||||
const int64_t kMinRefillBytesPerPeriod = 100;
|
const int64_t kMinRefillBytesPerPeriod = 100;
|
||||||
|
|
||||||
const int64_t refill_period_us_;
|
const int64_t refill_period_us_;
|
||||||
|
|
||||||
|
int64_t rate_bytes_per_sec_;
|
||||||
// This variable can be changed dynamically.
|
// This variable can be changed dynamically.
|
||||||
std::atomic<int64_t> refill_bytes_per_period_;
|
std::atomic<int64_t> refill_bytes_per_period_;
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user