3dff28cf9b
Summary: For performance purposes, the lower level routines were changed to use a SystemClock* instead of a std::shared_ptr<SystemClock>. The shared ptr has some performance degradation on certain hardware classes. For most of the system, there is no risk of the pointer being deleted/invalid because the shared_ptr will be stored elsewhere. For example, the ImmutableDBOptions stores the Env which has a std::shared_ptr<SystemClock> in it. The SystemClock* within the ImmutableDBOptions is essentially a "short cut" to gain access to this constant resource. There were a few classes (PeriodicWorkScheduler?) where the "short cut" property did not hold. In those cases, the shared pointer was preserved. Using db_bench readrandom perf_level=3 on my EC2 box, this change performed as well or better than 6.17: 6.17: readrandom : 28.046 micros/op 854902 ops/sec; 61.3 MB/s (355999 of 355999 found) 6.18: readrandom : 32.615 micros/op 735306 ops/sec; 52.7 MB/s (290999 of 290999 found) PR: readrandom : 27.500 micros/op 871909 ops/sec; 62.5 MB/s (367999 of 367999 found) (Note that the times for 6.18 are prior to revert of the SystemClock). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8033 Reviewed By: pdillinger Differential Revision: D27014563 Pulled By: mrambacher fbshipit-source-id: ad0459eba03182e454391b5926bf5cdd45657b67
130 lines
4.1 KiB
C++
130 lines
4.1 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "db/write_controller.h"
|
|
|
|
#include <atomic>
|
|
#include <cassert>
|
|
#include <ratio>
|
|
|
|
#include "rocksdb/system_clock.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
std::unique_ptr<WriteControllerToken> WriteController::GetStopToken() {
|
|
++total_stopped_;
|
|
return std::unique_ptr<WriteControllerToken>(new StopWriteToken(this));
|
|
}
|
|
|
|
std::unique_ptr<WriteControllerToken> WriteController::GetDelayToken(
|
|
uint64_t write_rate) {
|
|
total_delayed_++;
|
|
// Reset counters.
|
|
last_refill_time_ = 0;
|
|
bytes_left_ = 0;
|
|
set_delayed_write_rate(write_rate);
|
|
return std::unique_ptr<WriteControllerToken>(new DelayWriteToken(this));
|
|
}
|
|
|
|
std::unique_ptr<WriteControllerToken>
|
|
WriteController::GetCompactionPressureToken() {
|
|
++total_compaction_pressure_;
|
|
return std::unique_ptr<WriteControllerToken>(
|
|
new CompactionPressureToken(this));
|
|
}
|
|
|
|
bool WriteController::IsStopped() const {
|
|
return total_stopped_.load(std::memory_order_relaxed) > 0;
|
|
}
|
|
// This is inside DB mutex, so we can't sleep and need to minimize
|
|
// frequency to get time.
|
|
// If it turns out to be a performance issue, we can redesign the thread
|
|
// synchronization model here.
|
|
// The function trust caller will sleep micros returned.
|
|
uint64_t WriteController::GetDelay(SystemClock* clock, uint64_t num_bytes) {
|
|
if (total_stopped_.load(std::memory_order_relaxed) > 0) {
|
|
return 0;
|
|
}
|
|
if (total_delayed_.load(std::memory_order_relaxed) == 0) {
|
|
return 0;
|
|
}
|
|
|
|
const uint64_t kMicrosPerSecond = 1000000;
|
|
const uint64_t kRefillInterval = 1024U;
|
|
|
|
if (bytes_left_ >= num_bytes) {
|
|
bytes_left_ -= num_bytes;
|
|
return 0;
|
|
}
|
|
// The frequency to get time inside DB mutex is less than one per refill
|
|
// interval.
|
|
auto time_now = NowMicrosMonotonic(clock);
|
|
|
|
uint64_t sleep_debt = 0;
|
|
uint64_t time_since_last_refill = 0;
|
|
if (last_refill_time_ != 0) {
|
|
if (last_refill_time_ > time_now) {
|
|
sleep_debt = last_refill_time_ - time_now;
|
|
} else {
|
|
time_since_last_refill = time_now - last_refill_time_;
|
|
bytes_left_ +=
|
|
static_cast<uint64_t>(static_cast<double>(time_since_last_refill) /
|
|
kMicrosPerSecond * delayed_write_rate_);
|
|
if (time_since_last_refill >= kRefillInterval &&
|
|
bytes_left_ > num_bytes) {
|
|
// If refill interval already passed and we have enough bytes
|
|
// return without extra sleeping.
|
|
last_refill_time_ = time_now;
|
|
bytes_left_ -= num_bytes;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
uint64_t single_refill_amount =
|
|
delayed_write_rate_ * kRefillInterval / kMicrosPerSecond;
|
|
if (bytes_left_ + single_refill_amount >= num_bytes) {
|
|
// Wait until a refill interval
|
|
// Never trigger expire for less than one refill interval to avoid to get
|
|
// time.
|
|
bytes_left_ = bytes_left_ + single_refill_amount - num_bytes;
|
|
last_refill_time_ = time_now + kRefillInterval;
|
|
return kRefillInterval + sleep_debt;
|
|
}
|
|
|
|
// Need to refill more than one interval. Need to sleep longer. Check
|
|
// whether expiration will hit
|
|
|
|
// Sleep just until `num_bytes` is allowed.
|
|
uint64_t sleep_amount =
|
|
static_cast<uint64_t>(num_bytes /
|
|
static_cast<long double>(delayed_write_rate_) *
|
|
kMicrosPerSecond) +
|
|
sleep_debt;
|
|
last_refill_time_ = time_now + sleep_amount;
|
|
return sleep_amount;
|
|
}
|
|
|
|
uint64_t WriteController::NowMicrosMonotonic(SystemClock* clock) {
|
|
return clock->NowNanos() / std::milli::den;
|
|
}
|
|
|
|
StopWriteToken::~StopWriteToken() {
|
|
assert(controller_->total_stopped_ >= 1);
|
|
--controller_->total_stopped_;
|
|
}
|
|
|
|
DelayWriteToken::~DelayWriteToken() {
|
|
controller_->total_delayed_--;
|
|
assert(controller_->total_delayed_.load() >= 0);
|
|
}
|
|
|
|
CompactionPressureToken::~CompactionPressureToken() {
|
|
controller_->total_compaction_pressure_--;
|
|
assert(controller_->total_compaction_pressure_ >= 0);
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|