cd7c4143d7
Summary: Current write stalling system has the problem of lacking of positive feedback if the restricted rate is already too low. Users sometimes stack in very low slowdown value. With the diff, we add a positive feedback (increasing the slowdown value) if we recover from slowdown state back to normal. To avoid the positive feedback to keep the slowdown value to be to high, we add issue a negative feedback every time we are close to the stop condition. Experiments show it is easier to reach a relative balance than before. Also increase level0_stop_writes_trigger default from 24 to 32. Since level0_slowdown_writes_trigger default is 20, stop trigger 24 only gives four files as the buffer time to slowdown writes. In order to avoid stop in four files while 20 files have been accumulated, the slowdown value must be very low, which is amost the same as stop. It also doesn't give enough time for the slowdown value to converge. Increase it to 32 will smooth out the system. Closes https://github.com/facebook/rocksdb/pull/1562 Differential Revision: D4218519 Pulled By: siying fbshipit-source-id: 95e4088
134 lines
4.3 KiB
C++
134 lines
4.3 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <memory>
|
|
|
|
namespace rocksdb {
|
|
|
|
class Env;
|
|
class WriteControllerToken;
|
|
|
|
// WriteController is controlling write stalls in our write code-path. Write
|
|
// stalls happen when compaction can't keep up with write rate.
|
|
// All of the methods here (including WriteControllerToken's destructors) need
|
|
// to be called while holding DB mutex
|
|
class WriteController {
|
|
public:
|
|
explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u)
|
|
: total_stopped_(0),
|
|
total_delayed_(0),
|
|
total_compaction_pressure_(0),
|
|
bytes_left_(0),
|
|
last_refill_time_(0) {
|
|
set_max_delayed_write_rate(_delayed_write_rate);
|
|
}
|
|
~WriteController() = default;
|
|
|
|
// When an actor (column family) requests a stop token, all writes will be
|
|
// stopped until the stop token is released (deleted)
|
|
std::unique_ptr<WriteControllerToken> GetStopToken();
|
|
// When an actor (column family) requests a delay token, total delay for all
|
|
// writes to the DB will be controlled under the delayed write rate. Every
|
|
// write needs to call GetDelay() with number of bytes writing to the DB,
|
|
// which returns number of microseconds to sleep.
|
|
std::unique_ptr<WriteControllerToken> GetDelayToken(
|
|
uint64_t delayed_write_rate);
|
|
// When an actor (column family) requests a moderate token, compaction
|
|
// threads will be increased
|
|
std::unique_ptr<WriteControllerToken> GetCompactionPressureToken();
|
|
|
|
// these three metods are querying the state of the WriteController
|
|
bool IsStopped() const;
|
|
bool NeedsDelay() const { return total_delayed_ > 0; }
|
|
bool NeedSpeedupCompaction() const {
|
|
return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0;
|
|
}
|
|
// return how many microseconds the caller needs to sleep after the call
|
|
// num_bytes: how many number of bytes to put into the DB.
|
|
// Prerequisite: DB mutex held.
|
|
uint64_t GetDelay(Env* env, uint64_t num_bytes);
|
|
void set_delayed_write_rate(uint64_t write_rate) {
|
|
// avoid divide 0
|
|
if (write_rate == 0) {
|
|
write_rate = 1u;
|
|
} else if (write_rate > max_delayed_write_rate()) {
|
|
write_rate = max_delayed_write_rate();
|
|
}
|
|
delayed_write_rate_ = write_rate;
|
|
}
|
|
|
|
void set_max_delayed_write_rate(uint64_t write_rate) {
|
|
// avoid divide 0
|
|
if (write_rate == 0) {
|
|
write_rate = 1u;
|
|
}
|
|
max_delayed_write_rate_ = write_rate;
|
|
// update delayed_write_rate_ as well
|
|
delayed_write_rate_ = write_rate;
|
|
}
|
|
|
|
uint64_t delayed_write_rate() const { return delayed_write_rate_; }
|
|
|
|
uint64_t max_delayed_write_rate() const { return max_delayed_write_rate_; }
|
|
|
|
private:
|
|
friend class WriteControllerToken;
|
|
friend class StopWriteToken;
|
|
friend class DelayWriteToken;
|
|
friend class CompactionPressureToken;
|
|
|
|
int total_stopped_;
|
|
int total_delayed_;
|
|
int total_compaction_pressure_;
|
|
uint64_t bytes_left_;
|
|
uint64_t last_refill_time_;
|
|
// write rate set when initialization or by `DBImpl::SetDBOptions`
|
|
uint64_t max_delayed_write_rate_;
|
|
// current write rate
|
|
uint64_t delayed_write_rate_;
|
|
};
|
|
|
|
class WriteControllerToken {
|
|
public:
|
|
explicit WriteControllerToken(WriteController* controller)
|
|
: controller_(controller) {}
|
|
virtual ~WriteControllerToken() {}
|
|
|
|
protected:
|
|
WriteController* controller_;
|
|
|
|
private:
|
|
// no copying allowed
|
|
WriteControllerToken(const WriteControllerToken&) = delete;
|
|
void operator=(const WriteControllerToken&) = delete;
|
|
};
|
|
|
|
class StopWriteToken : public WriteControllerToken {
|
|
public:
|
|
explicit StopWriteToken(WriteController* controller)
|
|
: WriteControllerToken(controller) {}
|
|
virtual ~StopWriteToken();
|
|
};
|
|
|
|
class DelayWriteToken : public WriteControllerToken {
|
|
public:
|
|
explicit DelayWriteToken(WriteController* controller)
|
|
: WriteControllerToken(controller) {}
|
|
virtual ~DelayWriteToken();
|
|
};
|
|
|
|
class CompactionPressureToken : public WriteControllerToken {
|
|
public:
|
|
explicit CompactionPressureToken(WriteController* controller)
|
|
: WriteControllerToken(controller) {}
|
|
virtual ~CompactionPressureToken();
|
|
};
|
|
|
|
} // namespace rocksdb
|