cff7819dff
Summary: **Context:** Some existing internal calls of `GenericRateLimiter::Request()` in backupable_db.cc and newly added internal calls in https://github.com/facebook/rocksdb/pull/8722/ do not make sure `bytes <= GetSingleBurstBytes()` as required by rate_limiter https://github.com/facebook/rocksdb/blob/master/include/rocksdb/rate_limiter.h#L47. **Impacts of this bug include:** (1) In debug build, when `GenericRateLimiter::Request()` requests bytes greater than `GenericRateLimiter:: kMinRefillBytesPerPeriod = 100` byte, process will crash due to assertion failure. See https://github.com/facebook/rocksdb/pull/9063#discussion_r737034133 and for possible scenario (2) In production build, although there will not be the above crash due to disabled assertion, the bug can lead to a request of small bytes being blocked for a long time by a request of same priority with insanely large bytes from a different thread. See updated https://github.com/facebook/rocksdb/wiki/Rate-Limiter ("Notice that although....the maximum bytes that can be granted in a single request have to be bounded...") for more info. There is an on-going effort to move rate-limiting to file wrapper level so rate limiting in `BackupEngine` and this PR might be made obsolete in the future. **Summary:** - Implemented loop-calling `GenericRateLimiter::Request()` with `bytes <= GetSingleBurstBytes()` as a static private helper function `BackupEngineImpl::LoopRateLimitRequestHelper` -- Considering make this a util function in `RateLimiter` later or do something with `RateLimiter::RequestToken()` - Replaced buggy internal callers with this helper function wherever requested byte is not pre-limited by `GetSingleBurstBytes()` - Removed the minimum refill bytes per period enforced by `GenericRateLimiter` since it is useless and prevents testing `GenericRateLimiter` for extreme case with small refill bytes per period. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9063 Test Plan: - Added a new test that failed the assertion before this change and now passes - It exposed bugs in [the write during creation in `CopyOrCreateFile()`](df7cc66e17/utilities/backupable/backupable_db.cc (L2034-L2043)
), [the read of table properties in `GetFileDbIdentities()`](df7cc66e17/utilities/backupable/backupable_db.cc (L2372-L2378)
), [some read of metadata in `BackupMeta::LoadFromFile()`](df7cc66e17/utilities/backupable/backupable_db.cc (L2726)
) - Passing Existing tests Reviewed By: ajkr Differential Revision: D31824535 Pulled By: hx235 fbshipit-source-id: d2b3dea7a64e2a4b1e6a59fca322f0800a4fcbcc
140 lines
4.4 KiB
C++
140 lines
4.4 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <deque>
|
|
|
|
#include "port/port.h"
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/rate_limiter.h"
|
|
#include "rocksdb/status.h"
|
|
#include "rocksdb/system_clock.h"
|
|
#include "util/mutexlock.h"
|
|
#include "util/random.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class GenericRateLimiter : public RateLimiter {
|
|
public:
|
|
GenericRateLimiter(int64_t refill_bytes, int64_t refill_period_us,
|
|
int32_t fairness, RateLimiter::Mode mode,
|
|
const std::shared_ptr<SystemClock>& clock,
|
|
bool auto_tuned);
|
|
|
|
virtual ~GenericRateLimiter();
|
|
|
|
// This API allows user to dynamically change rate limiter's bytes per second.
|
|
virtual void SetBytesPerSecond(int64_t bytes_per_second) override;
|
|
|
|
// Request for token to write bytes. If this request can not be satisfied,
|
|
// the call is blocked. Caller is responsible to make sure
|
|
// bytes <= GetSingleBurstBytes() and bytes >= 0. Negative bytes
|
|
// passed in will be rounded up to 0.
|
|
using RateLimiter::Request;
|
|
virtual void Request(const int64_t bytes, const Env::IOPriority pri,
|
|
Statistics* stats) override;
|
|
|
|
virtual int64_t GetSingleBurstBytes() const override {
|
|
return refill_bytes_per_period_.load(std::memory_order_relaxed);
|
|
}
|
|
|
|
virtual int64_t GetTotalBytesThrough(
|
|
const Env::IOPriority pri = Env::IO_TOTAL) const override {
|
|
MutexLock g(&request_mutex_);
|
|
if (pri == Env::IO_TOTAL) {
|
|
int64_t total_bytes_through_sum = 0;
|
|
for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
|
|
total_bytes_through_sum += total_bytes_through_[i];
|
|
}
|
|
return total_bytes_through_sum;
|
|
}
|
|
return total_bytes_through_[pri];
|
|
}
|
|
|
|
virtual int64_t GetTotalRequests(
|
|
const Env::IOPriority pri = Env::IO_TOTAL) const override {
|
|
MutexLock g(&request_mutex_);
|
|
if (pri == Env::IO_TOTAL) {
|
|
int64_t total_requests_sum = 0;
|
|
for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
|
|
total_requests_sum += total_requests_[i];
|
|
}
|
|
return total_requests_sum;
|
|
}
|
|
return total_requests_[pri];
|
|
}
|
|
|
|
virtual Status GetTotalPendingRequests(
|
|
int64_t* total_pending_requests,
|
|
const Env::IOPriority pri = Env::IO_TOTAL) const override {
|
|
assert(total_pending_requests != nullptr);
|
|
MutexLock g(&request_mutex_);
|
|
if (pri == Env::IO_TOTAL) {
|
|
int64_t total_pending_requests_sum = 0;
|
|
for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
|
|
total_pending_requests_sum += static_cast<int64_t>(queue_[i].size());
|
|
}
|
|
*total_pending_requests = total_pending_requests_sum;
|
|
} else {
|
|
*total_pending_requests = static_cast<int64_t>(queue_[pri].size());
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
virtual int64_t GetBytesPerSecond() const override {
|
|
return rate_bytes_per_sec_;
|
|
}
|
|
|
|
private:
|
|
void RefillBytesAndGrantRequests();
|
|
std::vector<Env::IOPriority> GeneratePriorityIterationOrder();
|
|
int64_t CalculateRefillBytesPerPeriod(int64_t rate_bytes_per_sec);
|
|
Status Tune();
|
|
|
|
uint64_t NowMicrosMonotonic() { return clock_->NowNanos() / std::milli::den; }
|
|
|
|
// This mutex guard all internal states
|
|
mutable port::Mutex request_mutex_;
|
|
|
|
const int64_t refill_period_us_;
|
|
|
|
int64_t rate_bytes_per_sec_;
|
|
// This variable can be changed dynamically.
|
|
std::atomic<int64_t> refill_bytes_per_period_;
|
|
std::shared_ptr<SystemClock> clock_;
|
|
|
|
bool stop_;
|
|
port::CondVar exit_cv_;
|
|
int32_t requests_to_wait_;
|
|
|
|
int64_t total_requests_[Env::IO_TOTAL];
|
|
int64_t total_bytes_through_[Env::IO_TOTAL];
|
|
int64_t available_bytes_;
|
|
int64_t next_refill_us_;
|
|
|
|
int32_t fairness_;
|
|
Random rnd_;
|
|
|
|
struct Req;
|
|
std::deque<Req*> queue_[Env::IO_TOTAL];
|
|
bool wait_until_refill_pending_;
|
|
|
|
bool auto_tuned_;
|
|
int64_t num_drains_;
|
|
int64_t prev_num_drains_;
|
|
const int64_t max_bytes_per_sec_;
|
|
std::chrono::microseconds tuned_time_;
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|