2014-03-23 21:49:14 -07:00
|
|
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#include "util/sync_point.h"
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
#include "port/port.h"
|
|
|
|
#include "util/random.h"
|
|
|
|
|
|
|
|
int rocksdb_kill_odds = 0;
|
2015-10-14 14:08:50 -07:00
|
|
|
std::vector<std::string> rocksdb_kill_prefix_blacklist;
|
2014-03-23 21:49:14 -07:00
|
|
|
|
2014-04-17 10:49:58 -07:00
|
|
|
#ifndef NDEBUG
|
2014-03-23 21:49:14 -07:00
|
|
|
namespace rocksdb {
|
|
|
|
|
2015-10-14 14:08:50 -07:00
|
|
|
void TestKillRandom(std::string kill_point, int odds,
|
|
|
|
const std::string& srcfile, int srcline) {
|
|
|
|
for (auto& p : rocksdb_kill_prefix_blacklist) {
|
|
|
|
if (kill_point.substr(0, p.length()) == p) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
time_t curtime = time(nullptr);
|
|
|
|
Random r((uint32_t)curtime);
|
|
|
|
|
|
|
|
assert(odds > 0);
|
2015-10-26 16:02:32 -07:00
|
|
|
if (odds % 7 == 0) {
|
|
|
|
// class Rarndom uses multiplier 16807, which is 7^5. If odds are
|
|
|
|
// multiplier of 7, the first random value might have limited values.
|
|
|
|
odds++;
|
|
|
|
}
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 16:16:11 -07:00
|
|
|
bool crash = r.OneIn(odds);
|
|
|
|
if (crash) {
|
|
|
|
port::Crash(srcfile, srcline);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-23 21:49:14 -07:00
|
|
|
SyncPoint* SyncPoint::GetInstance() {
|
|
|
|
static SyncPoint sync_point;
|
|
|
|
return &sync_point;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::LoadDependency(const std::vector<Dependency>& dependencies) {
|
2015-02-26 15:11:50 -08:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
2014-03-23 21:49:14 -07:00
|
|
|
successors_.clear();
|
|
|
|
predecessors_.clear();
|
|
|
|
cleared_points_.clear();
|
|
|
|
for (const auto& dependency : dependencies) {
|
|
|
|
successors_[dependency.predecessor].push_back(dependency.successor);
|
|
|
|
predecessors_[dependency.successor].push_back(dependency.predecessor);
|
|
|
|
}
|
2015-02-26 15:11:50 -08:00
|
|
|
cv_.notify_all();
|
2014-03-23 21:49:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bool SyncPoint::PredecessorsAllCleared(const std::string& point) {
|
|
|
|
for (const auto& pred : predecessors_[point]) {
|
|
|
|
if (cleared_points_.count(pred) == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 11:44:17 -08:00
|
|
|
void SyncPoint::SetCallBack(const std::string point,
|
2015-04-14 01:55:19 -07:00
|
|
|
std::function<void(void*)> callback) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 11:44:17 -08:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
callbacks_[point] = callback;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::ClearAllCallBacks() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
while (num_callbacks_running_ > 0) {
|
|
|
|
cv_.wait(lock);
|
|
|
|
}
|
|
|
|
callbacks_.clear();
|
|
|
|
}
|
|
|
|
|
2014-03-23 21:49:14 -07:00
|
|
|
void SyncPoint::EnableProcessing() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
enabled_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::DisableProcessing() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
enabled_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::ClearTrace() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
cleared_points_.clear();
|
|
|
|
}
|
|
|
|
|
2015-04-14 01:55:19 -07:00
|
|
|
void SyncPoint::Process(const std::string& point, void* cb_arg) {
|
2014-03-23 21:49:14 -07:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
|
|
|
|
if (!enabled_) return;
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 11:44:17 -08:00
|
|
|
auto callback_pair = callbacks_.find(point);
|
|
|
|
if (callback_pair != callbacks_.end()) {
|
|
|
|
num_callbacks_running_++;
|
|
|
|
mutex_.unlock();
|
2015-04-14 01:55:19 -07:00
|
|
|
callback_pair->second(cb_arg);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 11:44:17 -08:00
|
|
|
mutex_.lock();
|
|
|
|
num_callbacks_running_--;
|
|
|
|
cv_.notify_all();
|
|
|
|
}
|
|
|
|
|
2014-03-23 21:49:14 -07:00
|
|
|
while (!PredecessorsAllCleared(point)) {
|
|
|
|
cv_.wait(lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
cleared_points_.insert(point);
|
|
|
|
cv_.notify_all();
|
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
2014-04-17 10:49:58 -07:00
|
|
|
#endif // NDEBUG
|