2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2014-03-24 05:49:14 +01:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#include "util/sync_point.h"
|
2016-07-07 20:29:14 +02:00
|
|
|
#include <thread>
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
#include "port/port.h"
|
|
|
|
#include "util/random.h"
|
|
|
|
|
|
|
|
int rocksdb_kill_odds = 0;
|
2015-10-14 23:08:50 +02:00
|
|
|
std::vector<std::string> rocksdb_kill_prefix_blacklist;
|
2014-03-24 05:49:14 +01:00
|
|
|
|
2014-04-17 19:49:58 +02:00
|
|
|
#ifndef NDEBUG
|
2014-03-24 05:49:14 +01:00
|
|
|
namespace rocksdb {
|
|
|
|
|
2015-10-14 23:08:50 +02:00
|
|
|
void TestKillRandom(std::string kill_point, int odds,
|
|
|
|
const std::string& srcfile, int srcline) {
|
|
|
|
for (auto& p : rocksdb_kill_prefix_blacklist) {
|
|
|
|
if (kill_point.substr(0, p.length()) == p) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
assert(odds > 0);
|
2015-10-27 00:02:32 +01:00
|
|
|
if (odds % 7 == 0) {
|
Crash test to make kill decision for every kill point
Summary:
In crash test, when coming to each kill point, we start a random class using seed as current second. With this approach, for every second, the random number used is the same. However, in each second, there are multiple kill points with different frequency. It makes it hard to reason about chance of kill point to trigger. With this commit, we use thread local random seed to generate the random number, so that it will take different values per second, hoping it makes chances of killing much easier to reason about.
Also significantly reduce the kill odd to make sure time before kiling is similar as before.
Test Plan: Run white box crash test and see the killing happens as expected and the run time time before killing reasonable.
Reviewers: kradhakrishnan, IslamAbdelRahman, rven, yhchiang, andrewkr, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52971
2016-01-19 23:40:54 +01:00
|
|
|
// class Random uses multiplier 16807, which is 7^5. If odds are
|
|
|
|
// multiplier of 7, there might be limited values generated.
|
2015-10-27 00:02:32 +01:00
|
|
|
odds++;
|
|
|
|
}
|
Crash test to make kill decision for every kill point
Summary:
In crash test, when coming to each kill point, we start a random class using seed as current second. With this approach, for every second, the random number used is the same. However, in each second, there are multiple kill points with different frequency. It makes it hard to reason about chance of kill point to trigger. With this commit, we use thread local random seed to generate the random number, so that it will take different values per second, hoping it makes chances of killing much easier to reason about.
Also significantly reduce the kill odd to make sure time before kiling is similar as before.
Test Plan: Run white box crash test and see the killing happens as expected and the run time time before killing reasonable.
Reviewers: kradhakrishnan, IslamAbdelRahman, rven, yhchiang, andrewkr, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52971
2016-01-19 23:40:54 +01:00
|
|
|
auto* r = Random::GetTLSInstance();
|
|
|
|
bool crash = r->OneIn(odds);
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-18 01:16:11 +02:00
|
|
|
if (crash) {
|
|
|
|
port::Crash(srcfile, srcline);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-24 05:49:14 +01:00
|
|
|
SyncPoint* SyncPoint::GetInstance() {
|
|
|
|
static SyncPoint sync_point;
|
|
|
|
return &sync_point;
|
|
|
|
}
|
|
|
|
|
2016-07-07 20:29:14 +02:00
|
|
|
void SyncPoint::LoadDependency(const std::vector<SyncPointPair>& dependencies) {
|
2015-02-27 00:11:50 +01:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
2014-03-24 05:49:14 +01:00
|
|
|
successors_.clear();
|
|
|
|
predecessors_.clear();
|
|
|
|
cleared_points_.clear();
|
|
|
|
for (const auto& dependency : dependencies) {
|
|
|
|
successors_[dependency.predecessor].push_back(dependency.successor);
|
|
|
|
predecessors_[dependency.successor].push_back(dependency.predecessor);
|
|
|
|
}
|
2015-02-27 00:11:50 +01:00
|
|
|
cv_.notify_all();
|
2014-03-24 05:49:14 +01:00
|
|
|
}
|
|
|
|
|
2016-07-07 20:29:14 +02:00
|
|
|
void SyncPoint::LoadDependencyAndMarkers(
|
|
|
|
const std::vector<SyncPointPair>& dependencies,
|
|
|
|
const std::vector<SyncPointPair>& markers) {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
successors_.clear();
|
|
|
|
predecessors_.clear();
|
|
|
|
cleared_points_.clear();
|
|
|
|
markers_.clear();
|
|
|
|
marked_thread_id_.clear();
|
|
|
|
for (const auto& dependency : dependencies) {
|
|
|
|
successors_[dependency.predecessor].push_back(dependency.successor);
|
|
|
|
predecessors_[dependency.successor].push_back(dependency.predecessor);
|
|
|
|
}
|
|
|
|
for (const auto& marker : markers) {
|
|
|
|
successors_[marker.predecessor].push_back(marker.successor);
|
|
|
|
predecessors_[marker.successor].push_back(marker.predecessor);
|
|
|
|
markers_[marker.predecessor].push_back(marker.successor);
|
|
|
|
}
|
|
|
|
cv_.notify_all();
|
|
|
|
}
|
|
|
|
|
2014-03-24 05:49:14 +01:00
|
|
|
bool SyncPoint::PredecessorsAllCleared(const std::string& point) {
|
|
|
|
for (const auto& pred : predecessors_[point]) {
|
|
|
|
if (cleared_points_.count(pred) == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
void SyncPoint::SetCallBack(const std::string point,
|
2015-04-14 10:55:19 +02:00
|
|
|
std::function<void(void*)> callback) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
callbacks_[point] = callback;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::ClearAllCallBacks() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
while (num_callbacks_running_ > 0) {
|
|
|
|
cv_.wait(lock);
|
|
|
|
}
|
|
|
|
callbacks_.clear();
|
|
|
|
}
|
|
|
|
|
2014-03-24 05:49:14 +01:00
|
|
|
void SyncPoint::EnableProcessing() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
enabled_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::DisableProcessing() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
enabled_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyncPoint::ClearTrace() {
|
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
|
|
cleared_points_.clear();
|
|
|
|
}
|
|
|
|
|
2016-07-07 20:29:14 +02:00
|
|
|
bool SyncPoint::DisabledByMarker(const std::string& point,
|
|
|
|
std::thread::id thread_id) {
|
|
|
|
auto marked_point_iter = marked_thread_id_.find(point);
|
|
|
|
return marked_point_iter != marked_thread_id_.end() &&
|
|
|
|
thread_id != marked_point_iter->second;
|
|
|
|
}
|
|
|
|
|
2015-04-14 10:55:19 +02:00
|
|
|
void SyncPoint::Process(const std::string& point, void* cb_arg) {
|
2014-03-24 05:49:14 +01:00
|
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
2016-07-07 20:29:14 +02:00
|
|
|
auto thread_id = std::this_thread::get_id();
|
|
|
|
|
|
|
|
auto marker_iter = markers_.find(point);
|
|
|
|
if (marker_iter != markers_.end()) {
|
|
|
|
for (auto marked_point : marker_iter->second) {
|
|
|
|
marked_thread_id_.insert(std::make_pair(marked_point, thread_id));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DisabledByMarker(point, thread_id)) {
|
|
|
|
return;
|
|
|
|
}
|
2014-03-24 05:49:14 +01:00
|
|
|
|
2016-07-07 20:29:14 +02:00
|
|
|
if (!enabled_) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (!PredecessorsAllCleared(point)) {
|
|
|
|
cv_.wait(lock);
|
|
|
|
if (DisabledByMarker(point, thread_id)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2014-03-24 05:49:14 +01:00
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
auto callback_pair = callbacks_.find(point);
|
|
|
|
if (callback_pair != callbacks_.end()) {
|
|
|
|
num_callbacks_running_++;
|
|
|
|
mutex_.unlock();
|
2015-04-14 10:55:19 +02:00
|
|
|
callback_pair->second(cb_arg);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
mutex_.lock();
|
|
|
|
num_callbacks_running_--;
|
|
|
|
cv_.notify_all();
|
|
|
|
}
|
|
|
|
|
2014-03-24 05:49:14 +01:00
|
|
|
cleared_points_.insert(point);
|
|
|
|
cv_.notify_all();
|
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
2014-04-17 19:49:58 +02:00
|
|
|
#endif // NDEBUG
|