more times in perf_context and iostats_context
Summary: We occasionally get write stalls (>1s Write() calls) on HDD under read load. The following timers explain almost all of the stalls: - perf_context.db_mutex_lock_nanos - perf_context.db_condition_wait_nanos - iostats_context.open_time - iostats_context.allocate_time - iostats_context.write_time - iostats_context.range_sync_time - iostats_context.logger_time In my experiments each of these occasionally takes >1s on write path under some workload. There are rare cases when Write() takes long but none of these takes long. Test Plan: Added code to our application to write the listed timings to log for slow writes. They usually add up to almost exactly the time Write() call took. Reviewers: rven, yhchiang, sdong Reviewed By: sdong Subscribers: march, dhruba, tnovak Differential Revision: https://reviews.facebook.net/D39177
This commit is contained in:
parent
4266d4fd90
commit
ec7a944360
@ -3322,7 +3322,10 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
|
|||||||
if (context.schedule_bg_work_) {
|
if (context.schedule_bg_work_) {
|
||||||
MaybeScheduleFlushOrCompaction();
|
MaybeScheduleFlushOrCompaction();
|
||||||
}
|
}
|
||||||
|
PERF_TIMER_STOP(write_pre_and_post_process_time);
|
||||||
|
PERF_TIMER_GUARD(write_delay_time);
|
||||||
status = DelayWrite(expiration_time);
|
status = DelayWrite(expiration_time);
|
||||||
|
PERF_TIMER_START(write_pre_and_post_process_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UNLIKELY(status.ok() && has_timeout &&
|
if (UNLIKELY(status.ok() && has_timeout &&
|
||||||
|
@ -9,7 +9,11 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "rocksdb/perf_level.h"
|
||||||
|
|
||||||
// A thread local context for gathering io-stats efficiently and transparently.
|
// A thread local context for gathering io-stats efficiently and transparently.
|
||||||
|
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
struct IOStatsContext {
|
struct IOStatsContext {
|
||||||
@ -25,6 +29,18 @@ struct IOStatsContext {
|
|||||||
uint64_t bytes_written;
|
uint64_t bytes_written;
|
||||||
// number of bytes that has been read.
|
// number of bytes that has been read.
|
||||||
uint64_t bytes_read;
|
uint64_t bytes_read;
|
||||||
|
|
||||||
|
// time spent in open() and fopen().
|
||||||
|
uint64_t open_nanos;
|
||||||
|
// time spent in fallocate().
|
||||||
|
uint64_t allocate_nanos;
|
||||||
|
// time spent in write() and pwrite().
|
||||||
|
uint64_t write_nanos;
|
||||||
|
// time spent in sync_file_range().
|
||||||
|
uint64_t range_sync_nanos;
|
||||||
|
|
||||||
|
// time spent in Logger::Logv().
|
||||||
|
uint64_t logger_nanos;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef IOS_CROSS_COMPILE
|
#ifndef IOS_CROSS_COMPILE
|
||||||
|
@ -9,22 +9,13 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "rocksdb/perf_level.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
enum PerfLevel {
|
|
||||||
kDisable = 0, // disable perf stats
|
|
||||||
kEnableCount = 1, // enable only count stats
|
|
||||||
kEnableTime = 2 // enable time stats too
|
|
||||||
};
|
|
||||||
|
|
||||||
// set the perf stats level
|
|
||||||
void SetPerfLevel(PerfLevel level);
|
|
||||||
|
|
||||||
// get current perf stats level
|
|
||||||
PerfLevel GetPerfLevel();
|
|
||||||
|
|
||||||
// A thread local context for gathering performance counter efficiently
|
// A thread local context for gathering performance counter efficiently
|
||||||
// and transparently.
|
// and transparently.
|
||||||
|
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
|
||||||
|
|
||||||
struct PerfContext {
|
struct PerfContext {
|
||||||
|
|
||||||
@ -64,11 +55,16 @@ struct PerfContext {
|
|||||||
uint64_t seek_internal_seek_time;
|
uint64_t seek_internal_seek_time;
|
||||||
// total time spent on iterating internal entries to find the next user entry
|
// total time spent on iterating internal entries to find the next user entry
|
||||||
uint64_t find_next_user_entry_time;
|
uint64_t find_next_user_entry_time;
|
||||||
// total time spent on pre or post processing when writing a record
|
|
||||||
uint64_t write_pre_and_post_process_time;
|
// total time spent on writing to WAL
|
||||||
uint64_t write_wal_time; // total time spent on writing to WAL
|
uint64_t write_wal_time;
|
||||||
// total time spent on writing to mem tables
|
// total time spent on writing to mem tables
|
||||||
uint64_t write_memtable_time;
|
uint64_t write_memtable_time;
|
||||||
|
// total time spent on delaying write
|
||||||
|
uint64_t write_delay_time;
|
||||||
|
// total time spent on writing a record, excluding the above three times
|
||||||
|
uint64_t write_pre_and_post_process_time;
|
||||||
|
|
||||||
uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex.
|
uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex.
|
||||||
// Time spent on waiting with a condition variable created with DB mutex.
|
// Time spent on waiting with a condition variable created with DB mutex.
|
||||||
uint64_t db_condition_wait_nanos;
|
uint64_t db_condition_wait_nanos;
|
||||||
|
30
include/rocksdb/perf_level.h
Normal file
30
include/rocksdb/perf_level.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#ifndef INCLUDE_ROCKSDB_PERF_LEVEL_H_
|
||||||
|
#define INCLUDE_ROCKSDB_PERF_LEVEL_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
// How much perf stats to collect. Affects perf_context and iostats_context.
|
||||||
|
|
||||||
|
enum PerfLevel {
|
||||||
|
kDisable = 0, // disable perf stats
|
||||||
|
kEnableCount = 1, // enable only count stats
|
||||||
|
kEnableTime = 2 // enable time stats too
|
||||||
|
};
|
||||||
|
|
||||||
|
// set the perf stats level for current thread
|
||||||
|
void SetPerfLevel(PerfLevel level);
|
||||||
|
|
||||||
|
// get current perf stats level for current thread
|
||||||
|
PerfLevel GetPerfLevel();
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
#endif // INCLUDE_ROCKSDB_PERF_LEVEL_H_
|
1
src.mk
1
src.mk
@ -126,6 +126,7 @@ LIB_SOURCES = \
|
|||||||
util/options.cc \
|
util/options.cc \
|
||||||
util/options_helper.cc \
|
util/options_helper.cc \
|
||||||
util/perf_context.cc \
|
util/perf_context.cc \
|
||||||
|
util/perf_level.cc \
|
||||||
util/rate_limiter.cc \
|
util/rate_limiter.cc \
|
||||||
util/skiplistrep.cc \
|
util/skiplistrep.cc \
|
||||||
util/slice.cc \
|
util/slice.cc \
|
||||||
|
@ -439,14 +439,17 @@ class PosixMmapFile : public WritableFile {
|
|||||||
|
|
||||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||||
// we can't fallocate with FALLOC_FL_KEEP_SIZE here
|
// we can't fallocate with FALLOC_FL_KEEP_SIZE here
|
||||||
int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
|
{
|
||||||
if (alloc_status != 0) {
|
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||||
// fallback to posix_fallocate
|
int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
|
||||||
alloc_status = posix_fallocate(fd_, file_offset_, map_size_);
|
if (alloc_status != 0) {
|
||||||
}
|
// fallback to posix_fallocate
|
||||||
if (alloc_status != 0) {
|
alloc_status = posix_fallocate(fd_, file_offset_, map_size_);
|
||||||
return Status::IOError("Error allocating space to file : " + filename_ +
|
}
|
||||||
"Error : " + strerror(alloc_status));
|
if (alloc_status != 0) {
|
||||||
|
return Status::IOError("Error allocating space to file : " + filename_ +
|
||||||
|
"Error : " + strerror(alloc_status));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||||
@ -635,6 +638,7 @@ class PosixMmapFile : public WritableFile {
|
|||||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||||
virtual Status Allocate(off_t offset, off_t len) override {
|
virtual Status Allocate(off_t offset, off_t len) override {
|
||||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||||
|
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||||
int alloc_status = fallocate(
|
int alloc_status = fallocate(
|
||||||
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
||||||
if (alloc_status == 0) {
|
if (alloc_status == 0) {
|
||||||
@ -721,7 +725,12 @@ class PosixWritableFile : public WritableFile {
|
|||||||
cursize_ += left;
|
cursize_ += left;
|
||||||
} else {
|
} else {
|
||||||
while (left != 0) {
|
while (left != 0) {
|
||||||
ssize_t done = write(fd_, src, RequestToken(left));
|
ssize_t done;
|
||||||
|
size_t size = RequestToken(left);
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(write_nanos);
|
||||||
|
done = write(fd_, src, size);
|
||||||
|
}
|
||||||
if (done < 0) {
|
if (done < 0) {
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
continue;
|
continue;
|
||||||
@ -769,6 +778,7 @@ class PosixWritableFile : public WritableFile {
|
|||||||
// tmpfs (since Linux 3.5)
|
// tmpfs (since Linux 3.5)
|
||||||
// We ignore error since failure of this operation does not affect
|
// We ignore error since failure of this operation does not affect
|
||||||
// correctness.
|
// correctness.
|
||||||
|
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||||
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
|
||||||
filesize_, block_size * last_allocated_block - filesize_);
|
filesize_, block_size * last_allocated_block - filesize_);
|
||||||
#endif
|
#endif
|
||||||
@ -787,7 +797,12 @@ class PosixWritableFile : public WritableFile {
|
|||||||
size_t left = cursize_;
|
size_t left = cursize_;
|
||||||
char* src = buf_.get();
|
char* src = buf_.get();
|
||||||
while (left != 0) {
|
while (left != 0) {
|
||||||
ssize_t done = write(fd_, src, RequestToken(left));
|
ssize_t done;
|
||||||
|
size_t size = RequestToken(left);
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(write_nanos);
|
||||||
|
done = write(fd_, src, size);
|
||||||
|
}
|
||||||
if (done < 0) {
|
if (done < 0) {
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
continue;
|
continue;
|
||||||
@ -861,7 +876,9 @@ class PosixWritableFile : public WritableFile {
|
|||||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||||
virtual Status Allocate(off_t offset, off_t len) override {
|
virtual Status Allocate(off_t offset, off_t len) override {
|
||||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||||
int alloc_status = fallocate(
|
int alloc_status;
|
||||||
|
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||||
|
alloc_status = fallocate(
|
||||||
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
||||||
if (alloc_status == 0) {
|
if (alloc_status == 0) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
@ -871,6 +888,7 @@ class PosixWritableFile : public WritableFile {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual Status RangeSync(off_t offset, off_t nbytes) override {
|
virtual Status RangeSync(off_t offset, off_t nbytes) override {
|
||||||
|
IOSTATS_TIMER_GUARD(range_sync_nanos);
|
||||||
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
|
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
} else {
|
} else {
|
||||||
@ -929,7 +947,11 @@ class PosixRandomRWFile : public RandomRWFile {
|
|||||||
pending_fsync_ = true;
|
pending_fsync_ = true;
|
||||||
|
|
||||||
while (left != 0) {
|
while (left != 0) {
|
||||||
ssize_t done = pwrite(fd_, src, left, offset);
|
ssize_t done;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(write_nanos);
|
||||||
|
done = pwrite(fd_, src, left, offset);
|
||||||
|
}
|
||||||
if (done < 0) {
|
if (done < 0) {
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
continue;
|
continue;
|
||||||
@ -1001,6 +1023,7 @@ class PosixRandomRWFile : public RandomRWFile {
|
|||||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||||
virtual Status Allocate(off_t offset, off_t len) override {
|
virtual Status Allocate(off_t offset, off_t len) override {
|
||||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||||
|
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||||
int alloc_status = fallocate(
|
int alloc_status = fallocate(
|
||||||
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
|
||||||
if (alloc_status == 0) {
|
if (alloc_status == 0) {
|
||||||
@ -1109,6 +1132,7 @@ class PosixEnv : public Env {
|
|||||||
result->reset();
|
result->reset();
|
||||||
FILE* f = nullptr;
|
FILE* f = nullptr;
|
||||||
do {
|
do {
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
f = fopen(fname.c_str(), "r");
|
f = fopen(fname.c_str(), "r");
|
||||||
} while (f == nullptr && errno == EINTR);
|
} while (f == nullptr && errno == EINTR);
|
||||||
if (f == nullptr) {
|
if (f == nullptr) {
|
||||||
@ -1127,7 +1151,11 @@ class PosixEnv : public Env {
|
|||||||
const EnvOptions& options) override {
|
const EnvOptions& options) override {
|
||||||
result->reset();
|
result->reset();
|
||||||
Status s;
|
Status s;
|
||||||
int fd = open(fname.c_str(), O_RDONLY);
|
int fd;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
|
fd = open(fname.c_str(), O_RDONLY);
|
||||||
|
}
|
||||||
SetFD_CLOEXEC(fd, &options);
|
SetFD_CLOEXEC(fd, &options);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
s = IOError(fname, errno);
|
s = IOError(fname, errno);
|
||||||
@ -1160,6 +1188,7 @@ class PosixEnv : public Env {
|
|||||||
Status s;
|
Status s;
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
do {
|
do {
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
|
fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
|
||||||
} while (fd < 0 && errno == EINTR);
|
} while (fd < 0 && errno == EINTR);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
@ -1200,7 +1229,11 @@ class PosixEnv : public Env {
|
|||||||
return Status::NotSupported("No support for mmap read/write yet");
|
return Status::NotSupported("No support for mmap read/write yet");
|
||||||
}
|
}
|
||||||
Status s;
|
Status s;
|
||||||
const int fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644);
|
int fd;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
|
fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644);
|
||||||
|
}
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
s = IOError(fname, errno);
|
s = IOError(fname, errno);
|
||||||
} else {
|
} else {
|
||||||
@ -1213,7 +1246,11 @@ class PosixEnv : public Env {
|
|||||||
virtual Status NewDirectory(const std::string& name,
|
virtual Status NewDirectory(const std::string& name,
|
||||||
unique_ptr<Directory>* result) override {
|
unique_ptr<Directory>* result) override {
|
||||||
result->reset();
|
result->reset();
|
||||||
const int fd = open(name.c_str(), 0);
|
int fd;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
|
fd = open(name.c_str(), 0);
|
||||||
|
}
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
return IOError(name, errno);
|
return IOError(name, errno);
|
||||||
} else {
|
} else {
|
||||||
@ -1325,7 +1362,11 @@ class PosixEnv : public Env {
|
|||||||
virtual Status LockFile(const std::string& fname, FileLock** lock) override {
|
virtual Status LockFile(const std::string& fname, FileLock** lock) override {
|
||||||
*lock = nullptr;
|
*lock = nullptr;
|
||||||
Status result;
|
Status result;
|
||||||
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
int fd;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
|
fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
||||||
|
}
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
result = IOError(fname, errno);
|
result = IOError(fname, errno);
|
||||||
} else if (LockOrUnlock(fname, fd, true) == -1) {
|
} else if (LockOrUnlock(fname, fd, true) == -1) {
|
||||||
@ -1396,7 +1437,11 @@ class PosixEnv : public Env {
|
|||||||
|
|
||||||
virtual Status NewLogger(const std::string& fname,
|
virtual Status NewLogger(const std::string& fname,
|
||||||
shared_ptr<Logger>* result) override {
|
shared_ptr<Logger>* result) override {
|
||||||
FILE* f = fopen(fname.c_str(), "w");
|
FILE* f;
|
||||||
|
{
|
||||||
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
|
f = fopen(fname.c_str(), "w");
|
||||||
|
}
|
||||||
if (f == nullptr) {
|
if (f == nullptr) {
|
||||||
result->reset();
|
result->reset();
|
||||||
return IOError(fname, errno);
|
return IOError(fname, errno);
|
||||||
|
@ -17,6 +17,11 @@ void IOStatsContext::Reset() {
|
|||||||
thread_pool_id = Env::Priority::TOTAL;
|
thread_pool_id = Env::Priority::TOTAL;
|
||||||
bytes_read = 0;
|
bytes_read = 0;
|
||||||
bytes_written = 0;
|
bytes_written = 0;
|
||||||
|
open_nanos = 0;
|
||||||
|
allocate_nanos = 0;
|
||||||
|
write_nanos = 0;
|
||||||
|
range_sync_nanos = 0;
|
||||||
|
logger_nanos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OUTPUT(counter) #counter << " = " << counter << ", "
|
#define OUTPUT(counter) #counter << " = " << counter << ", "
|
||||||
@ -25,7 +30,13 @@ std::string IOStatsContext::ToString() const {
|
|||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << OUTPUT(thread_pool_id)
|
ss << OUTPUT(thread_pool_id)
|
||||||
<< OUTPUT(bytes_read)
|
<< OUTPUT(bytes_read)
|
||||||
<< OUTPUT(bytes_written);
|
<< OUTPUT(bytes_written)
|
||||||
|
<< OUTPUT(open_nanos)
|
||||||
|
<< OUTPUT(allocate_nanos)
|
||||||
|
<< OUTPUT(write_nanos)
|
||||||
|
<< OUTPUT(range_sync_nanos)
|
||||||
|
<< OUTPUT(logger_nanos);
|
||||||
|
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "rocksdb/iostats_context.h"
|
#include "rocksdb/iostats_context.h"
|
||||||
|
#include "util/perf_step_timer.h"
|
||||||
|
|
||||||
#ifndef IOS_CROSS_COMPILE
|
#ifndef IOS_CROSS_COMPILE
|
||||||
|
|
||||||
@ -33,6 +34,18 @@
|
|||||||
#define IOSTATS(metric) \
|
#define IOSTATS(metric) \
|
||||||
(iostats_context.metric)
|
(iostats_context.metric)
|
||||||
|
|
||||||
|
// Stop the timer and update the metric
|
||||||
|
#define IOSTATS_TIMER_STOP(metric) \
|
||||||
|
iostats_step_timer_ ## metric.Stop();
|
||||||
|
|
||||||
|
#define IOSTATS_TIMER_START(metric) \
|
||||||
|
iostats_step_timer_ ## metric.Start();
|
||||||
|
|
||||||
|
// Declare and set start time of the timer
|
||||||
|
#define IOSTATS_TIMER_GUARD(metric) \
|
||||||
|
PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \
|
||||||
|
iostats_step_timer_ ## metric.Start();
|
||||||
|
|
||||||
#else // IOS_CROSS_COMPILE
|
#else // IOS_CROSS_COMPILE
|
||||||
|
|
||||||
#define IOSTATS_ADD(metric, value)
|
#define IOSTATS_ADD(metric, value)
|
||||||
@ -43,4 +56,8 @@
|
|||||||
#define IOSTATS_THREAD_POOL_ID()
|
#define IOSTATS_THREAD_POOL_ID()
|
||||||
#define IOSTATS(metric) 0
|
#define IOSTATS(metric) 0
|
||||||
|
|
||||||
|
#define IOSTATS_TIMER_GUARD(metric)
|
||||||
|
#define IOSTATS_TIMER_STOP(metric)
|
||||||
|
#define IOSTATS_TIMER_START(metric)
|
||||||
|
|
||||||
#endif // IOS_CROSS_COMPILE
|
#endif // IOS_CROSS_COMPILE
|
||||||
|
@ -10,22 +10,12 @@
|
|||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||||
PerfLevel perf_level = kEnableCount;
|
|
||||||
// This is a dummy variable since some place references it
|
// This is a dummy variable since some place references it
|
||||||
PerfContext perf_context;
|
PerfContext perf_context;
|
||||||
#else
|
#else
|
||||||
__thread PerfLevel perf_level = kEnableCount;
|
|
||||||
__thread PerfContext perf_context;
|
__thread PerfContext perf_context;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void SetPerfLevel(PerfLevel level) {
|
|
||||||
perf_level = level;
|
|
||||||
}
|
|
||||||
|
|
||||||
PerfLevel GetPerfLevel() {
|
|
||||||
return perf_level;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerfContext::Reset() {
|
void PerfContext::Reset() {
|
||||||
#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE)
|
#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE)
|
||||||
user_key_comparison_count = 0;
|
user_key_comparison_count = 0;
|
||||||
@ -53,6 +43,7 @@ void PerfContext::Reset() {
|
|||||||
find_next_user_entry_time = 0;
|
find_next_user_entry_time = 0;
|
||||||
write_pre_and_post_process_time = 0;
|
write_pre_and_post_process_time = 0;
|
||||||
write_memtable_time = 0;
|
write_memtable_time = 0;
|
||||||
|
write_delay_time = 0;
|
||||||
db_mutex_lock_nanos = 0;
|
db_mutex_lock_nanos = 0;
|
||||||
db_condition_wait_nanos = 0;
|
db_condition_wait_nanos = 0;
|
||||||
merge_operator_time_nanos = 0;
|
merge_operator_time_nanos = 0;
|
||||||
@ -79,7 +70,7 @@ std::string PerfContext::ToString() const {
|
|||||||
<< OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time)
|
<< OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time)
|
||||||
<< OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time)
|
<< OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time)
|
||||||
<< OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos)
|
<< OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos)
|
||||||
<< OUTPUT(merge_operator_time_nanos);
|
<< OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time);
|
||||||
return ss.str();
|
return ss.str();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "rocksdb/perf_context.h"
|
#include "rocksdb/perf_context.h"
|
||||||
|
#include "util/perf_step_timer.h"
|
||||||
#include "util/stop_watch.h"
|
#include "util/stop_watch.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
@ -19,49 +20,6 @@ namespace rocksdb {
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
extern __thread PerfLevel perf_level;
|
|
||||||
|
|
||||||
class PerfStepTimer {
|
|
||||||
public:
|
|
||||||
PerfStepTimer(uint64_t* metric)
|
|
||||||
: enabled_(perf_level >= PerfLevel::kEnableTime),
|
|
||||||
env_(enabled_ ? Env::Default() : nullptr),
|
|
||||||
start_(0),
|
|
||||||
metric_(metric) {
|
|
||||||
}
|
|
||||||
|
|
||||||
~PerfStepTimer() {
|
|
||||||
Stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Start() {
|
|
||||||
if (enabled_) {
|
|
||||||
start_ = env_->NowNanos();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Measure() {
|
|
||||||
if (start_) {
|
|
||||||
uint64_t now = env_->NowNanos();
|
|
||||||
*metric_ += now - start_;
|
|
||||||
start_ = now;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Stop() {
|
|
||||||
if (start_) {
|
|
||||||
*metric_ += env_->NowNanos() - start_;
|
|
||||||
start_ = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const bool enabled_;
|
|
||||||
Env* const env_;
|
|
||||||
uint64_t start_;
|
|
||||||
uint64_t* metric_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Stop the timer and update the metric
|
// Stop the timer and update the metric
|
||||||
#define PERF_TIMER_STOP(metric) \
|
#define PERF_TIMER_STOP(metric) \
|
||||||
perf_step_timer_ ## metric.Stop();
|
perf_step_timer_ ## metric.Stop();
|
||||||
@ -70,8 +28,8 @@ class PerfStepTimer {
|
|||||||
perf_step_timer_ ## metric.Start();
|
perf_step_timer_ ## metric.Start();
|
||||||
|
|
||||||
// Declare and set start time of the timer
|
// Declare and set start time of the timer
|
||||||
#define PERF_TIMER_GUARD(metric) \
|
#define PERF_TIMER_GUARD(metric) \
|
||||||
PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \
|
PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \
|
||||||
perf_step_timer_ ## metric.Start();
|
perf_step_timer_ ## metric.Start();
|
||||||
|
|
||||||
// Update metric with time elapsed since last START. start time is reset
|
// Update metric with time elapsed since last START. start time is reset
|
||||||
|
26
util/perf_level.cc
Normal file
26
util/perf_level.cc
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include "util/perf_level_imp.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
#if defined(IOS_CROSS_COMPILE)
|
||||||
|
PerfLevel perf_level = kEnableCount;
|
||||||
|
#else
|
||||||
|
__thread PerfLevel perf_level = kEnableCount;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void SetPerfLevel(PerfLevel level) {
|
||||||
|
perf_level = level;
|
||||||
|
}
|
||||||
|
|
||||||
|
PerfLevel GetPerfLevel() {
|
||||||
|
return perf_level;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
17
util/perf_level_imp.h
Normal file
17
util/perf_level_imp.h
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#include "rocksdb/perf_level.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
#if defined(IOS_CROSS_COMPILE)
|
||||||
|
extern PerfLevel perf_level;
|
||||||
|
#else
|
||||||
|
extern __thread PerfLevel perf_level;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
54
util/perf_step_timer.h
Normal file
54
util/perf_step_timer.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#include "rocksdb/env.h"
|
||||||
|
#include "util/perf_level_imp.h"
|
||||||
|
#include "util/stop_watch.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class PerfStepTimer {
|
||||||
|
public:
|
||||||
|
PerfStepTimer(uint64_t* metric)
|
||||||
|
: enabled_(perf_level >= PerfLevel::kEnableTime),
|
||||||
|
env_(enabled_ ? Env::Default() : nullptr),
|
||||||
|
start_(0),
|
||||||
|
metric_(metric) {
|
||||||
|
}
|
||||||
|
|
||||||
|
~PerfStepTimer() {
|
||||||
|
Stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Start() {
|
||||||
|
if (enabled_) {
|
||||||
|
start_ = env_->NowNanos();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Measure() {
|
||||||
|
if (start_) {
|
||||||
|
uint64_t now = env_->NowNanos();
|
||||||
|
*metric_ += now - start_;
|
||||||
|
start_ = now;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stop() {
|
||||||
|
if (start_) {
|
||||||
|
*metric_ += env_->NowNanos() - start_;
|
||||||
|
start_ = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const bool enabled_;
|
||||||
|
Env* const env_;
|
||||||
|
uint64_t start_;
|
||||||
|
uint64_t* metric_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -21,6 +21,7 @@
|
|||||||
#include <linux/falloc.h>
|
#include <linux/falloc.h>
|
||||||
#endif
|
#endif
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
|
#include "util/iostats_context_imp.h"
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
@ -61,6 +62,8 @@ class PosixLogger : public Logger {
|
|||||||
|
|
||||||
using Logger::Logv;
|
using Logger::Logv;
|
||||||
virtual void Logv(const char* format, va_list ap) override {
|
virtual void Logv(const char* format, va_list ap) override {
|
||||||
|
IOSTATS_TIMER_GUARD(logger_nanos);
|
||||||
|
|
||||||
const uint64_t thread_id = (*gettid_)();
|
const uint64_t thread_id = (*gettid_)();
|
||||||
|
|
||||||
// We try twice: the first time with a fixed-size stack allocated buffer,
|
// We try twice: the first time with a fixed-size stack allocated buffer,
|
||||||
|
Loading…
Reference in New Issue
Block a user