more times in perf_context and iostats_context

Summary:
We occasionally get write stalls (>1s Write() calls) on HDD under read load. The following timers explain almost all of the stalls:
 - perf_context.db_mutex_lock_nanos
 - perf_context.db_condition_wait_nanos
 - iostats_context.open_time
 - iostats_context.allocate_time
 - iostats_context.write_time
 - iostats_context.range_sync_time
 - iostats_context.logger_time

In my experiments each of these occasionally takes >1s on write path under some workload. There are rare cases when Write() takes long but none of these takes long.

Test Plan: Added code to our application to write the listed timings to log for slow writes. They usually add up to almost exactly the time Write() call took.

Reviewers: rven, yhchiang, sdong

Reviewed By: sdong

Subscribers: march, dhruba, tnovak

Differential Revision: https://reviews.facebook.net/D39177
This commit is contained in:
Mike Kolupaev 2015-06-02 02:07:58 -07:00
parent 4266d4fd90
commit ec7a944360
14 changed files with 257 additions and 89 deletions

View File

@ -3322,7 +3322,10 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
if (context.schedule_bg_work_) { if (context.schedule_bg_work_) {
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
PERF_TIMER_STOP(write_pre_and_post_process_time);
PERF_TIMER_GUARD(write_delay_time);
status = DelayWrite(expiration_time); status = DelayWrite(expiration_time);
PERF_TIMER_START(write_pre_and_post_process_time);
} }
if (UNLIKELY(status.ok() && has_timeout && if (UNLIKELY(status.ok() && has_timeout &&

View File

@ -9,7 +9,11 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include "rocksdb/perf_level.h"
// A thread local context for gathering io-stats efficiently and transparently. // A thread local context for gathering io-stats efficiently and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
namespace rocksdb { namespace rocksdb {
struct IOStatsContext { struct IOStatsContext {
@ -25,6 +29,18 @@ struct IOStatsContext {
uint64_t bytes_written; uint64_t bytes_written;
// number of bytes that has been read. // number of bytes that has been read.
uint64_t bytes_read; uint64_t bytes_read;
// time spent in open() and fopen().
uint64_t open_nanos;
// time spent in fallocate().
uint64_t allocate_nanos;
// time spent in write() and pwrite().
uint64_t write_nanos;
// time spent in sync_file_range().
uint64_t range_sync_nanos;
// time spent in Logger::Logv().
uint64_t logger_nanos;
}; };
#ifndef IOS_CROSS_COMPILE #ifndef IOS_CROSS_COMPILE

View File

@ -9,22 +9,13 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include "rocksdb/perf_level.h"
namespace rocksdb { namespace rocksdb {
enum PerfLevel {
kDisable = 0, // disable perf stats
kEnableCount = 1, // enable only count stats
kEnableTime = 2 // enable time stats too
};
// set the perf stats level
void SetPerfLevel(PerfLevel level);
// get current perf stats level
PerfLevel GetPerfLevel();
// A thread local context for gathering performance counter efficiently // A thread local context for gathering performance counter efficiently
// and transparently. // and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
struct PerfContext { struct PerfContext {
@ -64,11 +55,16 @@ struct PerfContext {
uint64_t seek_internal_seek_time; uint64_t seek_internal_seek_time;
// total time spent on iterating internal entries to find the next user entry // total time spent on iterating internal entries to find the next user entry
uint64_t find_next_user_entry_time; uint64_t find_next_user_entry_time;
// total time spent on pre or post processing when writing a record
uint64_t write_pre_and_post_process_time; // total time spent on writing to WAL
uint64_t write_wal_time; // total time spent on writing to WAL uint64_t write_wal_time;
// total time spent on writing to mem tables // total time spent on writing to mem tables
uint64_t write_memtable_time; uint64_t write_memtable_time;
// total time spent on delaying write
uint64_t write_delay_time;
// total time spent on writing a record, excluding the above three times
uint64_t write_pre_and_post_process_time;
uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex. uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex.
// Time spent on waiting with a condition variable created with DB mutex. // Time spent on waiting with a condition variable created with DB mutex.
uint64_t db_condition_wait_nanos; uint64_t db_condition_wait_nanos;

View File

@ -0,0 +1,30 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#ifndef INCLUDE_ROCKSDB_PERF_LEVEL_H_
#define INCLUDE_ROCKSDB_PERF_LEVEL_H_
#include <stdint.h>
#include <string>
namespace rocksdb {
// How much perf stats to collect. Affects perf_context and iostats_context.
enum PerfLevel {
kDisable = 0, // disable perf stats
kEnableCount = 1, // enable only count stats
kEnableTime = 2 // enable time stats too
};
// set the perf stats level for current thread
void SetPerfLevel(PerfLevel level);
// get current perf stats level for current thread
PerfLevel GetPerfLevel();
} // namespace rocksdb
#endif // INCLUDE_ROCKSDB_PERF_LEVEL_H_

1
src.mk
View File

@ -126,6 +126,7 @@ LIB_SOURCES = \
util/options.cc \ util/options.cc \
util/options_helper.cc \ util/options_helper.cc \
util/perf_context.cc \ util/perf_context.cc \
util/perf_level.cc \
util/rate_limiter.cc \ util/rate_limiter.cc \
util/skiplistrep.cc \ util/skiplistrep.cc \
util/slice.cc \ util/slice.cc \

View File

@ -439,14 +439,17 @@ class PosixMmapFile : public WritableFile {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
// we can't fallocate with FALLOC_FL_KEEP_SIZE here // we can't fallocate with FALLOC_FL_KEEP_SIZE here
int alloc_status = fallocate(fd_, 0, file_offset_, map_size_); {
if (alloc_status != 0) { IOSTATS_TIMER_GUARD(allocate_nanos);
// fallback to posix_fallocate int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
alloc_status = posix_fallocate(fd_, file_offset_, map_size_); if (alloc_status != 0) {
} // fallback to posix_fallocate
if (alloc_status != 0) { alloc_status = posix_fallocate(fd_, file_offset_, map_size_);
return Status::IOError("Error allocating space to file : " + filename_ + }
"Error : " + strerror(alloc_status)); if (alloc_status != 0) {
return Status::IOError("Error allocating space to file : " + filename_ +
"Error : " + strerror(alloc_status));
}
} }
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
@ -635,6 +638,7 @@ class PosixMmapFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate( int alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
if (alloc_status == 0) { if (alloc_status == 0) {
@ -721,7 +725,12 @@ class PosixWritableFile : public WritableFile {
cursize_ += left; cursize_ += left;
} else { } else {
while (left != 0) { while (left != 0) {
ssize_t done = write(fd_, src, RequestToken(left)); ssize_t done;
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
done = write(fd_, src, size);
}
if (done < 0) { if (done < 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
@ -769,6 +778,7 @@ class PosixWritableFile : public WritableFile {
// tmpfs (since Linux 3.5) // tmpfs (since Linux 3.5)
// We ignore error since failure of this operation does not affect // We ignore error since failure of this operation does not affect
// correctness. // correctness.
IOSTATS_TIMER_GUARD(allocate_nanos);
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
filesize_, block_size * last_allocated_block - filesize_); filesize_, block_size * last_allocated_block - filesize_);
#endif #endif
@ -787,7 +797,12 @@ class PosixWritableFile : public WritableFile {
size_t left = cursize_; size_t left = cursize_;
char* src = buf_.get(); char* src = buf_.get();
while (left != 0) { while (left != 0) {
ssize_t done = write(fd_, src, RequestToken(left)); ssize_t done;
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
done = write(fd_, src, size);
}
if (done < 0) { if (done < 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
@ -861,7 +876,9 @@ class PosixWritableFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
int alloc_status = fallocate( int alloc_status;
IOSTATS_TIMER_GUARD(allocate_nanos);
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
if (alloc_status == 0) { if (alloc_status == 0) {
return Status::OK(); return Status::OK();
@ -871,6 +888,7 @@ class PosixWritableFile : public WritableFile {
} }
virtual Status RangeSync(off_t offset, off_t nbytes) override { virtual Status RangeSync(off_t offset, off_t nbytes) override {
IOSTATS_TIMER_GUARD(range_sync_nanos);
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) { if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
return Status::OK(); return Status::OK();
} else { } else {
@ -929,7 +947,11 @@ class PosixRandomRWFile : public RandomRWFile {
pending_fsync_ = true; pending_fsync_ = true;
while (left != 0) { while (left != 0) {
ssize_t done = pwrite(fd_, src, left, offset); ssize_t done;
{
IOSTATS_TIMER_GUARD(write_nanos);
done = pwrite(fd_, src, left, offset);
}
if (done < 0) { if (done < 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
@ -1001,6 +1023,7 @@ class PosixRandomRWFile : public RandomRWFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate( int alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
if (alloc_status == 0) { if (alloc_status == 0) {
@ -1109,6 +1132,7 @@ class PosixEnv : public Env {
result->reset(); result->reset();
FILE* f = nullptr; FILE* f = nullptr;
do { do {
IOSTATS_TIMER_GUARD(open_nanos);
f = fopen(fname.c_str(), "r"); f = fopen(fname.c_str(), "r");
} while (f == nullptr && errno == EINTR); } while (f == nullptr && errno == EINTR);
if (f == nullptr) { if (f == nullptr) {
@ -1127,7 +1151,11 @@ class PosixEnv : public Env {
const EnvOptions& options) override { const EnvOptions& options) override {
result->reset(); result->reset();
Status s; Status s;
int fd = open(fname.c_str(), O_RDONLY); int fd;
{
IOSTATS_TIMER_GUARD(open_nanos);
fd = open(fname.c_str(), O_RDONLY);
}
SetFD_CLOEXEC(fd, &options); SetFD_CLOEXEC(fd, &options);
if (fd < 0) { if (fd < 0) {
s = IOError(fname, errno); s = IOError(fname, errno);
@ -1160,6 +1188,7 @@ class PosixEnv : public Env {
Status s; Status s;
int fd = -1; int fd = -1;
do { do {
IOSTATS_TIMER_GUARD(open_nanos);
fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
} while (fd < 0 && errno == EINTR); } while (fd < 0 && errno == EINTR);
if (fd < 0) { if (fd < 0) {
@ -1200,7 +1229,11 @@ class PosixEnv : public Env {
return Status::NotSupported("No support for mmap read/write yet"); return Status::NotSupported("No support for mmap read/write yet");
} }
Status s; Status s;
const int fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644); int fd;
{
IOSTATS_TIMER_GUARD(open_nanos);
fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644);
}
if (fd < 0) { if (fd < 0) {
s = IOError(fname, errno); s = IOError(fname, errno);
} else { } else {
@ -1213,7 +1246,11 @@ class PosixEnv : public Env {
virtual Status NewDirectory(const std::string& name, virtual Status NewDirectory(const std::string& name,
unique_ptr<Directory>* result) override { unique_ptr<Directory>* result) override {
result->reset(); result->reset();
const int fd = open(name.c_str(), 0); int fd;
{
IOSTATS_TIMER_GUARD(open_nanos);
fd = open(name.c_str(), 0);
}
if (fd < 0) { if (fd < 0) {
return IOError(name, errno); return IOError(name, errno);
} else { } else {
@ -1325,7 +1362,11 @@ class PosixEnv : public Env {
virtual Status LockFile(const std::string& fname, FileLock** lock) override { virtual Status LockFile(const std::string& fname, FileLock** lock) override {
*lock = nullptr; *lock = nullptr;
Status result; Status result;
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); int fd;
{
IOSTATS_TIMER_GUARD(open_nanos);
fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
}
if (fd < 0) { if (fd < 0) {
result = IOError(fname, errno); result = IOError(fname, errno);
} else if (LockOrUnlock(fname, fd, true) == -1) { } else if (LockOrUnlock(fname, fd, true) == -1) {
@ -1396,7 +1437,11 @@ class PosixEnv : public Env {
virtual Status NewLogger(const std::string& fname, virtual Status NewLogger(const std::string& fname,
shared_ptr<Logger>* result) override { shared_ptr<Logger>* result) override {
FILE* f = fopen(fname.c_str(), "w"); FILE* f;
{
IOSTATS_TIMER_GUARD(open_nanos);
f = fopen(fname.c_str(), "w");
}
if (f == nullptr) { if (f == nullptr) {
result->reset(); result->reset();
return IOError(fname, errno); return IOError(fname, errno);

View File

@ -17,6 +17,11 @@ void IOStatsContext::Reset() {
thread_pool_id = Env::Priority::TOTAL; thread_pool_id = Env::Priority::TOTAL;
bytes_read = 0; bytes_read = 0;
bytes_written = 0; bytes_written = 0;
open_nanos = 0;
allocate_nanos = 0;
write_nanos = 0;
range_sync_nanos = 0;
logger_nanos = 0;
} }
#define OUTPUT(counter) #counter << " = " << counter << ", " #define OUTPUT(counter) #counter << " = " << counter << ", "
@ -25,7 +30,13 @@ std::string IOStatsContext::ToString() const {
std::ostringstream ss; std::ostringstream ss;
ss << OUTPUT(thread_pool_id) ss << OUTPUT(thread_pool_id)
<< OUTPUT(bytes_read) << OUTPUT(bytes_read)
<< OUTPUT(bytes_written); << OUTPUT(bytes_written)
<< OUTPUT(open_nanos)
<< OUTPUT(allocate_nanos)
<< OUTPUT(write_nanos)
<< OUTPUT(range_sync_nanos)
<< OUTPUT(logger_nanos);
return ss.str(); return ss.str();
} }

View File

@ -5,6 +5,7 @@
// //
#pragma once #pragma once
#include "rocksdb/iostats_context.h" #include "rocksdb/iostats_context.h"
#include "util/perf_step_timer.h"
#ifndef IOS_CROSS_COMPILE #ifndef IOS_CROSS_COMPILE
@ -33,6 +34,18 @@
#define IOSTATS(metric) \ #define IOSTATS(metric) \
(iostats_context.metric) (iostats_context.metric)
// Stop the timer and update the metric
#define IOSTATS_TIMER_STOP(metric) \
iostats_step_timer_ ## metric.Stop();
#define IOSTATS_TIMER_START(metric) \
iostats_step_timer_ ## metric.Start();
// Declare and set start time of the timer
#define IOSTATS_TIMER_GUARD(metric) \
PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \
iostats_step_timer_ ## metric.Start();
#else // IOS_CROSS_COMPILE #else // IOS_CROSS_COMPILE
#define IOSTATS_ADD(metric, value) #define IOSTATS_ADD(metric, value)
@ -43,4 +56,8 @@
#define IOSTATS_THREAD_POOL_ID() #define IOSTATS_THREAD_POOL_ID()
#define IOSTATS(metric) 0 #define IOSTATS(metric) 0
#define IOSTATS_TIMER_GUARD(metric)
#define IOSTATS_TIMER_STOP(metric)
#define IOSTATS_TIMER_START(metric)
#endif // IOS_CROSS_COMPILE #endif // IOS_CROSS_COMPILE

View File

@ -10,22 +10,12 @@
namespace rocksdb { namespace rocksdb {
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
PerfLevel perf_level = kEnableCount;
// This is a dummy variable since some place references it // This is a dummy variable since some place references it
PerfContext perf_context; PerfContext perf_context;
#else #else
__thread PerfLevel perf_level = kEnableCount;
__thread PerfContext perf_context; __thread PerfContext perf_context;
#endif #endif
void SetPerfLevel(PerfLevel level) {
perf_level = level;
}
PerfLevel GetPerfLevel() {
return perf_level;
}
void PerfContext::Reset() { void PerfContext::Reset() {
#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE) #if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE)
user_key_comparison_count = 0; user_key_comparison_count = 0;
@ -53,6 +43,7 @@ void PerfContext::Reset() {
find_next_user_entry_time = 0; find_next_user_entry_time = 0;
write_pre_and_post_process_time = 0; write_pre_and_post_process_time = 0;
write_memtable_time = 0; write_memtable_time = 0;
write_delay_time = 0;
db_mutex_lock_nanos = 0; db_mutex_lock_nanos = 0;
db_condition_wait_nanos = 0; db_condition_wait_nanos = 0;
merge_operator_time_nanos = 0; merge_operator_time_nanos = 0;
@ -79,7 +70,7 @@ std::string PerfContext::ToString() const {
<< OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time) << OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time)
<< OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time) << OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time)
<< OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos) << OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos)
<< OUTPUT(merge_operator_time_nanos); << OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time);
return ss.str(); return ss.str();
#endif #endif
} }

View File

@ -5,6 +5,7 @@
// //
#pragma once #pragma once
#include "rocksdb/perf_context.h" #include "rocksdb/perf_context.h"
#include "util/perf_step_timer.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
namespace rocksdb { namespace rocksdb {
@ -19,49 +20,6 @@ namespace rocksdb {
#else #else
extern __thread PerfLevel perf_level;
class PerfStepTimer {
public:
PerfStepTimer(uint64_t* metric)
: enabled_(perf_level >= PerfLevel::kEnableTime),
env_(enabled_ ? Env::Default() : nullptr),
start_(0),
metric_(metric) {
}
~PerfStepTimer() {
Stop();
}
void Start() {
if (enabled_) {
start_ = env_->NowNanos();
}
}
void Measure() {
if (start_) {
uint64_t now = env_->NowNanos();
*metric_ += now - start_;
start_ = now;
}
}
void Stop() {
if (start_) {
*metric_ += env_->NowNanos() - start_;
start_ = 0;
}
}
private:
const bool enabled_;
Env* const env_;
uint64_t start_;
uint64_t* metric_;
};
// Stop the timer and update the metric // Stop the timer and update the metric
#define PERF_TIMER_STOP(metric) \ #define PERF_TIMER_STOP(metric) \
perf_step_timer_ ## metric.Stop(); perf_step_timer_ ## metric.Stop();
@ -70,8 +28,8 @@ class PerfStepTimer {
perf_step_timer_ ## metric.Start(); perf_step_timer_ ## metric.Start();
// Declare and set start time of the timer // Declare and set start time of the timer
#define PERF_TIMER_GUARD(metric) \ #define PERF_TIMER_GUARD(metric) \
PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \ PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \
perf_step_timer_ ## metric.Start(); perf_step_timer_ ## metric.Start();
// Update metric with time elapsed since last START. start time is reset // Update metric with time elapsed since last START. start time is reset

26
util/perf_level.cc Normal file
View File

@ -0,0 +1,26 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include <sstream>
#include "util/perf_level_imp.h"
namespace rocksdb {
#if defined(IOS_CROSS_COMPILE)
PerfLevel perf_level = kEnableCount;
#else
__thread PerfLevel perf_level = kEnableCount;
#endif
void SetPerfLevel(PerfLevel level) {
perf_level = level;
}
PerfLevel GetPerfLevel() {
return perf_level;
}
} // namespace rocksdb

17
util/perf_level_imp.h Normal file
View File

@ -0,0 +1,17 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#pragma once
#include "rocksdb/perf_level.h"
namespace rocksdb {
#if defined(IOS_CROSS_COMPILE)
extern PerfLevel perf_level;
#else
extern __thread PerfLevel perf_level;
#endif
} // namespace rocksdb

54
util/perf_step_timer.h Normal file
View File

@ -0,0 +1,54 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#pragma once
#include "rocksdb/env.h"
#include "util/perf_level_imp.h"
#include "util/stop_watch.h"
namespace rocksdb {
class PerfStepTimer {
public:
PerfStepTimer(uint64_t* metric)
: enabled_(perf_level >= PerfLevel::kEnableTime),
env_(enabled_ ? Env::Default() : nullptr),
start_(0),
metric_(metric) {
}
~PerfStepTimer() {
Stop();
}
void Start() {
if (enabled_) {
start_ = env_->NowNanos();
}
}
void Measure() {
if (start_) {
uint64_t now = env_->NowNanos();
*metric_ += now - start_;
start_ = now;
}
}
void Stop() {
if (start_) {
*metric_ += env_->NowNanos() - start_;
start_ = 0;
}
}
private:
const bool enabled_;
Env* const env_;
uint64_t start_;
uint64_t* metric_;
};
} // namespace rocksdb

View File

@ -21,6 +21,7 @@
#include <linux/falloc.h> #include <linux/falloc.h>
#endif #endif
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "util/iostats_context_imp.h"
#include <atomic> #include <atomic>
namespace rocksdb { namespace rocksdb {
@ -61,6 +62,8 @@ class PosixLogger : public Logger {
using Logger::Logv; using Logger::Logv;
virtual void Logv(const char* format, va_list ap) override { virtual void Logv(const char* format, va_list ap) override {
IOSTATS_TIMER_GUARD(logger_nanos);
const uint64_t thread_id = (*gettid_)(); const uint64_t thread_id = (*gettid_)();
// We try twice: the first time with a fixed-size stack allocated buffer, // We try twice: the first time with a fixed-size stack allocated buffer,