diff --git a/db/db_impl.cc b/db/db_impl.cc index 412146a3e..4720742ae 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3103,25 +3103,36 @@ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) { // REQUIRES: mutex_ is held // REQUIRES: this thread is currently at the front of the writer queue Status DBImpl::DelayWrite(uint64_t expiration_time) { - StopWatch sw(env_, stats_, WRITE_STALL); - bool has_timeout = (expiration_time > 0); - auto delay = write_controller_.GetDelay(); - if (write_controller_.IsStopped() == false && delay > 0) { - mutex_.Unlock(); - // hopefully we don't have to sleep more than 2 billion microseconds - env_->SleepForMicroseconds(static_cast(delay)); - mutex_.Lock(); - } - - while (bg_error_.ok() && write_controller_.IsStopped()) { - if (has_timeout) { - bg_cv_.TimedWait(expiration_time); - if (env_->NowMicros() > expiration_time) { - return Status::TimedOut(); - } - } else { - bg_cv_.Wait(); + uint64_t time_delayed = 0; + bool delayed = false; + { + StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed); + bool has_timeout = (expiration_time > 0); + auto delay = write_controller_.GetDelay(); + if (write_controller_.IsStopped() == false && delay > 0) { + mutex_.Unlock(); + delayed = true; + // hopefully we don't have to sleep more than 2 billion microseconds + env_->SleepForMicroseconds(static_cast(delay)); + mutex_.Lock(); } + + while (bg_error_.ok() && write_controller_.IsStopped()) { + delayed = true; + if (has_timeout) { + bg_cv_.TimedWait(expiration_time); + if (env_->NowMicros() > expiration_time) { + return Status::TimedOut(); + } + } else { + bg_cv_.Wait(); + } + } + } + if (delayed) { + default_cf_internal_stats_->AddDBStats(InternalStats::WRITE_STALL_MICROS, + time_delayed); + RecordTick(stats_, STALL_MICROS, time_delayed); } return bg_error_; diff --git a/db/internal_stats.cc b/db/internal_stats.cc index c14a03c12..e27e74de7 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -300,6 +300,7 @@ void InternalStats::DumpDBStats(std::string* value) { uint64_t wal_bytes = db_stats_[InternalStats::WAL_FILE_BYTES]; uint64_t wal_synced = db_stats_[InternalStats::WAL_FILE_SYNCED]; uint64_t write_with_wal = db_stats_[InternalStats::WRITE_WITH_WAL]; + uint64_t write_stall_micros = db_stats_[InternalStats::WRITE_STALL_MICROS]; // Data // writes: total number of write requests. // keys: total number of key updates issued by all the write requests @@ -311,10 +312,11 @@ void InternalStats::DumpDBStats(std::string* value) { // The format is the same for interval stats. snprintf(buf, sizeof(buf), "Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 - " batches, %.1f writes per batch, %.2f GB user ingest\n", + " batches, %.1f writes per batch, %.2f GB user ingest, " + "stall micros: %" PRIu64 "\n", write_other + write_self, num_keys_written, write_self, (write_other + write_self) / static_cast(write_self + 1), - user_bytes_written / kGB); + user_bytes_written / kGB, write_stall_micros); value->append(buf); // WAL snprintf(buf, sizeof(buf), @@ -332,12 +334,14 @@ void InternalStats::DumpDBStats(std::string* value) { num_keys_written - db_stats_snapshot_.num_keys_written; snprintf(buf, sizeof(buf), "Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 - " batches, %.1f writes per batch, %.1f MB user ingest\n", + " batches, %.1f writes per batch, %.1f MB user ingest, " + "stall micros: %" PRIu64 "\n", interval_write_other + interval_write_self, interval_num_keys_written, interval_write_self, static_cast(interval_write_other + interval_write_self) / (interval_write_self + 1), - (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB); + (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB, + write_stall_micros - db_stats_snapshot_.write_stall_micros); value->append(buf); uint64_t interval_write_with_wal = @@ -363,6 +367,7 @@ void InternalStats::DumpDBStats(std::string* value) { db_stats_snapshot_.wal_bytes = wal_bytes; db_stats_snapshot_.wal_synced = wal_synced; db_stats_snapshot_.write_with_wal = write_with_wal; + db_stats_snapshot_.write_stall_micros = write_stall_micros; } void InternalStats::DumpCFStats(std::string* value) { diff --git a/db/internal_stats.h b/db/internal_stats.h index 702008032..c1d77b6b6 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -77,6 +77,7 @@ class InternalStats { WRITE_DONE_BY_OTHER, WRITE_DONE_BY_SELF, WRITE_WITH_WAL, + WRITE_STALL_MICROS, INTERNAL_DB_STATS_ENUM_MAX, }; @@ -287,6 +288,8 @@ class InternalStats { // to multiple keys. num_keys_written is total number of keys updated by all // those writes. uint64_t num_keys_written; + // Total time writes delayed by stalls. + uint64_t write_stall_micros; double seconds_up; DBStatsSnapshot() @@ -297,6 +300,7 @@ class InternalStats { write_other(0), write_self(0), num_keys_written(0), + write_stall_micros(0), seconds_up(0) {} } db_stats_snapshot_; @@ -334,6 +338,7 @@ class InternalStats { WRITE_DONE_BY_OTHER, WRITE_DONE_BY_SELF, WRITE_WITH_WAL, + WRITE_STALL_MICROS, INTERNAL_DB_STATS_ENUM_MAX, }; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 87ac321c9..7d0dad5d6 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -73,12 +73,14 @@ enum Tickers : uint32_t { NO_FILE_CLOSES, NO_FILE_OPENS, NO_FILE_ERRORS, - // Time system had to wait to do LO-L1 compactions + // DEPRECATED Time system had to wait to do LO-L1 compactions STALL_L0_SLOWDOWN_MICROS, - // Time system had to wait to move memtable to L1. + // DEPRECATED Time system had to wait to move memtable to L1. STALL_MEMTABLE_COMPACTION_MICROS, - // write throttle because of too many files in L0 + // DEPRECATED write throttle because of too many files in L0 STALL_L0_NUM_FILES_MICROS, + // Writer has to wait for compaction or flush to finish. + STALL_MICROS, RATE_LIMIT_DELAY_MILLIS, NO_ITERATORS, // number of iterators currently open @@ -160,6 +162,7 @@ const std::vector> TickersNameMap = { {STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"}, {STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"}, {STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"}, + {STALL_MICROS, "rocksdb.stall.micros"}, {RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"}, {NO_ITERATORS, "rocksdb.num.iterators"}, {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"}, @@ -188,8 +191,7 @@ const std::vector> TickersNameMap = { {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"}, {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"}, {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"}, - {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, -}; + {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, }; /** * Keep adding histogram's here.