2016-02-09 15:12:00 -08:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2013-11-18 10:12:08 -08:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
2013-08-23 08:38:13 -07:00
|
|
|
#ifndef STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H
|
|
|
|
#define STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H
|
2013-08-12 23:59:04 -07:00
|
|
|
|
|
|
|
#include <stdint.h>
|
2014-03-05 10:32:54 -08:00
|
|
|
#include <string>
|
2013-08-12 23:59:04 -07:00
|
|
|
|
2015-06-02 02:07:58 -07:00
|
|
|
#include "rocksdb/perf_level.h"
|
[RocksDB] Added nano second stopwatch and new perf counters to track block read cost
Summary: The pupose of this diff is to expose per user-call level precise timing of block read, so that we can answer questions like: a Get() costs me 100ms, is that somehow related to loading blocks from file system, or sth else? We will answer that with EXACTLY how many blocks have been read, how much time was spent on transfering the bytes from os, how much time was spent on checksum verification and how much time was spent on block decompression, just for that one Get. A nano second stopwatch was introduced to track time with higher precision. The cost/precision of the stopwatch is also measured in unit-test. On my dev box, retrieving one time instance costs about 30ns, on average. The deviation of timing results is good enough to track 100ns-1us level events. And the overhead could be safely ignored for 100us level events (10000 instances/s), for example, a viewstate thrift call.
Test Plan: perf_context_test, also testing with viewstate shadow traffic.
Reviewers: dhruba
Reviewed By: dhruba
CC: leveldb, xjin
Differential Revision: https://reviews.facebook.net/D12351
2013-06-03 23:09:15 -07:00
|
|
|
|
2015-06-02 02:07:58 -07:00
|
|
|
namespace rocksdb {
|
2014-07-10 11:35:07 -07:00
|
|
|
|
2013-08-12 23:59:04 -07:00
|
|
|
// A thread local context for gathering performance counter efficiently
|
|
|
|
// and transparently.
|
2015-06-02 02:07:58 -07:00
|
|
|
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
|
2013-08-12 23:59:04 -07:00
|
|
|
|
|
|
|
struct PerfContext {
|
|
|
|
|
|
|
|
void Reset(); // reset all performance counters to zero
|
|
|
|
|
2016-02-01 13:41:13 -08:00
|
|
|
std::string ToString(bool exclude_zero_counters = false) const;
|
2014-03-05 10:32:54 -08:00
|
|
|
|
2013-08-12 23:59:04 -07:00
|
|
|
uint64_t user_key_comparison_count; // total number of user key comparisons
|
2013-10-22 23:26:51 -07:00
|
|
|
uint64_t block_cache_hit_count; // total number of block cache hits
|
|
|
|
uint64_t block_read_count; // total number of block reads (with IO)
|
|
|
|
uint64_t block_read_byte; // total number of bytes from block reads
|
2015-10-13 17:24:13 -07:00
|
|
|
uint64_t block_read_time; // total nanos spent on block reads
|
|
|
|
uint64_t block_checksum_time; // total nanos spent on block checksum
|
|
|
|
uint64_t block_decompress_time; // total nanos spent on block decompression
|
2016-03-22 09:41:31 -07:00
|
|
|
// total number of internal keys skipped over during iteration.
|
|
|
|
// There are several reasons for it:
|
2016-04-04 12:26:01 -07:00
|
|
|
// 1. when calling Next(), the iterator is in the position of the previous
|
|
|
|
// key, so that we'll need to skip it. It means this counter will always
|
|
|
|
// be incremented in Next().
|
|
|
|
// 2. when calling Next(), we need to skip internal entries for the previous
|
|
|
|
// keys that are overwritten.
|
2016-03-22 09:41:31 -07:00
|
|
|
// 3. when calling Next(), Seek() or SeekToFirst(), after previous key
|
|
|
|
// before calling Next(), the seek key in Seek() or the beginning for
|
|
|
|
// SeekToFirst(), there may be one or more deleted keys before the next
|
|
|
|
// valid key that the operation should place the iterator to. We need
|
|
|
|
// to skip both of the tombstone and updates hidden by the tombstones. The
|
|
|
|
// tombstones are not included in this counter, while previous updates
|
|
|
|
// hidden by the tombstones will be included here.
|
|
|
|
// 4. symmetric cases for Prev() and SeekToLast()
|
|
|
|
// We sometimes also skip entries of more recent updates than the snapshot
|
2016-04-04 12:26:01 -07:00
|
|
|
// we read from, but they are not included in this counter.
|
2016-03-22 09:41:31 -07:00
|
|
|
//
|
2013-10-02 10:28:25 -07:00
|
|
|
uint64_t internal_key_skipped_count;
|
2016-03-22 09:41:31 -07:00
|
|
|
// Total number of deletes and single deletes skipped over during iteration
|
2016-04-04 12:26:01 -07:00
|
|
|
// When calling Next(), Seek() or SeekToFirst(), after previous position
|
|
|
|
// before calling Next(), the seek key in Seek() or the beginning for
|
|
|
|
// SeekToFirst(), there may be one or more deleted keys before the next valid
|
|
|
|
// key. Every deleted key is counted once. We don't recount here if there are
|
|
|
|
// still older updates invalidated by the tombstones.
|
2016-03-22 09:41:31 -07:00
|
|
|
//
|
2013-10-02 10:28:25 -07:00
|
|
|
uint64_t internal_delete_skipped_count;
|
2013-11-18 11:32:54 -08:00
|
|
|
|
2015-10-13 17:24:13 -07:00
|
|
|
uint64_t get_snapshot_time; // total nanos spent on getting snapshot
|
|
|
|
uint64_t get_from_memtable_time; // total nanos spent on querying memtables
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t get_from_memtable_count; // number of mem tables queried
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent after Get() finds a key
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t get_post_process_time;
|
2015-10-13 17:24:13 -07:00
|
|
|
uint64_t get_from_output_files_time; // total nanos reading from output files
|
|
|
|
// total nanos spent on seeking memtable
|
2015-02-27 17:06:06 -08:00
|
|
|
uint64_t seek_on_memtable_time;
|
|
|
|
// number of seeks issued on memtable
|
|
|
|
uint64_t seek_on_memtable_count;
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on seeking child iters
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t seek_child_seek_time;
|
|
|
|
// number of seek issued in child iterators
|
|
|
|
uint64_t seek_child_seek_count;
|
2016-09-27 18:20:57 -07:00
|
|
|
uint64_t seek_min_heap_time; // total nanos spent on the merge min heap
|
|
|
|
uint64_t seek_max_heap_time; // total nanos spent on the merge max heap
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on seeking the internal entries
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t seek_internal_seek_time;
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on iterating internal entries to find the next user entry
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t find_next_user_entry_time;
|
2015-06-02 02:07:58 -07:00
|
|
|
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on writing to WAL
|
2015-06-02 02:07:58 -07:00
|
|
|
uint64_t write_wal_time;
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on writing to mem tables
|
2013-11-18 11:32:54 -08:00
|
|
|
uint64_t write_memtable_time;
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on delaying write
|
2015-06-02 02:07:58 -07:00
|
|
|
uint64_t write_delay_time;
|
2015-10-13 17:24:13 -07:00
|
|
|
// total nanos spent on writing a record, excluding the above three times
|
2015-06-02 02:07:58 -07:00
|
|
|
uint64_t write_pre_and_post_process_time;
|
|
|
|
|
2015-02-09 16:12:31 -08:00
|
|
|
uint64_t db_mutex_lock_nanos; // time spent on acquiring DB mutex.
|
|
|
|
// Time spent on waiting with a condition variable created with DB mutex.
|
|
|
|
uint64_t db_condition_wait_nanos;
|
2015-03-03 10:59:36 -08:00
|
|
|
// Time spent on merge operator.
|
|
|
|
uint64_t merge_operator_time_nanos;
|
2015-07-08 16:34:48 -07:00
|
|
|
|
|
|
|
// Time spent on reading index block from block cache or SST file
|
|
|
|
uint64_t read_index_block_nanos;
|
|
|
|
// Time spent on reading filter block from block cache or SST file
|
|
|
|
uint64_t read_filter_block_nanos;
|
|
|
|
// Time spent on creating data block iterator
|
|
|
|
uint64_t new_table_block_iter_nanos;
|
2015-07-10 16:09:10 -07:00
|
|
|
// Time spent on creating a iterator of an SST file.
|
2015-07-08 16:34:48 -07:00
|
|
|
uint64_t new_table_iterator_nanos;
|
|
|
|
// Time spent on seeking a key in data/index blocks
|
|
|
|
uint64_t block_seek_nanos;
|
|
|
|
// Time spent on finding or creating a table reader
|
|
|
|
uint64_t find_table_nanos;
|
2015-10-07 11:23:20 -07:00
|
|
|
// total number of mem table bloom hits
|
|
|
|
uint64_t bloom_memtable_hit_count;
|
|
|
|
// total number of mem table bloom misses
|
|
|
|
uint64_t bloom_memtable_miss_count;
|
|
|
|
// total number of SST table bloom hits
|
|
|
|
uint64_t bloom_sst_hit_count;
|
|
|
|
// total number of SST table bloom misses
|
|
|
|
uint64_t bloom_sst_miss_count;
|
2013-08-12 23:59:04 -07:00
|
|
|
};
|
|
|
|
|
2014-04-08 10:58:07 -07:00
|
|
|
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
2014-04-04 13:11:44 -07:00
|
|
|
extern PerfContext perf_context;
|
2015-07-16 12:10:16 -07:00
|
|
|
#elif _WIN32
|
|
|
|
extern __declspec(thread) PerfContext perf_context;
|
2014-04-04 13:11:44 -07:00
|
|
|
#else
|
2013-08-12 23:59:04 -07:00
|
|
|
extern __thread PerfContext perf_context;
|
2014-04-04 13:11:44 -07:00
|
|
|
#endif
|
2013-08-12 23:59:04 -07:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|