rocksdb/util/statistics.h
Andrew Kryczka 7c95868378 Thread-specific ticker statistics
Summary:
The global atomics we previously used for tickers had poor cache performance
since they were typically updated from different threads, causing frequent
invalidations. In this diff,

- recordTick() updates a local ticker value specific to the thread in which it was called
- When a thread exits, its local ticker value is added into merged_sum
- getTickerCount() returns the sum of all threads' local ticker values and the merged_sum
- setTickerCount() resets all threads' local ticker values and sets merged_sum to the value provided by the caller.

In a next diff I will make a similar change for histogram stats.

Test Plan:
before:

  $ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
  $ perf report -g --stdio | grep recordTick
  7.59%  db_bench     db_bench             [.] rocksdb::StatisticsImpl::recordTick
  ...

after:

  $ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
  $ perf report -g --stdio | grep recordTick
  1.46%  db_bench     db_bench             [.] rocksdb::StatisticsImpl::recordTick
  ...

Reviewers: kradhakrishnan, MarkCallaghan, IslamAbdelRahman, sdong

Reviewed By: sdong

Subscribers: yiwu, andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D62337
2016-08-24 15:42:31 -07:00

121 lines
3.9 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#pragma once
#include "rocksdb/statistics.h"
#include <vector>
#include <atomic>
#include <string>
#include "port/likely.h"
#include "port/port.h"
#include "util/histogram.h"
#include "util/mutexlock.h"
#include "util/thread_local.h"
namespace rocksdb {
enum TickersInternal : uint32_t {
INTERNAL_TICKER_ENUM_START = TICKER_ENUM_MAX,
INTERNAL_TICKER_ENUM_MAX
};
enum HistogramsInternal : uint32_t {
INTERNAL_HISTOGRAM_START = HISTOGRAM_ENUM_MAX,
INTERNAL_HISTOGRAM_ENUM_MAX
};
class StatisticsImpl : public Statistics {
public:
StatisticsImpl(std::shared_ptr<Statistics> stats,
bool enable_internal_stats);
virtual ~StatisticsImpl();
virtual uint64_t getTickerCount(uint32_t ticker_type) const override;
virtual void histogramData(uint32_t histogram_type,
HistogramData* const data) const override;
std::string getHistogramString(uint32_t histogram_type) const override;
virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override;
virtual void recordTick(uint32_t ticker_type, uint64_t count) override;
virtual void measureTime(uint32_t histogram_type, uint64_t value) override;
virtual std::string ToString() const override;
virtual bool HistEnabledForType(uint32_t type) const override;
private:
std::shared_ptr<Statistics> stats_shared_;
Statistics* stats_;
bool enable_internal_stats_;
// Synchronizes setTickerCount()/getTickerCount() operations so partially
// completed setTickerCount() won't be visible.
mutable port::Mutex aggregate_lock_;
// Holds data maintained by each thread for implementing tickers.
struct ThreadTickerInfo {
std::atomic_uint_fast64_t value;
// During teardown, value will be summed into *merged_sum.
std::atomic_uint_fast64_t* merged_sum;
ThreadTickerInfo(uint_fast64_t _value,
std::atomic_uint_fast64_t* _merged_sum)
: value(_value), merged_sum(_merged_sum) {}
};
struct Ticker {
Ticker()
: thread_value(new ThreadLocalPtr(&mergeThreadValue)), merged_sum(0) {}
// Holds thread-specific pointer to ThreadTickerInfo
std::unique_ptr<ThreadLocalPtr> thread_value;
// Sum of thread-specific values for tickers that have been reset due to
// thread termination or ThreadLocalPtr destruction. Also, this is used by
// setTickerCount() to conveniently change the global value by setting this
// while simultaneously zeroing all thread-local values.
std::atomic_uint_fast64_t merged_sum;
static void mergeThreadValue(void* ptr) {
auto info_ptr = static_cast<ThreadTickerInfo*>(ptr);
*info_ptr->merged_sum += info_ptr->value;
delete info_ptr;
}
};
// Returns the info for this tickerType/thread. It sets a new info with zeroed
// counter if none exists.
ThreadTickerInfo* getThreadTickerInfo(uint32_t tickerType);
Ticker tickers_[INTERNAL_TICKER_ENUM_MAX];
// Attributes expand to nothing depending on the platform
__declspec(align(64))
HistogramImpl histograms_[INTERNAL_HISTOGRAM_ENUM_MAX]
__attribute__((aligned(64)));
};
// Utility functions
inline void MeasureTime(Statistics* statistics, uint32_t histogram_type,
uint64_t value) {
if (statistics) {
statistics->measureTime(histogram_type, value);
}
}
inline void RecordTick(Statistics* statistics, uint32_t ticker_type,
uint64_t count = 1) {
if (statistics) {
statistics->recordTick(ticker_type, count);
}
}
inline void SetTickerCount(Statistics* statistics, uint32_t ticker_type,
uint64_t count) {
if (statistics) {
statistics->setTickerCount(ticker_type, count);
}
}
}