Support GetMapProperty() with "rocksdb.dbstats" (#9057)

Summary:
This PR supports querying `GetMapProperty()` with "rocksdb.dbstats" to get the DB-level stats in a map format. It only reports cumulative stats over the DB lifetime and, as such, does not update the baseline for interval stats. Like other map properties, the string keys are not (yet) exposed in the public API.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9057

Test Plan: new unit test

Reviewed By: zhichao-cao

Differential Revision: D31781495

Pulled By: ajkr

fbshipit-source-id: 6f77d3aee8b4b1a015061b8c260a123859ceaf9b
This commit is contained in:
Andrew Kryczka 2021-10-20 13:15:33 -07:00 committed by Facebook GitHub Bot
parent c66b4429ff
commit 4217d1bce7
5 changed files with 169 additions and 22 deletions

View File

@ -18,6 +18,7 @@
* Introduce an experimental feature to dump out the blocks from block cache and insert them to the secondary cache to reduce the cache warmup time (e.g., used while migrating DB instance). More information are in `class CacheDumper` and `CacheDumpedLoader` at `rocksdb/utilities/cache_dump_load.h` Note that, this feature is subject to the potential change in the future, it is still experimental.
* Introduced a new BlobDB configuration option `blob_garbage_collection_force_threshold`, which can be used to trigger compactions targeting the SST files which reference the oldest blob files when the ratio of garbage in those blob files meets or exceeds the specified threshold. This can reduce space amplification with skewed workloads where the affected SST files might not otherwise get picked up for compaction.
* Added EXPERIMENTAL support for table file (SST) unique identifiers that are stable and universally unique, available with new function `GetUniqueIdFromTableProperties`. Only SST files from RocksDB >= 6.24 support unique IDs.
* Added `GetMapProperty()` support for "rocksdb.dbstats" (`DB::Properties::kDBStats`). As a map property, it includes DB-level internal stats accumulated over the DB's lifetime, such as user write related stats and uptime.
### Public API change
* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.

View File

@ -19,6 +19,7 @@
#include "rocksdb/perf_context.h"
#include "rocksdb/perf_level.h"
#include "rocksdb/table.h"
#include "test_util/mock_time_env.h"
#include "util/random.h"
#include "util/string_util.h"
@ -28,6 +29,25 @@ class DBPropertiesTest : public DBTestBase {
public:
DBPropertiesTest()
: DBTestBase("db_properties_test", /*env_do_fsync=*/false) {}
void AssertDbStats(const std::map<std::string, std::string>& db_stats,
double expected_uptime, int expected_user_bytes_written,
int expected_wal_bytes_written,
int expected_user_writes_by_self,
int expected_user_writes_with_wal) {
ASSERT_EQ(std::to_string(expected_uptime), db_stats.at("db.uptime"));
ASSERT_EQ(std::to_string(expected_wal_bytes_written),
db_stats.at("db.wal_bytes_written"));
ASSERT_EQ("0", db_stats.at("db.wal_syncs"));
ASSERT_EQ(std::to_string(expected_user_bytes_written),
db_stats.at("db.user_bytes_written"));
ASSERT_EQ("0", db_stats.at("db.user_writes_by_other"));
ASSERT_EQ(std::to_string(expected_user_writes_by_self),
db_stats.at("db.user_writes_by_self"));
ASSERT_EQ(std::to_string(expected_user_writes_with_wal),
db_stats.at("db.user_writes_with_wal"));
ASSERT_EQ("0", db_stats.at("db.user_write_stall_micros"));
}
};
#ifndef ROCKSDB_LITE
@ -1895,7 +1915,80 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) {
ASSERT_EQ(0, value);
}
TEST_F(DBPropertiesTest, GetMapPropertyDbStats) {
auto mock_clock = std::make_shared<MockSystemClock>(env_->GetSystemClock());
CompositeEnvWrapper env(env_, mock_clock);
Options opts = CurrentOptions();
opts.env = &env;
Reopen(opts);
{
std::map<std::string, std::string> db_stats;
ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats));
AssertDbStats(db_stats, 0.0 /* expected_uptime */,
0 /* expected_user_bytes_written */,
0 /* expected_wal_bytes_written */,
0 /* expected_user_writes_by_self */,
0 /* expected_user_writes_with_wal */);
}
{
mock_clock->SleepForMicroseconds(1500000);
std::map<std::string, std::string> db_stats;
ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats));
AssertDbStats(db_stats, 1.5 /* expected_uptime */,
0 /* expected_user_bytes_written */,
0 /* expected_wal_bytes_written */,
0 /* expected_user_writes_by_self */,
0 /* expected_user_writes_with_wal */);
}
int expected_user_bytes_written = 0;
{
// Write with WAL disabled.
WriteOptions write_opts;
write_opts.disableWAL = true;
WriteBatch batch;
ASSERT_OK(batch.Put("key", "val"));
expected_user_bytes_written += static_cast<int>(batch.GetDataSize());
ASSERT_OK(db_->Write(write_opts, &batch));
std::map<std::string, std::string> db_stats;
ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats));
AssertDbStats(db_stats, 1.5 /* expected_uptime */,
expected_user_bytes_written,
0 /* expected_wal_bytes_written */,
1 /* expected_user_writes_by_self */,
0 /* expected_user_writes_with_wal */);
}
int expected_wal_bytes_written = 0;
{
// Write with WAL enabled.
WriteBatch batch;
ASSERT_OK(batch.Delete("key"));
expected_user_bytes_written += static_cast<int>(batch.GetDataSize());
expected_wal_bytes_written += static_cast<int>(batch.GetDataSize());
ASSERT_OK(db_->Write(WriteOptions(), &batch));
std::map<std::string, std::string> db_stats;
ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats));
AssertDbStats(db_stats, 1.5 /* expected_uptime */,
expected_user_bytes_written, expected_wal_bytes_written,
2 /* expected_user_writes_by_self */,
1 /* expected_user_writes_with_wal */);
}
Close();
}
#endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {

View File

@ -60,6 +60,25 @@ const std::map<LevelStatType, LevelStat> InternalStats::compaction_level_stats =
{LevelStatType::W_BLOB_GB, LevelStat{"WblobGB", "Wblob(GB)"}},
};
const std::map<InternalStats::InternalDBStatsType, DBStatInfo>
InternalStats::db_stats_type_to_info = {
{InternalStats::kIntStatsWalFileBytes,
DBStatInfo{"db.wal_bytes_written"}},
{InternalStats::kIntStatsWalFileSynced, DBStatInfo{"db.wal_syncs"}},
{InternalStats::kIntStatsBytesWritten,
DBStatInfo{"db.user_bytes_written"}},
{InternalStats::kIntStatsNumKeysWritten,
DBStatInfo{"db.user_keys_written"}},
{InternalStats::kIntStatsWriteDoneByOther,
DBStatInfo{"db.user_writes_by_other"}},
{InternalStats::kIntStatsWriteDoneBySelf,
DBStatInfo{"db.user_writes_by_self"}},
{InternalStats::kIntStatsWriteWithWal,
DBStatInfo{"db.user_writes_with_wal"}},
{InternalStats::kIntStatsWriteStallMicros,
DBStatInfo{"db.user_write_stall_micros"}},
};
namespace {
const double kMB = 1048576.0;
const double kGB = kMB * 1024;
@ -408,7 +427,8 @@ const std::unordered_map<std::string, DBPropertyInfo>
{false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr,
nullptr}},
{DB::Properties::kDBStats,
{false, &InternalStats::HandleDBStats, nullptr, nullptr, nullptr}},
{false, &InternalStats::HandleDBStats, nullptr,
&InternalStats::HandleDBMapStats, nullptr}},
{DB::Properties::kBlockCacheEntryStats,
{true, &InternalStats::HandleBlockCacheEntryStats, nullptr,
&InternalStats::HandleBlockCacheEntryStatsMap, nullptr}},
@ -898,6 +918,12 @@ bool InternalStats::HandleCFFileHistogram(std::string* value,
return true;
}
bool InternalStats::HandleDBMapStats(
std::map<std::string, std::string>* db_stats, Slice /*suffix*/) {
DumpDBMapStats(db_stats);
return true;
}
bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) {
DumpDBStats(value);
return true;
@ -1274,10 +1300,21 @@ bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
return true;
}
void InternalStats::DumpDBMapStats(
std::map<std::string, std::string>* db_stats) {
for (int i = 0; i < static_cast<int>(kIntStatsNumMax); ++i) {
InternalDBStatsType type = static_cast<InternalDBStatsType>(i);
(*db_stats)[db_stats_type_to_info.at(type).property_name] =
std::to_string(GetDBStats(type));
}
double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
(*db_stats)["db.uptime"] = std::to_string(seconds_up);
}
void InternalStats::DumpDBStats(std::string* value) {
char buf[1000];
// DB-level stats, only available from default column family
double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec;
double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf),
"\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
@ -1314,8 +1351,10 @@ void InternalStats::DumpDBStats(std::string* value) {
NumberToHumanString(write_other + write_self).c_str(),
NumberToHumanString(num_keys_written).c_str(),
NumberToHumanString(write_self).c_str(),
(write_other + write_self) / static_cast<double>(write_self + 1),
user_bytes_written / kGB, user_bytes_written / kMB / seconds_up);
(write_other + write_self) /
std::max(1.0, static_cast<double>(write_self)),
user_bytes_written / kGB,
user_bytes_written / kMB / std::max(seconds_up, 0.001));
value->append(buf);
// WAL
snprintf(buf, sizeof(buf),
@ -1323,8 +1362,8 @@ void InternalStats::DumpDBStats(std::string* value) {
"%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
NumberToHumanString(write_with_wal).c_str(),
NumberToHumanString(wal_synced).c_str(),
write_with_wal / static_cast<double>(wal_synced + 1),
wal_bytes / kGB, wal_bytes / kMB / seconds_up);
write_with_wal / std::max(1.0, static_cast<double>(wal_synced)),
wal_bytes / kGB, wal_bytes / kMB / std::max(seconds_up, 0.001));
value->append(buf);
// Stall
AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true);
@ -1347,7 +1386,7 @@ void InternalStats::DumpDBStats(std::string* value) {
NumberToHumanString(interval_num_keys_written).c_str(),
NumberToHumanString(interval_write_self).c_str(),
static_cast<double>(interval_write_other + interval_write_self) /
(interval_write_self + 1),
std::max(1.0, static_cast<double>(interval_write_self)),
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB /
std::max(interval_seconds_up, 0.001)),
@ -1358,15 +1397,15 @@ void InternalStats::DumpDBStats(std::string* value) {
uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced;
uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;
snprintf(
buf, sizeof(buf),
"Interval WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
NumberToHumanString(interval_write_with_wal).c_str(),
NumberToHumanString(interval_wal_synced).c_str(),
interval_write_with_wal / static_cast<double>(interval_wal_synced + 1),
interval_wal_bytes / kGB,
interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
snprintf(buf, sizeof(buf),
"Interval WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
NumberToHumanString(interval_write_with_wal).c_str(),
NumberToHumanString(interval_wal_synced).c_str(),
interval_write_with_wal /
std::max(1.0, static_cast<double>(interval_wal_synced)),
interval_wal_bytes / kGB,
interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
value->append(buf);
// Stall
@ -1614,7 +1653,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
value->append(buf);
uint64_t now_micros = clock_->NowMicros();
double seconds_up = (now_micros - started_at_ + 1) / kMicrosInSec;
double seconds_up = (now_micros - started_at_) / kMicrosInSec;
double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
seconds_up, interval_seconds_up);
@ -1664,8 +1703,10 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
snprintf(buf, sizeof(buf),
"Cumulative compaction: %.2f GB write, %.2f MB/s write, "
"%.2f GB read, %.2f MB/s read, %.1f seconds\n",
compact_bytes_write / kGB, compact_bytes_write / kMB / seconds_up,
compact_bytes_read / kGB, compact_bytes_read / kMB / seconds_up,
compact_bytes_write / kGB,
compact_bytes_write / kMB / std::max(seconds_up, 0.001),
compact_bytes_read / kGB,
compact_bytes_read / kMB / std::max(seconds_up, 0.001),
compact_micros / kMicrosInSec);
value->append(buf);

View File

@ -96,6 +96,11 @@ struct LevelStat {
std::string header_name;
};
struct DBStatInfo {
// This what will be property_name in the flat map returned to the user
std::string property_name;
};
class InternalStats {
public:
static const std::map<LevelStatType, LevelStat> compaction_level_stats;
@ -130,6 +135,8 @@ class InternalStats {
kIntStatsNumMax,
};
static const std::map<InternalDBStatsType, DBStatInfo> db_stats_type_to_info;
InternalStats(int num_levels, SystemClock* clock, ColumnFamilyData* cfd);
// Per level compaction stats. comp_stats_[level] stores the stats for
@ -478,6 +485,7 @@ class InternalStats {
static const std::unordered_map<std::string, DBPropertyInfo> ppt_name_to_info;
private:
void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
void DumpDBStats(std::string* value);
void DumpCFMapStats(std::map<std::string, std::string>* cf_stats);
void DumpCFMapStats(
@ -610,6 +618,8 @@ class InternalStats {
bool HandleCFStats(std::string* value, Slice suffix);
bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix);
bool HandleCFFileHistogram(std::string* value, Slice suffix);
bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats,
Slice suffix);
bool HandleDBStats(std::string* value, Slice suffix);
bool HandleSsTables(std::string* value, Slice suffix);
bool HandleAggregatedTableProperties(std::string* value, Slice suffix);

View File

@ -789,9 +789,11 @@ class DB {
// level, as well as the histogram of latency of single requests.
static const std::string kCFFileHistogram;
// "rocksdb.dbstats" - returns a multi-line string with general database
// stats, both cumulative (over the db's lifetime) and interval (since
// the last retrieval of kDBStats).
// "rocksdb.dbstats" - As a string property, returns a multi-line string
// with general database stats, both cumulative (over the db's
// lifetime) and interval (since the last retrieval of kDBStats).
// As a map property, returns cumulative stats only and does not
// update the baseline for the interval stats.
static const std::string kDBStats;
// "rocksdb.levelstats" - returns multi-line string containing the number