diff --git a/HISTORY.md b/HISTORY.md index 596639036..1e6027c4e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -18,6 +18,7 @@ * Introduce an experimental feature to dump out the blocks from block cache and insert them to the secondary cache to reduce the cache warmup time (e.g., used while migrating DB instance). More information are in `class CacheDumper` and `CacheDumpedLoader` at `rocksdb/utilities/cache_dump_load.h` Note that, this feature is subject to the potential change in the future, it is still experimental. * Introduced a new BlobDB configuration option `blob_garbage_collection_force_threshold`, which can be used to trigger compactions targeting the SST files which reference the oldest blob files when the ratio of garbage in those blob files meets or exceeds the specified threshold. This can reduce space amplification with skewed workloads where the affected SST files might not otherwise get picked up for compaction. * Added EXPERIMENTAL support for table file (SST) unique identifiers that are stable and universally unique, available with new function `GetUniqueIdFromTableProperties`. Only SST files from RocksDB >= 6.24 support unique IDs. +* Added `GetMapProperty()` support for "rocksdb.dbstats" (`DB::Properties::kDBStats`). As a map property, it includes DB-level internal stats accumulated over the DB's lifetime, such as user write related stats and uptime. ### Public API change * Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 8ff4a30b0..8a4157aed 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -19,6 +19,7 @@ #include "rocksdb/perf_context.h" #include "rocksdb/perf_level.h" #include "rocksdb/table.h" +#include "test_util/mock_time_env.h" #include "util/random.h" #include "util/string_util.h" @@ -28,6 +29,25 @@ class DBPropertiesTest : public DBTestBase { public: DBPropertiesTest() : DBTestBase("db_properties_test", /*env_do_fsync=*/false) {} + + void AssertDbStats(const std::map& db_stats, + double expected_uptime, int expected_user_bytes_written, + int expected_wal_bytes_written, + int expected_user_writes_by_self, + int expected_user_writes_with_wal) { + ASSERT_EQ(std::to_string(expected_uptime), db_stats.at("db.uptime")); + ASSERT_EQ(std::to_string(expected_wal_bytes_written), + db_stats.at("db.wal_bytes_written")); + ASSERT_EQ("0", db_stats.at("db.wal_syncs")); + ASSERT_EQ(std::to_string(expected_user_bytes_written), + db_stats.at("db.user_bytes_written")); + ASSERT_EQ("0", db_stats.at("db.user_writes_by_other")); + ASSERT_EQ(std::to_string(expected_user_writes_by_self), + db_stats.at("db.user_writes_by_self")); + ASSERT_EQ(std::to_string(expected_user_writes_with_wal), + db_stats.at("db.user_writes_with_wal")); + ASSERT_EQ("0", db_stats.at("db.user_write_stall_micros")); + } }; #ifndef ROCKSDB_LITE @@ -1895,7 +1915,80 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) { ASSERT_EQ(0, value); } +TEST_F(DBPropertiesTest, GetMapPropertyDbStats) { + auto mock_clock = std::make_shared(env_->GetSystemClock()); + CompositeEnvWrapper env(env_, mock_clock); + + Options opts = CurrentOptions(); + opts.env = &env; + Reopen(opts); + + { + std::map db_stats; + ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); + AssertDbStats(db_stats, 0.0 /* expected_uptime */, + 0 /* expected_user_bytes_written */, + 0 /* expected_wal_bytes_written */, + 0 /* expected_user_writes_by_self */, + 0 /* expected_user_writes_with_wal */); + } + + { + mock_clock->SleepForMicroseconds(1500000); + + std::map db_stats; + ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); + AssertDbStats(db_stats, 1.5 /* expected_uptime */, + 0 /* expected_user_bytes_written */, + 0 /* expected_wal_bytes_written */, + 0 /* expected_user_writes_by_self */, + 0 /* expected_user_writes_with_wal */); + } + + int expected_user_bytes_written = 0; + { + // Write with WAL disabled. + WriteOptions write_opts; + write_opts.disableWAL = true; + + WriteBatch batch; + ASSERT_OK(batch.Put("key", "val")); + expected_user_bytes_written += static_cast(batch.GetDataSize()); + + ASSERT_OK(db_->Write(write_opts, &batch)); + + std::map db_stats; + ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); + AssertDbStats(db_stats, 1.5 /* expected_uptime */, + expected_user_bytes_written, + 0 /* expected_wal_bytes_written */, + 1 /* expected_user_writes_by_self */, + 0 /* expected_user_writes_with_wal */); + } + + int expected_wal_bytes_written = 0; + { + // Write with WAL enabled. + WriteBatch batch; + ASSERT_OK(batch.Delete("key")); + expected_user_bytes_written += static_cast(batch.GetDataSize()); + expected_wal_bytes_written += static_cast(batch.GetDataSize()); + + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::map db_stats; + ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); + AssertDbStats(db_stats, 1.5 /* expected_uptime */, + expected_user_bytes_written, expected_wal_bytes_written, + 2 /* expected_user_writes_by_self */, + 1 /* expected_user_writes_with_wal */); + } + + Close(); +} + #endif // ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 16d3289b9..dd618983f 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -60,6 +60,25 @@ const std::map InternalStats::compaction_level_stats = {LevelStatType::W_BLOB_GB, LevelStat{"WblobGB", "Wblob(GB)"}}, }; +const std::map + InternalStats::db_stats_type_to_info = { + {InternalStats::kIntStatsWalFileBytes, + DBStatInfo{"db.wal_bytes_written"}}, + {InternalStats::kIntStatsWalFileSynced, DBStatInfo{"db.wal_syncs"}}, + {InternalStats::kIntStatsBytesWritten, + DBStatInfo{"db.user_bytes_written"}}, + {InternalStats::kIntStatsNumKeysWritten, + DBStatInfo{"db.user_keys_written"}}, + {InternalStats::kIntStatsWriteDoneByOther, + DBStatInfo{"db.user_writes_by_other"}}, + {InternalStats::kIntStatsWriteDoneBySelf, + DBStatInfo{"db.user_writes_by_self"}}, + {InternalStats::kIntStatsWriteWithWal, + DBStatInfo{"db.user_writes_with_wal"}}, + {InternalStats::kIntStatsWriteStallMicros, + DBStatInfo{"db.user_write_stall_micros"}}, +}; + namespace { const double kMB = 1048576.0; const double kGB = kMB * 1024; @@ -408,7 +427,8 @@ const std::unordered_map {false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr, nullptr}}, {DB::Properties::kDBStats, - {false, &InternalStats::HandleDBStats, nullptr, nullptr, nullptr}}, + {false, &InternalStats::HandleDBStats, nullptr, + &InternalStats::HandleDBMapStats, nullptr}}, {DB::Properties::kBlockCacheEntryStats, {true, &InternalStats::HandleBlockCacheEntryStats, nullptr, &InternalStats::HandleBlockCacheEntryStatsMap, nullptr}}, @@ -898,6 +918,12 @@ bool InternalStats::HandleCFFileHistogram(std::string* value, return true; } +bool InternalStats::HandleDBMapStats( + std::map* db_stats, Slice /*suffix*/) { + DumpDBMapStats(db_stats); + return true; +} + bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) { DumpDBStats(value); return true; @@ -1274,10 +1300,21 @@ bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/, return true; } +void InternalStats::DumpDBMapStats( + std::map* db_stats) { + for (int i = 0; i < static_cast(kIntStatsNumMax); ++i) { + InternalDBStatsType type = static_cast(i); + (*db_stats)[db_stats_type_to_info.at(type).property_name] = + std::to_string(GetDBStats(type)); + } + double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec; + (*db_stats)["db.uptime"] = std::to_string(seconds_up); +} + void InternalStats::DumpDBStats(std::string* value) { char buf[1000]; // DB-level stats, only available from default column family - double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec; + double seconds_up = (clock_->NowMicros() - started_at_) / kMicrosInSec; double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up; snprintf(buf, sizeof(buf), "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n", @@ -1314,8 +1351,10 @@ void InternalStats::DumpDBStats(std::string* value) { NumberToHumanString(write_other + write_self).c_str(), NumberToHumanString(num_keys_written).c_str(), NumberToHumanString(write_self).c_str(), - (write_other + write_self) / static_cast(write_self + 1), - user_bytes_written / kGB, user_bytes_written / kMB / seconds_up); + (write_other + write_self) / + std::max(1.0, static_cast(write_self)), + user_bytes_written / kGB, + user_bytes_written / kMB / std::max(seconds_up, 0.001)); value->append(buf); // WAL snprintf(buf, sizeof(buf), @@ -1323,8 +1362,8 @@ void InternalStats::DumpDBStats(std::string* value) { "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", NumberToHumanString(write_with_wal).c_str(), NumberToHumanString(wal_synced).c_str(), - write_with_wal / static_cast(wal_synced + 1), - wal_bytes / kGB, wal_bytes / kMB / seconds_up); + write_with_wal / std::max(1.0, static_cast(wal_synced)), + wal_bytes / kGB, wal_bytes / kMB / std::max(seconds_up, 0.001)); value->append(buf); // Stall AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true); @@ -1347,7 +1386,7 @@ void InternalStats::DumpDBStats(std::string* value) { NumberToHumanString(interval_num_keys_written).c_str(), NumberToHumanString(interval_write_self).c_str(), static_cast(interval_write_other + interval_write_self) / - (interval_write_self + 1), + std::max(1.0, static_cast(interval_write_self)), (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB, (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB / std::max(interval_seconds_up, 0.001)), @@ -1358,15 +1397,15 @@ void InternalStats::DumpDBStats(std::string* value) { uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced; uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes; - snprintf( - buf, sizeof(buf), - "Interval WAL: %s writes, %s syncs, " - "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", - NumberToHumanString(interval_write_with_wal).c_str(), - NumberToHumanString(interval_wal_synced).c_str(), - interval_write_with_wal / static_cast(interval_wal_synced + 1), - interval_wal_bytes / kGB, - interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001)); + snprintf(buf, sizeof(buf), + "Interval WAL: %s writes, %s syncs, " + "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", + NumberToHumanString(interval_write_with_wal).c_str(), + NumberToHumanString(interval_wal_synced).c_str(), + interval_write_with_wal / + std::max(1.0, static_cast(interval_wal_synced)), + interval_wal_bytes / kGB, + interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001)); value->append(buf); // Stall @@ -1614,7 +1653,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { value->append(buf); uint64_t now_micros = clock_->NowMicros(); - double seconds_up = (now_micros - started_at_ + 1) / kMicrosInSec; + double seconds_up = (now_micros - started_at_) / kMicrosInSec; double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up; snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n", seconds_up, interval_seconds_up); @@ -1664,8 +1703,10 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { snprintf(buf, sizeof(buf), "Cumulative compaction: %.2f GB write, %.2f MB/s write, " "%.2f GB read, %.2f MB/s read, %.1f seconds\n", - compact_bytes_write / kGB, compact_bytes_write / kMB / seconds_up, - compact_bytes_read / kGB, compact_bytes_read / kMB / seconds_up, + compact_bytes_write / kGB, + compact_bytes_write / kMB / std::max(seconds_up, 0.001), + compact_bytes_read / kGB, + compact_bytes_read / kMB / std::max(seconds_up, 0.001), compact_micros / kMicrosInSec); value->append(buf); diff --git a/db/internal_stats.h b/db/internal_stats.h index d3b90421e..9e100c12f 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -96,6 +96,11 @@ struct LevelStat { std::string header_name; }; +struct DBStatInfo { + // This what will be property_name in the flat map returned to the user + std::string property_name; +}; + class InternalStats { public: static const std::map compaction_level_stats; @@ -130,6 +135,8 @@ class InternalStats { kIntStatsNumMax, }; + static const std::map db_stats_type_to_info; + InternalStats(int num_levels, SystemClock* clock, ColumnFamilyData* cfd); // Per level compaction stats. comp_stats_[level] stores the stats for @@ -478,6 +485,7 @@ class InternalStats { static const std::unordered_map ppt_name_to_info; private: + void DumpDBMapStats(std::map* db_stats); void DumpDBStats(std::string* value); void DumpCFMapStats(std::map* cf_stats); void DumpCFMapStats( @@ -610,6 +618,8 @@ class InternalStats { bool HandleCFStats(std::string* value, Slice suffix); bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix); bool HandleCFFileHistogram(std::string* value, Slice suffix); + bool HandleDBMapStats(std::map* compaction_stats, + Slice suffix); bool HandleDBStats(std::string* value, Slice suffix); bool HandleSsTables(std::string* value, Slice suffix); bool HandleAggregatedTableProperties(std::string* value, Slice suffix); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index c1203ec6c..4b70606fa 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -789,9 +789,11 @@ class DB { // level, as well as the histogram of latency of single requests. static const std::string kCFFileHistogram; - // "rocksdb.dbstats" - returns a multi-line string with general database - // stats, both cumulative (over the db's lifetime) and interval (since - // the last retrieval of kDBStats). + // "rocksdb.dbstats" - As a string property, returns a multi-line string + // with general database stats, both cumulative (over the db's + // lifetime) and interval (since the last retrieval of kDBStats). + // As a map property, returns cumulative stats only and does not + // update the baseline for the interval stats. static const std::string kDBStats; // "rocksdb.levelstats" - returns multi-line string containing the number