diff --git a/HISTORY.md b/HISTORY.md index 62e2cdcd2..9e29e9bd2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s). * In best-efforts recovery, an error that is not Corruption or IOError::kNotFound or IOError::kPathNotFound will be overwritten silently. Fix this by checking all non-ok cases and return early. * When `file_checksum_gen_factory` is set to `GetFileChecksumGenCrc32cFactory()`, BackupEngine will compare the crc32c checksums of table files computed when creating a backup to the expected checksums stored in the DB manifest, and will fail `CreateNewBackup()` on mismatch (corruption). If the `file_checksum_gen_factory` is not set or set to any other customized factory, there is no checksum verification to detect if SST files in a DB are corrupt when read, copied, and independently checksummed by BackupEngine. +* When a DB sets `stats_dump_period_sec > 0`, either as the initial value for DB open or as a dynamic option change, the first stats dump is staggered in the following X seconds, where X is an integer in `[0, stats_dump_period_sec)`. Subsequent stats dumps are still spaced `stats_dump_period_sec` seconds apart. ### Bug fixes * Compressed block cache was automatically disabled with read-only DBs by mistake. Now it is fixed: compressed block cache will be in effective with read-only DB too. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ac5f2fe99..cb5308127 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -685,9 +685,18 @@ void DBImpl::StartTimedTasks() { stats_dump_period_sec = mutable_db_options_.stats_dump_period_sec; if (stats_dump_period_sec > 0) { if (!thread_dump_stats_) { + // In case of many `DB::Open()` in rapid succession we can have all + // threads dumping at once, which causes severe lock contention in + // jemalloc. Ensure successive `DB::Open()`s are staggered by at least + // one second in the common case. + static uint64_t stats_dump_threads_started = 0; thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::DumpStats(); }, "dump_st", env_, - static_cast(stats_dump_period_sec) * kMicrosInSecond)); + static_cast(stats_dump_period_sec) * kMicrosInSecond, + stats_dump_threads_started % + static_cast(stats_dump_period_sec) * + kMicrosInSecond)); + ++stats_dump_threads_started; } } stats_persist_period_sec = mutable_db_options_.stats_persist_period_sec; @@ -1083,10 +1092,20 @@ Status DBImpl::SetDBOptions( mutex_.Lock(); } if (new_options.stats_dump_period_sec > 0) { + // In case many DBs have `stats_dump_period_sec` enabled in rapid + // succession, we can have all threads dumping at once, which causes + // severe lock contention in jemalloc. Ensure successive enabling of + // `stats_dump_period_sec` are staggered by at least one second in the + // common case. + static uint64_t stats_dump_threads_started = 0; thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::DumpStats(); }, "dump_st", env_, static_cast(new_options.stats_dump_period_sec) * + kMicrosInSecond, + stats_dump_threads_started % + static_cast(new_options.stats_dump_period_sec) * kMicrosInSecond)); + ++stats_dump_threads_started; } else { thread_dump_stats_.reset(); }