2019-06-17 15:17:43 -07:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
2020-08-14 20:11:35 -07:00
|
|
|
#include "rocksdb/stats_history.h"
|
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
#include <limits>
|
|
|
|
#include <string>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#include "db/column_family.h"
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
|
|
#include "db/db_test_util.h"
|
2020-10-01 19:12:26 -07:00
|
|
|
#include "db/periodic_work_scheduler.h"
|
2019-06-17 15:17:43 -07:00
|
|
|
#include "monitoring/persistent_stats_history.h"
|
|
|
|
#include "options/options_helper.h"
|
|
|
|
#include "port/stack_trace.h"
|
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/convenience.h"
|
|
|
|
#include "rocksdb/rate_limiter.h"
|
2021-01-25 22:07:26 -08:00
|
|
|
#include "test_util/mock_time_env.h"
|
2019-06-17 15:17:43 -07:00
|
|
|
#include "test_util/sync_point.h"
|
|
|
|
#include "test_util/testutil.h"
|
|
|
|
#include "util/random.h"
|
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2019-06-17 15:17:43 -07:00
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
#ifndef ROCKSDB_LITE
|
2019-06-17 15:17:43 -07:00
|
|
|
class StatsHistoryTest : public DBTestBase {
|
|
|
|
public:
|
2021-07-23 08:37:27 -07:00
|
|
|
StatsHistoryTest() : DBTestBase("stats_history_test", /*env_do_fsync=*/true) {
|
2021-01-25 22:07:26 -08:00
|
|
|
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
|
|
|
|
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
}
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
protected:
|
2021-01-25 22:07:26 -08:00
|
|
|
std::shared_ptr<MockSystemClock> mock_clock_;
|
|
|
|
std::unique_ptr<Env> mock_env_;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
void SetUp() override {
|
2021-01-25 22:07:26 -08:00
|
|
|
mock_clock_->InstallTimedWaitFixCallback();
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
2020-10-01 19:12:26 -07:00
|
|
|
"DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) {
|
|
|
|
auto* periodic_work_scheduler_ptr =
|
|
|
|
reinterpret_cast<PeriodicWorkScheduler**>(arg);
|
|
|
|
*periodic_work_scheduler_ptr =
|
2021-01-25 22:07:26 -08:00
|
|
|
PeriodicWorkTestScheduler::Default(mock_clock_);
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_dump_period_sec = kPeriodSec;
|
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
int counter = 0;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:1",
|
|
|
|
[&](void* /*arg*/) { counter++; });
|
2019-06-17 15:17:43 -07:00
|
|
|
Reopen(options);
|
2019-09-09 11:22:28 -07:00
|
|
|
ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec);
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
// Wait for the first stats persist to finish, as the initial delay could be
|
|
|
|
// different.
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_GE(counter, 1);
|
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
// Test cancel job through SetOptions
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}}));
|
|
|
|
int old_val = counter;
|
2020-08-14 20:11:35 -07:00
|
|
|
for (int i = 1; i < 20; ++i) {
|
2021-01-25 22:07:26 -08:00
|
|
|
mock_clock_->MockSleepForSeconds(kPeriodSec);
|
2019-06-17 15:17:43 -07:00
|
|
|
}
|
|
|
|
ASSERT_EQ(counter, old_val);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test persistent stats background thread scheduling and cancelling
|
|
|
|
TEST_F(StatsHistoryTest, StatsPersistScheduling) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
int counter = 0;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:Entry",
|
|
|
|
[&](void* /*arg*/) { counter++; });
|
2019-06-17 15:17:43 -07:00
|
|
|
Reopen(options);
|
2019-09-09 11:22:28 -07:00
|
|
|
ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec);
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
// Wait for the first stats persist to finish, as the initial delay could be
|
|
|
|
// different.
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_GE(counter, 1);
|
|
|
|
|
2020-08-21 11:31:27 -07:00
|
|
|
// Test cancel job through SetOptions
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
|
2020-08-14 20:11:35 -07:00
|
|
|
int old_val = counter;
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec * 2); });
|
2020-08-14 20:11:35 -07:00
|
|
|
ASSERT_EQ(counter, old_val);
|
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test enabling persistent stats for the first time
|
|
|
|
TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) {
|
2020-08-21 11:31:27 -07:00
|
|
|
constexpr unsigned int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.stats_persist_period_sec = 0;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
int counter = 0;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:Entry",
|
|
|
|
[&](void* /*arg*/) { counter++; });
|
2019-06-17 15:17:43 -07:00
|
|
|
Reopen(options);
|
2020-08-21 11:31:27 -07:00
|
|
|
ASSERT_OK(dbfull()->SetDBOptions(
|
|
|
|
{{"stats_persist_period_sec", std::to_string(kPeriodSec)}}));
|
|
|
|
ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec);
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2020-08-21 11:31:27 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_GE(counter, 1);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(Zhongyi): Move persistent stats related tests to a separate file
|
|
|
|
TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
CreateColumnFamilies({"pikachu"}, options);
|
|
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
// make sure the first stats persist to finish
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
// Wait for stats persist to finish
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
std::unique_ptr<StatsHistoryIterator> stats_iter;
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
// disabled stats snapshots
|
|
|
|
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
|
|
|
|
size_t stats_count = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_EQ(stats_iter->GetStatsTime(), mock_clock_->NowSeconds());
|
2019-06-17 15:17:43 -07:00
|
|
|
stats_count += stats_map.size();
|
|
|
|
}
|
|
|
|
ASSERT_GT(stats_count, 0);
|
|
|
|
// Wait a bit and verify no more stats are found
|
2020-08-21 11:31:27 -07:00
|
|
|
for (int i = 0; i < 10; ++i) {
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(1); });
|
2019-06-17 15:17:43 -07:00
|
|
|
}
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
size_t stats_count_new = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
stats_count_new += stats_iter->GetStatsMap().size();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(stats_count_new, stats_count);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 1;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
CreateColumnFamilies({"pikachu"}, options);
|
|
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
// some random operation to populate statistics
|
|
|
|
ASSERT_OK(Delete("foo"));
|
|
|
|
ASSERT_OK(Put("sol", "sol"));
|
|
|
|
ASSERT_OK(Put("epic", "epic"));
|
|
|
|
ASSERT_OK(Put("ltd", "ltd"));
|
|
|
|
ASSERT_EQ("sol", Get("sol"));
|
|
|
|
ASSERT_EQ("epic", Get("epic"));
|
|
|
|
ASSERT_EQ("ltd", Get("ltd"));
|
|
|
|
Iterator* iterator = db_->NewIterator(ReadOptions());
|
|
|
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_TRUE(iterator->key() == iterator->value());
|
|
|
|
}
|
|
|
|
delete iterator;
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(Delete("sol"));
|
2020-10-02 08:22:06 -07:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
// second round of ops
|
|
|
|
ASSERT_OK(Put("saigon", "saigon"));
|
|
|
|
ASSERT_OK(Put("noodle talk", "noodle talk"));
|
|
|
|
ASSERT_OK(Put("ping bistro", "ping bistro"));
|
|
|
|
iterator = db_->NewIterator(ReadOptions());
|
|
|
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_TRUE(iterator->key() == iterator->value());
|
|
|
|
}
|
|
|
|
delete iterator;
|
|
|
|
ASSERT_OK(Flush());
|
2020-10-02 08:22:06 -07:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2020-08-21 11:31:27 -07:00
|
|
|
const int kIterations = 10;
|
|
|
|
for (int i = 0; i < kIterations; ++i) {
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
}
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
std::unique_ptr<StatsHistoryIterator> stats_iter;
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
size_t stats_count = 0;
|
|
|
|
int slice_count = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
slice_count++;
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
|
|
|
stats_count += stats_map.size();
|
|
|
|
}
|
|
|
|
size_t stats_history_size = dbfull()->TEST_EstimateInMemoryStatsHistorySize();
|
2020-08-21 11:31:27 -07:00
|
|
|
ASSERT_GE(slice_count, kIterations - 1);
|
2021-07-19 10:33:59 -07:00
|
|
|
ASSERT_GE(stats_history_size, 15000);
|
|
|
|
// capping memory cost at 15000 bytes since one slice is around 10000~15000
|
|
|
|
ASSERT_OK(dbfull()->SetDBOptions({{"stats_history_buffer_size", "15000"}}));
|
|
|
|
ASSERT_EQ(15000, dbfull()->GetDBOptions().stats_history_buffer_size);
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
// Wait for stats persist to finish
|
2020-08-21 11:31:27 -07:00
|
|
|
for (int i = 0; i < kIterations; ++i) {
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
}
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
size_t stats_count_reopen = 0;
|
|
|
|
slice_count = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
slice_count++;
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
|
|
|
stats_count_reopen += stats_map.size();
|
|
|
|
}
|
|
|
|
size_t stats_history_size_reopen =
|
|
|
|
dbfull()->TEST_EstimateInMemoryStatsHistorySize();
|
|
|
|
// only one slice can fit under the new stats_history_buffer_size
|
|
|
|
ASSERT_LT(slice_count, 2);
|
2021-07-19 10:33:59 -07:00
|
|
|
ASSERT_TRUE(stats_history_size_reopen < 15000 &&
|
2019-06-17 15:17:43 -07:00
|
|
|
stats_history_size_reopen > 0);
|
|
|
|
ASSERT_TRUE(stats_count_reopen < stats_count && stats_count_reopen > 0);
|
|
|
|
Close();
|
|
|
|
// TODO: may also want to verify stats timestamp to make sure we are purging
|
|
|
|
// the correct stats snapshot
|
|
|
|
}
|
|
|
|
|
|
|
|
int countkeys(Iterator* iter) {
|
|
|
|
int count = 0;
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2019-06-17 15:17:43 -07:00
|
|
|
options.persist_stats_to_disk = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
CreateColumnFamilies({"pikachu"}, options);
|
|
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
ASSERT_EQ(Get("foo"), "bar");
|
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
// Wait for the first stats persist to finish, as the initial delay could be
|
|
|
|
// different.
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
// Wait for stats persist to finish
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
auto iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
int key_count1 = countkeys(iter);
|
|
|
|
delete iter;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
int key_count2 = countkeys(iter);
|
|
|
|
delete iter;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
int key_count3 = countkeys(iter);
|
|
|
|
delete iter;
|
|
|
|
ASSERT_GE(key_count2, key_count1);
|
|
|
|
ASSERT_GE(key_count3, key_count2);
|
|
|
|
ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1);
|
|
|
|
std::unique_ptr<StatsHistoryIterator> stats_iter;
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
size_t stats_count = 0;
|
|
|
|
int slice_count = 0;
|
|
|
|
int non_zero_count = 0;
|
2020-08-14 20:11:35 -07:00
|
|
|
for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) {
|
2019-06-17 15:17:43 -07:00
|
|
|
slice_count++;
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
2020-08-14 20:11:35 -07:00
|
|
|
ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1);
|
2019-06-17 15:17:43 -07:00
|
|
|
for (auto& stat : stats_map) {
|
|
|
|
if (stat.second != 0) {
|
|
|
|
non_zero_count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
stats_count += stats_map.size();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(slice_count, 3);
|
|
|
|
// 2 extra keys for format version
|
|
|
|
ASSERT_EQ(stats_count, key_count3 - 2);
|
|
|
|
// verify reopen will not cause data loss
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
size_t stats_count_reopen = 0;
|
|
|
|
int slice_count_reopen = 0;
|
|
|
|
int non_zero_count_recover = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
slice_count_reopen++;
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
|
|
|
for (auto& stat : stats_map) {
|
|
|
|
if (stat.second != 0) {
|
|
|
|
non_zero_count_recover++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
stats_count_reopen += stats_map.size();
|
|
|
|
}
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_EQ(non_zero_count, non_zero_count_recover);
|
|
|
|
ASSERT_EQ(slice_count, slice_count_reopen);
|
|
|
|
ASSERT_EQ(stats_count, stats_count_reopen);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test persisted stats matches the value found in options.statistics and
|
|
|
|
// the stats value retains after DB reopen
|
|
|
|
TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2019-06-17 15:17:43 -07:00
|
|
|
options.persist_stats_to_disk = true;
|
|
|
|
std::map<std::string, uint64_t> stats_map_before;
|
|
|
|
ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_before));
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
CreateColumnFamilies({"pikachu"}, options);
|
|
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
ASSERT_EQ(Get("foo"), "bar");
|
|
|
|
|
2020-08-14 20:11:35 -07:00
|
|
|
// Wait for the first stats persist to finish, as the initial delay could be
|
|
|
|
// different.
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-06-17 15:17:43 -07:00
|
|
|
// Wait for stats persist to finish
|
2020-08-14 20:11:35 -07:00
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
auto iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
countkeys(iter);
|
|
|
|
delete iter;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
countkeys(iter);
|
|
|
|
delete iter;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
countkeys(iter);
|
|
|
|
delete iter;
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
std::map<std::string, uint64_t> stats_map_after;
|
|
|
|
ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after));
|
|
|
|
std::unique_ptr<StatsHistoryIterator> stats_iter;
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
std::string sample = "rocksdb.num.iterator.deleted";
|
|
|
|
uint64_t recovered_value = 0;
|
2020-08-14 20:11:35 -07:00
|
|
|
for (int i = 2; stats_iter->Valid(); stats_iter->Next(), ++i) {
|
2019-06-17 15:17:43 -07:00
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
2020-08-14 20:11:35 -07:00
|
|
|
ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1);
|
2019-06-17 15:17:43 -07:00
|
|
|
for (const auto& stat : stats_map) {
|
|
|
|
if (sample.compare(stat.first) == 0) {
|
|
|
|
recovered_value += stat.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_EQ(recovered_value, stats_map_after[sample]);
|
|
|
|
|
|
|
|
// test stats value retains after recovery
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
uint64_t new_recovered_value = 0;
|
2020-08-14 20:11:35 -07:00
|
|
|
for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) {
|
2019-06-17 15:17:43 -07:00
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
2020-08-14 20:11:35 -07:00
|
|
|
ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1);
|
2019-06-17 15:17:43 -07:00
|
|
|
for (const auto& stat : stats_map) {
|
|
|
|
if (sample.compare(stat.first) == 0) {
|
|
|
|
new_recovered_value += stat.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_EQ(recovered_value, new_recovered_value);
|
|
|
|
|
|
|
|
// TODO(Zhongyi): also add test to read raw values from disk and verify
|
|
|
|
// correctness
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(Zhongyi): add test for different format versions
|
|
|
|
|
|
|
|
TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-06-17 15:17:43 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2019-06-17 15:17:43 -07:00
|
|
|
options.persist_stats_to_disk = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_OK(TryReopen(options));
|
|
|
|
CreateColumnFamilies({"one", "two", "three"}, options);
|
|
|
|
ASSERT_OK(Put(1, "foo", "bar"));
|
|
|
|
ReopenWithColumnFamilies({"default", "one", "two", "three"}, options);
|
|
|
|
ASSERT_EQ(Get(2, "foo"), "bar");
|
|
|
|
CreateColumnFamilies({"four"}, options);
|
|
|
|
ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options);
|
|
|
|
ASSERT_EQ(Get(2, "foo"), "bar");
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
// make sure the first stats persist to finish
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-06-17 15:17:43 -07:00
|
|
|
auto iter =
|
|
|
|
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
|
|
|
|
int key_count = countkeys(iter);
|
|
|
|
delete iter;
|
|
|
|
ASSERT_GE(key_count, 0);
|
|
|
|
uint64_t num_write_wal = 0;
|
|
|
|
std::string sample = "rocksdb.write.wal";
|
|
|
|
std::unique_ptr<StatsHistoryIterator> stats_iter;
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
|
|
|
for (const auto& stat : stats_map) {
|
|
|
|
if (sample.compare(stat.first) == 0) {
|
|
|
|
num_write_wal += stat.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
stats_iter.reset();
|
2021-11-01 11:42:09 -07:00
|
|
|
ASSERT_EQ(num_write_wal, 1);
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
options.persist_stats_to_disk = false;
|
|
|
|
ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options);
|
|
|
|
int cf_count = 0;
|
|
|
|
for (auto cfd : *dbfull()->versions_->GetColumnFamilySet()) {
|
|
|
|
(void)cfd;
|
|
|
|
cf_count++;
|
|
|
|
}
|
|
|
|
// persistent stats cf will be implicitly opened even if
|
|
|
|
// persist_stats_to_disk is false
|
|
|
|
ASSERT_EQ(cf_count, 6);
|
|
|
|
ASSERT_EQ(Get(2, "foo"), "bar");
|
|
|
|
|
|
|
|
// attempt to create column family using same name, should fail
|
|
|
|
ColumnFamilyOptions cf_opts(options);
|
|
|
|
ColumnFamilyHandle* handle;
|
|
|
|
ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName,
|
|
|
|
&handle));
|
|
|
|
|
|
|
|
options.persist_stats_to_disk = true;
|
|
|
|
ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options);
|
|
|
|
ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName,
|
|
|
|
&handle));
|
|
|
|
// verify stats is not affected by prior failed CF creation
|
2021-01-25 22:07:26 -08:00
|
|
|
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
|
2019-06-17 15:17:43 -07:00
|
|
|
ASSERT_TRUE(stats_iter != nullptr);
|
|
|
|
num_write_wal = 0;
|
|
|
|
for (; stats_iter->Valid(); stats_iter->Next()) {
|
|
|
|
auto stats_map = stats_iter->GetStatsMap();
|
|
|
|
for (const auto& stat : stats_map) {
|
|
|
|
if (sample.compare(stat.first) == 0) {
|
|
|
|
num_write_wal += stat.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-11-01 11:42:09 -07:00
|
|
|
ASSERT_EQ(num_write_wal, 1);
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
Close();
|
|
|
|
Destroy(options);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(StatsHistoryTest, PersistentStatsReadOnly) {
|
|
|
|
ASSERT_OK(Put("bar", "v2"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
auto options = CurrentOptions();
|
|
|
|
options.stats_persist_period_sec = 5;
|
|
|
|
options.persist_stats_to_disk = true;
|
|
|
|
assert(options.env == env_);
|
|
|
|
ASSERT_OK(ReadOnlyReopen(options));
|
|
|
|
ASSERT_EQ("v2", Get("bar"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
// Reopen and flush memtable.
|
2019-07-01 11:53:25 -07:00
|
|
|
ASSERT_OK(TryReopen(options));
|
2020-12-23 16:54:05 -08:00
|
|
|
ASSERT_OK(Flush());
|
2019-06-17 15:17:43 -07:00
|
|
|
Close();
|
|
|
|
// Now check keys in read only mode.
|
|
|
|
ASSERT_OK(ReadOnlyReopen(options));
|
|
|
|
}
|
|
|
|
|
2019-07-01 11:53:25 -07:00
|
|
|
TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
|
2020-08-14 20:11:35 -07:00
|
|
|
constexpr int kPeriodSec = 5;
|
2019-07-01 11:53:25 -07:00
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
|
2020-08-14 20:11:35 -07:00
|
|
|
options.stats_persist_period_sec = kPeriodSec;
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
options.statistics = CreateDBStatistics();
|
2019-07-01 11:53:25 -07:00
|
|
|
options.persist_stats_to_disk = true;
|
2020-08-14 20:11:35 -07:00
|
|
|
options.env = mock_env_.get();
|
2019-07-01 11:53:25 -07:00
|
|
|
CreateColumnFamilies({"pikachu"}, options);
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
// Wait for the first stats persist to finish, as the initial delay could be
|
|
|
|
// different.
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
|
2020-08-14 20:11:35 -07:00
|
|
|
|
2019-07-01 11:53:25 -07:00
|
|
|
ColumnFamilyData* cfd_default =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily())
|
|
|
|
->cfd();
|
|
|
|
ColumnFamilyData* cfd_stats = static_cast<ColumnFamilyHandleImpl*>(
|
|
|
|
dbfull()->PersistentStatsColumnFamily())
|
|
|
|
->cfd();
|
|
|
|
ColumnFamilyData* cfd_test =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(handles_[1])->cfd();
|
|
|
|
|
|
|
|
ASSERT_OK(Put("foo", "v0"));
|
|
|
|
ASSERT_OK(Put("bar", "v0"));
|
|
|
|
ASSERT_EQ("v0", Get("bar"));
|
|
|
|
ASSERT_EQ("v0", Get("foo"));
|
|
|
|
ASSERT_OK(Put(1, "Eevee", "v0"));
|
|
|
|
ASSERT_EQ("v0", Get(1, "Eevee"));
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-07-01 11:53:25 -07:00
|
|
|
// writing to all three cf, flush default cf
|
Persist the new MANIFEST after successfully syncing the new WAL during recovery (#9922)
Summary:
In case of non-TransactionDB and avoid_flush_during_recovery = true, RocksDB won't
flush the data from WAL to L0 for all column families if possible. As a
result, not all column families can increase their log_numbers, and
min_log_number_to_keep won't change.
For transaction DB (.allow_2pc), even with the flush, there may be old WAL files that it must not delete because they can contain data of uncommitted transactions and min_log_number_to_keep won't change.
If we persist a new MANIFEST with
advanced log_numbers for some column families, then during a second
crash after persisting the MANIFEST, RocksDB will see some column
families' log_numbers larger than the corrupted wal, and the "column family inconsistency" error will be hit, causing recovery to fail.
As a solution, RocksDB will persist the new MANIFEST after successfully syncing the new WAL.
If a future recovery starts from the new MANIFEST, then it means the new WAL is successfully synced. Due to the sentinel empty write batch at the beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point.
If future recovery starts from the old MANIFEST, it means the writing the new MANIFEST failed. We won't have the "SST ahead of WAL" error.
Currently, RocksDB DB::Open() may creates and writes to two new MANIFEST files even before recovery succeeds. This PR buffers the edits in a structure and writes to a new MANIFEST after recovery is successful
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9922
Test Plan:
1. Update unit tests to fail without this change
2. make crast_test -j
Branch with unit test and no fix https://github.com/facebook/rocksdb/pull/9942 to keep track of unit test (without fix)
Reviewed By: riversand963
Differential Revision: D36043701
Pulled By: akankshamahajan15
fbshipit-source-id: 5760970db0a0920fb73d3c054a4155733500acd9
2022-06-01 10:52:26 -07:00
|
|
|
// LogNumbers: default: 16, stats: 10, pikachu: 5
|
|
|
|
// Since in recovery process, cfd_stats column is created after WAL is
|
|
|
|
// created, synced and MANIFEST is persisted, its log number which depends on
|
|
|
|
// logfile_number_ will be different. Since "pikachu" is never flushed, thus
|
|
|
|
// its log_number should be the smallest of the three.
|
2019-07-01 11:53:25 -07:00
|
|
|
ASSERT_OK(Flush());
|
Persist the new MANIFEST after successfully syncing the new WAL during recovery (#9922)
Summary:
In case of non-TransactionDB and avoid_flush_during_recovery = true, RocksDB won't
flush the data from WAL to L0 for all column families if possible. As a
result, not all column families can increase their log_numbers, and
min_log_number_to_keep won't change.
For transaction DB (.allow_2pc), even with the flush, there may be old WAL files that it must not delete because they can contain data of uncommitted transactions and min_log_number_to_keep won't change.
If we persist a new MANIFEST with
advanced log_numbers for some column families, then during a second
crash after persisting the MANIFEST, RocksDB will see some column
families' log_numbers larger than the corrupted wal, and the "column family inconsistency" error will be hit, causing recovery to fail.
As a solution, RocksDB will persist the new MANIFEST after successfully syncing the new WAL.
If a future recovery starts from the new MANIFEST, then it means the new WAL is successfully synced. Due to the sentinel empty write batch at the beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point.
If future recovery starts from the old MANIFEST, it means the writing the new MANIFEST failed. We won't have the "SST ahead of WAL" error.
Currently, RocksDB DB::Open() may creates and writes to two new MANIFEST files even before recovery succeeds. This PR buffers the edits in a structure and writes to a new MANIFEST after recovery is successful
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9922
Test Plan:
1. Update unit tests to fail without this change
2. make crast_test -j
Branch with unit test and no fix https://github.com/facebook/rocksdb/pull/9942 to keep track of unit test (without fix)
Reviewed By: riversand963
Differential Revision: D36043701
Pulled By: akankshamahajan15
fbshipit-source-id: 5760970db0a0920fb73d3c054a4155733500acd9
2022-06-01 10:52:26 -07:00
|
|
|
ASSERT_LT(cfd_test->GetLogNumber(), cfd_stats->GetLogNumber());
|
|
|
|
ASSERT_LT(cfd_test->GetLogNumber(), cfd_default->GetLogNumber());
|
2019-07-01 11:53:25 -07:00
|
|
|
|
|
|
|
ASSERT_OK(Put("foo1", "v1"));
|
|
|
|
ASSERT_OK(Put("bar1", "v1"));
|
|
|
|
ASSERT_EQ("v1", Get("bar1"));
|
|
|
|
ASSERT_EQ("v1", Get("foo1"));
|
|
|
|
ASSERT_OK(Put(1, "Vaporeon", "v1"));
|
|
|
|
ASSERT_EQ("v1", Get(1, "Vaporeon"));
|
|
|
|
// writing to default and test cf, flush test cf
|
|
|
|
// LogNumbers: default: 14, stats: 16, pikachu: 16
|
|
|
|
ASSERT_OK(Flush(1));
|
|
|
|
ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber());
|
|
|
|
ASSERT_GT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("foo2", "v2"));
|
|
|
|
ASSERT_OK(Put("bar2", "v2"));
|
|
|
|
ASSERT_EQ("v2", Get("bar2"));
|
|
|
|
ASSERT_EQ("v2", Get("foo2"));
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-07-01 11:53:25 -07:00
|
|
|
// writing to default and stats cf, flushing default cf
|
|
|
|
// LogNumbers: default: 19, stats: 19, pikachu: 19
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber());
|
|
|
|
ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("foo3", "v3"));
|
|
|
|
ASSERT_OK(Put("bar3", "v3"));
|
|
|
|
ASSERT_EQ("v3", Get("bar3"));
|
|
|
|
ASSERT_EQ("v3", Get("foo3"));
|
|
|
|
ASSERT_OK(Put(1, "Jolteon", "v3"));
|
|
|
|
ASSERT_EQ("v3", Get(1, "Jolteon"));
|
2020-08-14 20:11:35 -07:00
|
|
|
|
|
|
|
dbfull()->TEST_WaitForStatsDumpRun(
|
2021-01-25 22:07:26 -08:00
|
|
|
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
|
2019-07-01 11:53:25 -07:00
|
|
|
// writing to all three cf, flushing test cf
|
|
|
|
// LogNumbers: default: 19, stats: 19, pikachu: 22
|
|
|
|
ASSERT_OK(Flush(1));
|
|
|
|
ASSERT_LT(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber());
|
|
|
|
ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber());
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2019-07-01 16:32:59 -07:00
|
|
|
#endif // !ROCKSDB_LITE
|
2020-02-20 12:07:53 -08:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2019-06-17 15:17:43 -07:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2020-02-20 12:07:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2019-06-17 15:17:43 -07:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|