2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2015-01-15 19:28:10 +01:00
|
|
|
//
|
|
|
|
// Copyright 2014 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
// This test uses a custom Env to keep track of the state of a filesystem as of
|
|
|
|
// the last "sync". It then checks for data loss errors by purposely dropping
|
|
|
|
// file data (or entire files) not protected by a "sync".
|
|
|
|
|
2019-06-01 00:21:36 +02:00
|
|
|
#include "db/db_impl/db_impl.h"
|
2015-01-15 19:28:10 +01:00
|
|
|
#include "db/log_format.h"
|
|
|
|
#include "db/version_set.h"
|
2017-04-06 04:02:00 +02:00
|
|
|
#include "env/mock_env.h"
|
2019-05-30 05:44:08 +02:00
|
|
|
#include "file/filename.h"
|
2015-01-15 19:28:10 +01:00
|
|
|
#include "rocksdb/cache.h"
|
2021-11-08 20:04:01 +01:00
|
|
|
#include "rocksdb/convenience.h"
|
2015-01-15 19:28:10 +01:00
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "rocksdb/write_batch.h"
|
2019-05-30 20:21:38 +02:00
|
|
|
#include "test_util/sync_point.h"
|
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2019-05-31 02:39:43 +02:00
|
|
|
#include "util/mutexlock.h"
|
2020-07-09 23:33:42 +02:00
|
|
|
#include "util/random.h"
|
|
|
|
#include "utilities/fault_injection_env.h"
|
Fix a bug causing duplicate trailing entries in WritableFile (buffered IO) (#9236)
Summary:
`db_stress` is a user of `FaultInjectionTestFS`. After injecting a write error, `db_stress` probabilistically determins
data drop (https://github.com/facebook/rocksdb/blob/6.27.fb/db_stress_tool/db_stress_test_base.cc#L2615:L2619).
In some of our recent runs of `db_stress`, we found duplicate trailing entries corresponding to file trivial move in
the MANIFEST, causing the recovery to fail, because the file move operation is not idempotent: you cannot delete a
file from a given level twice.
Investigation suggests that data buffering in both `WritableFileWriter` and `FaultInjectionTestFS` may be the root cause.
WritableFileWriter buffers data to write in a memory buffer, `WritableFileWriter::buf_`. After each
`WriteBuffered()`/`WriteBufferedWithChecksum()` succeeds, the `buf_` is cleared.
If the underlying file `WritableFileWriter::writable_file_` is opened in buffered IO mode, then `FaultInjectionTestFS`
buffers data written for each file until next file sync. After an injected error, user of `FaultInjectionFS` can
choose to drop some or none of previously buffered data. If `db_stress` does not drop any unsynced data, then
such data will still exist in the `FaultInjectionTestFS`'s buffer.
Existing implementation of `WritableileWriter::WriteBuffered()` does not clear `buf_` if there is an error. This may lead
to the data being buffered two copies: one in `WritableFileWriter`, and another in `FaultInjectionTestFS`.
We also know that the `WritableFileWriter` of MANIFEST file will close upon an error. During `Close()`, it will flush the
content in `buf_`. If no write error is injected to `FaultInjectionTestFS` this time, then we end up with two copies of the
data appended to the file.
To fix, we clear the `WritableFileWriter::buf_` upon failure as well. We focus this PR on files opened in non-direct mode.
This PR includes a unit test to reproduce a case when write error injection
to `WritableFile` can cause duplicate trailing entries.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9236
Test Plan: make check
Reviewed By: zhichao-cao
Differential Revision: D33033984
Pulled By: riversand963
fbshipit-source-id: ebfa5a0db8cbf1ed73100528b34fcba543c5db31
2021-12-13 17:59:20 +01:00
|
|
|
#ifndef NDEBUG
|
|
|
|
#include "utilities/fault_injection_fs.h"
|
|
|
|
#endif
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2015-01-15 19:28:10 +01:00
|
|
|
|
|
|
|
static const int kValueSize = 1000;
|
|
|
|
static const int kMaxNumValues = 2000;
|
|
|
|
static const size_t kNumIterations = 3;
|
|
|
|
|
2018-05-07 21:15:54 +02:00
|
|
|
enum FaultInjectionOptionConfig {
|
|
|
|
kDefault,
|
|
|
|
kDifferentDataDir,
|
|
|
|
kWalDir,
|
|
|
|
kSyncWal,
|
|
|
|
kWalDirSyncWal,
|
|
|
|
kMultiLevels,
|
|
|
|
kEnd,
|
|
|
|
};
|
|
|
|
class FaultInjectionTest
|
|
|
|
: public testing::Test,
|
|
|
|
public testing::WithParamInterface<std::tuple<
|
|
|
|
bool, FaultInjectionOptionConfig, FaultInjectionOptionConfig>> {
|
2015-01-23 03:34:23 +01:00
|
|
|
protected:
|
|
|
|
int option_config_;
|
2018-05-07 21:15:54 +02:00
|
|
|
int non_inclusive_end_range_; // kEnd or equivalent to that
|
2015-01-23 03:34:23 +01:00
|
|
|
// When need to make sure data is persistent, sync WAL
|
2015-01-24 01:03:24 +01:00
|
|
|
bool sync_use_wal_;
|
2015-01-23 03:34:23 +01:00
|
|
|
// When need to make sure data is persistent, call DB::CompactRange()
|
2015-01-24 01:03:24 +01:00
|
|
|
bool sync_use_compact_;
|
2015-01-23 03:34:23 +01:00
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
bool sequential_order_;
|
|
|
|
|
2015-01-15 19:28:10 +01:00
|
|
|
public:
|
2015-01-23 03:34:23 +01:00
|
|
|
enum ExpectedVerifResult { kValExpectFound, kValExpectNoError };
|
|
|
|
enum ResetMethod {
|
|
|
|
kResetDropUnsyncedData,
|
2015-01-27 00:22:18 +01:00
|
|
|
kResetDropRandomUnsyncedData,
|
2015-01-23 03:34:23 +01:00
|
|
|
kResetDeleteUnsyncedFiles,
|
|
|
|
kResetDropAndDeleteUnsynced
|
|
|
|
};
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2015-01-27 23:44:19 +01:00
|
|
|
std::unique_ptr<Env> base_env_;
|
2015-01-15 19:28:10 +01:00
|
|
|
FaultInjectionTestEnv* env_;
|
|
|
|
std::string dbname_;
|
2018-11-09 20:17:34 +01:00
|
|
|
std::shared_ptr<Cache> tiny_cache_;
|
2015-01-15 19:28:10 +01:00
|
|
|
Options options_;
|
|
|
|
DB* db_;
|
|
|
|
|
2015-01-23 03:34:23 +01:00
|
|
|
FaultInjectionTest()
|
2018-05-07 21:15:54 +02:00
|
|
|
: option_config_(std::get<1>(GetParam())),
|
|
|
|
non_inclusive_end_range_(std::get<2>(GetParam())),
|
2015-01-24 01:03:24 +01:00
|
|
|
sync_use_wal_(false),
|
|
|
|
sync_use_compact_(true),
|
2015-01-27 23:44:19 +01:00
|
|
|
base_env_(nullptr),
|
2018-03-07 21:39:19 +01:00
|
|
|
env_(nullptr),
|
2021-11-08 20:04:01 +01:00
|
|
|
db_(nullptr) {
|
|
|
|
EXPECT_OK(
|
|
|
|
test::CreateEnvFromSystem(ConfigOptions(), &system_env_, &env_guard_));
|
|
|
|
EXPECT_NE(system_env_, nullptr);
|
|
|
|
}
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2019-02-14 22:52:47 +01:00
|
|
|
~FaultInjectionTest() override {
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
2015-07-16 21:18:32 +02:00
|
|
|
}
|
|
|
|
|
2015-01-23 03:34:23 +01:00
|
|
|
bool ChangeOptions() {
|
|
|
|
option_config_++;
|
2018-05-07 21:15:54 +02:00
|
|
|
if (option_config_ >= non_inclusive_end_range_) {
|
2015-01-23 03:34:23 +01:00
|
|
|
return false;
|
|
|
|
} else {
|
2015-01-27 23:44:19 +01:00
|
|
|
if (option_config_ == kMultiLevels) {
|
2021-11-08 20:04:01 +01:00
|
|
|
base_env_.reset(MockEnv::Create(system_env_));
|
2015-01-27 23:44:19 +01:00
|
|
|
}
|
2015-01-23 03:34:23 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the current option configuration.
|
|
|
|
Options CurrentOptions() {
|
2015-01-24 01:03:24 +01:00
|
|
|
sync_use_wal_ = false;
|
|
|
|
sync_use_compact_ = true;
|
2015-01-23 03:34:23 +01:00
|
|
|
Options options;
|
|
|
|
switch (option_config_) {
|
|
|
|
case kWalDir:
|
2018-07-14 02:18:39 +02:00
|
|
|
options.wal_dir = test::PerThreadDBPath(env_, "fault_test_wal");
|
2015-01-23 03:34:23 +01:00
|
|
|
break;
|
|
|
|
case kDifferentDataDir:
|
2018-07-14 02:18:39 +02:00
|
|
|
options.db_paths.emplace_back(
|
|
|
|
test::PerThreadDBPath(env_, "fault_test_data"), 1000000U);
|
2015-01-23 03:34:23 +01:00
|
|
|
break;
|
|
|
|
case kSyncWal:
|
2015-01-24 01:03:24 +01:00
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
2015-01-23 03:34:23 +01:00
|
|
|
break;
|
|
|
|
case kWalDirSyncWal:
|
2018-07-14 02:18:39 +02:00
|
|
|
options.wal_dir = test::PerThreadDBPath(env_, "/fault_test_wal");
|
2015-01-24 01:03:24 +01:00
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
2015-01-23 03:34:23 +01:00
|
|
|
break;
|
2015-01-27 23:44:19 +01:00
|
|
|
case kMultiLevels:
|
|
|
|
options.write_buffer_size = 64 * 1024;
|
|
|
|
options.target_file_size_base = 64 * 1024;
|
|
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
|
|
options.level0_slowdown_writes_trigger = 2;
|
|
|
|
options.level0_stop_writes_trigger = 4;
|
|
|
|
options.max_bytes_for_level_base = 128 * 1024;
|
|
|
|
options.max_write_buffer_number = 2;
|
|
|
|
options.max_background_compactions = 8;
|
|
|
|
options.max_background_flushes = 8;
|
|
|
|
sync_use_wal_ = true;
|
|
|
|
sync_use_compact_ = false;
|
|
|
|
break;
|
2015-01-23 03:34:23 +01:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2015-01-15 19:28:10 +01:00
|
|
|
Status NewDB() {
|
2018-03-07 21:39:19 +01:00
|
|
|
assert(db_ == nullptr);
|
2015-01-15 19:28:10 +01:00
|
|
|
assert(tiny_cache_ == nullptr);
|
2018-03-07 21:39:19 +01:00
|
|
|
assert(env_ == nullptr);
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2021-11-08 20:04:01 +01:00
|
|
|
env_ = new FaultInjectionTestEnv(base_env_ ? base_env_.get() : system_env_);
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2015-01-23 03:34:23 +01:00
|
|
|
options_ = CurrentOptions();
|
2015-01-15 19:28:10 +01:00
|
|
|
options_.env = env_;
|
|
|
|
options_.paranoid_checks = true;
|
|
|
|
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
tiny_cache_ = NewLRUCache(100);
|
|
|
|
table_options.block_cache = tiny_cache_;
|
|
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
2018-07-14 02:18:39 +02:00
|
|
|
dbname_ = test::PerThreadDBPath("fault_test");
|
2015-01-15 19:28:10 +01:00
|
|
|
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
2015-03-17 04:52:32 +01:00
|
|
|
EXPECT_OK(DestroyDB(dbname_, options_));
|
2015-01-23 03:34:23 +01:00
|
|
|
|
2015-01-15 19:28:10 +01:00
|
|
|
options_.create_if_missing = true;
|
|
|
|
Status s = OpenDB();
|
|
|
|
options_.create_if_missing = false;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
void SetUp() override {
|
2018-05-07 21:15:54 +02:00
|
|
|
sequential_order_ = std::get<0>(GetParam());
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(NewDB());
|
|
|
|
}
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
void TearDown() override {
|
2015-01-15 19:28:10 +01:00
|
|
|
CloseDB();
|
|
|
|
|
2015-01-23 03:34:23 +01:00
|
|
|
Status s = DestroyDB(dbname_, options_);
|
2015-01-15 19:28:10 +01:00
|
|
|
|
|
|
|
delete env_;
|
2018-03-07 21:39:19 +01:00
|
|
|
env_ = nullptr;
|
2015-01-15 19:28:10 +01:00
|
|
|
|
|
|
|
tiny_cache_.reset();
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
ASSERT_OK(s);
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
void Build(const WriteOptions& write_options, int start_idx, int num_vals) {
|
2015-01-15 19:28:10 +01:00
|
|
|
std::string key_space, value_space;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (int i = start_idx; i < start_idx + num_vals; i++) {
|
2015-07-16 21:18:32 +02:00
|
|
|
Slice key = Key(i, &key_space);
|
2015-01-15 19:28:10 +01:00
|
|
|
batch.Clear();
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(batch.Put(key, Value(i, &value_space)));
|
2015-01-23 03:34:23 +01:00
|
|
|
ASSERT_OK(db_->Write(write_options, &batch));
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
Status ReadValue(int i, std::string* val) const {
|
2015-01-15 19:28:10 +01:00
|
|
|
std::string key_space, value_space;
|
2015-07-16 21:18:32 +02:00
|
|
|
Slice key = Key(i, &key_space);
|
2015-01-15 19:28:10 +01:00
|
|
|
Value(i, &value_space);
|
|
|
|
ReadOptions options;
|
|
|
|
return db_->Get(options, key, val);
|
|
|
|
}
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
Status Verify(int start_idx, int num_vals,
|
|
|
|
ExpectedVerifResult expected) const {
|
2015-01-15 19:28:10 +01:00
|
|
|
std::string val;
|
|
|
|
std::string value_space;
|
|
|
|
Status s;
|
|
|
|
for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
|
|
|
|
Value(i, &value_space);
|
2015-07-16 21:18:32 +02:00
|
|
|
s = ReadValue(i, &val);
|
2015-01-23 03:34:23 +01:00
|
|
|
if (s.ok()) {
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
2015-03-17 04:52:32 +01:00
|
|
|
EXPECT_EQ(value_space, val);
|
2015-01-23 03:34:23 +01:00
|
|
|
}
|
|
|
|
if (expected == kValExpectFound) {
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Error when read %dth record (expect found): %s\n", i,
|
|
|
|
s.ToString().c_str());
|
|
|
|
return s;
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
2015-01-23 03:34:23 +01:00
|
|
|
} else if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "Error when read %dth record: %s\n", i,
|
|
|
|
s.ToString().c_str());
|
|
|
|
return s;
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
}
|
2015-01-23 03:34:23 +01:00
|
|
|
return Status::OK();
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Return the ith key
|
2015-07-16 21:18:32 +02:00
|
|
|
Slice Key(int i, std::string* storage) const {
|
2018-02-13 23:07:48 +01:00
|
|
|
unsigned long long num = i;
|
2015-07-16 21:18:32 +02:00
|
|
|
if (!sequential_order_) {
|
2015-07-16 04:58:28 +02:00
|
|
|
// random transfer
|
|
|
|
const int m = 0x5bd1e995;
|
|
|
|
num *= m;
|
|
|
|
num ^= num << 24;
|
|
|
|
}
|
2015-01-15 19:28:10 +01:00
|
|
|
char buf[100];
|
2018-02-13 23:07:48 +01:00
|
|
|
snprintf(buf, sizeof(buf), "%016d", static_cast<int>(num));
|
2015-01-15 19:28:10 +01:00
|
|
|
storage->assign(buf, strlen(buf));
|
|
|
|
return Slice(*storage);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the value to associate with the specified key
|
|
|
|
Slice Value(int k, std::string* storage) const {
|
|
|
|
Random r(k);
|
2020-07-09 23:33:42 +02:00
|
|
|
*storage = r.RandomString(kValueSize);
|
|
|
|
return Slice(*storage);
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
2015-12-16 00:26:20 +01:00
|
|
|
void CloseDB() {
|
2015-01-15 19:28:10 +01:00
|
|
|
delete db_;
|
2017-05-19 19:43:11 +02:00
|
|
|
db_ = nullptr;
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
2015-12-16 00:26:20 +01:00
|
|
|
Status OpenDB() {
|
|
|
|
CloseDB();
|
|
|
|
env_->ResetState();
|
2017-05-19 19:43:11 +02:00
|
|
|
Status s = DB::Open(options_, dbname_, &db_);
|
|
|
|
assert(db_ != nullptr);
|
|
|
|
return s;
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void DeleteAllData() {
|
|
|
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
|
|
|
WriteOptions options;
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
|
|
|
|
}
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(iter->status());
|
2015-01-15 19:28:10 +01:00
|
|
|
delete iter;
|
2015-01-23 03:34:23 +01:00
|
|
|
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = true;
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
2015-01-27 00:22:18 +01:00
|
|
|
// rnd cannot be null for kResetDropRandomUnsyncedData
|
|
|
|
void ResetDBState(ResetMethod reset_method, Random* rnd = nullptr) {
|
2015-01-28 01:34:16 +01:00
|
|
|
env_->AssertNoOpenFile();
|
2015-01-15 19:28:10 +01:00
|
|
|
switch (reset_method) {
|
2015-01-23 03:34:23 +01:00
|
|
|
case kResetDropUnsyncedData:
|
2015-01-15 19:28:10 +01:00
|
|
|
ASSERT_OK(env_->DropUnsyncedFileData());
|
|
|
|
break;
|
2015-01-27 00:22:18 +01:00
|
|
|
case kResetDropRandomUnsyncedData:
|
|
|
|
ASSERT_OK(env_->DropRandomUnsyncedFileData(rnd));
|
|
|
|
break;
|
2015-01-23 03:34:23 +01:00
|
|
|
case kResetDeleteUnsyncedFiles:
|
|
|
|
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
|
|
|
|
break;
|
|
|
|
case kResetDropAndDeleteUnsynced:
|
|
|
|
ASSERT_OK(env_->DropUnsyncedFileData());
|
2015-01-15 19:28:10 +01:00
|
|
|
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
|
|
|
|
DeleteAllData();
|
2015-01-23 03:34:23 +01:00
|
|
|
|
|
|
|
WriteOptions write_options;
|
2015-01-24 01:03:24 +01:00
|
|
|
write_options.sync = sync_use_wal_;
|
2015-01-23 03:34:23 +01:00
|
|
|
|
|
|
|
Build(write_options, 0, num_pre_sync);
|
2015-01-24 01:03:24 +01:00
|
|
|
if (sync_use_compact_) {
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2015-01-23 03:34:23 +01:00
|
|
|
}
|
|
|
|
write_options.sync = false;
|
|
|
|
Build(write_options, num_pre_sync, num_post_sync);
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
|
2015-01-27 00:22:18 +01:00
|
|
|
int num_pre_sync, int num_post_sync,
|
|
|
|
Random* rnd = nullptr) {
|
2015-01-15 19:28:10 +01:00
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
CloseDB();
|
2015-01-27 00:22:18 +01:00
|
|
|
ResetDBState(reset_method, rnd);
|
2015-01-15 19:28:10 +01:00
|
|
|
ASSERT_OK(OpenDB());
|
2015-01-23 03:34:23 +01:00
|
|
|
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
|
2015-01-15 19:28:10 +01:00
|
|
|
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
|
2015-01-23 03:34:23 +01:00
|
|
|
FaultInjectionTest::kValExpectNoError));
|
2015-07-16 21:18:32 +02:00
|
|
|
WaitCompactionFinish();
|
|
|
|
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound));
|
|
|
|
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
|
|
|
|
FaultInjectionTest::kValExpectNoError));
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void NoWriteTestPreFault() {
|
|
|
|
}
|
|
|
|
|
|
|
|
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
|
|
|
|
CloseDB();
|
|
|
|
ResetDBState(reset_method);
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
}
|
2015-07-16 21:18:32 +02:00
|
|
|
|
|
|
|
void WaitCompactionFinish() {
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(static_cast<DBImpl*>(db_->GetRootDB())->TEST_WaitForCompact());
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
|
|
|
|
}
|
2021-11-08 20:04:01 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
Env* system_env_;
|
|
|
|
std::shared_ptr<Env> env_guard_;
|
2015-01-15 19:28:10 +01:00
|
|
|
};
|
|
|
|
|
2018-05-07 21:15:54 +02:00
|
|
|
class FaultInjectionTestSplitted : public FaultInjectionTest {};
|
|
|
|
|
|
|
|
TEST_P(FaultInjectionTestSplitted, FaultTest) {
|
2015-01-23 03:34:23 +01:00
|
|
|
do {
|
|
|
|
Random rnd(301);
|
|
|
|
|
2015-01-24 01:26:38 +01:00
|
|
|
for (size_t idx = 0; idx < kNumIterations; idx++) {
|
|
|
|
int num_pre_sync = rnd.Uniform(kMaxNumValues);
|
|
|
|
int num_post_sync = rnd.Uniform(kMaxNumValues);
|
2015-01-15 19:28:10 +01:00
|
|
|
|
2015-01-23 03:34:23 +01:00
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
2015-01-24 01:26:38 +01:00
|
|
|
PartialCompactTestReopenWithFault(kResetDropUnsyncedData, num_pre_sync,
|
|
|
|
num_post_sync);
|
2015-01-23 03:34:23 +01:00
|
|
|
NoWriteTestPreFault();
|
2015-01-24 01:26:38 +01:00
|
|
|
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
|
|
|
|
|
2015-01-27 00:22:18 +01:00
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
PartialCompactTestReopenWithFault(kResetDropRandomUnsyncedData,
|
|
|
|
num_pre_sync, num_post_sync, &rnd);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDropUnsyncedData);
|
|
|
|
|
2015-01-24 01:26:38 +01:00
|
|
|
// Setting a separate data path won't pass the test as we don't sync
|
|
|
|
// it after creating new files,
|
2015-01-26 22:59:38 +01:00
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
PartialCompactTestReopenWithFault(kResetDropAndDeleteUnsynced,
|
|
|
|
num_pre_sync, num_post_sync);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
|
|
|
|
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
|
|
|
// No new files created so we expect all values since no files will be
|
|
|
|
// dropped.
|
|
|
|
PartialCompactTestReopenWithFault(kResetDeleteUnsyncedFiles, num_pre_sync,
|
|
|
|
num_post_sync);
|
|
|
|
NoWriteTestPreFault();
|
|
|
|
NoWriteTestReopenWithFault(kResetDeleteUnsyncedFiles);
|
2015-01-23 03:34:23 +01:00
|
|
|
}
|
|
|
|
} while (ChangeOptions());
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|
|
|
|
|
2015-04-09 06:18:05 +02:00
|
|
|
// Previous log file is not fsynced if sync is forced after log rolling.
|
[wal changes 2/3] write with sync=true syncs previous unsynced wals to prevent illegal data loss
Summary:
I'll just copy internal task summary here:
"
This sequence will cause data loss in the middle after an sync write:
non-sync write key 1
flush triggered, not yet scheduled
sync write key 2
system crash
After rebooting, users might see key 2 but not key 1, which violates the API of sync write.
This can be reproduced using unit test FaultInjectionTest::DISABLED_WriteOptionSyncTest.
One way to fix it is for a sync write, if there is outstanding unsynced log files, we need to syc them too.
"
This diff should be considered together with the next diff D40905; in isolation this fix probably could be a little simpler.
Test Plan: `make check`; added a test for that (DBTest.SyncingPreviousLogs) before noticing FaultInjectionTest.WriteOptionSyncTest (keeping both since mine asserts a bit more); both tests fail without this diff; for D40905 stacked on top of this diff, ran tests with ASAN, TSAN and valgrind
Reviewers: rven, yhchiang, IslamAbdelRahman, anthony, kradhakrishnan, igor, sdong
Reviewed By: sdong
Subscribers: dhruba
Differential Revision: https://reviews.facebook.net/D40899
2015-07-22 12:28:08 +02:00
|
|
|
TEST_P(FaultInjectionTest, WriteOptionSyncTest) {
|
2015-09-25 19:29:44 +02:00
|
|
|
test::SleepingBackgroundTask sleeping_task_low;
|
2015-04-09 06:18:05 +02:00
|
|
|
env_->SetBackgroundThreads(1, Env::HIGH);
|
|
|
|
// Block the job queue to prevent flush job from running.
|
2015-09-25 19:29:44 +02:00
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
|
2015-04-09 06:18:05 +02:00
|
|
|
Env::Priority::HIGH);
|
2016-01-06 02:56:29 +01:00
|
|
|
sleeping_task_low.WaitUntilSleeping();
|
2015-04-09 06:18:05 +02:00
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
write_options.sync = false;
|
|
|
|
|
|
|
|
std::string key_space, value_space;
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
|
2015-04-09 06:18:05 +02:00
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = false;
|
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
|
|
|
write_options.sync = true;
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(db_->FlushWAL(false));
|
2015-04-09 06:18:05 +02:00
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
sleeping_task_low.WakeUp();
|
2016-08-10 19:58:15 +02:00
|
|
|
sleeping_task_low.WaitUntilDone();
|
2015-04-09 06:18:05 +02:00
|
|
|
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
std::string val;
|
|
|
|
Value(2, &value_space);
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(ReadValue(2, &val));
|
2015-04-09 06:18:05 +02:00
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
|
|
|
|
Value(1, &value_space);
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(ReadValue(1, &val));
|
2015-04-09 06:18:05 +02:00
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
}
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
TEST_P(FaultInjectionTest, UninstalledCompaction) {
|
2015-07-16 04:58:28 +02:00
|
|
|
options_.target_file_size_base = 32 * 1024;
|
|
|
|
options_.write_buffer_size = 100 << 10; // 100KB
|
|
|
|
options_.level0_file_num_compaction_trigger = 6;
|
|
|
|
options_.level0_stop_writes_trigger = 1 << 10;
|
|
|
|
options_.level0_slowdown_writes_trigger = 1 << 10;
|
|
|
|
options_.max_background_compactions = 1;
|
|
|
|
OpenDB();
|
|
|
|
|
2015-07-16 21:18:32 +02:00
|
|
|
if (!sequential_order_) {
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2015-07-16 21:18:32 +02:00
|
|
|
{"FaultInjectionTest::FaultTest:0", "DBImpl::BGWorkCompaction"},
|
|
|
|
{"CompactionJob::Run():End", "FaultInjectionTest::FaultTest:1"},
|
|
|
|
{"FaultInjectionTest::FaultTest:2",
|
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun"},
|
|
|
|
});
|
|
|
|
}
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2015-07-16 04:58:28 +02:00
|
|
|
|
|
|
|
int kNumKeys = 1000;
|
2015-07-16 21:18:32 +02:00
|
|
|
Build(WriteOptions(), 0, kNumKeys);
|
2015-07-16 04:58:28 +02:00
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = true;
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
2015-07-16 04:58:28 +02:00
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "", ""));
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:0");
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:1");
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
TEST_SYNC_POINT("FaultInjectionTest::FaultTest:2");
|
|
|
|
CloseDB();
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2015-07-16 04:58:28 +02:00
|
|
|
ResetDBState(kResetDropUnsyncedData);
|
|
|
|
|
|
|
|
std::atomic<bool> opened(false);
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 02:55:14 +02:00
|
|
|
"DBImpl::Open:Opened", [&](void* /*arg*/) { opened.store(true); });
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2015-07-16 04:58:28 +02:00
|
|
|
"DBImpl::BGWorkCompaction",
|
2018-04-13 02:55:14 +02:00
|
|
|
[&](void* /*arg*/) { ASSERT_TRUE(opened.load()); });
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2015-07-16 04:58:28 +02:00
|
|
|
ASSERT_OK(OpenDB());
|
2015-07-16 21:18:32 +02:00
|
|
|
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
|
|
|
|
WaitCompactionFinish();
|
|
|
|
ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound));
|
2020-02-20 21:07:53 +01:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
2015-07-16 04:58:28 +02:00
|
|
|
}
|
|
|
|
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
TEST_P(FaultInjectionTest, ManualLogSyncTest) {
|
2015-09-25 19:29:44 +02:00
|
|
|
test::SleepingBackgroundTask sleeping_task_low;
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
env_->SetBackgroundThreads(1, Env::HIGH);
|
|
|
|
// Block the job queue to prevent flush job from running.
|
2015-09-25 19:29:44 +02:00
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
Env::Priority::HIGH);
|
2016-01-06 02:56:29 +01:00
|
|
|
sleeping_task_low.WaitUntilSleeping();
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
|
|
|
|
WriteOptions write_options;
|
|
|
|
write_options.sync = false;
|
|
|
|
|
|
|
|
std::string key_space, value_space;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(1, &key_space), Value(1, &value_space)));
|
|
|
|
FlushOptions flush_options;
|
|
|
|
flush_options.wait = false;
|
|
|
|
ASSERT_OK(db_->Flush(flush_options));
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_options, Key(2, &key_space), Value(2, &value_space)));
|
2017-06-24 23:06:43 +02:00
|
|
|
ASSERT_OK(db_->FlushWAL(true));
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
sleeping_task_low.WakeUp();
|
2016-08-10 19:58:15 +02:00
|
|
|
sleeping_task_low.WaitUntilDone();
|
[wal changes 3/3] method in DB to sync WAL without blocking writers
Summary:
Subj. We really need this feature.
Previous diff D40899 has most of the changes to make this possible, this diff just adds the method.
Test Plan: `make check`, the new test fails without this diff; ran with ASAN, TSAN and valgrind.
Reviewers: igor, rven, IslamAbdelRahman, anthony, kradhakrishnan, tnovak, yhchiang, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, maykov, hermanlee4, yoshinorim, tnovak, dhruba
Differential Revision: https://reviews.facebook.net/D40905
2015-08-05 15:06:39 +02:00
|
|
|
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
std::string val;
|
|
|
|
Value(2, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(2, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
|
|
|
|
Value(1, &value_space);
|
|
|
|
ASSERT_OK(ReadValue(1, &val));
|
|
|
|
ASSERT_EQ(value_space, val);
|
|
|
|
}
|
|
|
|
|
Add facility to write only a portion of WriteBatch to WAL
Summary:
When constructing a write batch a client may now call MarkWalTerminationPoint() on that batch. No batch operations after this call will be added written to the WAL but will still be inserted into the Memtable. This facility is used to remove one of the three WriteImpl calls in 2PC transactions. This produces a ~1% perf improvement.
```
RocksDB - unoptimized 2pc, sync_binlog=1, disable_2pc=off
INFO 2016-08-31 14:30:38,814 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2619 seconds. Requests/second = 28628
RocksDB - optimized 2pc , sync_binlog=1, disable_2pc=off
INFO 2016-08-31 16:26:59,442 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2581 seconds. Requests/second = 29054
```
Test Plan: Two unit tests added.
Reviewers: sdong, yiwu, IslamAbdelRahman
Reviewed By: yiwu
Subscribers: hermanlee4, dhruba, andrewkr
Differential Revision: https://reviews.facebook.net/D64599
2016-10-07 20:31:26 +02:00
|
|
|
TEST_P(FaultInjectionTest, WriteBatchWalTerminationTest) {
|
|
|
|
ReadOptions ro;
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
wo.disableWAL = false;
|
|
|
|
WriteBatch batch;
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(batch.Put("cats", "dogs"));
|
Add facility to write only a portion of WriteBatch to WAL
Summary:
When constructing a write batch a client may now call MarkWalTerminationPoint() on that batch. No batch operations after this call will be added written to the WAL but will still be inserted into the Memtable. This facility is used to remove one of the three WriteImpl calls in 2PC transactions. This produces a ~1% perf improvement.
```
RocksDB - unoptimized 2pc, sync_binlog=1, disable_2pc=off
INFO 2016-08-31 14:30:38,814 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2619 seconds. Requests/second = 28628
RocksDB - optimized 2pc , sync_binlog=1, disable_2pc=off
INFO 2016-08-31 16:26:59,442 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2581 seconds. Requests/second = 29054
```
Test Plan: Two unit tests added.
Reviewers: sdong, yiwu, IslamAbdelRahman
Reviewed By: yiwu
Subscribers: hermanlee4, dhruba, andrewkr
Differential Revision: https://reviews.facebook.net/D64599
2016-10-07 20:31:26 +02:00
|
|
|
batch.MarkWalTerminationPoint();
|
2020-12-23 08:44:44 +01:00
|
|
|
ASSERT_OK(batch.Put("boys", "girls"));
|
Add facility to write only a portion of WriteBatch to WAL
Summary:
When constructing a write batch a client may now call MarkWalTerminationPoint() on that batch. No batch operations after this call will be added written to the WAL but will still be inserted into the Memtable. This facility is used to remove one of the three WriteImpl calls in 2PC transactions. This produces a ~1% perf improvement.
```
RocksDB - unoptimized 2pc, sync_binlog=1, disable_2pc=off
INFO 2016-08-31 14:30:38,814 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2619 seconds. Requests/second = 28628
RocksDB - optimized 2pc , sync_binlog=1, disable_2pc=off
INFO 2016-08-31 16:26:59,442 [main]: REQUEST PHASE COMPLETED. 75000000 requests done in 2581 seconds. Requests/second = 29054
```
Test Plan: Two unit tests added.
Reviewers: sdong, yiwu, IslamAbdelRahman
Reviewed By: yiwu
Subscribers: hermanlee4, dhruba, andrewkr
Differential Revision: https://reviews.facebook.net/D64599
2016-10-07 20:31:26 +02:00
|
|
|
ASSERT_OK(db_->Write(wo, &batch));
|
|
|
|
|
|
|
|
env_->SetFilesystemActive(false);
|
|
|
|
NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced);
|
|
|
|
ASSERT_OK(OpenDB());
|
|
|
|
|
|
|
|
std::string val;
|
|
|
|
ASSERT_OK(db_->Get(ro, "cats", &val));
|
|
|
|
ASSERT_EQ("dogs", val);
|
|
|
|
ASSERT_EQ(db_->Get(ro, "boys", &val), Status::NotFound());
|
|
|
|
}
|
|
|
|
|
Fix a bug causing duplicate trailing entries in WritableFile (buffered IO) (#9236)
Summary:
`db_stress` is a user of `FaultInjectionTestFS`. After injecting a write error, `db_stress` probabilistically determins
data drop (https://github.com/facebook/rocksdb/blob/6.27.fb/db_stress_tool/db_stress_test_base.cc#L2615:L2619).
In some of our recent runs of `db_stress`, we found duplicate trailing entries corresponding to file trivial move in
the MANIFEST, causing the recovery to fail, because the file move operation is not idempotent: you cannot delete a
file from a given level twice.
Investigation suggests that data buffering in both `WritableFileWriter` and `FaultInjectionTestFS` may be the root cause.
WritableFileWriter buffers data to write in a memory buffer, `WritableFileWriter::buf_`. After each
`WriteBuffered()`/`WriteBufferedWithChecksum()` succeeds, the `buf_` is cleared.
If the underlying file `WritableFileWriter::writable_file_` is opened in buffered IO mode, then `FaultInjectionTestFS`
buffers data written for each file until next file sync. After an injected error, user of `FaultInjectionFS` can
choose to drop some or none of previously buffered data. If `db_stress` does not drop any unsynced data, then
such data will still exist in the `FaultInjectionTestFS`'s buffer.
Existing implementation of `WritableileWriter::WriteBuffered()` does not clear `buf_` if there is an error. This may lead
to the data being buffered two copies: one in `WritableFileWriter`, and another in `FaultInjectionTestFS`.
We also know that the `WritableFileWriter` of MANIFEST file will close upon an error. During `Close()`, it will flush the
content in `buf_`. If no write error is injected to `FaultInjectionTestFS` this time, then we end up with two copies of the
data appended to the file.
To fix, we clear the `WritableFileWriter::buf_` upon failure as well. We focus this PR on files opened in non-direct mode.
This PR includes a unit test to reproduce a case when write error injection
to `WritableFile` can cause duplicate trailing entries.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9236
Test Plan: make check
Reviewed By: zhichao-cao
Differential Revision: D33033984
Pulled By: riversand963
fbshipit-source-id: ebfa5a0db8cbf1ed73100528b34fcba543c5db31
2021-12-13 17:59:20 +01:00
|
|
|
TEST_P(FaultInjectionTest, NoDuplicateTrailingEntries) {
|
|
|
|
auto fault_fs = std::make_shared<FaultInjectionTestFS>(FileSystem::Default());
|
|
|
|
fault_fs->EnableWriteErrorInjection();
|
|
|
|
fault_fs->SetFilesystemDirectWritable(false);
|
|
|
|
const std::string file_name = NormalizePath(dbname_ + "/test_file");
|
|
|
|
std::unique_ptr<log::Writer> log_writer = nullptr;
|
|
|
|
constexpr uint64_t log_number = 0;
|
|
|
|
{
|
|
|
|
std::unique_ptr<FSWritableFile> file;
|
|
|
|
const Status s =
|
|
|
|
fault_fs->NewWritableFile(file_name, FileOptions(), &file, nullptr);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
std::unique_ptr<WritableFileWriter> fwriter(
|
|
|
|
new WritableFileWriter(std::move(file), file_name, FileOptions()));
|
|
|
|
log_writer.reset(new log::Writer(std::move(fwriter), log_number,
|
|
|
|
/*recycle_log_files=*/false));
|
|
|
|
}
|
|
|
|
|
|
|
|
fault_fs->SetRandomWriteError(
|
|
|
|
0xdeadbeef, /*one_in=*/1, IOStatus::IOError("Injected IOError"),
|
|
|
|
/*inject_for_all_file_types=*/true, /*types=*/{});
|
|
|
|
|
|
|
|
{
|
|
|
|
VersionEdit edit;
|
|
|
|
edit.SetColumnFamily(0);
|
|
|
|
std::string buf;
|
|
|
|
assert(edit.EncodeTo(&buf));
|
|
|
|
const Status s = log_writer->AddRecord(buf);
|
|
|
|
ASSERT_NOK(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
fault_fs->DisableWriteErrorInjection();
|
|
|
|
|
|
|
|
// Closing the log writer will cause WritableFileWriter::Close() and flush
|
|
|
|
// remaining data from its buffer to underlying file.
|
|
|
|
log_writer.reset();
|
|
|
|
|
|
|
|
{
|
|
|
|
std::unique_ptr<FSSequentialFile> file;
|
|
|
|
Status s =
|
|
|
|
fault_fs->NewSequentialFile(file_name, FileOptions(), &file, nullptr);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
std::unique_ptr<SequentialFileReader> freader(
|
|
|
|
new SequentialFileReader(std::move(file), file_name));
|
|
|
|
Status log_read_s;
|
|
|
|
class LogReporter : public log::Reader::Reporter {
|
|
|
|
public:
|
|
|
|
Status* status_;
|
|
|
|
explicit LogReporter(Status* _s) : status_(_s) {}
|
|
|
|
void Corruption(size_t /*bytes*/, const Status& _s) override {
|
|
|
|
if (status_->ok()) {
|
|
|
|
*status_ = _s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} reporter(&log_read_s);
|
|
|
|
std::unique_ptr<log::Reader> log_reader(new log::Reader(
|
|
|
|
nullptr, std::move(freader), &reporter, /*checksum=*/true, log_number));
|
|
|
|
Slice record;
|
|
|
|
std::string data;
|
|
|
|
size_t count = 0;
|
|
|
|
while (log_reader->ReadRecord(&record, &data) && log_read_s.ok()) {
|
|
|
|
VersionEdit edit;
|
|
|
|
ASSERT_OK(edit.DecodeFrom(data));
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
// Verify that only one version edit exists in the file.
|
|
|
|
ASSERT_EQ(1, count);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-04 00:53:09 +02:00
|
|
|
INSTANTIATE_TEST_CASE_P(
|
2018-05-07 21:15:54 +02:00
|
|
|
FaultTest, FaultInjectionTest,
|
|
|
|
::testing::Values(std::make_tuple(false, kDefault, kEnd),
|
|
|
|
std::make_tuple(true, kDefault, kEnd)));
|
|
|
|
|
2020-06-04 00:53:09 +02:00
|
|
|
INSTANTIATE_TEST_CASE_P(
|
2018-05-07 21:15:54 +02:00
|
|
|
FaultTest, FaultInjectionTestSplitted,
|
|
|
|
::testing::Values(std::make_tuple(false, kDefault, kSyncWal),
|
|
|
|
std::make_tuple(true, kDefault, kSyncWal),
|
|
|
|
std::make_tuple(false, kSyncWal, kEnd),
|
|
|
|
std::make_tuple(true, kSyncWal, kEnd)));
|
2015-07-16 21:18:32 +02:00
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2015-01-15 19:28:10 +01:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2015-03-17 22:08:00 +01:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
2021-11-08 20:04:01 +01:00
|
|
|
RegisterCustomObjects(argc, argv);
|
2015-03-17 22:08:00 +01:00
|
|
|
return RUN_ALL_TESTS();
|
2015-01-15 19:28:10 +01:00
|
|
|
}
|