Should flush and sync WAL when writing it in DB::Open() (#6417)
Summary: A recent fix related to 2pc https://github.com/facebook/rocksdb/pull/6313/ writes something to WAL, but does not flush or sync. This causes assertion failure "impl->TEST_WALBufferIsEmpty()" if manual_wal_flush = true. We should fsync the entry to make sure a second power reset can recover. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6417 Test Plan: Add manual_wal_flush=true case in TransactionTest.DoubleCrashInRecovery and fix a bug in the test so that the bug can be reproduced. It passes with the fix. Differential Revision: D19894537 fbshipit-source-id: f1e84e49e2269f583c6019743118292cd8b6598e
This commit is contained in:
parent
46516778dd
commit
ac8e89a443
@ -1494,6 +1494,13 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
|
||||
uint64_t log_used, log_size;
|
||||
log::Writer* log_writer = impl->logs_.back().writer;
|
||||
s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size);
|
||||
if (s.ok()) {
|
||||
// Need to fsync, otherwise it might get lost after a power reset.
|
||||
s = impl->FlushWAL(false);
|
||||
if (s.ok()) {
|
||||
s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6120,84 +6120,87 @@ TEST_P(TransactionTest, ReseekOptimization) {
|
||||
// there. The new log files should be still read succesfully during recovery of
|
||||
// the 2nd crash.
|
||||
TEST_P(TransactionTest, DoubleCrashInRecovery) {
|
||||
for (const bool write_after_recovery : {false, true}) {
|
||||
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
|
||||
ReOpen();
|
||||
std::string cf_name = "two";
|
||||
ColumnFamilyOptions cf_options;
|
||||
ColumnFamilyHandle* cf_handle = nullptr;
|
||||
ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle));
|
||||
for (const bool manual_wal_flush : {false, true}) {
|
||||
for (const bool write_after_recovery : {false, true}) {
|
||||
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
|
||||
options.manual_wal_flush = manual_wal_flush;
|
||||
ReOpen();
|
||||
std::string cf_name = "two";
|
||||
ColumnFamilyOptions cf_options;
|
||||
ColumnFamilyHandle* cf_handle = nullptr;
|
||||
ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle));
|
||||
|
||||
// Add a prepare entry to prevent the older logs from being deleted.
|
||||
WriteOptions write_options;
|
||||
TransactionOptions txn_options;
|
||||
Transaction* txn = db->BeginTransaction(write_options, txn_options);
|
||||
ASSERT_OK(txn->SetName("xid"));
|
||||
ASSERT_OK(txn->Put(Slice("foo-prepare"), Slice("bar-prepare")));
|
||||
ASSERT_OK(txn->Prepare());
|
||||
// Add a prepare entry to prevent the older logs from being deleted.
|
||||
WriteOptions write_options;
|
||||
TransactionOptions txn_options;
|
||||
Transaction* txn = db->BeginTransaction(write_options, txn_options);
|
||||
ASSERT_OK(txn->SetName("xid"));
|
||||
ASSERT_OK(txn->Put(Slice("foo-prepare"), Slice("bar-prepare")));
|
||||
ASSERT_OK(txn->Prepare());
|
||||
|
||||
FlushOptions flush_ops;
|
||||
db->Flush(flush_ops);
|
||||
// Now we have a log that cannot be deleted
|
||||
FlushOptions flush_ops;
|
||||
db->Flush(flush_ops);
|
||||
// Now we have a log that cannot be deleted
|
||||
|
||||
ASSERT_OK(db->Put(write_options, cf_handle, "foo1", "bar1"));
|
||||
// Flush only the 2nd cf
|
||||
db->Flush(flush_ops, cf_handle);
|
||||
ASSERT_OK(db->Put(write_options, cf_handle, "foo1", "bar1"));
|
||||
// Flush only the 2nd cf
|
||||
db->Flush(flush_ops, cf_handle);
|
||||
|
||||
// The value is large enough to be touched by the corruption we ingest
|
||||
// below.
|
||||
std::string large_value(400, ' ');
|
||||
// key/value not touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo2", "bar2"));
|
||||
// key/value touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo3", large_value));
|
||||
// key/value not touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo4", "bar4"));
|
||||
// The value is large enough to be touched by the corruption we ingest
|
||||
// below.
|
||||
std::string large_value(400, ' ');
|
||||
// key/value not touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo2", "bar2"));
|
||||
// key/value touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo3", large_value));
|
||||
// key/value not touched by corruption
|
||||
ASSERT_OK(db->Put(write_options, "foo4", "bar4"));
|
||||
|
||||
db->FlushWAL(true);
|
||||
DBImpl* db_impl = reinterpret_cast<DBImpl*>(db->GetRootDB());
|
||||
uint64_t wal_file_id = db_impl->TEST_LogfileNumber();
|
||||
std::string fname = LogFileName(dbname, wal_file_id);
|
||||
reinterpret_cast<PessimisticTransactionDB*>(db)->TEST_Crash();
|
||||
delete txn;
|
||||
delete cf_handle;
|
||||
delete db;
|
||||
db = nullptr;
|
||||
db->FlushWAL(true);
|
||||
DBImpl* db_impl = reinterpret_cast<DBImpl*>(db->GetRootDB());
|
||||
uint64_t wal_file_id = db_impl->TEST_LogfileNumber();
|
||||
std::string fname = LogFileName(dbname, wal_file_id);
|
||||
reinterpret_cast<PessimisticTransactionDB*>(db)->TEST_Crash();
|
||||
delete txn;
|
||||
delete cf_handle;
|
||||
delete db;
|
||||
db = nullptr;
|
||||
|
||||
// Corrupt the last log file in the middle, so that it is not corrupted
|
||||
// in the tail.
|
||||
std::string file_content;
|
||||
ASSERT_OK(ReadFileToString(env, fname, &file_content));
|
||||
file_content[400] = 'h';
|
||||
file_content[401] = 'a';
|
||||
ASSERT_OK(env->DeleteFile(fname));
|
||||
ASSERT_OK(WriteStringToFile(env, file_content, fname));
|
||||
// Corrupt the last log file in the middle, so that it is not corrupted
|
||||
// in the tail.
|
||||
std::string file_content;
|
||||
ASSERT_OK(ReadFileToString(env, fname, &file_content));
|
||||
file_content[400] = 'h';
|
||||
file_content[401] = 'a';
|
||||
ASSERT_OK(env->DeleteFile(fname));
|
||||
ASSERT_OK(WriteStringToFile(env, file_content, fname, true));
|
||||
|
||||
// Recover from corruption
|
||||
std::vector<ColumnFamilyHandle*> handles;
|
||||
std::vector<ColumnFamilyDescriptor> column_families;
|
||||
column_families.push_back(ColumnFamilyDescriptor(kDefaultColumnFamilyName,
|
||||
ColumnFamilyOptions()));
|
||||
column_families.push_back(
|
||||
ColumnFamilyDescriptor("two", ColumnFamilyOptions()));
|
||||
ASSERT_OK(ReOpenNoDelete(column_families, &handles));
|
||||
// Recover from corruption
|
||||
std::vector<ColumnFamilyHandle*> handles;
|
||||
std::vector<ColumnFamilyDescriptor> column_families;
|
||||
column_families.push_back(ColumnFamilyDescriptor(kDefaultColumnFamilyName,
|
||||
ColumnFamilyOptions()));
|
||||
column_families.push_back(
|
||||
ColumnFamilyDescriptor("two", ColumnFamilyOptions()));
|
||||
ASSERT_OK(ReOpenNoDelete(column_families, &handles));
|
||||
|
||||
if (write_after_recovery) {
|
||||
// Write data to the log right after the corrupted log
|
||||
ASSERT_OK(db->Put(write_options, "foo5", large_value));
|
||||
}
|
||||
if (write_after_recovery) {
|
||||
// Write data to the log right after the corrupted log
|
||||
ASSERT_OK(db->Put(write_options, "foo5", large_value));
|
||||
}
|
||||
|
||||
// Persist data written to WAL during recovery or by the last Put
|
||||
db->FlushWAL(true);
|
||||
// 2nd crash to recover while having a valid log after the corrupted one.
|
||||
ASSERT_OK(ReOpenNoDelete(column_families, &handles));
|
||||
assert(db != nullptr);
|
||||
txn = db->GetTransactionByName("xid");
|
||||
ASSERT_TRUE(txn != nullptr);
|
||||
ASSERT_OK(txn->Commit());
|
||||
delete txn;
|
||||
for (auto handle : handles) {
|
||||
delete handle;
|
||||
// Persist data written to WAL during recovery or by the last Put
|
||||
db->FlushWAL(true);
|
||||
// 2nd crash to recover while having a valid log after the corrupted one.
|
||||
ASSERT_OK(ReOpenNoDelete(column_families, &handles));
|
||||
assert(db != nullptr);
|
||||
txn = db->GetTransactionByName("xid");
|
||||
ASSERT_TRUE(txn != nullptr);
|
||||
ASSERT_OK(txn->Commit());
|
||||
delete txn;
|
||||
for (auto handle : handles) {
|
||||
delete handle;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user