Summary: Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together (https://github.com/facebook/rocksdb/issues/8723) Pull Request resolved: https://github.com/facebook/rocksdb/pull/8746 Reviewed By: ajkr Differential Revision: D30758468 Pulled By: riversand963 fbshipit-source-id: 07c20899d5f2447dc77861b4845efc68a59aa4e8
This commit is contained in:
parent
a5566d508b
commit
0b6be7eb68
@ -9,6 +9,7 @@
|
|||||||
* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations.
|
* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations.
|
||||||
* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding.
|
* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding.
|
||||||
* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates.
|
* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates.
|
||||||
|
* Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_.
|
||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
|
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
|
||||||
|
@ -1124,6 +1124,18 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
|
|||||||
// writer thread, so no one will push to logs_,
|
// writer thread, so no one will push to logs_,
|
||||||
// - as long as other threads don't modify it, it's safe to read
|
// - as long as other threads don't modify it, it's safe to read
|
||||||
// from std::deque from multiple threads concurrently.
|
// from std::deque from multiple threads concurrently.
|
||||||
|
//
|
||||||
|
// Sync operation should work with locked log_write_mutex_, because:
|
||||||
|
// when DBOptions.manual_wal_flush_ is set,
|
||||||
|
// FlushWAL function will be invoked by another thread.
|
||||||
|
// if without locked log_write_mutex_, the log file may get data
|
||||||
|
// corruption
|
||||||
|
|
||||||
|
const bool needs_locking = manual_wal_flush_ && !two_write_queues_;
|
||||||
|
if (UNLIKELY(needs_locking)) {
|
||||||
|
log_write_mutex_.Lock();
|
||||||
|
}
|
||||||
|
|
||||||
for (auto& log : logs_) {
|
for (auto& log : logs_) {
|
||||||
io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
|
io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
|
||||||
if (!io_s.ok()) {
|
if (!io_s.ok()) {
|
||||||
@ -1131,6 +1143,10 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UNLIKELY(needs_locking)) {
|
||||||
|
log_write_mutex_.Unlock();
|
||||||
|
}
|
||||||
|
|
||||||
if (io_s.ok() && need_log_dir_sync) {
|
if (io_s.ok() && need_log_dir_sync) {
|
||||||
// We only sync WAL directory the first time WAL syncing is
|
// We only sync WAL directory the first time WAL syncing is
|
||||||
// requested, so that in case users never turn on WAL sync,
|
// requested, so that in case users never turn on WAL sync,
|
||||||
|
@ -4098,6 +4098,39 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This test failure will be caught with a probability
|
||||||
|
TEST_F(DBTest, ManualFlushWalAndWriteRace) {
|
||||||
|
Options options;
|
||||||
|
options.env = env_;
|
||||||
|
options.manual_wal_flush = true;
|
||||||
|
options.create_if_missing = true;
|
||||||
|
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
WriteOptions wopts;
|
||||||
|
wopts.sync = true;
|
||||||
|
|
||||||
|
port::Thread writeThread([&]() {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
auto istr = ToString(i);
|
||||||
|
dbfull()->Put(wopts, "key_" + istr, "value_" + istr);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
port::Thread flushThread([&]() {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
ASSERT_OK(dbfull()->FlushWAL(false));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
writeThread.join();
|
||||||
|
flushThread.join();
|
||||||
|
ASSERT_OK(dbfull()->Put(wopts, "foo1", "value1"));
|
||||||
|
ASSERT_OK(dbfull()->Put(wopts, "foo2", "value2"));
|
||||||
|
Reopen(options);
|
||||||
|
ASSERT_EQ("value1", Get("foo1"));
|
||||||
|
ASSERT_EQ("value2", Get("foo2"));
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
TEST_F(DBTest, DynamicMemtableOptions) {
|
TEST_F(DBTest, DynamicMemtableOptions) {
|
||||||
const uint64_t k64KB = 1 << 16;
|
const uint64_t k64KB = 1 << 16;
|
||||||
|
Loading…
Reference in New Issue
Block a user