Expose DB methods to lock and unlock the WAL (#5146)

Summary:
Expose DB methods to lock and unlock the WAL.

These methods are intended to use by MyRocks in order to obtain WAL
coordinates in consistent way.

Usage scenario is following:

MySQL has performance_schema.log_status which provides information that
enables a backup tool to copy the required log files without locking for
the duration of copy. To populate this table MySQL does following:

1. Lock the binary log. Transactions are not allowed to commit now
2. Save the binary log coordinates
3. Walk through the storage engines and lock writes on each engine. For
   InnoDB, redo log is locked. For MyRocks, WAL should be locked.
4. Ask storage engines for their coordinates. InnoDB reports its current
   LSN and checkpoint LSN. MyRocks should report active WAL files names
   and sizes.
5. Release storage engine's locks
6. Unlock binary log

Backup tool will then use this information to copy InnoDB, RocksDB and
MySQL binary logs up to specified positions to end up with consistent DB
state after restore.

Currently, RocksDB allows to obtain the list of WAL files. Only missing
bit is the method to lock the writes to WAL files.

LockWAL method must flush the WAL in order for the reported size to be
accurate (GetSortedWALFiles is using file system stat call to return the
file size), also, since backup tool is going to copy the WAL, it is
better to be flushed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5146

Differential Revision: D14815447

Pulled By: maysamyabandeh

fbshipit-source-id: eec9535a6025229ed471119f19fe7b3d8ae888a3
This commit is contained in:
Sergei Glushchenko 2019-04-06 06:36:42 -07:00 committed by Facebook Github Bot
parent 479c566771
commit 39c6c5fc1b
6 changed files with 64 additions and 4 deletions

View File

@ -1112,6 +1112,25 @@ Status DBImpl::SyncWAL() {
return status;
}
Status DBImpl::LockWAL() {
log_write_mutex_.Lock();
auto cur_log_writer = logs_.back().writer;
auto status = cur_log_writer->WriteBuffer();
if (!status.ok()) {
ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
status.ToString().c_str());
// In case there is a fs error we should set it globally to prevent the
// future writes
WriteStatusCheck(status);
}
return status;
}
Status DBImpl::UnlockWAL() {
log_write_mutex_.Unlock();
return Status::OK();
}
void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir,
const Status& status) {
mutex_.AssertHeld();

View File

@ -234,8 +234,10 @@ class DBImpl : public DB {
const FlushOptions& options,
const std::vector<ColumnFamilyHandle*>& column_families) override;
virtual Status FlushWAL(bool sync) override;
bool TEST_WALBufferIsEmpty();
bool TEST_WALBufferIsEmpty(bool lock = true);
virtual Status SyncWAL() override;
virtual Status LockWAL() override;
virtual Status UnlockWAL() override;
virtual SequenceNumber GetLatestSequenceNumber() const override;
virtual SequenceNumber GetLastPublishedSequence() const {

View File

@ -26,10 +26,16 @@ void DBImpl::TEST_SwitchWAL() {
SwitchWAL(&write_context);
}
bool DBImpl::TEST_WALBufferIsEmpty() {
InstrumentedMutexLock wl(&log_write_mutex_);
bool DBImpl::TEST_WALBufferIsEmpty(bool lock) {
if (lock) {
log_write_mutex_.Lock();
}
log::Writer* cur_log_writer = logs_.back().writer;
return cur_log_writer->TEST_BufferIsEmpty();
auto res = cur_log_writer->TEST_BufferIsEmpty();
if (lock) {
log_write_mutex_.Unlock();
}
return res;
}
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes(

View File

@ -166,6 +166,25 @@ TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) {
Close();
}
// Test that db->LockWAL() flushes the WAL after locking.
TEST_P(DBWriteTest, LockWalInEffect) {
Options options = GetOptions();
Reopen(options);
// try the 1st WAL created during open
ASSERT_OK(Put("key" + ToString(0), "value"));
ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
ASSERT_OK(dbfull()->LockWAL());
ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false));
ASSERT_OK(dbfull()->UnlockWAL());
// try the 2nd wal created during SwitchWAL
dbfull()->TEST_SwitchWAL();
ASSERT_OK(Put("key" + ToString(0), "value"));
ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
ASSERT_OK(dbfull()->LockWAL());
ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false));
ASSERT_OK(dbfull()->UnlockWAL());
}
INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest,
testing::Values(DBTestBase::kDefault,
DBTestBase::kConcurrentWALWrites,

View File

@ -986,6 +986,16 @@ class DB {
// Currently only works if allow_mmap_writes = false in Options.
virtual Status SyncWAL() = 0;
// Lock the WAL. Also flushes the WAL after locking.
virtual Status LockWAL() {
return Status::NotSupported("LockWAL not implemented");
}
// Unlock the WAL.
virtual Status UnlockWAL() {
return Status::NotSupported("UnlockWAL not implemented");
}
// The sequence number of the most recent transaction.
virtual SequenceNumber GetLatestSequenceNumber() const = 0;

View File

@ -281,6 +281,10 @@ class StackableDB : public DB {
virtual Status FlushWAL(bool sync) override { return db_->FlushWAL(sync); }
virtual Status LockWAL() override { return db_->LockWAL(); }
virtual Status UnlockWAL() override { return db_->UnlockWAL(); }
#ifndef ROCKSDB_LITE
virtual Status DisableFileDeletions() override {