From a5fafd4f46248e8b89d5ec7c63d3f25aaa347fd1 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 14 Mar 2014 14:01:45 -0700 Subject: [PATCH 1/3] Correct the logic of MemTable::ShouldFlushNow(). Summary: Memtable will now be forced to flush if the one of the following conditions is met: 1. Already allocated more than write_buffer_size + 60% arena block size. (the overflowing condition) 2. Unable to safely allocate one more arena block without hitting the overflowing condition AND the unused allocated memory < 25% arena block size. Test Plan: make all check Reviewers: sdong, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16893 --- db/memtable.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/db/memtable.cc b/db/memtable.cc index f9fa66eec..b787ec24e 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -78,21 +78,23 @@ bool MemTable::ShouldFlushNow() const { auto allocated_memory = table_->ApproximateMemoryUsage() + arena_.MemoryAllocatedBytes(); - if (allocated_memory + kArenaBlockSize * kAllowOverAllocationRatio < - kWriteBufferSize) { + // if we can still allocate one more block without exceeding the + // over-allocation ratio, then we should not flush. + if (allocated_memory + kArenaBlockSize < + kWriteBufferSize + kArenaBlockSize * kAllowOverAllocationRatio) { return false; } // if user keeps adding entries that exceeds kWriteBufferSize, we need to - // flush - // earlier even though we still have much available memory left. - if (allocated_memory > kWriteBufferSize * (1 + kAllowOverAllocationRatio)) { + // flush earlier even though we still have much available memory left. + if (allocated_memory > + kWriteBufferSize + kArenaBlockSize * kAllowOverAllocationRatio) { return true; } // In this code path, Arena has already allocated its "last block", which // means the total allocatedmemory size is either: - // (1) "moderately" over allocated the memory (no more than `0.4 * arena + // (1) "moderately" over allocated the memory (no more than `0.6 * arena // block size`. Or, // (2) the allocated memory is less than write buffer size, but we'll stop // here since if we allocate a new arena block, we'll over allocate too much From 9caeff516e32b52fea12a66694987d7c36e73bd5 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 17 Mar 2014 15:39:23 -0700 Subject: [PATCH 2/3] keep_log_files option in BackupableDB Summary: Added an option to BackupableDB implementation that allows users to persist in-memory databases. When the restore happens with keep_log_files = true, it will *) Not delete existing log files in wal_dir *) Move log files from archive directory to wal_dir, so that DB can replay them if necessary Test Plan: Added an unit test Reviewers: dhruba, ljin Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D16941 --- include/utilities/backupable_db.h | 43 +++++++-- utilities/backupable/backupable_db.cc | 105 +++++++++++++++------ utilities/backupable/backupable_db_test.cc | 31 +++++- 3 files changed, 136 insertions(+), 43 deletions(-) diff --git a/include/utilities/backupable_db.h b/include/utilities/backupable_db.h index 22a75ac34..1ec9e8966 100644 --- a/include/utilities/backupable_db.h +++ b/include/utilities/backupable_db.h @@ -55,19 +55,39 @@ struct BackupableDBOptions { // Default: false bool destroy_old_data; + // If false, we won't backup log files. This option can be useful for backing + // up in-memory databases where log file are persisted, but table files are in + // memory. + // Default: true + bool backup_log_files; + void Dump(Logger* logger) const; explicit BackupableDBOptions(const std::string& _backup_dir, Env* _backup_env = nullptr, bool _share_table_files = true, Logger* _info_log = nullptr, bool _sync = true, - bool _destroy_old_data = false) + bool _destroy_old_data = false, + bool _backup_log_files = true) : backup_dir(_backup_dir), backup_env(_backup_env), share_table_files(_share_table_files), info_log(_info_log), sync(_sync), - destroy_old_data(_destroy_old_data) {} + destroy_old_data(_destroy_old_data), + backup_log_files(_backup_log_files) {} +}; + +struct RestoreOptions { + // If true, restore won't overwrite the existing log files in wal_dir. It will + // also move all log files from archive directory to wal_dir. Use this option + // in combination with BackupableDBOptions::backup_log_files = false for + // persisting in-memory databases. + // Default: false + bool keep_log_files; + + explicit RestoreOptions(bool _keep_log_files = false) + : keep_log_files(_keep_log_files) {} }; typedef uint32_t BackupID; @@ -96,11 +116,12 @@ class BackupEngine { virtual void StopBackup() = 0; virtual void GetBackupInfo(std::vector* backup_info) = 0; - virtual Status RestoreDBFromBackup(BackupID backup_id, - const std::string& db_dir, - const std::string& wal_dir) = 0; - virtual Status RestoreDBFromLatestBackup(const std::string& db_dir, - const std::string& wal_dir) = 0; + virtual Status RestoreDBFromBackup( + BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, + const RestoreOptions& restore_options = RestoreOptions()) = 0; + virtual Status RestoreDBFromLatestBackup( + const std::string& db_dir, const std::string& wal_dir, + const RestoreOptions& restore_options = RestoreOptions()) = 0; }; // Stack your DB with BackupableDB to be able to backup the DB @@ -156,11 +177,15 @@ class RestoreBackupableDB { // If you want to create new backup, you will first have to delete backups 4 // and 5. Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir, - const std::string& wal_dir); + const std::string& wal_dir, + const RestoreOptions& restore_options = + RestoreOptions()); // restore from the latest backup Status RestoreDBFromLatestBackup(const std::string& db_dir, - const std::string& wal_dir); + const std::string& wal_dir, + const RestoreOptions& restore_options = + RestoreOptions()); // deletes old backups, keeping latest num_backups_to_keep alive Status PurgeOldBackups(uint32_t num_backups_to_keep); // deletes a specific backup diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 63731b69a..a78427533 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -35,6 +35,8 @@ void BackupableDBOptions::Dump(Logger* logger) const { Log(logger, " Options.sync: %d", static_cast(sync)); Log(logger, " Options.destroy_old_data: %d", static_cast(destroy_old_data)); + Log(logger, " Options.backup_log_files: %d", + static_cast(backup_log_files)); } // -------- BackupEngineImpl class --------- @@ -50,14 +52,21 @@ class BackupEngineImpl : public BackupEngine { } void GetBackupInfo(std::vector* backup_info); - Status RestoreDBFromBackup(BackupID backup_id, const std::string &db_dir, - const std::string &wal_dir); - Status RestoreDBFromLatestBackup(const std::string &db_dir, - const std::string &wal_dir) { - return RestoreDBFromBackup(latest_backup_id_, db_dir, wal_dir); + Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir, + const std::string& wal_dir, + const RestoreOptions& restore_options = + RestoreOptions()); + Status RestoreDBFromLatestBackup(const std::string& db_dir, + const std::string& wal_dir, + const RestoreOptions& restore_options = + RestoreOptions()) { + return RestoreDBFromBackup(latest_backup_id_, db_dir, wal_dir, + restore_options); } private: + void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0); + struct FileInfo { FileInfo(const std::string& fname, uint64_t sz, uint32_t checksum) : refs(0), filename(fname), size(sz), checksum_value(checksum) {} @@ -315,7 +324,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) { s = db->GetLiveFiles(live_files, &manifest_file_size, flush_before_backup); } // if we didn't flush before backup, we need to also get WAL files - if (s.ok() && !flush_before_backup) { + if (s.ok() && !flush_before_backup && options_.backup_log_files) { // returns file names prefixed with "/" s = db->GetSortedWalFiles(live_wal_files); } @@ -469,9 +478,9 @@ void BackupEngineImpl::GetBackupInfo(std::vector* backup_info) { } } -Status BackupEngineImpl::RestoreDBFromBackup(BackupID backup_id, - const std::string& db_dir, - const std::string& wal_dir) { +Status BackupEngineImpl::RestoreDBFromBackup( + BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, + const RestoreOptions& restore_options) { auto backup_itr = backups_.find(backup_id); if (backup_itr == backups_.end()) { return Status::NotFound("Backup not found"); @@ -482,25 +491,40 @@ Status BackupEngineImpl::RestoreDBFromBackup(BackupID backup_id, } Log(options_.info_log, "Restoring backup id %u\n", backup_id); + Log(options_.info_log, "keep_log_files: %d\n", + static_cast(restore_options.keep_log_files)); // just in case. Ignore errors db_env_->CreateDirIfMissing(db_dir); db_env_->CreateDirIfMissing(wal_dir); - // delete log files that might have been already in wal_dir. - // This is important since they might get replayed to the restored DB, - // which will then differ from the backuped DB - std::vector delete_children; - db_env_->GetChildren(wal_dir, &delete_children); // ignore errors - for (auto f : delete_children) { - db_env_->DeleteFile(wal_dir + "/" + f); // ignore errors - } - // Also delete all the db_dir children. This is not so important - // because obsolete files will be deleted by DBImpl::PurgeObsoleteFiles() - delete_children.clear(); - db_env_->GetChildren(db_dir, &delete_children); // ignore errors - for (auto f : delete_children) { - db_env_->DeleteFile(db_dir + "/" + f); // ignore errors + if (restore_options.keep_log_files) { + // delete files in db_dir, but keep all the log files + DeleteChildren(db_dir, 1 << kLogFile); + // move all the files from archive dir to wal_dir + std::string archive_dir = ArchivalDirectory(wal_dir); + std::vector archive_files; + db_env_->GetChildren(archive_dir, &archive_files); // ignore errors + for (const auto& f : archive_files) { + uint64_t number; + FileType type; + bool ok = ParseFileName(f, &number, &type); + if (ok && type == kLogFile) { + Log(options_.info_log, "Moving log file from archive/ to wal_dir: %s", + f.c_str()); + Status s = + db_env_->RenameFile(archive_dir + "/" + f, wal_dir + "/" + f); + if (!s.ok()) { + // if we can't move log file from archive_dir to wal_dir, + // we should fail, since it might mean data loss + return s; + } + } + } + } else { + DeleteChildren(wal_dir); + DeleteChildren(ArchivalDirectory(wal_dir)); + DeleteChildren(db_dir); } Status s; @@ -761,6 +785,23 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, return s; } +void BackupEngineImpl::DeleteChildren(const std::string& dir, + uint32_t file_type_filter) { + std::vector children; + db_env_->GetChildren(dir, &children); // ignore errors + + for (const auto& f : children) { + uint64_t number; + FileType type; + bool ok = ParseFileName(f, &number, &type); + if (ok && (file_type_filter & (1 << type))) { + // don't delete this file + continue; + } + db_env_->DeleteFile(dir + "/" + f); // ignore errors + } +} + void BackupEngineImpl::GarbageCollection(bool full_scan) { Log(options_.info_log, "Starting garbage collection"); std::vector to_delete; @@ -1043,16 +1084,18 @@ RestoreBackupableDB::GetBackupInfo(std::vector* backup_info) { backup_engine_->GetBackupInfo(backup_info); } -Status RestoreBackupableDB::RestoreDBFromBackup(BackupID backup_id, - const std::string& db_dir, - const std::string& wal_dir) { - return backup_engine_->RestoreDBFromBackup(backup_id, db_dir, wal_dir); +Status RestoreBackupableDB::RestoreDBFromBackup( + BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, + const RestoreOptions& restore_options) { + return backup_engine_->RestoreDBFromBackup(backup_id, db_dir, wal_dir, + restore_options); } -Status -RestoreBackupableDB::RestoreDBFromLatestBackup(const std::string& db_dir, - const std::string& wal_dir) { - return backup_engine_->RestoreDBFromLatestBackup(db_dir, wal_dir); +Status RestoreBackupableDB::RestoreDBFromLatestBackup( + const std::string& db_dir, const std::string& wal_dir, + const RestoreOptions& restore_options) { + return backup_engine_->RestoreDBFromLatestBackup(db_dir, wal_dir, + restore_options); } Status RestoreBackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) { diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index a5f146e05..c5ee445a6 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -396,16 +396,20 @@ class BackupableDBTest { // if backup_id == 0, it means restore from latest // if end == 0, don't check AssertEmpty void AssertBackupConsistency(BackupID backup_id, uint32_t start_exist, - uint32_t end_exist, uint32_t end = 0) { + uint32_t end_exist, uint32_t end = 0, + bool keep_log_files = false) { + RestoreOptions restore_options(keep_log_files); bool opened_restore = false; if (restore_db_.get() == nullptr) { opened_restore = true; OpenRestoreDB(); } if (backup_id > 0) { - ASSERT_OK(restore_db_->RestoreDBFromBackup(backup_id, dbname_, dbname_)); + ASSERT_OK(restore_db_->RestoreDBFromBackup(backup_id, dbname_, dbname_, + restore_options)); } else { - ASSERT_OK(restore_db_->RestoreDBFromLatestBackup(dbname_, dbname_)); + ASSERT_OK(restore_db_->RestoreDBFromLatestBackup(dbname_, dbname_, + restore_options)); } DB* db = OpenDB(); AssertExists(db, start_exist, end_exist); @@ -775,6 +779,27 @@ TEST(BackupableDBTest, DeleteTmpFiles) { ASSERT_EQ(false, file_manager_->FileExists(private_tmp_dir)); } +TEST(BackupableDBTest, KeepLogFiles) { + // basically infinite + backupable_options_->backup_log_files = false; + options_.WAL_ttl_seconds = 24 * 60 * 60; + OpenBackupableDB(true); + FillDB(db_.get(), 0, 100); + ASSERT_OK(db_->Flush(FlushOptions())); + FillDB(db_.get(), 100, 200); + ASSERT_OK(db_->CreateNewBackup(false)); + FillDB(db_.get(), 200, 300); + ASSERT_OK(db_->Flush(FlushOptions())); + FillDB(db_.get(), 300, 400); + ASSERT_OK(db_->Flush(FlushOptions())); + FillDB(db_.get(), 400, 500); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseBackupableDB(); + + // all data should be there if we call with keep_log_files = true + AssertBackupConsistency(0, 0, 500, 600, true); +} + } // anon namespace } // namespace rocksdb From 5601bc4619402676900539c4ef828cedfa998a32 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 17 Mar 2014 17:02:34 -0700 Subject: [PATCH 3/3] Check starts_with(prefix) in MultiPrefixIterate Summary: We switched to prefix_seek method of seeking. This means that anytime we check Valid(), we also need to check starts_with(prefix) Test Plan: ran db_stress Reviewers: ljin Reviewed By: ljin CC: leveldb Differential Revision: https://reviews.facebook.net/D16953 --- tools/db_stress.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index a47a03933..32404c65d 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -959,13 +959,14 @@ class StressTest { } int count = 0; - while (iters[0]->Valid()) { + while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) { count++; std::string values[10]; // get list of all values for this iteration for (int i = 0; i < 10; i++) { // no iterator should finish before the first one - assert(iters[i]->Valid()); + assert(iters[i]->Valid() && + iters[i]->key().starts_with(prefix_slices[i])); values[i] = iters[i]->value().ToString(); char expected_first = (prefixes[i])[0]; @@ -993,7 +994,8 @@ class StressTest { // cleanup iterators and snapshot for (int i = 0; i < 10; i++) { // if the first iterator finished, they should have all finished - assert(!iters[i]->Valid()); + assert(!iters[i]->Valid() || + !iters[i]->key().starts_with(prefix_slices[i])); assert(iters[i]->status().ok()); delete iters[i]; }