[CF] Adaptation of GetLiveFiles for CF

Summary: Even if user flushes the memtables before getting live files, we still can't guarantee that new data didn't come in (to already-flushed memtables). If we want backups to provide consistent view of the database, we still need to get WAL files.

Test Plan: backupable_db_test

Reviewers: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16299
This commit is contained in:
Igor Canadi 2014-02-25 13:16:59 -08:00
parent 5a91746277
commit dc277f0ab7
3 changed files with 28 additions and 8 deletions

View File

@ -60,18 +60,35 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
*manifest_file_size = 0; *manifest_file_size = 0;
mutex_.Lock();
if (flush_memtable) { if (flush_memtable) {
// flush all dirty data to disk. // flush all dirty data to disk.
Status status = Flush(FlushOptions()); autovector<ColumnFamilyData*> to_delete;
Status status;
for (auto cfd : *versions_->GetColumnFamilySet()) {
cfd->Ref();
mutex_.Unlock();
status = FlushMemTable(cfd, FlushOptions());
mutex_.Lock();
if (cfd->Unref()) {
to_delete.push_back(cfd);
}
if (!status.ok()) {
break;
}
}
for (auto cfd : to_delete) {
delete cfd;
}
if (!status.ok()) { if (!status.ok()) {
mutex_.Unlock();
Log(options_.info_log, "Cannot Flush data %s\n", Log(options_.info_log, "Cannot Flush data %s\n",
status.ToString().c_str()); status.ToString().c_str());
return status; return status;
} }
} }
MutexLock l(&mutex_);
// Make a set of all of the live *.sst files // Make a set of all of the live *.sst files
std::set<uint64_t> live; std::set<uint64_t> live;
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
@ -93,6 +110,7 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
// find length of manifest file while holding the mutex lock // find length of manifest file while holding the mutex lock
*manifest_file_size = versions_->ManifestFileSize(); *manifest_file_size = versions_->ManifestFileSize();
mutex_.Unlock();
return Status::OK(); return Status::OK();
} }

View File

@ -394,9 +394,12 @@ class DB {
// Setting flush_memtable to true does Flush before recording the live files. // Setting flush_memtable to true does Flush before recording the live files.
// Setting flush_memtable to false is useful when we don't want to wait for // Setting flush_memtable to false is useful when we don't want to wait for
// flush which may have to wait for compaction to complete taking an // flush which may have to wait for compaction to complete taking an
// indeterminate time. But this will have to use GetSortedWalFiles after // indeterminate time.
// GetLiveFiles to compensate for memtables missed in this snapshot due to the //
// absence of Flush, by WAL files to recover the database consistently later // In case you have multiple column families, even if flush_memtable is true,
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
// for new data that arrived to already-flushed column families while other
// column families were flushing
virtual Status GetLiveFiles(std::vector<std::string>&, virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size, uint64_t* manifest_file_size,
bool flush_memtable = true) = 0; bool flush_memtable = true) = 0;

View File

@ -311,8 +311,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
// this will return live_files prefixed with "/" // this will return live_files prefixed with "/"
s = db->GetLiveFiles(live_files, &manifest_file_size, flush_before_backup); s = db->GetLiveFiles(live_files, &manifest_file_size, flush_before_backup);
} }
// if we didn't flush before backup, we need to also get WAL files if (s.ok()) {
if (s.ok() && !flush_before_backup) {
// returns file names prefixed with "/" // returns file names prefixed with "/"
s = db->GetSortedWalFiles(live_wal_files); s = db->GetSortedWalFiles(live_wal_files);
} }