Provide openable snapshots
Summary: Store links to live files in directory on same disk Test Plan: Take snapshot and open it. Added a test GetSnapshotLink in db_test. Reviewers: sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D28713
This commit is contained in:
parent
9be338cf9d
commit
6c1b040cc9
@ -25,6 +25,7 @@
|
||||
#include "port/port.h"
|
||||
#include "util/mutexlock.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/file_util.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -134,6 +135,114 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
|
||||
Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
|
||||
return wal_manager_.GetSortedWalFiles(files);
|
||||
}
|
||||
|
||||
// Builds an openable snapshot of RocksDB
|
||||
Status DBImpl::CreateCheckpoint(const std::string& snapshot_dir) {
|
||||
Status s;
|
||||
std::vector<std::string> live_files;
|
||||
uint64_t manifest_file_size = 0;
|
||||
uint64_t sequence_number = GetLatestSequenceNumber();
|
||||
bool same_fs = true;
|
||||
|
||||
if (env_->FileExists(snapshot_dir)) {
|
||||
return Status::InvalidArgument("Directory exists");
|
||||
}
|
||||
|
||||
s = DisableFileDeletions();
|
||||
if (s.ok()) {
|
||||
// this will return live_files prefixed with "/"
|
||||
s = GetLiveFiles(live_files, &manifest_file_size, true);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
EnableFileDeletions(false);
|
||||
return s;
|
||||
}
|
||||
|
||||
Log(db_options_.info_log,
|
||||
"Started the snapshot process -- creating snapshot in directory %s",
|
||||
snapshot_dir.c_str());
|
||||
|
||||
std::string full_private_path = snapshot_dir + ".tmp";
|
||||
|
||||
// create snapshot directory
|
||||
s = env_->CreateDir(full_private_path);
|
||||
|
||||
// copy/hard link live_files
|
||||
for (size_t i = 0; s.ok() && i < live_files.size(); ++i) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
bool ok = ParseFileName(live_files[i], &number, &type);
|
||||
if (!ok) {
|
||||
s = Status::Corruption("Can't parse file name. This is very bad");
|
||||
break;
|
||||
}
|
||||
// we should only get sst, manifest and current files here
|
||||
assert(type == kTableFile || type == kDescriptorFile ||
|
||||
type == kCurrentFile);
|
||||
assert(live_files[i].size() > 0 && live_files[i][0] == '/');
|
||||
std::string src_fname = live_files[i];
|
||||
|
||||
// rules:
|
||||
// * if it's kTableFile, then it's shared
|
||||
// * if it's kDescriptorFile, limit the size to manifest_file_size
|
||||
// * always copy if cross-device link
|
||||
if ((type == kTableFile) && same_fs) {
|
||||
Log(db_options_.info_log, "Hard Linking %s", src_fname.c_str());
|
||||
s = env_->LinkFile(GetName() + src_fname, full_private_path + src_fname);
|
||||
if (s.IsNotSupported()) {
|
||||
same_fs = false;
|
||||
s = Status::OK();
|
||||
}
|
||||
}
|
||||
if ((type != kTableFile) || (!same_fs)) {
|
||||
Log(db_options_.info_log, "Copying %s", src_fname.c_str());
|
||||
s = CopyFile(env_, GetName() + src_fname, full_private_path + src_fname,
|
||||
(type == kDescriptorFile) ? manifest_file_size : 0);
|
||||
}
|
||||
}
|
||||
|
||||
// we copied all the files, enable file deletions
|
||||
EnableFileDeletions(false);
|
||||
|
||||
if (s.ok()) {
|
||||
// move tmp private backup to real snapshot directory
|
||||
s = env_->RenameFile(full_private_path, snapshot_dir);
|
||||
}
|
||||
if (s.ok()) {
|
||||
unique_ptr<Directory> snapshot_directory;
|
||||
env_->NewDirectory(snapshot_dir, &snapshot_directory);
|
||||
if (snapshot_directory != nullptr) {
|
||||
s = snapshot_directory->Fsync();
|
||||
}
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
// clean all the files we might have created
|
||||
Log(db_options_.info_log, "Snapshot failed -- %s", s.ToString().c_str());
|
||||
// we have to delete the dir and all its children
|
||||
std::vector<std::string> subchildren;
|
||||
env_->GetChildren(full_private_path, &subchildren);
|
||||
for (auto& subchild : subchildren) {
|
||||
Status s1 = env_->DeleteFile(full_private_path + subchild);
|
||||
if (s1.ok()) {
|
||||
Log(db_options_.info_log, "Deleted %s",
|
||||
(full_private_path + subchild).c_str());
|
||||
}
|
||||
}
|
||||
// finally delete the private dir
|
||||
Status s1 = env_->DeleteDir(full_private_path);
|
||||
Log(db_options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
|
||||
s1.ToString().c_str());
|
||||
return s;
|
||||
}
|
||||
|
||||
// here we know that we succeeded and installed the new snapshot
|
||||
Log(db_options_.info_log, "Snapshot DONE. All is good");
|
||||
Log(db_options_.info_log, "Snapshot sequence number: %" PRIu64,
|
||||
sequence_number);
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
@ -169,6 +169,11 @@ class DBImpl : public DB {
|
||||
ColumnFamilyHandle* column_family,
|
||||
ColumnFamilyMetaData* metadata) override;
|
||||
|
||||
// Builds an openable snapshot of RocksDB on the same disk, which
|
||||
// accepts an output directory on the same disk, and under the directory
|
||||
// (1) hard-linked SST files pointing to existing live SST files
|
||||
// (2) a copied manifest files and other files
|
||||
virtual Status CreateCheckpoint(const std::string& snapshot_dir);
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
// checks if all live files exist on file system and that their file sizes
|
||||
|
@ -1606,6 +1606,60 @@ TEST(DBTest, GetSnapshot) {
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
}
|
||||
|
||||
TEST(DBTest, GetSnapshotLink) {
|
||||
do {
|
||||
Options options;
|
||||
const std::string snapshot_name = test::TmpDir(env_) + "/snapshot";
|
||||
DB* snapshotDB;
|
||||
ReadOptions roptions;
|
||||
std::string result;
|
||||
|
||||
options = CurrentOptions(options);
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
ASSERT_OK(DestroyDB(dbname_, options));
|
||||
ASSERT_OK(DestroyDB(snapshot_name, options));
|
||||
env_->DeleteDir(snapshot_name);
|
||||
|
||||
// Create a database
|
||||
Status s;
|
||||
options.create_if_missing = true;
|
||||
ASSERT_OK(DB::Open(options, dbname_, &db_));
|
||||
std::string key = std::string("foo");
|
||||
ASSERT_OK(Put(key, "v1"));
|
||||
// Take a snapshot
|
||||
ASSERT_OK(db_->CreateCheckpoint(snapshot_name));
|
||||
ASSERT_OK(Put(key, "v2"));
|
||||
ASSERT_EQ("v2", Get(key));
|
||||
ASSERT_OK(Flush());
|
||||
ASSERT_EQ("v2", Get(key));
|
||||
// Open snapshot and verify contents while DB is running
|
||||
options.create_if_missing = false;
|
||||
ASSERT_OK(DB::Open(options, snapshot_name, &snapshotDB));
|
||||
ASSERT_OK(snapshotDB->Get(roptions, key, &result));
|
||||
ASSERT_EQ("v1", result);
|
||||
delete snapshotDB;
|
||||
snapshotDB = nullptr;
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
|
||||
// Destroy original DB
|
||||
ASSERT_OK(DestroyDB(dbname_, options));
|
||||
|
||||
// Open snapshot and verify contents
|
||||
options.create_if_missing = false;
|
||||
dbname_ = snapshot_name;
|
||||
ASSERT_OK(DB::Open(options, dbname_, &db_));
|
||||
ASSERT_EQ("v1", Get(key));
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
ASSERT_OK(DestroyDB(dbname_, options));
|
||||
|
||||
// Restore DB name
|
||||
dbname_ = test::TmpDir(env_) + "/db_test";
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetLevel0Ordering) {
|
||||
do {
|
||||
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
|
||||
@ -7468,6 +7522,10 @@ class ModelDB: public DB {
|
||||
ColumnFamilyHandle* column_family,
|
||||
ColumnFamilyMetaData* metadata) {}
|
||||
|
||||
virtual Status CreateCheckpoint(const std::string& snapshot_dir) {
|
||||
return Status::NotSupported("Not supported in Model DB");
|
||||
}
|
||||
|
||||
private:
|
||||
class ModelIter: public Iterator {
|
||||
public:
|
||||
|
@ -93,6 +93,8 @@ class HdfsEnv : public Env {
|
||||
|
||||
virtual Status RenameFile(const std::string& src, const std::string& target);
|
||||
|
||||
virtual Status LinkFile(const std::string& src, const std::string& target);
|
||||
|
||||
virtual Status LockFile(const std::string& fname, FileLock** lock);
|
||||
|
||||
virtual Status UnlockFile(FileLock* lock);
|
||||
@ -291,6 +293,10 @@ class HdfsEnv : public Env {
|
||||
|
||||
virtual Status RenameFile(const std::string& src, const std::string& target){return notsup;}
|
||||
|
||||
virtual Status LinkFile(const std::string& src, const std::string& target) {
|
||||
return notsup;
|
||||
}
|
||||
|
||||
virtual Status LockFile(const std::string& fname, FileLock** lock){return notsup;}
|
||||
|
||||
virtual Status UnlockFile(FileLock* lock){return notsup;}
|
||||
|
@ -521,6 +521,12 @@ class DB {
|
||||
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
|
||||
return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
|
||||
}
|
||||
|
||||
// Builds an openable snapshot of RocksDB on the same disk, which
|
||||
// accepts an output directory on the same disk, and under the directory
|
||||
// (1) hard-linked SST files pointing to existing live SST files
|
||||
// (2) a copied manifest files and other files
|
||||
virtual Status CreateCheckpoint(const std::string& snapshot_dir) = 0;
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
private:
|
||||
|
@ -178,6 +178,10 @@ class Env {
|
||||
virtual Status RenameFile(const std::string& src,
|
||||
const std::string& target) = 0;
|
||||
|
||||
// Hard Link file src to target.
|
||||
virtual Status LinkFile(const std::string& src,
|
||||
const std::string& target) = 0;
|
||||
|
||||
// Lock the specified file. Used to prevent concurrent access to
|
||||
// the same db by multiple processes. On failure, stores nullptr in
|
||||
// *lock and returns non-OK.
|
||||
@ -747,6 +751,11 @@ class EnvWrapper : public Env {
|
||||
Status RenameFile(const std::string& s, const std::string& t) {
|
||||
return target_->RenameFile(s, t);
|
||||
}
|
||||
|
||||
Status LinkFile(const std::string& s, const std::string& t) {
|
||||
return target_->LinkFile(s, t);
|
||||
}
|
||||
|
||||
Status LockFile(const std::string& f, FileLock** l) {
|
||||
return target_->LockFile(f, l);
|
||||
}
|
||||
|
@ -247,6 +247,10 @@ class StackableDB : public DB {
|
||||
return db_->DefaultColumnFamily();
|
||||
}
|
||||
|
||||
virtual Status CreateCheckpoint(const std::string& snapshot_dir) override {
|
||||
return db_->CreateCheckpoint(snapshot_dir);
|
||||
}
|
||||
|
||||
protected:
|
||||
DB* db_;
|
||||
};
|
||||
|
@ -1288,6 +1288,17 @@ class PosixEnv : public Env {
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual Status LinkFile(const std::string& src, const std::string& target) {
|
||||
Status result;
|
||||
if (link(src.c_str(), target.c_str()) != 0) {
|
||||
if (errno == EXDEV) {
|
||||
return Status::NotSupported("No cross FS links allowed");
|
||||
}
|
||||
result = IOError(src, errno);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual Status LockFile(const std::string& fname, FileLock** lock) {
|
||||
*lock = nullptr;
|
||||
Status result;
|
||||
|
59
util/file_util.cc
Normal file
59
util/file_util.cc
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "util/file_util.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "db/filename.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// Utility function to copy a file up to a specified length
|
||||
Status CopyFile(Env* env, const std::string& source,
|
||||
const std::string& destination, uint64_t size) {
|
||||
const EnvOptions soptions;
|
||||
unique_ptr<SequentialFile> srcfile;
|
||||
Status s;
|
||||
s = env->NewSequentialFile(source, &srcfile, soptions);
|
||||
unique_ptr<WritableFile> destfile;
|
||||
if (s.ok()) {
|
||||
s = env->NewWritableFile(destination, &destfile, soptions);
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
// default argument means copy everything
|
||||
if (s.ok()) {
|
||||
s = env->GetFileSize(source, &size);
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
char buffer[4096];
|
||||
Slice slice;
|
||||
while (size > 0) {
|
||||
uint64_t bytes_to_read =
|
||||
std::min(static_cast<uint64_t>(sizeof(buffer)), size);
|
||||
if (s.ok()) {
|
||||
s = srcfile->Read(bytes_to_read, &slice, buffer);
|
||||
}
|
||||
if (s.ok()) {
|
||||
if (slice.size() == 0) {
|
||||
return Status::Corruption("file too small");
|
||||
}
|
||||
s = destfile->Append(slice);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
size -= slice.size();
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
18
util/file_util.h
Normal file
18
util/file_util.h
Normal file
@ -0,0 +1,18 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#include <string>
|
||||
|
||||
#pragma once
|
||||
#include "rocksdb/status.h"
|
||||
#include "rocksdb/types.h"
|
||||
#include "rocksdb/env.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
extern Status CopyFile(Env* env, const std::string& source,
|
||||
const std::string& destination, uint64_t size = 0);
|
||||
|
||||
} // namespace rocksdb
|
@ -559,6 +559,19 @@ Status MockEnv::RenameFile(const std::string& src, const std::string& dest) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MockEnv::LinkFile(const std::string& src, const std::string& dest) {
|
||||
auto s = NormalizePath(src);
|
||||
auto t = NormalizePath(dest);
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(s) == file_map_.end()) {
|
||||
return Status::IOError(s, "File not found");
|
||||
}
|
||||
|
||||
DeleteFileInternal(t);
|
||||
file_map_[t] = file_map_[s];
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MockEnv::NewLogger(const std::string& fname,
|
||||
shared_ptr<Logger>* result) {
|
||||
auto fn = NormalizePath(fname);
|
||||
|
@ -69,6 +69,8 @@ class MockEnv : public EnvWrapper {
|
||||
virtual Status RenameFile(const std::string& src,
|
||||
const std::string& target);
|
||||
|
||||
virtual Status LinkFile(const std::string& src, const std::string& target);
|
||||
|
||||
virtual Status NewLogger(const std::string& fname,
|
||||
shared_ptr<Logger>* result);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user