From 004f416b7750d4b044f3d8f7cfcd360ec24e9387 Mon Sep 17 00:00:00 2001 From: Venkatesh Radhakrishnan Date: Thu, 20 Nov 2014 15:54:47 -0800 Subject: [PATCH] Moved checkpoint to utilities Summary: Moved checkpoint to utilities. Addressed comments by Igor, Siying, Dhruba Test Plan: db_test/SnapshotLink Reviewers: dhruba, igor, sdong Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D29079 --- HISTORY.md | 6 + db/db_filesnapshot.cc | 107 --------------- db/db_impl.h | 5 - db/db_test.cc | 9 +- include/rocksdb/db.h | 6 - include/rocksdb/utilities/checkpoint.h | 34 +++++ include/rocksdb/utilities/stackable_db.h | 4 - utilities/checkpoint/checkpoint.cc | 168 +++++++++++++++++++++++ 8 files changed, 212 insertions(+), 127 deletions(-) create mode 100644 include/rocksdb/utilities/checkpoint.h create mode 100644 utilities/checkpoint/checkpoint.cc diff --git a/HISTORY.md b/HISTORY.md index 78973adec..93170fa6f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,12 @@ ### Unreleased Features * Add rocksdb::GetThreadList(), which returns the current status of all rocksdb-related threads. +### Public API changes +* New API to create a checkpoint added. Given a directory name, creates a new + database which is an image of the existing database. +*New API LinkFile added to Env. If you implement your own Env class, an + implementation of the API LinkFile will have to be provided. + ## 3.8.0 (11/14/2014) ### Public API changes diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index a442c68b2..ce009a976 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -138,113 +138,6 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { return wal_manager_.GetSortedWalFiles(files); } -// Builds an openable snapshot of RocksDB -Status DBImpl::CreateCheckpoint(const std::string& snapshot_dir) { - Status s; - std::vector live_files; - uint64_t manifest_file_size = 0; - uint64_t sequence_number = GetLatestSequenceNumber(); - bool same_fs = true; - - if (env_->FileExists(snapshot_dir)) { - return Status::InvalidArgument("Directory exists"); - } - - s = DisableFileDeletions(); - if (s.ok()) { - // this will return live_files prefixed with "/" - s = GetLiveFiles(live_files, &manifest_file_size, true); - } - if (!s.ok()) { - EnableFileDeletions(false); - return s; - } - - Log(db_options_.info_log, - "Started the snapshot process -- creating snapshot in directory %s", - snapshot_dir.c_str()); - - std::string full_private_path = snapshot_dir + ".tmp"; - - // create snapshot directory - s = env_->CreateDir(full_private_path); - - // copy/hard link live_files - for (size_t i = 0; s.ok() && i < live_files.size(); ++i) { - uint64_t number; - FileType type; - bool ok = ParseFileName(live_files[i], &number, &type); - if (!ok) { - s = Status::Corruption("Can't parse file name. This is very bad"); - break; - } - // we should only get sst, manifest and current files here - assert(type == kTableFile || type == kDescriptorFile || - type == kCurrentFile); - assert(live_files[i].size() > 0 && live_files[i][0] == '/'); - std::string src_fname = live_files[i]; - - // rules: - // * if it's kTableFile, then it's shared - // * if it's kDescriptorFile, limit the size to manifest_file_size - // * always copy if cross-device link - if ((type == kTableFile) && same_fs) { - Log(db_options_.info_log, "Hard Linking %s", src_fname.c_str()); - s = env_->LinkFile(GetName() + src_fname, full_private_path + src_fname); - if (s.IsNotSupported()) { - same_fs = false; - s = Status::OK(); - } - } - if ((type != kTableFile) || (!same_fs)) { - Log(db_options_.info_log, "Copying %s", src_fname.c_str()); - s = CopyFile(env_, GetName() + src_fname, full_private_path + src_fname, - (type == kDescriptorFile) ? manifest_file_size : 0); - } - } - - // we copied all the files, enable file deletions - EnableFileDeletions(false); - - if (s.ok()) { - // move tmp private backup to real snapshot directory - s = env_->RenameFile(full_private_path, snapshot_dir); - } - if (s.ok()) { - unique_ptr snapshot_directory; - env_->NewDirectory(snapshot_dir, &snapshot_directory); - if (snapshot_directory != nullptr) { - s = snapshot_directory->Fsync(); - } - } - - if (!s.ok()) { - // clean all the files we might have created - Log(db_options_.info_log, "Snapshot failed -- %s", s.ToString().c_str()); - // we have to delete the dir and all its children - std::vector subchildren; - env_->GetChildren(full_private_path, &subchildren); - for (auto& subchild : subchildren) { - Status s1 = env_->DeleteFile(full_private_path + subchild); - if (s1.ok()) { - Log(db_options_.info_log, "Deleted %s", - (full_private_path + subchild).c_str()); - } - } - // finally delete the private dir - Status s1 = env_->DeleteDir(full_private_path); - Log(db_options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(), - s1.ToString().c_str()); - return s; - } - - // here we know that we succeeded and installed the new snapshot - Log(db_options_.info_log, "Snapshot DONE. All is good"); - Log(db_options_.info_log, "Snapshot sequence number: %" PRIu64, - sequence_number); - - return s; -} } #endif // ROCKSDB_LITE diff --git a/db/db_impl.h b/db/db_impl.h index 283796120..1217610b5 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -170,11 +170,6 @@ class DBImpl : public DB { ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override; - // Builds an openable snapshot of RocksDB on the same disk, which - // accepts an output directory on the same disk, and under the directory - // (1) hard-linked SST files pointing to existing live SST files - // (2) a copied manifest files and other files - virtual Status CreateCheckpoint(const std::string& snapshot_dir); #endif // ROCKSDB_LITE // checks if all live files exist on file system and that their file sizes diff --git a/db/db_test.cc b/db/db_test.cc index 870fc7268..b8c04495b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -35,6 +35,7 @@ #include "rocksdb/table_properties.h" #include "rocksdb/thread_status.h" #include "rocksdb/utilities/write_batch_with_index.h" +#include "rocksdb/utilities/checkpoint.h" #include "table/block_based_table_factory.h" #include "table/plain_table_factory.h" #include "util/hash.h" @@ -1616,6 +1617,7 @@ TEST(DBTest, GetSnapshotLink) { DB* snapshotDB; ReadOptions roptions; std::string result; + Checkpoint* checkpoint; options = CurrentOptions(options); delete db_; @@ -1631,7 +1633,8 @@ TEST(DBTest, GetSnapshotLink) { std::string key = std::string("foo"); ASSERT_OK(Put(key, "v1")); // Take a snapshot - ASSERT_OK(db_->CreateCheckpoint(snapshot_name)); + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name)); ASSERT_OK(Put(key, "v2")); ASSERT_EQ("v2", Get(key)); ASSERT_OK(Flush()); @@ -7525,10 +7528,6 @@ class ModelDB: public DB { ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) {} - virtual Status CreateCheckpoint(const std::string& snapshot_dir) { - return Status::NotSupported("Not supported in Model DB"); - } - private: class ModelIter: public Iterator { public: diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index ad3745c5e..326989418 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -522,12 +522,6 @@ class DB { virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { return GetPropertiesOfAllTables(DefaultColumnFamily(), props); } - - // Builds an openable snapshot of RocksDB on the same disk, which - // accepts an output directory on the same disk, and under the directory - // (1) hard-linked SST files pointing to existing live SST files - // (2) a copied manifest files and other files - virtual Status CreateCheckpoint(const std::string& snapshot_dir) = 0; #endif // ROCKSDB_LITE private: diff --git a/include/rocksdb/utilities/checkpoint.h b/include/rocksdb/utilities/checkpoint.h new file mode 100644 index 000000000..b60f4ebc6 --- /dev/null +++ b/include/rocksdb/utilities/checkpoint.h @@ -0,0 +1,34 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// A checkpoint is an openable snapshot of a database at a point in time. + +#pragma once + +#include +#include "rocksdb/status.h" + +namespace rocksdb { + +class DB; + +class Checkpoint { + public: + // Creates a Checkpoint object to be used for creating openable sbapshots + static Status Create(DB* db, Checkpoint** checkpoint_ptr); + + // Builds an openable snapshot of RocksDB on the same disk, which + // accepts an output directory on the same disk, and under the directory + // (1) hard-linked SST files pointing to existing live SST files + // SST files will be copied if output directory is on a different filesystem + // (2) a copied manifest files and other files + // The directory should not already exist and will be created by this API. + // The directory will be an absolute path + virtual Status CreateCheckpoint(const std::string& checkpoint_dir); + + virtual ~Checkpoint() {} +}; + +} // namespace rocksdb diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 9366bd84f..7bdf9928e 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -247,10 +247,6 @@ class StackableDB : public DB { return db_->DefaultColumnFamily(); } - virtual Status CreateCheckpoint(const std::string& snapshot_dir) override { - return db_->CreateCheckpoint(snapshot_dir); - } - protected: DB* db_; }; diff --git a/utilities/checkpoint/checkpoint.cc b/utilities/checkpoint/checkpoint.cc new file mode 100644 index 000000000..b180bbd38 --- /dev/null +++ b/utilities/checkpoint/checkpoint.cc @@ -0,0 +1,168 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +// Copyright (c) 2012 Facebook. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef ROCKSDB_LITE + +#include "rocksdb/utilities/checkpoint.h" + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include +#include +#include "db/filename.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "util/file_util.h" + +namespace rocksdb { + +class CheckpointImpl : public Checkpoint { + public: + // Creates a Checkpoint object to be used for creating openable sbapshots + explicit CheckpointImpl(DB* db) : db_(db) {} + + // Builds an openable snapshot of RocksDB on the same disk, which + // accepts an output directory on the same disk, and under the directory + // (1) hard-linked SST files pointing to existing live SST files + // SST files will be copied if output directory is on a different filesystem + // (2) a copied manifest files and other files + // The directory should not already exist and will be created by this API. + // The directory will be an absolute path + using Checkpoint::CreateCheckpoint; + virtual Status CreateCheckpoint(const std::string& checkpoint_dir); + + private: + DB* db_; +}; + +Status Checkpoint::Create(DB* db, Checkpoint** checkpoint_ptr) { + *checkpoint_ptr = new CheckpointImpl(db); + return Status::OK(); +} + +Status Checkpoint::CreateCheckpoint(const std::string& checkpoint_dir) { + return Status::NotSupported(""); +} + +// Builds an openable snapshot of RocksDB +Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir) { + Status s; + std::vector live_files; + uint64_t manifest_file_size = 0; + uint64_t sequence_number = db_->GetLatestSequenceNumber(); + bool same_fs = true; + + if (db_->GetEnv()->FileExists(checkpoint_dir)) { + return Status::InvalidArgument("Directory exists"); + } + + s = db_->DisableFileDeletions(); + if (s.ok()) { + // this will return live_files prefixed with "/" + s = db_->GetLiveFiles(live_files, &manifest_file_size, true); + } + if (!s.ok()) { + db_->EnableFileDeletions(false); + return s; + } + + Log(db_->GetOptions().info_log, + "Started the snapshot process -- creating snapshot in directory %s", + checkpoint_dir.c_str()); + + std::string full_private_path = checkpoint_dir + ".tmp"; + + // create snapshot directory + s = db_->GetEnv()->CreateDir(full_private_path); + + // copy/hard link live_files + for (size_t i = 0; s.ok() && i < live_files.size(); ++i) { + uint64_t number; + FileType type; + bool ok = ParseFileName(live_files[i], &number, &type); + if (!ok) { + s = Status::Corruption("Can't parse file name. This is very bad"); + break; + } + // we should only get sst, manifest and current files here + assert(type == kTableFile || type == kDescriptorFile || + type == kCurrentFile); + assert(live_files[i].size() > 0 && live_files[i][0] == '/'); + std::string src_fname = live_files[i]; + + // rules: + // * if it's kTableFile, then it's shared + // * if it's kDescriptorFile, limit the size to manifest_file_size + // * always copy if cross-device link + if ((type == kTableFile) && same_fs) { + Log(db_->GetOptions().info_log, "Hard Linking %s", src_fname.c_str()); + s = db_->GetEnv()->LinkFile(db_->GetName() + src_fname, + full_private_path + src_fname); + if (s.IsNotSupported()) { + same_fs = false; + s = Status::OK(); + } + } + if ((type != kTableFile) || (!same_fs)) { + Log(db_->GetOptions().info_log, "Copying %s", src_fname.c_str()); + s = CopyFile(db_->GetEnv(), db_->GetName() + src_fname, + full_private_path + src_fname, + (type == kDescriptorFile) ? manifest_file_size : 0); + } + } + + // we copied all the files, enable file deletions + db_->EnableFileDeletions(false); + + if (s.ok()) { + // move tmp private backup to real snapshot directory + s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir); + } + if (s.ok()) { + unique_ptr checkpoint_directory; + db_->GetEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory); + if (checkpoint_directory != nullptr) { + s = checkpoint_directory->Fsync(); + } + } + + if (!s.ok()) { + // clean all the files we might have created + Log(db_->GetOptions().info_log, "Snapshot failed -- %s", + s.ToString().c_str()); + // we have to delete the dir and all its children + std::vector subchildren; + db_->GetEnv()->GetChildren(full_private_path, &subchildren); + for (auto& subchild : subchildren) { + Status s1 = db_->GetEnv()->DeleteFile(full_private_path + subchild); + if (s1.ok()) { + Log(db_->GetOptions().info_log, "Deleted %s", + (full_private_path + subchild).c_str()); + } + } + // finally delete the private dir + Status s1 = db_->GetEnv()->DeleteDir(full_private_path); + Log(db_->GetOptions().info_log, "Deleted dir %s -- %s", + full_private_path.c_str(), s1.ToString().c_str()); + return s; + } + + // here we know that we succeeded and installed the new snapshot + Log(db_->GetOptions().info_log, "Snapshot DONE. All is good"); + Log(db_->GetOptions().info_log, "Snapshot sequence number: %" PRIu64, + sequence_number); + + return s; +} +} // namespace rocksdb + +#endif // ROCKSDB_LITE