Postponing custom checksum support in BackupEngine (#7411)

Summary:
This change reverts BackupEngine to 6.12 state to accommodate a
higher-priority fix that does not easily merge with this custom checksum
support. We intend to reinstate this support soon, by merging a revert
of this change.

For backupable_db_test, I've removed the tests depending on this
feature.

I've also removed relevant HISTORY.md entry.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7411

Test Plan: unit tests

Reviewed By: ajkr

Differential Revision: D23793835

Pulled By: pdillinger

fbshipit-source-id: 7e861436539584799b13d1a8ae559b81b6d08052
This commit is contained in:
Peter Dillinger 2020-09-18 15:25:34 -07:00 committed by Facebook GitHub Bot
parent 6efae4b00d
commit b475a83f9d
5 changed files with 103 additions and 894 deletions

View File

@ -26,7 +26,6 @@
* Fix a bug in which bottommost compaction continues to advance the underlying InternalIterator to skip tombstones even after shutdown. * Fix a bug in which bottommost compaction continues to advance the underlying InternalIterator to skip tombstones even after shutdown.
### New Features ### New Features
* A new option `std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`.
* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions. * A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions.
* Added a new subcommand, `ldb unsafe_remove_sst_file`, which removes a lost or corrupt SST file from a DB's metadata. This command involves data loss and must not be used on a live DB. * Added a new subcommand, `ldb unsafe_remove_sst_file`, which removes a lost or corrupt SST file from a DB's metadata. This command involves data loss and must not be used on a live DB.

View File

@ -1223,9 +1223,6 @@ Status StressTest::TestBackupRestore(
// For debugging, get info_log from live options // For debugging, get info_log from live options
backup_opts.info_log = db_->GetDBOptions().info_log.get(); backup_opts.info_log = db_->GetDBOptions().info_log.get();
assert(backup_opts.info_log); assert(backup_opts.info_log);
if (thread->rand.OneIn(2)) {
backup_opts.file_checksum_gen_factory = options_.file_checksum_gen_factory;
}
if (thread->rand.OneIn(10)) { if (thread->rand.OneIn(10)) {
backup_opts.share_table_files = false; backup_opts.share_table_files = false;
} else { } else {

View File

@ -24,8 +24,10 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// The default DB file checksum function name.
constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c";
// The default BackupEngine file checksum function name. // The default BackupEngine file checksum function name.
constexpr char kDefaultBackupFileChecksumFuncName[] = "crc32c"; constexpr char kBackupFileChecksumFuncName[] = "crc32c";
struct BackupableDBOptions { struct BackupableDBOptions {
// Where to keep the backup files. Has to be different than dbname_ // Where to keep the backup files. Has to be different than dbname_
@ -194,33 +196,6 @@ struct BackupableDBOptions {
// and share_table_files are true. // and share_table_files are true.
ShareFilesNaming share_files_with_checksum_naming; ShareFilesNaming share_files_with_checksum_naming;
// Option for custom checksum functions.
// When this option is nullptr, BackupEngine will use its default crc32c as
// the checksum function.
//
// When it is not nullptr, BackupEngine will try to find in the factory the
// checksum function that DB used to calculate the file checksums. If such a
// function is found, BackupEngine will use it to create, verify, or restore
// backups, in addition to the default crc32c checksum function. If such a
// function is not found, BackupEngine will return Status::InvalidArgument().
// Therefore, this option comes into effect only if DB has a custom checksum
// factory and this option is set to the same factory.
//
//
// Note: If share_files_with_checksum and share_table_files are true,
// the <checksum> appeared in the table filenames will be the custom checksum
// value if db session ids are available (namely, table file naming options
// is kOptionalChecksumAndDbSessionId and the db session ids obtained from
// the table files are nonempty).
//
// Note: We do not require the same setting to this option for backup
// restoration or verification as was set during backup creation but we
// strongly recommend setting it to the same as the DB file checksum function
// for all BackupEngine interactions when practical.
//
// Default: nullptr
std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
void Dump(Logger* logger) const; void Dump(Logger* logger) const;
explicit BackupableDBOptions( explicit BackupableDBOptions(
@ -233,9 +208,7 @@ struct BackupableDBOptions {
int _max_valid_backups_to_open = INT_MAX, int _max_valid_backups_to_open = INT_MAX,
ShareFilesNaming _share_files_with_checksum_naming = ShareFilesNaming _share_files_with_checksum_naming =
static_cast<ShareFilesNaming>(kUseDbSessionId | kFlagIncludeFileSize | static_cast<ShareFilesNaming>(kUseDbSessionId | kFlagIncludeFileSize |
kFlagMatchInterimNaming), kFlagMatchInterimNaming))
std::shared_ptr<FileChecksumGenFactory> _file_checksum_gen_factory =
nullptr)
: backup_dir(_backup_dir), : backup_dir(_backup_dir),
backup_env(_backup_env), backup_env(_backup_env),
share_table_files(_share_table_files), share_table_files(_share_table_files),
@ -249,8 +222,7 @@ struct BackupableDBOptions {
max_background_operations(_max_background_operations), max_background_operations(_max_background_operations),
callback_trigger_interval_size(_callback_trigger_interval_size), callback_trigger_interval_size(_callback_trigger_interval_size),
max_valid_backups_to_open(_max_valid_backups_to_open), max_valid_backups_to_open(_max_valid_backups_to_open),
share_files_with_checksum_naming(_share_files_with_checksum_naming), share_files_with_checksum_naming(_share_files_with_checksum_naming) {
file_checksum_gen_factory(_file_checksum_gen_factory) {
assert(share_table_files || !share_files_with_checksum); assert(share_table_files || !share_files_with_checksum);
assert((share_files_with_checksum_naming & kMaskNoNamingFlags) != 0); assert((share_files_with_checksum_naming & kMaskNoNamingFlags) != 0);
} }
@ -407,18 +379,16 @@ class BackupEngineReadOnly {
} }
// If verify_with_checksum is true, this function // If verify_with_checksum is true, this function
// inspects the default crc32c checksums and file sizes of backup files to // inspects the current checksums and file sizes of backup files to see if
// see if they match our expectation. This function further inspects the // they match our expectation.
// custom checksums if BackupableDBOptions::file_checksum_gen_factory is
// the same as DBOptions::file_checksum_gen_factory.
// //
// If verify_with_checksum is false, this function // If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our // checks that each file exists and that the size of the file matches our
// expectation. It does not check file checksum. // expectation. It does not check file checksum.
// //
// If this BackupEngine created the backup, it compares the files' current // If this BackupEngine created the backup, it compares the files' current
// sizes (and current checksums) against the number of bytes written to // sizes (and current checksum) against the number of bytes written to
// them (and the checksums calculated) during creation. // them (and the checksum calculated) during creation.
// Otherwise, it compares the files' current sizes (and checksums) against // Otherwise, it compares the files' current sizes (and checksums) against
// their sizes (and checksums) when the BackupEngine was opened. // their sizes (and checksums) when the BackupEngine was opened.
// //
@ -538,9 +508,7 @@ class BackupEngine {
// If verify_with_checksum is true, this function // If verify_with_checksum is true, this function
// inspects the current checksums and file sizes of backup files to see if // inspects the current checksums and file sizes of backup files to see if
// they match our expectation. It further inspects the custom checksums // they match our expectation.
// if BackupableDBOptions::file_checksum_gen_factory is the same as
// DBOptions::file_checksum_gen_factory.
// //
// If verify_with_checksum is false, this function // If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our // checks that each file exists and that the size of the file matches our

View File

@ -28,7 +28,6 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "db/log_reader.h"
#include "env/composite_env_wrapper.h" #include "env/composite_env_wrapper.h"
#include "file/filename.h" #include "file/filename.h"
#include "file/sequence_file_reader.h" #include "file/sequence_file_reader.h"
@ -42,7 +41,6 @@
#include "util/channel.h" #include "util/channel.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_checksum_helper.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "utilities/checkpoint/checkpoint_impl.h" #include "utilities/checkpoint/checkpoint_impl.h"
@ -64,22 +62,6 @@ inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) {
PutFixed32(&checksum_str, EndianSwapValue(checksum_value)); PutFixed32(&checksum_str, EndianSwapValue(checksum_value));
return ChecksumStrToHex(checksum_str); return ChecksumStrToHex(checksum_str);
} }
// Checks if the checksum function names are the same. Note that both the
// backup default checksum function and the db standard checksum function are
// crc32c although they have different names. So We treat the db standard
// checksum function name and the backup default checksum function name as
// the same name.
inline bool IsSameChecksumFunc(const std::string& dst_checksum_func_name,
const std::string& src_checksum_func_name) {
return (dst_checksum_func_name == src_checksum_func_name) ||
((dst_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(src_checksum_func_name == kStandardDbFileChecksumFuncName)) ||
((src_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(dst_checksum_func_name == kStandardDbFileChecksumFuncName));
}
inline bool IsSstFile(const std::string& fname) {
return fname.length() > 4 && fname.rfind(".sst") == fname.length() - 4;
}
} // namespace } // namespace
void BackupStatistics::IncrementNumberSuccessBackup() { void BackupStatistics::IncrementNumberSuccessBackup() {
@ -190,15 +172,11 @@ class BackupEngineImpl : public BackupEngine {
struct FileInfo { struct FileInfo {
FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum, FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum,
const std::string& custom_checksum, const std::string& id = "", const std::string& sid = "")
const std::string& checksum_name, const std::string& id = "",
const std::string& sid = "")
: refs(0), : refs(0),
filename(fname), filename(fname),
size(sz), size(sz),
checksum_hex(checksum), checksum_hex(checksum),
custom_checksum_hex(custom_checksum),
checksum_func_name(checksum_name),
db_id(id), db_id(id),
db_session_id(sid) {} db_session_id(sid) {}
@ -209,8 +187,6 @@ class BackupEngineImpl : public BackupEngine {
const std::string filename; const std::string filename;
const uint64_t size; const uint64_t size;
const std::string checksum_hex; const std::string checksum_hex;
const std::string custom_checksum_hex;
const std::string checksum_func_name;
// DB identities // DB identities
// db_id is obtained for potential usage in the future but not used // db_id is obtained for potential usage in the future but not used
// currently // currently
@ -399,78 +375,6 @@ class BackupEngineImpl : public BackupEngine {
return GetBackupMetaDir() + "/" + (tmp ? "." : "") + return GetBackupMetaDir() + "/" + (tmp ? "." : "") +
ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : ""); ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : "");
} }
inline Status GetFileNameInfo(const std::string& file,
std::string& local_name, uint64_t& number,
FileType& type) const {
// 1. extract the filename
size_t last_slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(last_slash != std::string::npos);
local_name = file.substr(last_slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, last_slash) == GetSharedChecksumDirRel()) {
local_name = GetFileFromChecksumFile(local_name);
}
// 2. find the filetype
bool ok = ParseFileName(local_name, &number, &type);
if (!ok) {
return Status::Corruption("Backup corrupted: Fail to parse filename " +
local_name);
}
return Status::OK();
}
inline bool HasCustomChecksumGenFactory() const {
return options_.file_checksum_gen_factory != nullptr;
}
// Returns nullptr if file_checksum_gen_factory is not set or
// file_checksum_gen_factory is not able to create a generator with
// name being requested_checksum_func_name
inline std::unique_ptr<FileChecksumGenerator> GetCustomChecksumGenerator(
const std::string& requested_checksum_func_name = "") const {
std::shared_ptr<FileChecksumGenFactory> checksum_factory =
options_.file_checksum_gen_factory;
if (checksum_factory == nullptr) {
return nullptr;
} else {
FileChecksumGenContext gen_context;
gen_context.requested_checksum_func_name = requested_checksum_func_name;
return checksum_factory->CreateFileChecksumGenerator(gen_context);
}
}
// Set the checksum generator by the requested checksum function name
inline Status SetChecksumGenerator(
const std::string& requested_checksum_func_name,
std::unique_ptr<FileChecksumGenerator>& checksum_func) {
if (requested_checksum_func_name != kDefaultBackupFileChecksumFuncName) {
if (!HasCustomChecksumGenFactory()) {
// No custom checksum factory indicates users would like to use the
// backup default checksum function and accept the degraded data
// integrity checking
return Status::OK();
} else {
checksum_func =
GetCustomChecksumGenerator(requested_checksum_func_name);
// we will use the default backup checksum function if the custom
// checksum functions is the db standard checksum function but is not
// found in the checksum factory passed in; otherwise, we return
// Status::InvalidArgument()
if (checksum_func == nullptr &&
requested_checksum_func_name != kStandardDbFileChecksumFuncName) {
return Status::InvalidArgument("Checksum checksum function " +
requested_checksum_func_name +
" not found");
}
}
}
// The requested checksum function is the default backup checksum function
return Status::OK();
}
// If size_limit == 0, there is no size limit, copy everything. // If size_limit == 0, there is no size limit, copy everything.
// //
@ -478,48 +382,27 @@ class BackupEngineImpl : public BackupEngine {
// //
// @param src If non-empty, the file is copied from this pathname. // @param src If non-empty, the file is copied from this pathname.
// @param contents If non-empty, the file will be created with these contents. // @param contents If non-empty, the file will be created with these contents.
Status CopyOrCreateFile( Status CopyOrCreateFile(const std::string& src, const std::string& dst,
const std::string& src, const std::string& dst, const std::string& contents, Env* src_env,
const std::string& contents, Env* src_env, Env* dst_env, Env* dst_env, const EnvOptions& src_env_options,
const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter, bool sync, RateLimiter* rate_limiter,
const std::string& backup_checksum_func_name, uint64_t* size = nullptr, uint64_t* size = nullptr,
std::string* checksum_hex = nullptr, std::string* checksum_hex = nullptr,
std::string* custom_checksum_hex = nullptr, uint64_t size_limit = 0, uint64_t size_limit = 0,
std::function<void()> progress_callback = []() {}); std::function<void()> progress_callback = []() {});
Status CalculateChecksum( Status CalculateChecksum(const std::string& src, Env* src_env,
const std::string& src, Env* src_env, const EnvOptions& src_env_options, const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex, uint64_t size_limit, std::string* checksum_hex);
const std::unique_ptr<FileChecksumGenerator>& checksum_func = nullptr,
std::string* custom_checksum_hex = nullptr);
// Obtain db_id and db_session_id from the table properties of file_path // Obtain db_id and db_session_id from the table properties of file_path
Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options, Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options,
const std::string& file_path, std::string* db_id, const std::string& file_path, std::string* db_id,
std::string* db_session_id); std::string* db_session_id);
Status GetFileChecksumsFromManifestInBackup(Env* src_env,
const BackupID& backup_id,
const BackupMeta* backup,
FileChecksumList* checksum_list);
Status VerifyFileWithCrc32c(Env* src_env, const BackupMeta* backup,
const std::string& rel_path);
struct LogReporter : public log::Reader::Reporter {
Status* status;
virtual void Corruption(size_t /*bytes*/, const Status& s) override {
if (status->ok()) {
*status = s;
}
}
};
struct CopyOrCreateResult { struct CopyOrCreateResult {
uint64_t size; uint64_t size;
std::string checksum_hex; std::string checksum_hex;
std::string custom_checksum_hex;
std::string checksum_func_name;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
Status status; Status status;
@ -543,7 +426,6 @@ class BackupEngineImpl : public BackupEngine {
bool verify_checksum_after_work; bool verify_checksum_after_work;
std::string src_checksum_func_name; std::string src_checksum_func_name;
std::string src_checksum_hex; std::string src_checksum_hex;
std::string backup_checksum_func_name;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
@ -560,7 +442,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(false), verify_checksum_after_work(false),
src_checksum_func_name(kUnknownFileChecksumFuncName), src_checksum_func_name(kUnknownFileChecksumFuncName),
src_checksum_hex(""), src_checksum_hex(""),
backup_checksum_func_name(kUnknownFileChecksumFuncName),
db_id(""), db_id(""),
db_session_id("") {} db_session_id("") {}
@ -586,7 +467,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work = o.verify_checksum_after_work; verify_checksum_after_work = o.verify_checksum_after_work;
src_checksum_func_name = std::move(o.src_checksum_func_name); src_checksum_func_name = std::move(o.src_checksum_func_name);
src_checksum_hex = std::move(o.src_checksum_hex); src_checksum_hex = std::move(o.src_checksum_hex);
backup_checksum_func_name = std::move(o.backup_checksum_func_name);
db_id = std::move(o.db_id); db_id = std::move(o.db_id);
db_session_id = std::move(o.db_session_id); db_session_id = std::move(o.db_session_id);
return *this; return *this;
@ -601,8 +481,6 @@ class BackupEngineImpl : public BackupEngine {
const std::string& _src_checksum_func_name = const std::string& _src_checksum_func_name =
kUnknownFileChecksumFuncName, kUnknownFileChecksumFuncName,
const std::string& _src_checksum_hex = "", const std::string& _src_checksum_hex = "",
const std::string& _backup_checksum_func_name =
kUnknownFileChecksumFuncName,
const std::string& _db_id = "", const std::string& _db_session_id = "") const std::string& _db_id = "", const std::string& _db_session_id = "")
: src_path(std::move(_src_path)), : src_path(std::move(_src_path)),
dst_path(std::move(_dst_path)), dst_path(std::move(_dst_path)),
@ -617,7 +495,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(_verify_checksum_after_work), verify_checksum_after_work(_verify_checksum_after_work),
src_checksum_func_name(_src_checksum_func_name), src_checksum_func_name(_src_checksum_func_name),
src_checksum_hex(_src_checksum_hex), src_checksum_hex(_src_checksum_hex),
backup_checksum_func_name(_backup_checksum_func_name),
db_id(_db_id), db_id(_db_id),
db_session_id(_db_session_id) {} db_session_id(_db_session_id) {}
}; };
@ -999,49 +876,33 @@ Status BackupEngineImpl::Initialize() {
result.status = CopyOrCreateFile( result.status = CopyOrCreateFile(
work_item.src_path, work_item.dst_path, work_item.contents, work_item.src_path, work_item.dst_path, work_item.contents,
work_item.src_env, work_item.dst_env, work_item.src_env_options, work_item.src_env, work_item.dst_env, work_item.src_env_options,
work_item.sync, work_item.rate_limiter, work_item.sync, work_item.rate_limiter, &result.size,
work_item.backup_checksum_func_name, &result.size, &result.checksum_hex, work_item.size_limit,
&result.checksum_hex, &result.custom_checksum_hex, work_item.progress_callback);
work_item.size_limit, work_item.progress_callback);
result.checksum_func_name = work_item.backup_checksum_func_name;
result.db_id = work_item.db_id; result.db_id = work_item.db_id;
result.db_session_id = work_item.db_session_id; result.db_session_id = work_item.db_session_id;
if (result.status.ok() && work_item.verify_checksum_after_work) { if (result.status.ok() && work_item.verify_checksum_after_work) {
// work_item.verify_checksum_after_work being true means backup engine // unknown checksum function name implies no db table file checksum in
// has obtained its crc32c and/or custom checksum for the table file. // db manifest; work_item.verify_checksum_after_work being true means
// Therefore, we can try to compare the checksums if possible. // backup engine has calculated its crc32c checksum for the table
// file; therefore, we are able to compare the checksums.
if (work_item.src_checksum_func_name == if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName || kUnknownFileChecksumFuncName ||
IsSameChecksumFunc(result.checksum_func_name, work_item.src_checksum_func_name == kDbFileChecksumFuncName) {
work_item.src_checksum_func_name)) { if (work_item.src_checksum_hex != result.checksum_hex) {
std::string checksum_to_compare;
std::string checksum_func_name_used;
if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName ||
work_item.src_checksum_func_name ==
kStandardDbFileChecksumFuncName) {
// kUnknownFileChecksumFuncName implies no table file checksums in
// db manifest, but we can compare using the crc32c checksum
checksum_to_compare = result.checksum_hex;
checksum_func_name_used = kStandardDbFileChecksumFuncName;
} else {
checksum_to_compare = result.custom_checksum_hex;
checksum_func_name_used = work_item.src_checksum_func_name;
}
if (work_item.src_checksum_hex != checksum_to_compare) {
std::string checksum_info( std::string checksum_info(
"Expected checksum is " + work_item.src_checksum_hex + "Expected checksum is " + work_item.src_checksum_hex +
" while computed checksum is " + checksum_to_compare); " while computed checksum is " + result.checksum_hex);
result.status = Status::Corruption( result.status =
checksum_func_name_used + " mismatch after copying to " + Status::Corruption("Checksum mismatch after copying to " +
work_item.dst_path + ": " + checksum_info); work_item.dst_path + ": " + checksum_info);
} }
} else { } else {
std::string checksum_function_info( std::string checksum_function_info(
"Existing checksum function is " + "Existing checksum function is " +
work_item.src_checksum_func_name + work_item.src_checksum_func_name +
" while provided checksum function is " + " while provided checksum function is " +
result.checksum_func_name); kBackupFileChecksumFuncName);
ROCKS_LOG_INFO( ROCKS_LOG_INFO(
options_.info_log, options_.info_log,
"Unable to verify checksum after copying to %s: %s\n", "Unable to verify checksum after copying to %s: %s\n",
@ -1129,6 +990,15 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
CheckpointImpl checkpoint(db); CheckpointImpl checkpoint(db);
uint64_t sequence_number = 0; uint64_t sequence_number = 0;
DBOptions db_options = db->GetDBOptions(); DBOptions db_options = db->GetDBOptions();
FileChecksumGenFactory* db_checksum_factory =
db_options.file_checksum_gen_factory.get();
const std::string kFileChecksumGenFactoryName =
"FileChecksumGenCrc32cFactory";
bool compare_checksum =
db_checksum_factory != nullptr &&
db_checksum_factory->Name() == kFileChecksumGenFactoryName
? true
: false;
EnvOptions src_raw_env_options(db_options); EnvOptions src_raw_env_options(db_options);
s = checkpoint.CreateCustomCheckpoint( s = checkpoint.CreateCustomCheckpoint(
db_options, db_options,
@ -1194,14 +1064,12 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
options.progress_callback, contents); options.progress_callback, contents);
} /* create_file_cb */, } /* create_file_cb */,
&sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64, &sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64,
db_options.file_checksum_gen_factory == nullptr ? false : true); compare_checksum);
if (s.ok()) { if (s.ok()) {
new_backup->SetSequenceNumber(sequence_number); new_backup->SetSequenceNumber(sequence_number);
} }
} }
ROCKS_LOG_INFO(options_.info_log, ROCKS_LOG_INFO(options_.info_log, "add files for backup done, wait finish.");
"add files for backup done (%s), wait finish.",
s.ok() ? "OK" : "not OK");
Status item_status; Status item_status;
for (auto& item : backup_items_to_finish) { for (auto& item : backup_items_to_finish) {
item.result.wait(); item.result.wait();
@ -1213,11 +1081,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
} }
if (item_status.ok()) { if (item_status.ok()) {
item_status = new_backup.get()->AddFile(std::make_shared<FileInfo>( item_status = new_backup.get()->AddFile(std::make_shared<FileInfo>(
item.dst_relative, result.size, result.checksum_hex, item.dst_relative, result.size, result.checksum_hex, result.db_id,
result.custom_checksum_hex, result.checksum_func_name, result.db_id,
result.db_session_id)); result.db_session_id));
} }
if (s.ok() && !item_status.ok()) { if (!item_status.ok()) {
s = item_status; s = item_status;
} }
} }
@ -1475,74 +1342,50 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
DeleteChildren(db_dir); DeleteChildren(db_dir);
} }
Status s;
// Try to obtain checksum info from backuped DB MANIFEST
// The checksum info will be used for validating the checksums of the table
// files after restoration, in addtion to the default backup engine crc32c
// checksums.
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id, backup.get(),
checksum_list.get());
if (!s.ok()) {
return s;
}
RateLimiter* rate_limiter = options_.restore_rate_limiter.get(); RateLimiter* rate_limiter = options_.restore_rate_limiter.get();
if (rate_limiter) { if (rate_limiter) {
copy_file_buffer_size_ = copy_file_buffer_size_ =
static_cast<size_t>(rate_limiter->GetSingleBurstBytes()); static_cast<size_t>(rate_limiter->GetSingleBurstBytes());
} }
Status s;
std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish; std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish;
for (const auto& file_info : backup->GetFiles()) { for (const auto& file_info : backup->GetFiles()) {
const std::string& file = file_info->filename; const std::string& file = file_info->filename;
std::string dst; std::string dst;
// 1. extract the filename
size_t slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(slash != std::string::npos);
dst = file.substr(slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, slash) == GetSharedChecksumDirRel()) {
dst = GetFileFromChecksumFile(dst);
}
// 2. find the filetype
uint64_t number; uint64_t number;
FileType type; FileType type;
s = GetFileNameInfo(file, dst, number, type); bool ok = ParseFileName(dst, &number, &type);
if (!s.ok()) { if (!ok) {
return s; return Status::Corruption("Backup corrupted: Fail to parse filename " +
dst);
} }
// 3. Construct the final path
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
bool has_manifest_checksum = false;
if (type == kTableFile) {
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
has_manifest_checksum = true;
}
}
// Construct the final path
// kLogFile lives in wal_dir and all the rest live in db_dir // kLogFile lives in wal_dir and all the rest live in db_dir
dst = ((type == kLogFile) ? wal_dir : db_dir) + dst = ((type == kLogFile) ? wal_dir : db_dir) +
"/" + dst; "/" + dst;
ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(), ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(),
dst.c_str()); dst.c_str());
std::string backup_checksum_func_name = file_info->checksum_func_name;
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
CopyOrCreateWorkItem copy_or_create_work_item( CopyOrCreateWorkItem copy_or_create_work_item(
GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_, GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_,
EnvOptions() /* src_env_options */, false, rate_limiter, EnvOptions() /* src_env_options */, false, rate_limiter,
0 /* size_limit */, []() {} /* progress_callback */, 0 /* size_limit */);
has_manifest_checksum, src_checksum_func_name, src_checksum_hex,
backup_checksum_func_name);
RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item( RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), file_info->checksum_hex); copy_or_create_work_item.result.get_future(), file_info->checksum_hex);
files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
@ -1560,11 +1403,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
s = item_status; s = item_status;
break; break;
} else if (item.checksum_hex != result.checksum_hex) { } else if (item.checksum_hex != result.checksum_hex) {
// Compare crc32c checksums (especially for non-table files) s = Status::Corruption("Checksum check failed");
std::string checksum_info("Expected checksum is " + item.checksum_hex +
" while computed checksum is " +
result.checksum_hex);
s = Status::Corruption("Crc32c checksum check failed: " + checksum_info);
break; break;
} }
} }
@ -1603,17 +1442,6 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size); InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size);
} }
Status s;
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
if (verify_with_checksum) {
// Try to obtain checksum info from backuped DB MANIFEST
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id,
backup.get(), checksum_list.get());
if (!s.ok()) {
return s;
}
}
// For all files registered in backup // For all files registered in backup
for (const auto& file_info : backup->GetFiles()) { for (const auto& file_info : backup->GetFiles()) {
const auto abs_path = GetAbsolutePath(file_info->filename); const auto abs_path = GetAbsolutePath(file_info->filename);
@ -1632,68 +1460,27 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
} }
if (verify_with_checksum) { if (verify_with_checksum) {
// verify file checksum // verify file checksum
// try setting checksum_func std::string checksum_hex;
std::unique_ptr<FileChecksumGenerator> checksum_func;
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
if (IsSstFile(file_info->filename)) {
const std::string& file = file_info->filename;
std::string local_name;
uint64_t number;
FileType type;
s = GetFileNameInfo(file, local_name, number, type);
if (!s.ok()) {
return s;
}
assert(type == kTableFile);
// Try to get checksum for the table file
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
}
ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n", ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n",
abs_path.c_str()); abs_path.c_str());
std::string checksum_hex;
std::string custom_checksum_hex;
CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */, CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */,
&checksum_hex, checksum_func, &custom_checksum_hex); &checksum_hex);
if (file_info->checksum_hex != checksum_hex) { if (file_info->checksum_hex != checksum_hex) {
std::string checksum_info( std::string checksum_info(
"Expected checksum is " + file_info->checksum_hex + "Expected checksum is " + file_info->checksum_hex +
" while computed checksum is " + checksum_hex); " while computed checksum is " + checksum_hex);
return Status::Corruption("File corrupted: crc32c mismatch for " + return Status::Corruption("File corrupted: Checksum mismatch for " +
abs_path + ": " + checksum_info); abs_path + ": " + checksum_info);
} }
if (checksum_func != nullptr && src_checksum_hex != custom_checksum_hex) {
std::string checksum_info("Expected checksum is " + src_checksum_hex +
" while computed checksum is " +
custom_checksum_hex);
return Status::Corruption("File corrupted: " + src_checksum_func_name +
" mismatch for " + abs_path + ": " +
checksum_info);
}
} }
} }
return Status::OK(); return Status::OK();
} }
Status BackupEngineImpl::CopyOrCreateFile( Status BackupEngineImpl::CopyOrCreateFile(
const std::string& src, const std::string& dst, const std::string& contents, const std::string& src, const std::string& dst, const std::string& contents,
Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync, Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync,
RateLimiter* rate_limiter, const std::string& backup_checksum_func_name, RateLimiter* rate_limiter, uint64_t* size, std::string* checksum_hex,
uint64_t* size, std::string* checksum_hex, std::string* custom_checksum_hex,
uint64_t size_limit, std::function<void()> progress_callback) { uint64_t size_limit, std::function<void()> progress_callback) {
assert(src.empty() != contents.empty()); assert(src.empty() != contents.empty());
Status s; Status s;
@ -1707,13 +1494,6 @@ Status BackupEngineImpl::CopyOrCreateFile(
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
// Get custom checksum function
std::unique_ptr<FileChecksumGenerator> checksum_func;
s = SetChecksumGenerator(backup_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
// Check if size limit is set. if not, set it to very big number // Check if size limit is set. if not, set it to very big number
if (size_limit == 0) { if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max(); size_limit = std::numeric_limits<uint64_t>::max();
@ -1768,10 +1548,6 @@ Status BackupEngineImpl::CopyOrCreateFile(
if (checksum_hex != nullptr) { if (checksum_hex != nullptr) {
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
} }
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
s = dest_writer->Append(data); s = dest_writer->Append(data);
if (rate_limiter != nullptr) { if (rate_limiter != nullptr) {
rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */, rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */,
@ -1784,14 +1560,10 @@ Status BackupEngineImpl::CopyOrCreateFile(
} }
} while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0); } while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0);
// Convert uint32_t checksum to hex checksum
if (checksum_hex != nullptr) { if (checksum_hex != nullptr) {
// Convert uint32_t checksum to hex checksum
checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
} }
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
if (s.ok() && sync) { if (s.ok() && sync) {
s = dest_writer->Sync(false); s = dest_writer->Sync(false);
@ -1819,50 +1591,27 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::string dst_relative_tmp; std::string dst_relative_tmp;
Status s; Status s;
std::string checksum_hex; std::string checksum_hex;
std::string custom_checksum_hex;
// The function name of backup checksum function.
std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
// whether a default or custom checksum for a table file is available // whether the checksum for a table file is available
bool has_checksum = false; bool has_checksum = false;
// Set up the custom checksum function. // Whenever a default checksum function name is passed in, we will compares
// A nullptr checksum_func indicates the default backup checksum function // the corresponding checksum values after copying. Note that only table files
// will be used. If checksum_func is not nullptr, then both the default // may have a known checksum function name passed in.
// backup checksum function and checksum_func will be used.
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
// DB files have checksum functions
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
// Whenever the db checksum function name matches the backup engine custom
// checksum function name, we will compare the checksum values after copying.
// Note that only table files may have a known checksum name passed in.
// //
// If the checksum function names do not match and db session id is not // If no default checksum function name is passed in and db session id is not
// available, we will calculate the checksum *before* copying in two cases // available, we will calculate the checksum *before* copying in two cases
// (we always calcuate checksums when copying or creating for any file types): // (we always calcuate checksums when copying or creating for any file types):
// a) share_files_with_checksum is true and file type is table; // a) share_files_with_checksum is true and file type is table;
// b) share_table_files is true and the file exists already. // b) share_table_files is true and the file exists already.
// //
// Step 0: Check if a known checksum function name is passed in // Step 0: Check if default checksum function name is passed in
if (IsSameChecksumFunc(backup_checksum_func_name, src_checksum_func_name)) { if (kDbFileChecksumFuncName == src_checksum_func_name) {
if (src_checksum_str == kUnknownFileChecksum) { if (src_checksum_str == kUnknownFileChecksum) {
return Status::Aborted("Unknown checksum value for " + fname); return Status::Aborted("Unknown checksum value for " + fname);
} }
if (checksum_func == nullptr) { checksum_hex = ChecksumStrToHex(src_checksum_str);
checksum_hex = ChecksumStrToHex(src_checksum_str);
} else {
custom_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
has_checksum = true; has_checksum = true;
} }
@ -1881,8 +1630,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the shared_checksum directory. // the shared_checksum directory.
if (!has_checksum && db_session_id.empty()) { if (!has_checksum && db_session_id.empty()) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex, checksum_func, size_limit, &checksum_hex);
&custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1901,14 +1649,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// shared_checksum/<file_number>_<db_session_id>.sst // shared_checksum/<file_number>_<db_session_id>.sst
// Otherwise, dst_relative is of the form // Otherwise, dst_relative is of the form
// shared_checksum/<file_number>_<checksum>_<size>.sst // shared_checksum/<file_number>_<checksum>_<size>.sst
//
// Also, we display custom checksums in the name if possible.
dst_relative = GetSharedFileWithChecksum( dst_relative = GetSharedFileWithChecksum(
dst_relative, has_checksum, dst_relative, has_checksum, checksum_hex, size_bytes, db_session_id);
checksum_func == nullptr || UseLegacyNaming(db_session_id)
? checksum_hex
: custom_checksum_hex,
size_bytes, db_session_id);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
} else if (shared) { } else if (shared) {
@ -1973,14 +1715,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
backup_env_->DeleteFile(final_dest_path); backup_env_->DeleteFile(final_dest_path);
} else { } else {
// file exists and referenced // file exists and referenced
if (!has_checksum || checksum_hex.empty()) { if (!has_checksum) {
// Either both checksum_hex and custom_checksum_hex need recalculating
// or only checksum_hex needs recalculating
// FIXME(peterd): extra I/O // FIXME(peterd): extra I/O
s = CalculateChecksum( s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
src_dir + fname, db_env_, src_env_options, size_limit, size_limit, &checksum_hex);
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1999,11 +1737,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
fname.c_str(), checksum_hex.c_str(), size_bytes); fname.c_str(), checksum_hex.c_str(), size_bytes);
} }
} }
if (checksum_func != nullptr) {
ROCKS_LOG_INFO(options_.info_log, "%s checksum is %s",
backup_checksum_func_name.c_str(),
custom_checksum_hex.c_str());
}
} else if (backuped_file_infos_.find(dst_relative) == } else if (backuped_file_infos_.find(dst_relative) ==
backuped_file_infos_.end() && backuped_file_infos_.end() &&
!same_path) { !same_path) {
@ -2020,14 +1753,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the file is present and referenced by a backup // the file is present and referenced by a backup
ROCKS_LOG_INFO(options_.info_log, ROCKS_LOG_INFO(options_.info_log,
"%s already present, calculate checksum", fname.c_str()); "%s already present, calculate checksum", fname.c_str());
if (!has_checksum || checksum_hex.empty()) { if (!has_checksum) {
// Either both checksum_hex and custom_checksum_hex need recalculating
// or only checksum_hex needs recalculating
// FIXME(peterd): extra I/O // FIXME(peterd): extra I/O
s = CalculateChecksum( s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
src_dir + fname, db_env_, src_env_options, size_limit, size_limit, &checksum_hex);
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -2045,8 +1774,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents, src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents,
db_env_, backup_env_, src_env_options, options_.sync, rate_limiter, db_env_, backup_env_, src_env_options, options_.sync, rate_limiter,
size_limit, progress_callback, has_checksum, src_checksum_func_name, size_limit, progress_callback, has_checksum, src_checksum_func_name,
checksum_func == nullptr ? checksum_hex : custom_checksum_hex, checksum_hex, db_id, db_session_id);
backup_checksum_func_name, db_id, db_session_id);
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), shared, need_to_copy, copy_or_create_work_item.result.get_future(), shared, need_to_copy,
backup_env_, temp_dest_path, final_dest_path, dst_relative); backup_env_, temp_dest_path, final_dest_path, dst_relative);
@ -2062,8 +1790,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
result.status = s; result.status = s;
result.size = size_bytes; result.size = size_bytes;
result.checksum_hex = std::move(checksum_hex); result.checksum_hex = std::move(checksum_hex);
result.custom_checksum_hex = std::move(custom_checksum_hex);
result.checksum_func_name = std::move(backup_checksum_func_name);
result.db_id = std::move(db_id); result.db_id = std::move(db_id);
result.db_session_id = std::move(db_session_id); result.db_session_id = std::move(db_session_id);
promise_result.set_value(std::move(result)); promise_result.set_value(std::move(result));
@ -2071,16 +1797,14 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
return s; return s;
} }
Status BackupEngineImpl::CalculateChecksum( Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
const std::string& src, Env* src_env, const EnvOptions& src_env_options, const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex, uint64_t size_limit,
const std::unique_ptr<FileChecksumGenerator>& checksum_func, std::string* checksum_hex) {
std::string* custom_checksum_hex) {
if (checksum_hex == nullptr) { if (checksum_hex == nullptr) {
return Status::InvalidArgument("Checksum pointer is null"); return Status::Aborted("Checksum pointer is null");
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
if (size_limit == 0) { if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max(); size_limit = std::numeric_limits<uint64_t>::max();
} }
@ -2109,18 +1833,10 @@ Status BackupEngineImpl::CalculateChecksum(
} }
size_limit -= data.size(); size_limit -= data.size();
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
} while (data.size() > 0 && size_limit > 0); } while (data.size() > 0 && size_limit > 0);
checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
return s; return s;
} }
@ -2179,77 +1895,6 @@ Status BackupEngineImpl::GetFileDbIdentities(Env* src_env,
} }
} }
Status BackupEngineImpl::GetFileChecksumsFromManifestInBackup(
Env* src_env, const BackupID& backup_id, const BackupMeta* backup,
FileChecksumList* checksum_list) {
if (checksum_list == nullptr) {
return Status::InvalidArgument("checksum_list is nullptr");
}
checksum_list->reset();
Status s;
// Read CURRENT file to get the latest DB MANIFEST filename in backup_id
// and then read the the MANIFEST file to obtain the checksum info stored
// in the file.
std::string current_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, "CURRENT");
s = VerifyFileWithCrc32c(src_env, backup, current_rel_path);
if (!s.ok()) {
return s;
}
std::string manifest_filename;
s = ReadFileToString(src_env, GetAbsolutePath(current_rel_path),
&manifest_filename);
if (!s.ok()) {
return s;
}
// Remove tailing '\n' if any
while (!manifest_filename.empty() && manifest_filename.back() == '\n') {
manifest_filename.pop_back();
}
std::string manifest_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, manifest_filename);
s = VerifyFileWithCrc32c(src_env, backup, manifest_rel_path);
if (!s.ok()) {
return s;
}
// Read whole manifest file in backup
s = GetFileChecksumsFromManifest(
src_env, GetAbsolutePath(manifest_rel_path),
std::numeric_limits<uint64_t>::max() /*manifest_file_size*/,
checksum_list);
return s;
}
Status BackupEngineImpl::VerifyFileWithCrc32c(Env* src_env,
const BackupMeta* backup,
const std::string& rel_path) {
const std::shared_ptr<FileInfo> file_info = backup->GetFile(rel_path);
if (file_info == nullptr) {
return Status::Corruption(rel_path + " is missing");
}
std::string abs_path = GetAbsolutePath(rel_path);
std::string expected_checksum = file_info->checksum_hex;
std::string actual_checksum;
Status s = CalculateChecksum(abs_path, src_env, EnvOptions(),
0 /* size_limit */, &actual_checksum);
if (!s.ok()) {
return s;
}
if (actual_checksum != expected_checksum) {
std::string checksum_info("Expected checksum is " + expected_checksum +
" while computed checksum is " + actual_checksum);
return Status::Corruption("crc32c mismatch for " + rel_path + ": " +
checksum_info);
}
return s;
}
void BackupEngineImpl::DeleteChildren(const std::string& dir, void BackupEngineImpl::DeleteChildren(const std::string& dir,
uint32_t file_type_filter) { uint32_t file_type_filter) {
std::vector<std::string> children; std::vector<std::string> children;
@ -2423,14 +2068,6 @@ Status BackupEngineImpl::BackupMeta::AddFile(
return Status::Corruption( return Status::Corruption(
"Checksum mismatch for existing backup file. Delete old backups and " "Checksum mismatch for existing backup file. Delete old backups and "
"try again."); "try again.");
} else if (IsSameChecksumFunc(itr->second->checksum_func_name,
file_info->checksum_func_name) &&
!itr->second->custom_checksum_hex.empty() &&
itr->second->custom_checksum_hex !=
file_info->custom_checksum_hex) {
return Status::Corruption(
"Custom checksum mismatch for existing backup file. Delete old "
"backups and try again.");
} }
++itr->second->refs; // increase refcount if already present ++itr->second->refs; // increase refcount if already present
} }
@ -2544,14 +2181,12 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
std::string checksum_func_name = kUnknownFileChecksumFuncName;
if (line.starts_with(checksum_prefix)) { if (line.starts_with(checksum_prefix)) {
line.remove_prefix(checksum_prefix.size()); line.remove_prefix(checksum_prefix.size());
checksum_func_name = kDefaultBackupFileChecksumFuncName;
checksum_value = static_cast<uint32_t>(strtoul(line.data(), nullptr, 10)); checksum_value = static_cast<uint32_t>(strtoul(line.data(), nullptr, 10));
if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) { if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) {
return Status::Corruption("Invalid crc32c checksum value for " + return Status::Corruption("Invalid checksum value for " + filename +
filename + " in " + meta_filename_); " in " + meta_filename_);
} }
} else { } else {
return Status::Corruption("Unknown checksum type for " + filename + return Status::Corruption("Unknown checksum type for " + filename +
@ -2559,8 +2194,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
} }
files.emplace_back( files.emplace_back(
new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value), new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value)));
"" /* custom_checksum_hex */, checksum_func_name));
} }
if (s.ok() && data.size() > 0) { if (s.ok() && data.size() > 0) {

View File

@ -48,148 +48,6 @@ const auto kFlagMatchInterimNaming =
const auto kNamingDefault = const auto kNamingDefault =
kUseDbSessionId | kFlagIncludeFileSize | kFlagMatchInterimNaming; kUseDbSessionId | kFlagIncludeFileSize | kFlagMatchInterimNaming;
class DummyFileChecksumGen : public FileChecksumGenerator {
public:
explicit DummyFileChecksumGen(const FileChecksumGenContext& /* context */,
bool state) {
if (state) {
checksum_ = 0;
} else {
checksum_ = 1;
}
}
void Update(const char* /* data */, size_t /* n */) override {}
void Finalize() override {
assert(checksum_str_.empty());
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "DummyFileChecksum"; }
private:
uint32_t checksum_;
std::string checksum_str_;
};
class DummyFileChecksumGenFactory : public FileChecksumGenFactory {
public:
explicit DummyFileChecksumGenFactory(bool state = false) : state_(state) {}
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "DummyFileChecksum") {
return std::unique_ptr<FileChecksumGenerator>(
new DummyFileChecksumGen(context, state_));
} else {
return nullptr;
}
}
const char* Name() const override { return "DummyFileChecksumGenFactory"; }
private:
bool state_;
};
class FileHash32Gen : public FileChecksumGenerator {
public:
explicit FileHash32Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash32"; }
private:
std::string content_;
uint32_t checksum_;
std::string checksum_str_;
};
class FileHash64Gen : public FileChecksumGenerator {
public:
explicit FileHash64Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash64(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed64(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash64"; }
private:
std::string content_;
uint64_t checksum_;
std::string checksum_str_;
};
class FileHash32GenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHash32GenFactory"; }
};
class FileHashGenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash64") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash64Gen(context));
} else if (context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHashGenFactory"; }
};
class DummyDB : public StackableDB { class DummyDB : public StackableDB {
public: public:
/* implicit */ /* implicit */
@ -992,253 +850,6 @@ class BackupableDBTestWithParam : public BackupableDBTest,
} }
}; };
TEST_F(BackupableDBTest, DbAndBackupSameCustomChecksum) {
const int keys_iteration = 5000;
options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
// backup uses it default crc32c
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// backup uses db crc32c
backupable_options_->file_checksum_gen_factory =
GetFileChecksumGenCrc32cFactory();
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// Mimic a checksum mismatch for custom checksum function by using a dummy
// checksum function with a state
std::shared_ptr<FileChecksumGenFactory> dummy_factory_0 =
std::make_shared<DummyFileChecksumGenFactory>(false);
std::shared_ptr<FileChecksumGenFactory> dummy_factory_1 =
std::make_shared<DummyFileChecksumGenFactory>(true);
FileChecksumGenContext context;
// Both factories have the same generator name
std::string dummy_checksum_function_name =
dummy_factory_0->CreateFileChecksumGenerator(context)->Name();
options_.file_checksum_gen_factory = dummy_factory_0;
for (const auto& sopt : kAllShareOptions) {
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
// DB and backup engine do not have the same custom checksum function
// "state"
Status s = backup_engine_->CreateNewBackup(db_.get());
ASSERT_NOK(s);
ASSERT_TRUE(
s.ToString().find("Corruption: " + dummy_checksum_function_name +
" mismatch") != std::string::npos);
CloseBackupEngine();
// Change custom checksum function and try again
backupable_options_->file_checksum_gen_factory = dummy_factory_0;
OpenBackupEngine(true /* destroy_old_data */);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseBackupEngine();
// Try verifying or restoring a backup using a different custom checksum
// function "state"
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenBackupEngine(false /* destroy_old_data */);
ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseDBAndBackupEngine();
// delete old data
DestroyDB(dbname_, options_);
}
}
TEST_F(BackupableDBTest, CustomChecksumTransition) {
const int keys_iteration = 5000;
std::shared_ptr<FileChecksumGenFactory> hash32_factory =
std::make_shared<FileHash32GenFactory>();
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
for (const auto& sopt : kAllShareOptions) {
// 1) with one custom checksum function (FileHash32GenFactory) for both
// db and backup
int i = 0;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
// verify the backup with checksum
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 2) with two custom checksum functions (FileHashGenFactory) for db
// but one custom checksum function (FileHash32GenFactory) for backup
++i;
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
// note that the checksum factory for backup does not know the custom
// checksum function used in the db
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
// but it knows the custom checksum function for the older backup
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
// reset the factory to nullptr and try again
CloseBackupEngine();
backupable_options_->file_checksum_gen_factory = nullptr;
OpenBackupEngine();
ASSERT_NOK(backup_engine_->DeleteBackup(i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// Now set the factory to the same as the one used in the db
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseBackupEngine();
++i;
// Say, we accidentally change the factory
backupable_options_->file_checksum_gen_factory = hash32_factory;
OpenBackupEngine();
// Unable to verify the latest backup.
ASSERT_NOK(backup_engine_->VerifyBackup(i + 1, true));
// Unable to restore the latest backup.
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
CloseBackupEngine();
// Reset the factory to the same as the one used in the db.
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenBackupEngine();
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
ASSERT_OK(backup_engine_->DeleteBackup(i + 1));
--i;
CloseDBAndBackupEngine();
// 3) with one custom checksum function (FileHash32GenFactory) for db
// but two custom checksum functions (FileHashGenFactory) for backup
// note that the checksum factory for backup does know the checksum
// function in the db
++i;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i - 1, true));
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i - 1, 0, keys_iteration * (i - 1),
keys_iteration * i);
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 4) no custom checksums
++i;
options_.file_checksum_gen_factory = nullptr;
backupable_options_->file_checksum_gen_factory = nullptr;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
for (int j = 0; j <= i; ++j) {
ASSERT_OK(backup_engine_->VerifyBackup(j + 1, true));
}
CloseDBAndBackupEngine();
for (int j = 0; j <= i; ++j) {
AssertBackupConsistency(j + 1, 0, keys_iteration * (j + 1),
keys_iteration * (j + 2));
}
// delete old data
DestroyDB(dbname_, options_);
}
}
TEST_F(BackupableDBTest, CustomChecksumNoNewDbTables) {
const int keys_iteration = 5000;
std::vector<std::shared_ptr<FileChecksumGenFactory>> checksum_factories{
nullptr, GetFileChecksumGenCrc32cFactory(),
std::make_shared<FileHash32GenFactory>(),
std::make_shared<FileHashGenFactory>()};
for (const auto& sopt : kAllShareOptions) {
for (const auto& f : checksum_factories) {
options_.file_checksum_gen_factory = f;
backupable_options_->file_checksum_gen_factory = f;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
// No new table files have been created since the last backup.
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(2, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
AssertBackupConsistency(2, 0, keys_iteration, keys_iteration * 2);
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
// No new table files have been created since the last backup and backup
// engine opening
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(3, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(3, 0, keys_iteration, keys_iteration * 2);
// delete old data
DestroyDB(dbname_, options_);
}
}
}
TEST_F(BackupableDBTest, FileCollision) { TEST_F(BackupableDBTest, FileCollision) {
const int keys_iteration = 5000; const int keys_iteration = 5000;
for (const auto& sopt : kAllShareOptions) { for (const auto& sopt : kAllShareOptions) {