From 225abffd8fc8b85beced2ff6089879dbc1b89ddf Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 4 Jan 2021 11:50:29 -0800 Subject: [PATCH] Verify file checksum generator name (#7824) Summary: Previously we only had a debug assertion to check the right generator was being used for verification. However a user hit a problem in production where their factory was creating the wrong generator for some files, leading to checksum mismatches. It would have been easier to debug if we verified in optimized builds that the generator with the proper name is used. This PR adds such verification. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7824 Reviewed By: zhichao-cao Differential Revision: D25740254 Pulled By: ajkr fbshipit-source-id: a6231521747605021bad3231484b5d4f99f4044f --- HISTORY.md | 4 ++++ db/db_basic_test.cc | 24 ++++++++++++++++++++++++ file/file_util.cc | 26 ++++++++++++++++++-------- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 543ad6048..e873a20c2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## Unreleased +### Behavior Changes +* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. + ## 6.16.0 (12/18/2020) ### Behavior Changes * Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 257ce625c..5acc99868 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -21,6 +21,7 @@ #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif +#include "util/file_checksum_helper.h" #include "util/random.h" #include "utilities/fault_injection_env.h" #include "utilities/merge_operators.h" @@ -3580,6 +3581,29 @@ TEST_F(DBBasicTest, VerifyFileChecksums) { ASSERT_OK(Flush()); ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + + // Does the right thing but with the wrong name -- using it should lead to an + // error. + class MisnamedFileChecksumGenerator : public FileChecksumGenCrc32c { + public: + MisnamedFileChecksumGenerator(const FileChecksumGenContext& context) + : FileChecksumGenCrc32c(context) {} + + const char* Name() const override { return "sha1"; } + }; + + class MisnamedFileChecksumGenFactory : public FileChecksumGenCrc32cFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + return std::unique_ptr( + new MisnamedFileChecksumGenerator(context)); + } + }; + + options.file_checksum_gen_factory.reset(new MisnamedFileChecksumGenFactory()); + Reopen(options); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); } #endif // !ROCKSDB_LITE diff --git a/file/file_util.cc b/file/file_util.cc index 4e92673d6..7da27f3c1 100644 --- a/file/file_util.cc +++ b/file/file_util.cc @@ -124,8 +124,10 @@ bool IsWalDirSameAsDBPath(const ImmutableDBOptions* db_options) { } // requested_checksum_func_name brings the function name of the checksum -// generator in checksum_factory. Checksum factories may use or ignore -// requested_checksum_func_name. +// generator in checksum_factory. Empty string is permitted, in which case the +// name of the generator created by the factory is unchecked. When +// `requested_checksum_func_name` is non-empty, however, the created generator's +// name must match it, otherwise an `InvalidArgument` error is returned. IOStatus GenerateOneFileChecksum( FileSystem* fs, const std::string& file_path, FileChecksumGenFactory* checksum_factory, @@ -151,14 +153,22 @@ IOStatus GenerateOneFileChecksum( requested_checksum_func_name + " from checksum factory: " + checksum_factory->Name(); return IOStatus::InvalidArgument(msg); + } else { + // For backward compatibility and use in file ingestion clients where there + // is no stored checksum function name, `requested_checksum_func_name` can + // be empty. If we give the requested checksum function name, we expect it + // is the same name of the checksum generator. + if (!requested_checksum_func_name.empty() && + checksum_generator->Name() != requested_checksum_func_name) { + std::string msg = "Expected file checksum generator named '" + + requested_checksum_func_name + + "', while the factory created one " + "named '" + + checksum_generator->Name() + "'"; + return IOStatus::InvalidArgument(msg); + } } - // For backward compatable, requested_checksum_func_name can be empty. - // If we give the requested checksum function name, we expect it is the - // same name of the checksum generator. - assert(!checksum_generator || requested_checksum_func_name.empty() || - requested_checksum_func_name == checksum_generator->Name()); - uint64_t size; IOStatus io_s; std::unique_ptr reader;