Add API to verify whole sst file checksum (#7578)

Summary:
Existing API `VerifyChecksum()` allows application to verify sst files' block checksums.
Since whole file, user-specified checksum is tracked in MANIFEST, we can expose a new
API to verify sst files' file checksums.

```
// Compute table file checksums if applicable and compare with MANIFEST.
// Returns OK if no file has mismatching whole-file checksum.
Status DB::VerifyFileChecksums(const ReadOptions& /*read_options*/);
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7578

Test Plan: make check

Reviewed By: pdillinger

Differential Revision: D24436783

Pulled By: riversand963

fbshipit-source-id: 52b51519b842f2b3c4e3351998a97c86cbec85b3
This commit is contained in:
Yanqin Jin 2020-11-03 20:33:45 -08:00 committed by Facebook GitHub Bot
parent 06a92fcf5c
commit fde0cd7ced
7 changed files with 162 additions and 6 deletions

View File

@ -14,6 +14,7 @@
### Public API Change ### Public API Change
* Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options. * Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options.
* Add new API `DB::VerifyFileChecksums` to verify SST file checksum with corresponding entries in the MANIFEST if present. Current implementation requires scanning and recomputing file checksums.
### Behavior Changes ### Behavior Changes
* The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag. * The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag.

View File

@ -37,7 +37,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
static const int kValueSize = 1000; static constexpr int kValueSize = 1000;
class CorruptionTest : public testing::Test { class CorruptionTest : public testing::Test {
public: public:
@ -68,9 +68,16 @@ class CorruptionTest : public testing::Test {
} }
~CorruptionTest() override { ~CorruptionTest() override {
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->LoadDependency({});
SyncPoint::GetInstance()->ClearAllCallBacks();
delete db_; delete db_;
db_ = nullptr; db_ = nullptr;
DestroyDB(dbname_, Options()); if (getenv("KEEP_DB")) {
fprintf(stdout, "db is still at %s\n", dbname_.c_str());
} else {
EXPECT_OK(DestroyDB(dbname_, Options()));
}
} }
void CloseDb() { void CloseDb() {
@ -825,6 +832,41 @@ TEST_F(CorruptionTest, DisableKeyOrderCheck) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
} }
TEST_F(CorruptionTest, VerifyWholeTableChecksum) {
CloseDb();
Options options;
options.env = &env_;
ASSERT_OK(DestroyDB(dbname_, options));
options.create_if_missing = true;
options.file_checksum_gen_factory =
ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory();
Reopen(&options);
Build(10, 5);
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
CloseDb();
// Corrupt the first byte of each table file, this must be data block.
Corrupt(kTableFile, 0, 1);
ASSERT_OK(TryReopen(&options));
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
int count{0};
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::VerifySstFileChecksum:mismatch", [&](void* arg) {
auto* s = reinterpret_cast<Status*>(arg);
assert(s);
++count;
ASSERT_NOK(*s);
});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption());
ASSERT_EQ(1, count);
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {

View File

@ -3304,6 +3304,28 @@ TEST_F(DBBasicTest, ManifestWriteFailure) {
Reopen(options); Reopen(options);
} }
#ifndef ROCKSDB_LITE
TEST_F(DBBasicTest, VerifyFileChecksums) {
Options options = GetDefaultOptions();
options.create_if_missing = true;
options.env = env_;
DestroyAndReopen(options);
ASSERT_OK(Put("a", "value"));
ASSERT_OK(Flush());
ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument());
options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
Reopen(options);
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
// Write an L0 with checksum computed.
ASSERT_OK(Put("b", "value"));
ASSERT_OK(Flush());
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
}
#endif // !ROCKSDB_LITE
// A test class for intercepting random reads and injecting artificial // A test class for intercepting random reads and injecting artificial
// delays. Used for testing the deadline/timeout feature // delays. Used for testing the deadline/timeout feature
class DBBasicTestDeadline class DBBasicTestDeadline

View File

@ -18,6 +18,7 @@
#include <cstdio> #include <cstdio>
#include <map> #include <map>
#include <set> #include <set>
#include <sstream>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -4727,8 +4728,29 @@ Status DBImpl::CreateColumnFamilyWithImport(
return status; return status;
} }
Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) {
return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true);
}
Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false);
}
Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
bool use_file_checksum) {
Status s; Status s;
if (use_file_checksum) {
FileChecksumGenFactory* const file_checksum_gen_factory =
immutable_db_options_.file_checksum_gen_factory.get();
if (!file_checksum_gen_factory) {
s = Status::InvalidArgument(
"Cannot verify file checksum if options.file_checksum_gen_factory is "
"null");
return s;
}
}
std::vector<ColumnFamilyData*> cfd_list; std::vector<ColumnFamilyData*> cfd_list;
{ {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
@ -4743,11 +4765,12 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
for (auto cfd : cfd_list) { for (auto cfd : cfd_list) {
sv_list.push_back(cfd->GetReferencedSuperVersion(this)); sv_list.push_back(cfd->GetReferencedSuperVersion(this));
} }
for (auto& sv : sv_list) { for (auto& sv : sv_list) {
VersionStorageInfo* vstorage = sv->current->storage_info(); VersionStorageInfo* vstorage = sv->current->storage_info();
ColumnFamilyData* cfd = sv->current->cfd(); ColumnFamilyData* cfd = sv->current->cfd();
Options opts; Options opts;
{ if (!use_file_checksum) {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_),
cfd->GetLatestCFOptions()); cfd->GetLatestCFOptions());
@ -4755,11 +4778,18 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) { for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok(); for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
j++) { j++) {
const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd; const auto& fd_with_krange = vstorage->LevelFilesBrief(i).files[j];
const auto& fd = fd_with_krange.fd;
const FileMetaData* fmeta = fd_with_krange.file_metadata;
assert(fmeta);
std::string fname = TableFileName(cfd->ioptions()->cf_paths, std::string fname = TableFileName(cfd->ioptions()->cf_paths,
fd.GetNumber(), fd.GetPathId()); fd.GetNumber(), fd.GetPathId());
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, if (use_file_checksum) {
read_options, fname); s = VerifySstFileChecksum(*fmeta, fname, read_options);
} else {
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
read_options, fname);
}
} }
} }
if (!s.ok()) { if (!s.ok()) {
@ -4790,6 +4820,34 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
return s; return s;
} }
Status DBImpl::VerifySstFileChecksum(const FileMetaData& fmeta,
const std::string& fname,
const ReadOptions& read_options) {
Status s;
if (fmeta.file_checksum == kUnknownFileChecksum) {
return s;
}
std::string file_checksum;
std::string func_name;
s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum(
fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(),
fmeta.file_checksum_func_name, &file_checksum, &func_name,
read_options.readahead_size, immutable_db_options_.allow_mmap_reads,
io_tracer_);
if (s.ok()) {
assert(fmeta.file_checksum_func_name == func_name);
if (file_checksum != fmeta.file_checksum) {
std::ostringstream oss;
oss << fname << " file checksum mismatch, ";
oss << "expecting " << Slice(fmeta.file_checksum).ToString(/*hex=*/true);
oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true);
s = Status::Corruption(oss.str());
TEST_SYNC_POINT_CALLBACK("DBImpl::VerifySstFileChecksum:mismatch", &s);
}
}
return s;
}
void DBImpl::NotifyOnExternalFileIngested( void DBImpl::NotifyOnExternalFileIngested(
ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) { ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) {
if (immutable_db_options_.listeners.empty()) { if (immutable_db_options_.listeners.empty()) {

View File

@ -431,8 +431,28 @@ class DBImpl : public DB {
const ExportImportFilesMetaData& metadata, const ExportImportFilesMetaData& metadata,
ColumnFamilyHandle** handle) override; ColumnFamilyHandle** handle) override;
using DB::VerifyFileChecksums;
Status VerifyFileChecksums(const ReadOptions& read_options) override;
using DB::VerifyChecksum; using DB::VerifyChecksum;
virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override; virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override;
// Verify the checksums of files in db. Currently only tables are checked.
//
// read_options: controls file I/O behavior, e.g. read ahead size while
// reading all the live table files.
//
// use_file_checksum: if false, verify the block checksums of all live table
// in db. Otherwise, obtain the file checksums and compare
// with the MANIFEST. Currently, file checksums are
// recomputed by reading all table files.
//
// Returns: OK if there is no file whose file or block checksum mismatches.
Status VerifyChecksumInternal(const ReadOptions& read_options,
bool use_file_checksum);
Status VerifySstFileChecksum(const FileMetaData& fmeta,
const std::string& fpath,
const ReadOptions& read_options);
using DB::StartTrace; using DB::StartTrace;
virtual Status StartTrace( virtual Status StartTrace(

View File

@ -1443,6 +1443,14 @@ class DB {
const ExportImportFilesMetaData& metadata, const ExportImportFilesMetaData& metadata,
ColumnFamilyHandle** handle) = 0; ColumnFamilyHandle** handle) = 0;
// Verify the checksums of files in db. Currently the whole-file checksum of
// table files are checked.
virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
return Status::NotSupported("File verification not supported");
}
// Verify the block checksums of files in db. The block checksums of table
// files are checked.
virtual Status VerifyChecksum(const ReadOptions& read_options) = 0; virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }

View File

@ -141,6 +141,11 @@ class StackableDB : public DB {
import_options, metadata, handle); import_options, metadata, handle);
} }
using DB::VerifyFileChecksums;
Status VerifyFileChecksums(const ReadOptions& read_opts) override {
return db_->VerifyFileChecksums(read_opts);
}
virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); } virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); }
virtual Status VerifyChecksum(const ReadOptions& options) override { virtual Status VerifyChecksum(const ReadOptions& options) override {