Add API to verify whole sst file checksum (#7578)
Summary: Existing API `VerifyChecksum()` allows application to verify sst files' block checksums. Since whole file, user-specified checksum is tracked in MANIFEST, we can expose a new API to verify sst files' file checksums. ``` // Compute table file checksums if applicable and compare with MANIFEST. // Returns OK if no file has mismatching whole-file checksum. Status DB::VerifyFileChecksums(const ReadOptions& /*read_options*/); ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/7578 Test Plan: make check Reviewed By: pdillinger Differential Revision: D24436783 Pulled By: riversand963 fbshipit-source-id: 52b51519b842f2b3c4e3351998a97c86cbec85b3
This commit is contained in:
parent
06a92fcf5c
commit
fde0cd7ced
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
### Public API Change
|
### Public API Change
|
||||||
* Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options.
|
* Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options.
|
||||||
|
* Add new API `DB::VerifyFileChecksums` to verify SST file checksum with corresponding entries in the MANIFEST if present. Current implementation requires scanning and recomputing file checksums.
|
||||||
|
|
||||||
### Behavior Changes
|
### Behavior Changes
|
||||||
* The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag.
|
* The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag.
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
static const int kValueSize = 1000;
|
static constexpr int kValueSize = 1000;
|
||||||
|
|
||||||
class CorruptionTest : public testing::Test {
|
class CorruptionTest : public testing::Test {
|
||||||
public:
|
public:
|
||||||
@ -68,9 +68,16 @@ class CorruptionTest : public testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
~CorruptionTest() override {
|
~CorruptionTest() override {
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
SyncPoint::GetInstance()->LoadDependency({});
|
||||||
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
delete db_;
|
delete db_;
|
||||||
db_ = nullptr;
|
db_ = nullptr;
|
||||||
DestroyDB(dbname_, Options());
|
if (getenv("KEEP_DB")) {
|
||||||
|
fprintf(stdout, "db is still at %s\n", dbname_.c_str());
|
||||||
|
} else {
|
||||||
|
EXPECT_OK(DestroyDB(dbname_, Options()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CloseDb() {
|
void CloseDb() {
|
||||||
@ -825,6 +832,41 @@ TEST_F(CorruptionTest, DisableKeyOrderCheck) {
|
|||||||
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(CorruptionTest, VerifyWholeTableChecksum) {
|
||||||
|
CloseDb();
|
||||||
|
Options options;
|
||||||
|
options.env = &env_;
|
||||||
|
ASSERT_OK(DestroyDB(dbname_, options));
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.file_checksum_gen_factory =
|
||||||
|
ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory();
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
Build(10, 5);
|
||||||
|
|
||||||
|
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
|
||||||
|
CloseDb();
|
||||||
|
|
||||||
|
// Corrupt the first byte of each table file, this must be data block.
|
||||||
|
Corrupt(kTableFile, 0, 1);
|
||||||
|
|
||||||
|
ASSERT_OK(TryReopen(&options));
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
|
int count{0};
|
||||||
|
SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DBImpl::VerifySstFileChecksum:mismatch", [&](void* arg) {
|
||||||
|
auto* s = reinterpret_cast<Status*>(arg);
|
||||||
|
assert(s);
|
||||||
|
++count;
|
||||||
|
ASSERT_NOK(*s);
|
||||||
|
});
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption());
|
||||||
|
ASSERT_EQ(1, count);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -3304,6 +3304,28 @@ TEST_F(DBBasicTest, ManifestWriteFailure) {
|
|||||||
Reopen(options);
|
Reopen(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
TEST_F(DBBasicTest, VerifyFileChecksums) {
|
||||||
|
Options options = GetDefaultOptions();
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.env = env_;
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
ASSERT_OK(Put("a", "value"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument());
|
||||||
|
|
||||||
|
options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
|
||||||
|
Reopen(options);
|
||||||
|
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
|
||||||
|
|
||||||
|
// Write an L0 with checksum computed.
|
||||||
|
ASSERT_OK(Put("b", "value"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
|
||||||
|
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
|
||||||
|
}
|
||||||
|
#endif // !ROCKSDB_LITE
|
||||||
|
|
||||||
// A test class for intercepting random reads and injecting artificial
|
// A test class for intercepting random reads and injecting artificial
|
||||||
// delays. Used for testing the deadline/timeout feature
|
// delays. Used for testing the deadline/timeout feature
|
||||||
class DBBasicTestDeadline
|
class DBBasicTestDeadline
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -4727,8 +4728,29 @@ Status DBImpl::CreateColumnFamilyWithImport(
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) {
|
||||||
|
return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
|
Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
|
||||||
|
return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
|
||||||
|
bool use_file_checksum) {
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
|
if (use_file_checksum) {
|
||||||
|
FileChecksumGenFactory* const file_checksum_gen_factory =
|
||||||
|
immutable_db_options_.file_checksum_gen_factory.get();
|
||||||
|
if (!file_checksum_gen_factory) {
|
||||||
|
s = Status::InvalidArgument(
|
||||||
|
"Cannot verify file checksum if options.file_checksum_gen_factory is "
|
||||||
|
"null");
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<ColumnFamilyData*> cfd_list;
|
std::vector<ColumnFamilyData*> cfd_list;
|
||||||
{
|
{
|
||||||
InstrumentedMutexLock l(&mutex_);
|
InstrumentedMutexLock l(&mutex_);
|
||||||
@ -4743,11 +4765,12 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
|
|||||||
for (auto cfd : cfd_list) {
|
for (auto cfd : cfd_list) {
|
||||||
sv_list.push_back(cfd->GetReferencedSuperVersion(this));
|
sv_list.push_back(cfd->GetReferencedSuperVersion(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& sv : sv_list) {
|
for (auto& sv : sv_list) {
|
||||||
VersionStorageInfo* vstorage = sv->current->storage_info();
|
VersionStorageInfo* vstorage = sv->current->storage_info();
|
||||||
ColumnFamilyData* cfd = sv->current->cfd();
|
ColumnFamilyData* cfd = sv->current->cfd();
|
||||||
Options opts;
|
Options opts;
|
||||||
{
|
if (!use_file_checksum) {
|
||||||
InstrumentedMutexLock l(&mutex_);
|
InstrumentedMutexLock l(&mutex_);
|
||||||
opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_),
|
opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_),
|
||||||
cfd->GetLatestCFOptions());
|
cfd->GetLatestCFOptions());
|
||||||
@ -4755,11 +4778,18 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
|
|||||||
for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
|
for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
|
||||||
for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
|
for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
|
||||||
j++) {
|
j++) {
|
||||||
const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd;
|
const auto& fd_with_krange = vstorage->LevelFilesBrief(i).files[j];
|
||||||
|
const auto& fd = fd_with_krange.fd;
|
||||||
|
const FileMetaData* fmeta = fd_with_krange.file_metadata;
|
||||||
|
assert(fmeta);
|
||||||
std::string fname = TableFileName(cfd->ioptions()->cf_paths,
|
std::string fname = TableFileName(cfd->ioptions()->cf_paths,
|
||||||
fd.GetNumber(), fd.GetPathId());
|
fd.GetNumber(), fd.GetPathId());
|
||||||
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
|
if (use_file_checksum) {
|
||||||
read_options, fname);
|
s = VerifySstFileChecksum(*fmeta, fname, read_options);
|
||||||
|
} else {
|
||||||
|
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
|
||||||
|
read_options, fname);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
@ -4790,6 +4820,34 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status DBImpl::VerifySstFileChecksum(const FileMetaData& fmeta,
|
||||||
|
const std::string& fname,
|
||||||
|
const ReadOptions& read_options) {
|
||||||
|
Status s;
|
||||||
|
if (fmeta.file_checksum == kUnknownFileChecksum) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
std::string file_checksum;
|
||||||
|
std::string func_name;
|
||||||
|
s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum(
|
||||||
|
fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(),
|
||||||
|
fmeta.file_checksum_func_name, &file_checksum, &func_name,
|
||||||
|
read_options.readahead_size, immutable_db_options_.allow_mmap_reads,
|
||||||
|
io_tracer_);
|
||||||
|
if (s.ok()) {
|
||||||
|
assert(fmeta.file_checksum_func_name == func_name);
|
||||||
|
if (file_checksum != fmeta.file_checksum) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << fname << " file checksum mismatch, ";
|
||||||
|
oss << "expecting " << Slice(fmeta.file_checksum).ToString(/*hex=*/true);
|
||||||
|
oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true);
|
||||||
|
s = Status::Corruption(oss.str());
|
||||||
|
TEST_SYNC_POINT_CALLBACK("DBImpl::VerifySstFileChecksum:mismatch", &s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
void DBImpl::NotifyOnExternalFileIngested(
|
void DBImpl::NotifyOnExternalFileIngested(
|
||||||
ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) {
|
ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) {
|
||||||
if (immutable_db_options_.listeners.empty()) {
|
if (immutable_db_options_.listeners.empty()) {
|
||||||
|
@ -431,8 +431,28 @@ class DBImpl : public DB {
|
|||||||
const ExportImportFilesMetaData& metadata,
|
const ExportImportFilesMetaData& metadata,
|
||||||
ColumnFamilyHandle** handle) override;
|
ColumnFamilyHandle** handle) override;
|
||||||
|
|
||||||
|
using DB::VerifyFileChecksums;
|
||||||
|
Status VerifyFileChecksums(const ReadOptions& read_options) override;
|
||||||
|
|
||||||
using DB::VerifyChecksum;
|
using DB::VerifyChecksum;
|
||||||
virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override;
|
virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override;
|
||||||
|
// Verify the checksums of files in db. Currently only tables are checked.
|
||||||
|
//
|
||||||
|
// read_options: controls file I/O behavior, e.g. read ahead size while
|
||||||
|
// reading all the live table files.
|
||||||
|
//
|
||||||
|
// use_file_checksum: if false, verify the block checksums of all live table
|
||||||
|
// in db. Otherwise, obtain the file checksums and compare
|
||||||
|
// with the MANIFEST. Currently, file checksums are
|
||||||
|
// recomputed by reading all table files.
|
||||||
|
//
|
||||||
|
// Returns: OK if there is no file whose file or block checksum mismatches.
|
||||||
|
Status VerifyChecksumInternal(const ReadOptions& read_options,
|
||||||
|
bool use_file_checksum);
|
||||||
|
|
||||||
|
Status VerifySstFileChecksum(const FileMetaData& fmeta,
|
||||||
|
const std::string& fpath,
|
||||||
|
const ReadOptions& read_options);
|
||||||
|
|
||||||
using DB::StartTrace;
|
using DB::StartTrace;
|
||||||
virtual Status StartTrace(
|
virtual Status StartTrace(
|
||||||
|
@ -1443,6 +1443,14 @@ class DB {
|
|||||||
const ExportImportFilesMetaData& metadata,
|
const ExportImportFilesMetaData& metadata,
|
||||||
ColumnFamilyHandle** handle) = 0;
|
ColumnFamilyHandle** handle) = 0;
|
||||||
|
|
||||||
|
// Verify the checksums of files in db. Currently the whole-file checksum of
|
||||||
|
// table files are checked.
|
||||||
|
virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) {
|
||||||
|
return Status::NotSupported("File verification not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the block checksums of files in db. The block checksums of table
|
||||||
|
// files are checked.
|
||||||
virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
|
virtual Status VerifyChecksum(const ReadOptions& read_options) = 0;
|
||||||
|
|
||||||
virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
|
virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
|
||||||
|
@ -141,6 +141,11 @@ class StackableDB : public DB {
|
|||||||
import_options, metadata, handle);
|
import_options, metadata, handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using DB::VerifyFileChecksums;
|
||||||
|
Status VerifyFileChecksums(const ReadOptions& read_opts) override {
|
||||||
|
return db_->VerifyFileChecksums(read_opts);
|
||||||
|
}
|
||||||
|
|
||||||
virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); }
|
virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); }
|
||||||
|
|
||||||
virtual Status VerifyChecksum(const ReadOptions& options) override {
|
virtual Status VerifyChecksum(const ReadOptions& options) override {
|
||||||
|
Loading…
Reference in New Issue
Block a user