Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo to blob files (#8179)

Summary:
Extend the DB::GetLiveFilesChecksumInfo API to blob files.
This API is also used by the file_checksum_dump ldb command to dump checksum
of SST files which now also dumps blob files checksum.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8179

Test Plan: Add new unit test

Reviewed By: zhichao-cao

Differential Revision: D27714965

Pulled By: akankshamahajan15

fbshipit-source-id: d8b7343ea845a64c83800336d88cced7152a8c92
This commit is contained in:
Akanksha Mahajan 2021-04-15 09:36:57 -07:00 committed by Facebook GitHub Bot
parent b1f62be10e
commit 296b47db25
4 changed files with 186 additions and 7 deletions

View File

@ -21,6 +21,7 @@
* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead.
* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace
* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`.
* Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo API for IntegratedBlobDB and get checksum of blob files along with SST files.
### New Features
* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true.

View File

@ -4937,7 +4937,7 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
}
// Get the checksum information including the checksum and checksum function
// name of all SST files in VersionSet. Store the information in
// name of all SST and blob files in VersionSet. Store the information in
// FileChecksumList which contains a map from file number to its checksum info.
// If DB is not running, make sure call VersionSet::Recover() to load the file
// metadata from Manifest to VersionSet before calling this function.
@ -4954,6 +4954,7 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
if (cfd->IsDropped() || !cfd->initialized()) {
continue;
}
/* SST files */
for (int level = 0; level < cfd->NumberLevels(); level++) {
for (const auto& file :
cfd->current()->storage_info()->LevelFiles(level)) {
@ -4961,17 +4962,36 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
file->file_checksum,
file->file_checksum_func_name);
if (!s.ok()) {
break;
return s;
}
}
}
/* Blob files */
const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles();
for (const auto& pair : blob_files) {
const uint64_t blob_file_number = pair.first;
const auto& meta = pair.second;
assert(meta);
assert(blob_file_number == meta->GetBlobFileNumber());
std::string checksum_value = meta->GetChecksumValue();
std::string checksum_method = meta->GetChecksumMethod();
assert(checksum_value.empty() == checksum_method.empty());
if (meta->GetChecksumMethod().empty()) {
checksum_value = kUnknownFileChecksum;
checksum_method = kUnknownFileChecksumFuncName;
}
s = checksum_list->InsertOneFileChecksum(blob_file_number, checksum_value,
checksum_method);
if (!s.ok()) {
break;
return s;
}
}
if (!s.ok()) {
break;
}
}
return s;
}

View File

@ -1368,7 +1368,7 @@ class DB {
virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData>* /*metadata*/) {}
// Return a list of all table file checksum info.
// Return a list of all table and blob files checksum info.
// Note: This function might be of limited use because it cannot be
// synchronized with GetLiveFiles.
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;

View File

@ -349,6 +349,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumNoChecksum) {
ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files));
}
TEST_F(LdbCmdTest, BlobDBDumpFileChecksumNoChecksum) {
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
opts.enable_blob_files = true;
DB* db = nullptr;
std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test");
ASSERT_OK(DB::Open(opts, dbname, &db));
WriteOptions wopts;
FlushOptions fopts;
fopts.wait = true;
Random rnd(test::RandomSeed());
for (int i = 0; i < 200; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 100; i < 300; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 200; i < 400; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 300; i < 400; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
char arg1[] = "./ldb";
std::string arg2_str = "--db=" + dbname;
char arg3[] = "file_checksum_dump";
char* argv[] = {arg1, const_cast<char*>(arg2_str.c_str()), arg3};
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
// Verify each sst and blob file checksum value and checksum name
FileChecksumTestHelper fct_helper(opts, db, dbname);
ASSERT_OK(fct_helper.VerifyEachFileChecksum());
// Manually trigger compaction
std::ostringstream oss_b_buf;
oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0;
std::ostringstream oss_e_buf;
oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 399;
std::string b_buf = oss_b_buf.str();
std::string e_buf = oss_e_buf.str();
Slice begin(b_buf);
Slice end(e_buf);
CompactRangeOptions options;
ASSERT_OK(db->CompactRange(options, &begin, &end));
// Verify each sst file checksum after compaction
FileChecksumTestHelper fct_helper_ac(opts, db, dbname);
ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum());
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
delete db;
}
TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
@ -430,6 +509,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files));
}
TEST_F(LdbCmdTest, BlobDBDumpFileChecksumCRC32) {
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
opts.enable_blob_files = true;
DB* db = nullptr;
std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test");
ASSERT_OK(DB::Open(opts, dbname, &db));
WriteOptions wopts;
FlushOptions fopts;
fopts.wait = true;
Random rnd(test::RandomSeed());
for (int i = 0; i < 100; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 50; i < 150; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 100; i < 200; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 150; i < 250; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
char arg1[] = "./ldb";
std::string arg2_str = "--db=" + dbname;
char arg3[] = "file_checksum_dump";
char* argv[] = {arg1, const_cast<char*>(arg2_str.c_str()), arg3};
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
// Verify each sst and blob file checksum value and checksum name
FileChecksumTestHelper fct_helper(opts, db, dbname);
ASSERT_OK(fct_helper.VerifyEachFileChecksum());
// Manually trigger compaction
std::ostringstream oss_b_buf;
oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0;
std::ostringstream oss_e_buf;
oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 249;
std::string b_buf = oss_b_buf.str();
std::string e_buf = oss_e_buf.str();
Slice begin(b_buf);
Slice end(e_buf);
CompactRangeOptions options;
ASSERT_OK(db->CompactRange(options, &begin, &end));
// Verify each sst file checksum after compaction
FileChecksumTestHelper fct_helper_ac(opts, db, dbname);
ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum());
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
delete db;
}
TEST_F(LdbCmdTest, OptionParsing) {
// test parsing flags
Options opts;