From be219089ad280a8317086b428615c0a1c965eeff Mon Sep 17 00:00:00 2001 From: mrambacher Date: Mon, 28 Jun 2021 08:12:32 -0700 Subject: [PATCH] Add BlobMetaData retrieval methods (#8273) Summary: Added BlobMetaData to ColumnFamilyMetaData and LiveBlobMetaData and DB API GetLiveBlobMetaData to retrieve it. First pass at struct. More tests and maybe fields to come... Pull Request resolved: https://github.com/facebook/rocksdb/pull/8273 Reviewed By: ltamasi Differential Revision: D29102400 Pulled By: mrambacher fbshipit-source-id: 8a2383a4446328be6b91dced9841fdd3dfc80b73 --- HISTORY.md | 6 ++ db/blob/blob_file_meta.cc | 4 + db/blob/blob_file_meta.h | 6 ++ db/db_impl/db_impl.cc | 11 +++ db/db_impl/db_impl.h | 11 ++- db/db_impl/db_impl_debug.cc | 9 ++- db/db_test.cc | 155 +++++++++++++++++++++++++++++------- db/version_set.cc | 15 ++++ include/rocksdb/db.h | 6 ++ include/rocksdb/metadata.h | 46 +++++++++++ 10 files changed, 234 insertions(+), 35 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 638d8ee63..58ab48335 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -29,6 +29,12 @@ * Add BlockBasedTableOptions.prepopulate_block_cache. If enabled, it prepopulate warm/hot data blocks which are already in memory into block cache at the time of flush. On a flush, the data block that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this data back into memory again, which is avoided by enabling this option and it also helps with Distributed FileSystem. More details in include/rocksdb/table.h. * Added a `cancel` field to `CompactRangeOptions`, allowing individual in-process manual range compactions to be cancelled. +### New Features +* Added BlobMetaData to the ColumnFamilyMetaData to return information about blob files + +### Public API change +* Added GetAllColumnFamilyMetaData API to retrieve the ColumnFamilyMetaData about all column families. + ## 6.21.0 (2021-05-21) ### Bug Fixes * Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened. diff --git a/db/blob/blob_file_meta.cc b/db/blob/blob_file_meta.cc index 799f8ef21..4913137e5 100644 --- a/db/blob/blob_file_meta.cc +++ b/db/blob/blob_file_meta.cc @@ -8,9 +8,13 @@ #include #include +#include "db/blob/blob_log_format.h" #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { +uint64_t SharedBlobFileMetaData::GetBlobFileSize() const { + return BlobLogHeader::kSize + total_blob_bytes_ + BlobLogFooter::kSize; +} std::string SharedBlobFileMetaData::DebugString() const { std::ostringstream oss; diff --git a/db/blob/blob_file_meta.h b/db/blob/blob_file_meta.h index bd9aa6d90..d7c8a1243 100644 --- a/db/blob/blob_file_meta.h +++ b/db/blob/blob_file_meta.h @@ -51,6 +51,7 @@ class SharedBlobFileMetaData { SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete; SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete; + uint64_t GetBlobFileSize() const; uint64_t GetBlobFileNumber() const { return blob_file_number_; } uint64_t GetTotalBlobCount() const { return total_blob_count_; } uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } @@ -112,6 +113,11 @@ class BlobFileMetaData { return shared_meta_; } + uint64_t GetBlobFileSize() const { + assert(shared_meta_); + return shared_meta_->GetBlobFileSize(); + } + uint64_t GetBlobFileNumber() const { assert(shared_meta_); return shared_meta_->GetBlobFileNumber(); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9b4dc3005..1e0663bc7 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3774,6 +3774,17 @@ void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ReturnAndCleanupSuperVersion(cfd, sv); } +void DBImpl::GetAllColumnFamilyMetaData( + std::vector* metadata) { + InstrumentedMutexLock l(&mutex_); + for (auto cfd : *(versions_->GetColumnFamilySet())) { + { + metadata->emplace_back(); + cfd->current()->GetColumnFamilyMetaData(&metadata->back()); + } + } +} + #endif // ROCKSDB_LITE Status DBImpl::CheckConsistency() { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index d787f66f4..3d4d0a2e7 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -400,12 +400,13 @@ class DBImpl : public DB { FileChecksumList* checksum_list) override; // Obtains the meta data of the specified column family of the DB. - // Status::NotFound() will be returned if the current DB does not have - // any column family match the specified name. // TODO(yhchiang): output parameter is placed in the end in this codebase. virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override; + void GetAllColumnFamilyMetaData( + std::vector* metadata) override; + Status SuggestCompactRange(ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override; @@ -974,8 +975,10 @@ class DBImpl : public DB { // get total level0 file size. Only for testing. uint64_t TEST_GetLevel0TotalSize(); - void TEST_GetFilesMetaData(ColumnFamilyHandle* column_family, - std::vector>* metadata); + void TEST_GetFilesMetaData( + ColumnFamilyHandle* column_family, + std::vector>* metadata, + std::vector>* blob_metadata = nullptr); void TEST_LockMutex(); diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index e590607c6..a9f22458e 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -58,7 +58,8 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes( void DBImpl::TEST_GetFilesMetaData( ColumnFamilyHandle* column_family, - std::vector>* metadata) { + std::vector>* metadata, + std::vector>* blob_metadata) { auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); InstrumentedMutexLock l(&mutex_); @@ -72,6 +73,12 @@ void DBImpl::TEST_GetFilesMetaData( (*metadata)[level].push_back(*f); } } + if (blob_metadata != nullptr) { + blob_metadata->clear(); + for (const auto& blob : cfd->current()->storage_info()->GetBlobFiles()) { + blob_metadata->push_back(blob.second); + } + } } uint64_t DBImpl::TEST_Current_Manifest_FileNo() { diff --git a/db/db_test.cc b/db/db_test.cc index f1932d61b..ea9e10920 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -25,6 +25,7 @@ #include "cache/lru_cache.h" #include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/dbformat.h" @@ -1027,10 +1028,10 @@ TEST_F(DBTest, FailMoreDbPaths) { } void CheckColumnFamilyMeta( - const ColumnFamilyMetaData& cf_meta, + const ColumnFamilyMetaData& cf_meta, const std::string& cf_name, const std::vector>& files_by_level, uint64_t start_time, uint64_t end_time) { - ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName); + ASSERT_EQ(cf_meta.name, cf_name); ASSERT_EQ(cf_meta.levels.size(), files_by_level.size()); uint64_t cf_size = 0; @@ -1124,6 +1125,53 @@ void CheckLiveFilesMeta( } #ifndef ROCKSDB_LITE +void AddBlobFile(const ColumnFamilyHandle* cfh, uint64_t blob_file_number, + uint64_t total_blob_count, uint64_t total_blob_bytes, + const std::string& checksum_method, + const std::string& checksum_value, + uint64_t garbage_blob_count = 0, + uint64_t garbage_blob_bytes = 0) { + ColumnFamilyData* cfd = + (static_cast(cfh))->cfd(); + assert(cfd); + + Version* const version = cfd->current(); + assert(version); + + VersionStorageInfo* const storage_info = version->storage_info(); + assert(storage_info); + + // Add a live blob file. + + auto shared_meta = SharedBlobFileMetaData::Create( + blob_file_number, total_blob_count, total_blob_bytes, checksum_method, + checksum_value); + + auto meta = BlobFileMetaData::Create(std::move(shared_meta), + BlobFileMetaData::LinkedSsts(), + garbage_blob_count, garbage_blob_bytes); + + storage_info->AddBlobFile(std::move(meta)); +} + +static void CheckBlobMetaData( + const BlobMetaData& bmd, uint64_t blob_file_number, + uint64_t total_blob_count, uint64_t total_blob_bytes, + const std::string& checksum_method, const std::string& checksum_value, + uint64_t garbage_blob_count = 0, uint64_t garbage_blob_bytes = 0) { + ASSERT_EQ(bmd.blob_file_number, blob_file_number); + ASSERT_EQ(bmd.blob_file_name, BlobFileName("", blob_file_number)); + ASSERT_EQ(bmd.blob_file_size, + total_blob_bytes + BlobLogHeader::kSize + BlobLogFooter::kSize); + + ASSERT_EQ(bmd.total_blob_count, total_blob_count); + ASSERT_EQ(bmd.total_blob_bytes, total_blob_bytes); + ASSERT_EQ(bmd.garbage_blob_count, garbage_blob_count); + ASSERT_EQ(bmd.garbage_blob_bytes, garbage_blob_bytes); + ASSERT_EQ(bmd.checksum_method, checksum_method); + ASSERT_EQ(bmd.checksum_value, checksum_value); +} + TEST_F(DBTest, MetaDataTest) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -1164,13 +1212,69 @@ TEST_F(DBTest, MetaDataTest) { ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(&cf_meta); - CheckColumnFamilyMeta(cf_meta, files_by_level, start_time, end_time); - + CheckColumnFamilyMeta(cf_meta, kDefaultColumnFamilyName, files_by_level, + start_time, end_time); std::vector live_file_meta; db_->GetLiveFilesMetaData(&live_file_meta); CheckLiveFilesMeta(live_file_meta, files_by_level); } +TEST_F(DBTest, AllMetaDataTest) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + + constexpr uint64_t blob_file_number = 234; + constexpr uint64_t total_blob_count = 555; + constexpr uint64_t total_blob_bytes = 66666; + constexpr char checksum_method[] = "CRC32"; + constexpr char checksum_value[] = "\x3d\x87\xff\x57"; + + int64_t temp_time = 0; + options.env->GetCurrentTime(&temp_time).PermitUncheckedError(); + uint64_t start_time = static_cast(temp_time); + + Random rnd(301); + for (int cf = 0; cf < 2; cf++) { + AddBlobFile(handles_[cf], blob_file_number * (cf + 1), + total_blob_count * (cf + 1), total_blob_bytes * (cf + 1), + checksum_method, checksum_value); + } + + std::vector all_meta; + db_->GetAllColumnFamilyMetaData(&all_meta); + + std::vector> default_files_by_level; + std::vector> pikachu_files_by_level; + dbfull()->TEST_GetFilesMetaData(handles_[0], &default_files_by_level); + dbfull()->TEST_GetFilesMetaData(handles_[1], &pikachu_files_by_level); + + options.env->GetCurrentTime(&temp_time).PermitUncheckedError(); + uint64_t end_time = static_cast(temp_time); + + ASSERT_EQ(all_meta.size(), 2); + for (int cf = 0; cf < 2; cf++) { + const auto& cfmd = all_meta[cf]; + if (cf == 0) { + CheckColumnFamilyMeta(cfmd, "default", default_files_by_level, start_time, + end_time); + } else { + CheckColumnFamilyMeta(cfmd, "pikachu", pikachu_files_by_level, start_time, + end_time); + } + ASSERT_EQ(cfmd.blob_files.size(), 1U); + const auto& bmd = cfmd.blob_files[0]; + ASSERT_EQ(cfmd.blob_file_count, 1U); + ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size); + ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_)); + CheckBlobMetaData(bmd, blob_file_number * (cf + 1), + total_blob_count * (cf + 1), total_blob_bytes * (cf + 1), + checksum_method, checksum_value); + } +} + namespace { void MinLevelHelper(DBTest* self, Options& options) { Random rnd(301); @@ -2344,41 +2448,19 @@ TEST_F(DBTest, GetLiveBlobFiles) { Options options = CurrentOptions(); options.stats_dump_period_sec = 0; - Reopen(options); - - VersionSet* const versions = dbfull()->TEST_GetVersionSet(); - assert(versions); - assert(versions->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - Version* const version = cfd->current(); - assert(version); - - VersionStorageInfo* const storage_info = version->storage_info(); - assert(storage_info); - - // Add a live blob file. constexpr uint64_t blob_file_number = 234; constexpr uint64_t total_blob_count = 555; constexpr uint64_t total_blob_bytes = 66666; constexpr char checksum_method[] = "CRC32"; constexpr char checksum_value[] = "\x3d\x87\xff\x57"; - - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value); - constexpr uint64_t garbage_blob_count = 0; constexpr uint64_t garbage_blob_bytes = 0; - auto meta = BlobFileMetaData::Create(std::move(shared_meta), - BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - storage_info->AddBlobFile(std::move(meta)); + Reopen(options); + AddBlobFile(db_->DefaultColumnFamily(), blob_file_number, total_blob_count, + total_blob_bytes, checksum_method, checksum_value, + garbage_blob_count, garbage_blob_bytes); // Make sure it appears in the results returned by GetLiveFiles. uint64_t manifest_size = 0; std::vector files; @@ -2386,6 +2468,19 @@ TEST_F(DBTest, GetLiveBlobFiles) { ASSERT_FALSE(files.empty()); ASSERT_EQ(files[0], BlobFileName("", blob_file_number)); + + ColumnFamilyMetaData cfmd; + + db_->GetColumnFamilyMetaData(&cfmd); + ASSERT_EQ(cfmd.blob_files.size(), 1); + const BlobMetaData& bmd = cfmd.blob_files[0]; + + CheckBlobMetaData(bmd, blob_file_number, total_blob_count, total_blob_bytes, + checksum_method, checksum_value, garbage_blob_count, + garbage_blob_bytes); + ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_)); + ASSERT_EQ(cfmd.blob_file_count, 1U); + ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size); } #endif diff --git a/db/version_set.cc b/db/version_set.cc index b99a8b39c..cd79cc205 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1469,6 +1469,10 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { cf_meta->file_count = 0; cf_meta->levels.clear(); + cf_meta->blob_file_size = 0; + cf_meta->blob_file_count = 0; + cf_meta->blob_files.clear(); + auto* ioptions = cfd_->ioptions(); auto* vstorage = storage_info(); @@ -1504,6 +1508,17 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { level, level_size, std::move(files)); cf_meta->size += level_size; } + for (const auto& iter : vstorage->GetBlobFiles()) { + const auto meta = iter.second.get(); + cf_meta->blob_files.emplace_back( + meta->GetBlobFileNumber(), BlobFileName("", meta->GetBlobFileNumber()), + ioptions->cf_paths.front().path, meta->GetBlobFileSize(), + meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(), + meta->GetGarbageBlobCount(), meta->GetGarbageBlobBytes(), + meta->GetChecksumMethod(), meta->GetChecksumValue()); + cf_meta->blob_file_count++; + cf_meta->blob_file_size += meta->GetBlobFileSize(); + } } uint64_t Version::GetSstFilesSize() { diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index a8c3bbd05..645ecfb45 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1423,6 +1423,12 @@ class DB { GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); } + // Obtains the meta data of all column families for the DB. + // The returned map contains one entry for each column family indexed by the + // name of the column family. + virtual void GetAllColumnFamilyMetaData( + std::vector* /*metadata*/) {} + // IngestExternalFile() will load a list of external SST files (1) into the DB // Two primary modes are supported: // - Duplicate keys in the new files will overwrite exiting keys (default) diff --git a/include/rocksdb/metadata.h b/include/rocksdb/metadata.h index b515c51a1..25fcdde3e 100644 --- a/include/rocksdb/metadata.h +++ b/include/rocksdb/metadata.h @@ -15,6 +15,7 @@ #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { +struct BlobMetaData; struct ColumnFamilyMetaData; struct LevelMetaData; struct SstFileMetaData; @@ -35,6 +36,13 @@ struct ColumnFamilyMetaData { std::string name; // The metadata of all levels in this column family. std::vector levels; + + // The total size of all blob files + uint64_t blob_file_size = 0; + // The number of blob files in this column family. + size_t blob_file_count = 0; + // The metadata of the blobs in this column family + std::vector blob_files; }; // The metadata that describes a level. @@ -153,6 +161,44 @@ struct LiveFileMetaData : SstFileMetaData { LiveFileMetaData() : column_family_name(), level(0) {} }; +// The MetaData that describes a Blob file +struct BlobMetaData { + BlobMetaData() + : blob_file_number(0), + blob_file_size(0), + total_blob_count(0), + total_blob_bytes(0), + garbage_blob_count(0), + garbage_blob_bytes(0) {} + + BlobMetaData(uint64_t _file_number, const std::string& _file_name, + const std::string& _file_path, uint64_t _file_size, + uint64_t _total_blob_count, uint64_t _total_blob_bytes, + uint64_t _garbage_blob_count, uint64_t _garbage_blob_bytes, + const std::string& _file_checksum, + const std::string& _file_checksum_func_name) + : blob_file_number(_file_number), + blob_file_name(_file_name), + blob_file_path(_file_path), + blob_file_size(_file_size), + total_blob_count(_total_blob_count), + total_blob_bytes(_total_blob_bytes), + garbage_blob_count(_garbage_blob_count), + garbage_blob_bytes(_garbage_blob_bytes), + checksum_method(_file_checksum), + checksum_value(_file_checksum_func_name) {} + uint64_t blob_file_number; + std::string blob_file_name; + std::string blob_file_path; + uint64_t blob_file_size; + uint64_t total_blob_count; + uint64_t total_blob_bytes; + uint64_t garbage_blob_count; + uint64_t garbage_blob_bytes; + std::string checksum_method; + std::string checksum_value; +}; + // Metadata returned as output from ExportColumnFamily() and used as input to // CreateColumnFamiliesWithImport(). struct ExportImportFilesMetaData {