Mark GetLiveFilesStorageInfo ready for production use (#9868)

Summary:
... by filling out remaining testing hole: handling of
db_pathsi+cf_paths. (Note that while GetLiveFilesStorageInfo works
with db_paths / cf_paths, Checkpoint and BackupEngine do not and
are marked appropriately.)

Also improved comments for "live files" APIs, and grouped them
together in db.h.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9868

Test Plan: Adding to existing unit tests

Reviewed By: jay-zhuang

Differential Revision: D35752254

Pulled By: pdillinger

fbshipit-source-id: c70eb67748fad61826e2f554b674638700abefb2
This commit is contained in:
Peter Dillinger 2022-04-20 16:09:34 -07:00 committed by Facebook GitHub Bot
parent 2ea4205a69
commit 1bac873fcf
4 changed files with 74 additions and 46 deletions

View File

@ -1,5 +1,7 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased ## Unreleased
### New Features
* DB::GetLiveFilesStorageInfo is ready for production use.
## 7.2.0 (04/15/2022) ## 7.2.0 (04/15/2022)
### Bug Fixes ### Bug Fixes

View File

@ -2409,6 +2409,30 @@ TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) {
check_getvalues(); check_getvalues();
{ // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths
std::vector<LiveFileStorageInfo> new_infos;
LiveFilesStorageInfoOptions lfsio;
lfsio.wal_size_for_flush = UINT64_MAX; // no flush
ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos));
std::unordered_map<std::string, int> live_sst_by_dir;
for (auto& info : new_infos) {
if (info.file_type == kTableFile) {
live_sst_by_dir[info.directory]++;
// Verify file on disk (no directory confusion)
uint64_t size;
ASSERT_OK(env_->GetFileSize(
info.directory + "/" + info.relative_filename, &size));
ASSERT_EQ(info.size, size);
}
}
ASSERT_EQ(3U * 3U, live_sst_by_dir.size());
for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) {
ASSERT_EQ(1, live_sst_by_dir[paths[0].path]);
ASSERT_EQ(4, live_sst_by_dir[paths[1].path]);
ASSERT_EQ(2, live_sst_by_dir[paths[2].path]);
}
}
ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);
check_getvalues(); check_getvalues();

View File

@ -2427,7 +2427,7 @@ TEST_F(DBTest, SnapshotFiles) {
// Also test GetLiveFilesStorageInfo // Also test GetLiveFilesStorageInfo
std::vector<LiveFileStorageInfo> new_infos; std::vector<LiveFileStorageInfo> new_infos;
ASSERT_OK(dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
&new_infos)); &new_infos));
// Close DB (while deletions disabled) // Close DB (while deletions disabled)

View File

@ -1440,39 +1440,6 @@ class DB {
virtual Status EnableFileDeletions(bool force = true) = 0; virtual Status EnableFileDeletions(bool force = true) = 0;
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
// Retrieve the list of all files in the database. The files are
// relative to the dbname and are not absolute paths. Despite being relative
// paths, the file names begin with "/". The valid size of the manifest file
// is returned in manifest_file_size. The manifest file is an ever growing
// file, but only the portion specified by manifest_file_size is valid for
// this snapshot. Setting flush_memtable to true does Flush before recording
// the live files. Setting flush_memtable to false is useful when we don't
// want to wait for flush which may have to wait for compaction to complete
// taking an indeterminate time.
//
// In case you have multiple column families, even if flush_memtable is true,
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
// for new data that arrived to already-flushed column families while other
// column families were flushing
virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size,
bool flush_memtable = true) = 0;
// Retrieve the sorted list of all wal files with earliest file first
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
// Retrieve information about the current wal file
//
// Note that the log might have rolled after this call in which case
// the current_log_file would not point to the current log file.
//
// Additionally, for the sake of optimization current_log_file->StartSequence
// would always be set to 0
virtual Status GetCurrentWalFile(
std::unique_ptr<LogFile>* current_log_file) = 0;
// Retrieves the creation time of the oldest file in the DB. // Retrieves the creation time of the oldest file in the DB.
// This API only works if max_open_files = -1, if it is not then // This API only works if max_open_files = -1, if it is not then
// Status returned is Status::NotSupported() // Status returned is Status::NotSupported()
@ -1517,26 +1484,30 @@ class DB {
// path relative to the db directory. eg. 000001.sst, /archive/000003.log // path relative to the db directory. eg. 000001.sst, /archive/000003.log
virtual Status DeleteFile(std::string name) = 0; virtual Status DeleteFile(std::string name) = 0;
// Returns a list of all table files with their level, start key // Obtains a list of all live table (SST) files and how they fit into the
// and end key // LSM-trees, such as column family, level, key range, etc.
// This builds a de-normalized form of GetAllColumnFamilyMetaData().
// For information about all files in a DB, use GetLiveFilesStorageInfo().
virtual void GetLiveFilesMetaData( virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData>* /*metadata*/) {} std::vector<LiveFileMetaData>* /*metadata*/) {}
// Return a list of all table and blob files checksum info. // Return a list of all table (SST) and blob files checksum info.
// Note: This function might be of limited use because it cannot be // Note: This function might be of limited use because it cannot be
// synchronized with GetLiveFiles. // synchronized with other "live files" APIs. GetLiveFilesStorageInfo()
// is recommended instead.
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
// EXPERIMENTAL: This function is not yet feature-complete.
// Get information about all live files that make up a DB, for making // Get information about all live files that make up a DB, for making
// live copies (Checkpoint, backups, etc.) or other storage-related purposes. // live copies (Checkpoint, backups, etc.) or other storage-related purposes.
// Use DisableFileDeletions() before and EnableFileDeletions() after to // If creating a live copy, use DisableFileDeletions() before and
// preserve the files for live copy. // EnableFileDeletions() after to prevent deletions.
// For LSM-tree metadata, use Get*MetaData() functions instead.
virtual Status GetLiveFilesStorageInfo( virtual Status GetLiveFilesStorageInfo(
const LiveFilesStorageInfoOptions& opts, const LiveFilesStorageInfoOptions& opts,
std::vector<LiveFileStorageInfo>* files) = 0; std::vector<LiveFileStorageInfo>* files) = 0;
// Obtains the meta data of the specified column family of the DB. // Obtains the LSM-tree meta data of the specified column family of the DB,
// including metadata for each live table (SST) file in that column family.
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
ColumnFamilyMetaData* /*metadata*/) {} ColumnFamilyMetaData* /*metadata*/) {}
@ -1545,12 +1516,43 @@ class DB {
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
} }
// Obtains the meta data of all column families for the DB. // Obtains the LSM-tree meta data of all column families of the DB,
// The returned map contains one entry for each column family indexed by the // including metadata for each live table (SST) file in the DB.
// name of the column family.
virtual void GetAllColumnFamilyMetaData( virtual void GetAllColumnFamilyMetaData(
std::vector<ColumnFamilyMetaData>* /*metadata*/) {} std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
// Retrieve the list of all files in the database except WAL files. The files
// are relative to the dbname (or db_paths/cf_paths), not absolute paths.
// (Not recommended with db_paths/cf_paths because that information is not
// returned.) Despite being relative paths, the file names begin with "/".
// The valid size of the manifest file is returned in manifest_file_size.
// The manifest file is an ever growing file, but only the portion specified
// by manifest_file_size is valid for this snapshot. Setting flush_memtable
// to true does Flush before recording the live files. Setting flush_memtable
// to false is useful when we don't want to wait for flush which may have to
// wait for compaction to complete taking an indeterminate time.
//
// NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
// a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
// instead, because it ensures a single consistent view of all files is
// captured in one call.
virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size,
bool flush_memtable = true) = 0;
// Retrieve the sorted list of all wal files with earliest file first
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
// Retrieve information about the current wal file
//
// Note that the log might have rolled after this call in which case
// the current_log_file would not point to the current log file.
//
// Additionally, for the sake of optimization current_log_file->StartSequence
// would always be set to 0
virtual Status GetCurrentWalFile(
std::unique_ptr<LogFile>* current_log_file) = 0;
// IngestExternalFile() will load a list of external SST files (1) into the DB // IngestExternalFile() will load a list of external SST files (1) into the DB
// Two primary modes are supported: // Two primary modes are supported:
// - Duplicate keys in the new files will overwrite exiting keys (default) // - Duplicate keys in the new files will overwrite exiting keys (default)