Mark GetLiveFilesStorageInfo ready for production use (#9868)
Summary: ... by filling out remaining testing hole: handling of db_pathsi+cf_paths. (Note that while GetLiveFilesStorageInfo works with db_paths / cf_paths, Checkpoint and BackupEngine do not and are marked appropriately.) Also improved comments for "live files" APIs, and grouped them together in db.h. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9868 Test Plan: Adding to existing unit tests Reviewed By: jay-zhuang Differential Revision: D35752254 Pulled By: pdillinger fbshipit-source-id: c70eb67748fad61826e2f554b674638700abefb2
This commit is contained in:
parent
2ea4205a69
commit
1bac873fcf
@ -1,5 +1,7 @@
|
|||||||
# Rocksdb Change Log
|
# Rocksdb Change Log
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
### New Features
|
||||||
|
* DB::GetLiveFilesStorageInfo is ready for production use.
|
||||||
|
|
||||||
## 7.2.0 (04/15/2022)
|
## 7.2.0 (04/15/2022)
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
@ -2409,6 +2409,30 @@ TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) {
|
|||||||
|
|
||||||
check_getvalues();
|
check_getvalues();
|
||||||
|
|
||||||
|
{ // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths
|
||||||
|
std::vector<LiveFileStorageInfo> new_infos;
|
||||||
|
LiveFilesStorageInfoOptions lfsio;
|
||||||
|
lfsio.wal_size_for_flush = UINT64_MAX; // no flush
|
||||||
|
ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos));
|
||||||
|
std::unordered_map<std::string, int> live_sst_by_dir;
|
||||||
|
for (auto& info : new_infos) {
|
||||||
|
if (info.file_type == kTableFile) {
|
||||||
|
live_sst_by_dir[info.directory]++;
|
||||||
|
// Verify file on disk (no directory confusion)
|
||||||
|
uint64_t size;
|
||||||
|
ASSERT_OK(env_->GetFileSize(
|
||||||
|
info.directory + "/" + info.relative_filename, &size));
|
||||||
|
ASSERT_EQ(info.size, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_EQ(3U * 3U, live_sst_by_dir.size());
|
||||||
|
for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) {
|
||||||
|
ASSERT_EQ(1, live_sst_by_dir[paths[0].path]);
|
||||||
|
ASSERT_EQ(4, live_sst_by_dir[paths[1].path]);
|
||||||
|
ASSERT_EQ(2, live_sst_by_dir[paths[2].path]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);
|
ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);
|
||||||
|
|
||||||
check_getvalues();
|
check_getvalues();
|
||||||
|
@ -2427,8 +2427,8 @@ TEST_F(DBTest, SnapshotFiles) {
|
|||||||
|
|
||||||
// Also test GetLiveFilesStorageInfo
|
// Also test GetLiveFilesStorageInfo
|
||||||
std::vector<LiveFileStorageInfo> new_infos;
|
std::vector<LiveFileStorageInfo> new_infos;
|
||||||
ASSERT_OK(dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
|
ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
|
||||||
&new_infos));
|
&new_infos));
|
||||||
|
|
||||||
// Close DB (while deletions disabled)
|
// Close DB (while deletions disabled)
|
||||||
Close();
|
Close();
|
||||||
|
@ -1440,39 +1440,6 @@ class DB {
|
|||||||
virtual Status EnableFileDeletions(bool force = true) = 0;
|
virtual Status EnableFileDeletions(bool force = true) = 0;
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
|
|
||||||
|
|
||||||
// Retrieve the list of all files in the database. The files are
|
|
||||||
// relative to the dbname and are not absolute paths. Despite being relative
|
|
||||||
// paths, the file names begin with "/". The valid size of the manifest file
|
|
||||||
// is returned in manifest_file_size. The manifest file is an ever growing
|
|
||||||
// file, but only the portion specified by manifest_file_size is valid for
|
|
||||||
// this snapshot. Setting flush_memtable to true does Flush before recording
|
|
||||||
// the live files. Setting flush_memtable to false is useful when we don't
|
|
||||||
// want to wait for flush which may have to wait for compaction to complete
|
|
||||||
// taking an indeterminate time.
|
|
||||||
//
|
|
||||||
// In case you have multiple column families, even if flush_memtable is true,
|
|
||||||
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
|
|
||||||
// for new data that arrived to already-flushed column families while other
|
|
||||||
// column families were flushing
|
|
||||||
virtual Status GetLiveFiles(std::vector<std::string>&,
|
|
||||||
uint64_t* manifest_file_size,
|
|
||||||
bool flush_memtable = true) = 0;
|
|
||||||
|
|
||||||
// Retrieve the sorted list of all wal files with earliest file first
|
|
||||||
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
|
|
||||||
|
|
||||||
// Retrieve information about the current wal file
|
|
||||||
//
|
|
||||||
// Note that the log might have rolled after this call in which case
|
|
||||||
// the current_log_file would not point to the current log file.
|
|
||||||
//
|
|
||||||
// Additionally, for the sake of optimization current_log_file->StartSequence
|
|
||||||
// would always be set to 0
|
|
||||||
virtual Status GetCurrentWalFile(
|
|
||||||
std::unique_ptr<LogFile>* current_log_file) = 0;
|
|
||||||
|
|
||||||
// Retrieves the creation time of the oldest file in the DB.
|
// Retrieves the creation time of the oldest file in the DB.
|
||||||
// This API only works if max_open_files = -1, if it is not then
|
// This API only works if max_open_files = -1, if it is not then
|
||||||
// Status returned is Status::NotSupported()
|
// Status returned is Status::NotSupported()
|
||||||
@ -1517,26 +1484,30 @@ class DB {
|
|||||||
// path relative to the db directory. eg. 000001.sst, /archive/000003.log
|
// path relative to the db directory. eg. 000001.sst, /archive/000003.log
|
||||||
virtual Status DeleteFile(std::string name) = 0;
|
virtual Status DeleteFile(std::string name) = 0;
|
||||||
|
|
||||||
// Returns a list of all table files with their level, start key
|
// Obtains a list of all live table (SST) files and how they fit into the
|
||||||
// and end key
|
// LSM-trees, such as column family, level, key range, etc.
|
||||||
|
// This builds a de-normalized form of GetAllColumnFamilyMetaData().
|
||||||
|
// For information about all files in a DB, use GetLiveFilesStorageInfo().
|
||||||
virtual void GetLiveFilesMetaData(
|
virtual void GetLiveFilesMetaData(
|
||||||
std::vector<LiveFileMetaData>* /*metadata*/) {}
|
std::vector<LiveFileMetaData>* /*metadata*/) {}
|
||||||
|
|
||||||
// Return a list of all table and blob files checksum info.
|
// Return a list of all table (SST) and blob files checksum info.
|
||||||
// Note: This function might be of limited use because it cannot be
|
// Note: This function might be of limited use because it cannot be
|
||||||
// synchronized with GetLiveFiles.
|
// synchronized with other "live files" APIs. GetLiveFilesStorageInfo()
|
||||||
|
// is recommended instead.
|
||||||
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
|
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
|
||||||
|
|
||||||
// EXPERIMENTAL: This function is not yet feature-complete.
|
|
||||||
// Get information about all live files that make up a DB, for making
|
// Get information about all live files that make up a DB, for making
|
||||||
// live copies (Checkpoint, backups, etc.) or other storage-related purposes.
|
// live copies (Checkpoint, backups, etc.) or other storage-related purposes.
|
||||||
// Use DisableFileDeletions() before and EnableFileDeletions() after to
|
// If creating a live copy, use DisableFileDeletions() before and
|
||||||
// preserve the files for live copy.
|
// EnableFileDeletions() after to prevent deletions.
|
||||||
|
// For LSM-tree metadata, use Get*MetaData() functions instead.
|
||||||
virtual Status GetLiveFilesStorageInfo(
|
virtual Status GetLiveFilesStorageInfo(
|
||||||
const LiveFilesStorageInfoOptions& opts,
|
const LiveFilesStorageInfoOptions& opts,
|
||||||
std::vector<LiveFileStorageInfo>* files) = 0;
|
std::vector<LiveFileStorageInfo>* files) = 0;
|
||||||
|
|
||||||
// Obtains the meta data of the specified column family of the DB.
|
// Obtains the LSM-tree meta data of the specified column family of the DB,
|
||||||
|
// including metadata for each live table (SST) file in that column family.
|
||||||
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
|
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
|
||||||
ColumnFamilyMetaData* /*metadata*/) {}
|
ColumnFamilyMetaData* /*metadata*/) {}
|
||||||
|
|
||||||
@ -1545,12 +1516,43 @@ class DB {
|
|||||||
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
|
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Obtains the meta data of all column families for the DB.
|
// Obtains the LSM-tree meta data of all column families of the DB,
|
||||||
// The returned map contains one entry for each column family indexed by the
|
// including metadata for each live table (SST) file in the DB.
|
||||||
// name of the column family.
|
|
||||||
virtual void GetAllColumnFamilyMetaData(
|
virtual void GetAllColumnFamilyMetaData(
|
||||||
std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
|
std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
|
||||||
|
|
||||||
|
// Retrieve the list of all files in the database except WAL files. The files
|
||||||
|
// are relative to the dbname (or db_paths/cf_paths), not absolute paths.
|
||||||
|
// (Not recommended with db_paths/cf_paths because that information is not
|
||||||
|
// returned.) Despite being relative paths, the file names begin with "/".
|
||||||
|
// The valid size of the manifest file is returned in manifest_file_size.
|
||||||
|
// The manifest file is an ever growing file, but only the portion specified
|
||||||
|
// by manifest_file_size is valid for this snapshot. Setting flush_memtable
|
||||||
|
// to true does Flush before recording the live files. Setting flush_memtable
|
||||||
|
// to false is useful when we don't want to wait for flush which may have to
|
||||||
|
// wait for compaction to complete taking an indeterminate time.
|
||||||
|
//
|
||||||
|
// NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
|
||||||
|
// a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
|
||||||
|
// instead, because it ensures a single consistent view of all files is
|
||||||
|
// captured in one call.
|
||||||
|
virtual Status GetLiveFiles(std::vector<std::string>&,
|
||||||
|
uint64_t* manifest_file_size,
|
||||||
|
bool flush_memtable = true) = 0;
|
||||||
|
|
||||||
|
// Retrieve the sorted list of all wal files with earliest file first
|
||||||
|
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
|
||||||
|
|
||||||
|
// Retrieve information about the current wal file
|
||||||
|
//
|
||||||
|
// Note that the log might have rolled after this call in which case
|
||||||
|
// the current_log_file would not point to the current log file.
|
||||||
|
//
|
||||||
|
// Additionally, for the sake of optimization current_log_file->StartSequence
|
||||||
|
// would always be set to 0
|
||||||
|
virtual Status GetCurrentWalFile(
|
||||||
|
std::unique_ptr<LogFile>* current_log_file) = 0;
|
||||||
|
|
||||||
// IngestExternalFile() will load a list of external SST files (1) into the DB
|
// IngestExternalFile() will load a list of external SST files (1) into the DB
|
||||||
// Two primary modes are supported:
|
// Two primary modes are supported:
|
||||||
// - Duplicate keys in the new files will overwrite exiting keys (default)
|
// - Duplicate keys in the new files will overwrite exiting keys (default)
|
||||||
|
Loading…
Reference in New Issue
Block a user