Ingest external SST files with Temperature hints (#8949)
Summary: Add the file temperature to `IngestExternalFileArg` such that when SST files are ingested, user is able to assign the temperature to each SST file. If the temperature vector is empty or its size does not match the file name vector size, all ingested SST files will be assigned with `Temperature::unKnown`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8949 Test Plan: add the new test and make check Reviewed By: siying Differential Revision: D31127852 Pulled By: zhichao-cao fbshipit-source-id: 141a81f0f7b473d88f4ab0cb2a21a114cbc6f83c
This commit is contained in:
parent
2f1296ef48
commit
bcd049cd2d
@ -15,6 +15,7 @@
|
||||
* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
|
||||
* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name.
|
||||
* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method.
|
||||
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
|
||||
|
||||
## 6.25.0 (2021-09-20)
|
||||
### Bug Fixes
|
||||
|
@ -4558,7 +4558,8 @@ Status DBImpl::IngestExternalFiles(
|
||||
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
|
||||
Status es = ingestion_jobs[i].Prepare(
|
||||
args[i].external_files, args[i].files_checksums,
|
||||
args[i].files_checksum_func_names, start_file_number, super_version);
|
||||
args[i].files_checksum_func_names, args[i].file_temperature,
|
||||
start_file_number, super_version);
|
||||
// capture first error only
|
||||
if (!es.ok() && status.ok()) {
|
||||
status = es;
|
||||
@ -4573,7 +4574,8 @@ Status DBImpl::IngestExternalFiles(
|
||||
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
|
||||
Status es = ingestion_jobs[0].Prepare(
|
||||
args[0].external_files, args[0].files_checksums,
|
||||
args[0].files_checksum_func_names, next_file_number, super_version);
|
||||
args[0].files_checksum_func_names, args[0].file_temperature,
|
||||
next_file_number, super_version);
|
||||
if (!es.ok()) {
|
||||
status = es;
|
||||
}
|
||||
|
@ -37,11 +37,10 @@ class DBTest2 : public DBTestBase {
|
||||
#ifndef ROCKSDB_LITE
|
||||
uint64_t GetSstSizeHelper(Temperature temperature) {
|
||||
std::string prop;
|
||||
bool s =
|
||||
EXPECT_TRUE(
|
||||
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
|
||||
ToString(static_cast<uint8_t>(temperature)),
|
||||
&prop);
|
||||
assert(s);
|
||||
&prop));
|
||||
return static_cast<uint64_t>(std::atoi(prop.c_str()));
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
|
@ -187,6 +187,16 @@ class ExternalSSTFileBasicTest
|
||||
std::string sst_files_dir_;
|
||||
std::unique_ptr<FaultInjectionTestEnv> fault_injection_test_env_;
|
||||
bool random_rwfile_supported_;
|
||||
#ifndef ROCKSDB_LITE
|
||||
uint64_t GetSstSizeHelper(Temperature temperature) {
|
||||
std::string prop;
|
||||
EXPECT_TRUE(
|
||||
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
|
||||
ToString(static_cast<uint8_t>(temperature)),
|
||||
&prop));
|
||||
return static_cast<uint64_t>(std::atoi(prop.c_str()));
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
};
|
||||
|
||||
TEST_F(ExternalSSTFileBasicTest, Basic) {
|
||||
@ -478,6 +488,20 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
|
||||
ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName);
|
||||
}
|
||||
|
||||
// check the temperature of the file being ingested
|
||||
ColumnFamilyMetaData metadata;
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(1, metadata.file_count);
|
||||
ASSERT_EQ(Temperature::kUnknown, metadata.levels[6].files[0].temperature);
|
||||
auto size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_GT(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kHot);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kCold);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
// Reopen Db with checksum enabled
|
||||
Reopen(options);
|
||||
// Enable verify_file_checksum option
|
||||
@ -598,6 +622,15 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
|
||||
}
|
||||
ASSERT_OK(s) << s.ToString();
|
||||
ASSERT_OK(env_->FileExists(file6));
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_GT(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kHot);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kCold);
|
||||
ASSERT_EQ(size, 0);
|
||||
}
|
||||
|
||||
TEST_F(ExternalSSTFileBasicTest, NoCopy) {
|
||||
@ -1666,6 +1699,103 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) {
|
||||
ASSERT_EQ(Get("k"), "b");
|
||||
}
|
||||
|
||||
TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) {
|
||||
Options options = CurrentOptions();
|
||||
const ImmutableCFOptions ioptions(options);
|
||||
options.bottommost_temperature = Temperature::kWarm;
|
||||
SstFileWriter sst_file_writer(EnvOptions(), options);
|
||||
options.level0_file_num_compaction_trigger = 2;
|
||||
Reopen(options);
|
||||
|
||||
auto size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kHot);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
// create file01.sst (1000 => 1099) and ingest it
|
||||
std::string file1 = sst_files_dir_ + "file01.sst";
|
||||
ASSERT_OK(sst_file_writer.Open(file1));
|
||||
for (int k = 1000; k < 1100; k++) {
|
||||
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
||||
}
|
||||
ExternalSstFileInfo file1_info;
|
||||
Status s = sst_file_writer.Finish(&file1_info);
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ(file1_info.file_path, file1);
|
||||
ASSERT_EQ(file1_info.num_entries, 100);
|
||||
ASSERT_EQ(file1_info.smallest_key, Key(1000));
|
||||
ASSERT_EQ(file1_info.largest_key, Key(1099));
|
||||
|
||||
std::vector<std::string> files;
|
||||
std::vector<std::string> files_checksums;
|
||||
std::vector<std::string> files_checksum_func_names;
|
||||
Temperature file_temperature = Temperature::kWarm;
|
||||
|
||||
files.push_back(file1);
|
||||
IngestExternalFileOptions in_opts;
|
||||
in_opts.move_files = false;
|
||||
in_opts.snapshot_consistency = true;
|
||||
in_opts.allow_global_seqno = false;
|
||||
in_opts.allow_blocking_flush = false;
|
||||
in_opts.write_global_seqno = true;
|
||||
in_opts.verify_file_checksum = false;
|
||||
IngestExternalFileArg arg;
|
||||
arg.column_family = db_->DefaultColumnFamily();
|
||||
arg.external_files = files;
|
||||
arg.options = in_opts;
|
||||
arg.files_checksums = files_checksums;
|
||||
arg.files_checksum_func_names = files_checksum_func_names;
|
||||
arg.file_temperature = file_temperature;
|
||||
s = db_->IngestExternalFiles({arg});
|
||||
ASSERT_OK(s);
|
||||
|
||||
// check the temperature of the file being ingested
|
||||
ColumnFamilyMetaData metadata;
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(1, metadata.file_count);
|
||||
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
|
||||
size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_GT(size, 1);
|
||||
|
||||
// non-bottommost file still has unknown temperature
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_OK(Put("bar", "bar"));
|
||||
ASSERT_OK(Flush());
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(2, metadata.file_count);
|
||||
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
|
||||
size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_GT(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_GT(size, 0);
|
||||
|
||||
// reopen and check the information is persisted
|
||||
Reopen(options);
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(2, metadata.file_count);
|
||||
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
|
||||
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
|
||||
size = GetSstSizeHelper(Temperature::kUnknown);
|
||||
ASSERT_GT(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_GT(size, 0);
|
||||
|
||||
// check other non-exist temperatures
|
||||
size = GetSstSizeHelper(Temperature::kHot);
|
||||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kCold);
|
||||
ASSERT_EQ(size, 0);
|
||||
std::string prop;
|
||||
ASSERT_TRUE(dbfull()->GetProperty(
|
||||
DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
|
||||
&prop));
|
||||
ASSERT_EQ(std::atoi(prop.c_str()), 0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
|
||||
testing::Values(std::make_tuple(true, true),
|
||||
std::make_tuple(true, false),
|
||||
|
@ -31,7 +31,8 @@ Status ExternalSstFileIngestionJob::Prepare(
|
||||
const std::vector<std::string>& external_files_paths,
|
||||
const std::vector<std::string>& files_checksums,
|
||||
const std::vector<std::string>& files_checksum_func_names,
|
||||
uint64_t next_file_number, SuperVersion* sv) {
|
||||
const Temperature& file_temperature, uint64_t next_file_number,
|
||||
SuperVersion* sv) {
|
||||
Status status;
|
||||
|
||||
// Read the information of files we are ingesting
|
||||
@ -89,6 +90,11 @@ Status ExternalSstFileIngestionJob::Prepare(
|
||||
}
|
||||
}
|
||||
|
||||
// Hanlde the file temperature
|
||||
for (size_t i = 0; i < num_files; i++) {
|
||||
files_to_ingest_[i].file_temperature = file_temperature;
|
||||
}
|
||||
|
||||
if (ingestion_options_.ingest_behind && files_overlap_) {
|
||||
return Status::NotSupported("Files have overlapping ranges");
|
||||
}
|
||||
@ -429,12 +435,13 @@ Status ExternalSstFileIngestionJob::Run() {
|
||||
current_time = oldest_ancester_time =
|
||||
static_cast<uint64_t>(temp_current_time);
|
||||
}
|
||||
|
||||
edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
|
||||
f.fd.GetFileSize(), f.smallest_internal_key,
|
||||
f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
|
||||
false, kInvalidBlobFileNumber, oldest_ancester_time,
|
||||
FileMetaData f_metadata(
|
||||
f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(),
|
||||
f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
|
||||
f.assigned_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time,
|
||||
current_time, f.file_checksum, f.file_checksum_func_name);
|
||||
f_metadata.temperature = f.file_temperature;
|
||||
edit_.AddFile(f.picked_level, f_metadata);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
@ -68,6 +68,8 @@ struct IngestedFileInfo {
|
||||
std::string file_checksum;
|
||||
// The name of checksum function that generate the checksum
|
||||
std::string file_checksum_func_name;
|
||||
// The temperature of the file to be ingested
|
||||
Temperature file_temperature = Temperature::kUnknown;
|
||||
};
|
||||
|
||||
class ExternalSstFileIngestionJob {
|
||||
@ -99,7 +101,8 @@ class ExternalSstFileIngestionJob {
|
||||
Status Prepare(const std::vector<std::string>& external_files_paths,
|
||||
const std::vector<std::string>& files_checksums,
|
||||
const std::vector<std::string>& files_checksum_func_names,
|
||||
uint64_t next_file_number, SuperVersion* sv);
|
||||
const Temperature& file_temperature, uint64_t next_file_number,
|
||||
SuperVersion* sv);
|
||||
|
||||
// Check if we need to flush the memtable before running the ingestion job
|
||||
// This will be true if the files we are ingesting are overlapping with any
|
||||
|
@ -121,12 +121,15 @@ struct RangePtr {
|
||||
// empty (no checksum information is provided for ingestion). Otherwise,
|
||||
// their sizes should be the same as external_files. The file order should
|
||||
// be the same in three vectors and guaranteed by the caller.
|
||||
// Note that, we assume the temperatures of this batch of files to be
|
||||
// ingested are the same.
|
||||
struct IngestExternalFileArg {
|
||||
ColumnFamilyHandle* column_family = nullptr;
|
||||
std::vector<std::string> external_files;
|
||||
IngestExternalFileOptions options;
|
||||
std::vector<std::string> files_checksums;
|
||||
std::vector<std::string> files_checksum_func_names;
|
||||
Temperature file_temperature = Temperature::kUnknown;
|
||||
};
|
||||
|
||||
struct GetMergeOperandsOptions {
|
||||
|
Loading…
Reference in New Issue
Block a user