Rework VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC a bit (#9548)
Summary: We had a bug in `VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC` related to the edge case where all blob files are part of the "oldest batch", i.e. where only the very oldest file has any linked SSTs. (See https://github.com/facebook/rocksdb/issues/9542) This PR tries to make the logic in this method clearer and also adds a unit test for the problematic case. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9548 Test Plan: `make check` Reviewed By: akankshamahajan15 Differential Revision: D34158959 Pulled By: ltamasi fbshipit-source-id: fbab6d749c569728382aa04f7b7c60c92cca7650
This commit is contained in:
parent
fe9d495112
commit
a1203edca4
@ -2982,22 +2982,31 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC(
|
||||
uint64_t sum_total_blob_bytes = oldest_meta->GetTotalBlobBytes();
|
||||
uint64_t sum_garbage_blob_bytes = oldest_meta->GetGarbageBlobBytes();
|
||||
|
||||
while (count < blob_files_.size()) {
|
||||
assert(cutoff_count <= blob_files_.size());
|
||||
|
||||
for (; count < cutoff_count; ++count) {
|
||||
const auto& meta = blob_files_[count];
|
||||
assert(meta);
|
||||
|
||||
if (!meta->GetLinkedSsts().empty()) {
|
||||
// Found the beginning of the next batch of blob files
|
||||
break;
|
||||
}
|
||||
|
||||
if (++count > cutoff_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
sum_total_blob_bytes += meta->GetTotalBlobBytes();
|
||||
sum_garbage_blob_bytes += meta->GetGarbageBlobBytes();
|
||||
}
|
||||
|
||||
if (count < blob_files_.size()) {
|
||||
const auto& meta = blob_files_[count];
|
||||
assert(meta);
|
||||
|
||||
if (meta->GetLinkedSsts().empty()) {
|
||||
// Some files in the oldest batch are not eligible for GC
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (sum_garbage_blob_bytes <
|
||||
blob_garbage_collection_force_threshold * sum_total_blob_bytes) {
|
||||
return;
|
||||
|
@ -540,7 +540,131 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCEmpty) {
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
TEST_F(VersionStorageInfoTest, ForcedBlobGC) {
|
||||
TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) {
|
||||
// Test the edge case when all blob files are part of the oldest batch.
|
||||
// We have one L0 SST file #1, and four blob files #10, #11, #12, and #13.
|
||||
// The oldest blob file used by SST #1 is blob file #10.
|
||||
|
||||
constexpr int level = 0;
|
||||
|
||||
constexpr uint64_t sst = 1;
|
||||
|
||||
constexpr uint64_t first_blob = 10;
|
||||
constexpr uint64_t second_blob = 11;
|
||||
constexpr uint64_t third_blob = 12;
|
||||
constexpr uint64_t fourth_blob = 13;
|
||||
|
||||
{
|
||||
constexpr char smallest[] = "bar1";
|
||||
constexpr char largest[] = "foo1";
|
||||
constexpr uint64_t file_size = 1000;
|
||||
|
||||
Add(level, sst, smallest, largest, file_size, first_blob);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr uint64_t total_blob_count = 10;
|
||||
constexpr uint64_t total_blob_bytes = 100000;
|
||||
constexpr uint64_t garbage_blob_count = 2;
|
||||
constexpr uint64_t garbage_blob_bytes = 15000;
|
||||
|
||||
AddBlob(first_blob, total_blob_count, total_blob_bytes,
|
||||
BlobFileMetaData::LinkedSsts{sst}, garbage_blob_count,
|
||||
garbage_blob_bytes);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr uint64_t total_blob_count = 4;
|
||||
constexpr uint64_t total_blob_bytes = 400000;
|
||||
constexpr uint64_t garbage_blob_count = 3;
|
||||
constexpr uint64_t garbage_blob_bytes = 235000;
|
||||
|
||||
AddBlob(second_blob, total_blob_count, total_blob_bytes,
|
||||
BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
|
||||
garbage_blob_bytes);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr uint64_t total_blob_count = 20;
|
||||
constexpr uint64_t total_blob_bytes = 1000000;
|
||||
constexpr uint64_t garbage_blob_count = 8;
|
||||
constexpr uint64_t garbage_blob_bytes = 400000;
|
||||
|
||||
AddBlob(third_blob, total_blob_count, total_blob_bytes,
|
||||
BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
|
||||
garbage_blob_bytes);
|
||||
}
|
||||
|
||||
{
|
||||
constexpr uint64_t total_blob_count = 128;
|
||||
constexpr uint64_t total_blob_bytes = 1000000;
|
||||
constexpr uint64_t garbage_blob_count = 67;
|
||||
constexpr uint64_t garbage_blob_bytes = 600000;
|
||||
|
||||
AddBlob(fourth_blob, total_blob_count, total_blob_bytes,
|
||||
BlobFileMetaData::LinkedSsts{}, garbage_blob_count,
|
||||
garbage_blob_bytes);
|
||||
}
|
||||
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
assert(vstorage_.num_levels() > 0);
|
||||
const auto& level_files = vstorage_.LevelFiles(level);
|
||||
|
||||
assert(level_files.size() == 1);
|
||||
assert(level_files[0] && level_files[0]->fd.GetNumber() == sst);
|
||||
|
||||
// No blob files eligible for GC due to the age cutoff
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.1;
|
||||
constexpr double force_threshold = 0.0;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Part of the oldest batch of blob files (specifically, #12 and #13) is
|
||||
// ineligible for GC due to the age cutoff
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.5;
|
||||
constexpr double force_threshold = 0.0;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff but its overall garbage ratio
|
||||
// is below threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 1.0;
|
||||
constexpr double force_threshold = 0.6;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff and its overall garbage ratio
|
||||
// meets threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 1.0;
|
||||
constexpr double force_threshold = 0.5;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
|
||||
ASSERT_EQ(ssts_to_be_compacted.size(), 1);
|
||||
|
||||
const autovector<std::pair<int, FileMetaData*>>
|
||||
expected_ssts_to_be_compacted{{level, level_files[0]}};
|
||||
|
||||
ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) {
|
||||
// Add three L0 SSTs (1, 2, and 3) and four blob files (10, 11, 12, and 13).
|
||||
// The first two SSTs have the same oldest blob file, namely, the very oldest
|
||||
// one (10), while the third SST's oldest blob file reference points to the
|
||||
@ -695,6 +819,46 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGC) {
|
||||
ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
|
||||
ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
|
||||
}
|
||||
|
||||
// Now try the last two cases again with a greater than necessary age cutoff
|
||||
|
||||
// Oldest batch is eligible based on age cutoff but its overall garbage ratio
|
||||
// is below threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.75;
|
||||
constexpr double force_threshold = 0.6;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff and its overall garbage ratio
|
||||
// meets threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.75;
|
||||
constexpr double force_threshold = 0.5;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold);
|
||||
|
||||
auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
|
||||
ASSERT_EQ(ssts_to_be_compacted.size(), 2);
|
||||
|
||||
std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(),
|
||||
[](const std::pair<int, FileMetaData*>& lhs,
|
||||
const std::pair<int, FileMetaData*>& rhs) {
|
||||
assert(lhs.second);
|
||||
assert(rhs.second);
|
||||
return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber();
|
||||
});
|
||||
|
||||
const autovector<std::pair<int, FileMetaData*>>
|
||||
expected_ssts_to_be_compacted{{level, level_files[0]},
|
||||
{level, level_files[1]}};
|
||||
|
||||
ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
|
||||
ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
|
||||
}
|
||||
}
|
||||
|
||||
class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase {
|
||||
|
Loading…
Reference in New Issue
Block a user