diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index bcbd73d3e..4f2d376e8 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -733,7 +733,8 @@ struct AdvancedColumnFamilyOptions { // only pointers to them are stored in SST files. This can reduce write // amplification for large-value use cases at the cost of introducing a level // of indirection for reads. See also the options min_blob_size, - // blob_file_size, and blob_compression_type below. + // blob_file_size, blob_compression_type, enable_blob_garbage_collection, + // and blob_garbage_collection_age_cutoff below. // // Default: false // @@ -773,6 +774,30 @@ struct AdvancedColumnFamilyOptions { // Dynamically changeable through the SetOptions() API CompressionType blob_compression_type = kNoCompression; + // UNDER CONSTRUCTION -- DO NOT USE + // Enables garbage collection of blobs. Blob GC is performed as part of + // compaction. Valid blobs residing in blob files older than a cutoff get + // relocated to new files as they are encountered during compaction, which + // makes it possible to clean up blob files once they contain nothing but + // obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff below. + // + // Default: false + // + // Dynamically changeable through the SetOptions() API + bool enable_blob_garbage_collection = false; + + // UNDER CONSTRUCTION -- DO NOT USE + // The cutoff in terms of blob file age for garbage collection. Blobs in + // the oldest N blob files will be relocated when encountered during + // compaction, where N = garbage_collection_cutoff * number_of_blob_files. + // Note that enable_blob_garbage_collection has to be set in order for this + // option to have any effect. + // + // Default: 0.25 + // + // Dynamically changeable through the SetOptions() API + double blob_garbage_collection_age_cutoff = 0.25; + // Create ColumnFamilyOptions with default values for all fields AdvancedColumnFamilyOptions(); // Create ColumnFamilyOptions from Options diff --git a/options/cf_options.cc b/options/cf_options.cc index 5ddd8fa81..c436dd312 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -408,6 +408,14 @@ static std::unordered_map {offsetof(struct MutableCFOptions, blob_compression_type), OptionType::kCompressionType, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"enable_blob_garbage_collection", + {offsetof(struct MutableCFOptions, enable_blob_garbage_collection), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, + {"blob_garbage_collection_age_cutoff", + {offsetof(struct MutableCFOptions, blob_garbage_collection_age_cutoff), + OptionType::kDouble, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, {"sample_for_compression", {offsetof(struct MutableCFOptions, sample_for_compression), OptionType::kUInt64T, OptionVerificationType::kNormal, @@ -1006,6 +1014,10 @@ void MutableCFOptions::Dump(Logger* log) const { blob_file_size); ROCKS_LOG_INFO(log, " blob_compression_type: %s", CompressionTypeToString(blob_compression_type).c_str()); + ROCKS_LOG_INFO(log, " enable_blob_garbage_collection: %s", + enable_blob_garbage_collection ? "true" : "false"); + ROCKS_LOG_INFO(log, " blob_garbage_collection_age_cutoff: %f", + blob_garbage_collection_age_cutoff); } MutableCFOptions::MutableCFOptions(const Options& options) diff --git a/options/cf_options.h b/options/cf_options.h index b74fbf1a7..c9e8f068f 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -165,6 +165,9 @@ struct MutableCFOptions { min_blob_size(options.min_blob_size), blob_file_size(options.blob_file_size), blob_compression_type(options.blob_compression_type), + enable_blob_garbage_collection(options.enable_blob_garbage_collection), + blob_garbage_collection_age_cutoff( + options.blob_garbage_collection_age_cutoff), max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations), check_flush_compaction_key_order( @@ -208,6 +211,8 @@ struct MutableCFOptions { min_blob_size(0), blob_file_size(0), blob_compression_type(kNoCompression), + enable_blob_garbage_collection(false), + blob_garbage_collection_age_cutoff(0.0), max_sequential_skip_in_iterations(0), check_flush_compaction_key_order(true), paranoid_file_checks(false), @@ -269,6 +274,8 @@ struct MutableCFOptions { uint64_t min_blob_size; uint64_t blob_file_size; CompressionType blob_compression_type; + bool enable_blob_garbage_collection; + double blob_garbage_collection_age_cutoff; // Misc options uint64_t max_sequential_skip_in_iterations; diff --git a/options/options.cc b/options/options.cc index cf00059b7..d76a15441 100644 --- a/options/options.cc +++ b/options/options.cc @@ -92,7 +92,10 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) enable_blob_files(options.enable_blob_files), min_blob_size(options.min_blob_size), blob_file_size(options.blob_file_size), - blob_compression_type(options.blob_compression_type) { + blob_compression_type(options.blob_compression_type), + enable_blob_garbage_collection(options.enable_blob_garbage_collection), + blob_garbage_collection_age_cutoff( + options.blob_garbage_collection_age_cutoff) { assert(memtable_factory.get() != nullptr); if (max_bytes_for_level_multiplier_additional.size() < static_cast(num_levels)) { @@ -383,6 +386,10 @@ void ColumnFamilyOptions::Dump(Logger* log) const { blob_file_size); ROCKS_LOG_HEADER(log, " Options.blob_compression_type: %s", CompressionTypeToString(blob_compression_type).c_str()); + ROCKS_LOG_HEADER(log, " Options.enable_blob_garbage_collection: %s", + enable_blob_garbage_collection ? "true" : "false"); + ROCKS_LOG_HEADER(log, " Options.blob_garbage_collection_age_cutoff: %f", + blob_garbage_collection_age_cutoff); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/options/options_helper.cc b/options/options_helper.cc index a0a3ba1d6..47bf74ef0 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -229,6 +229,10 @@ ColumnFamilyOptions BuildColumnFamilyOptions( cf_opts.min_blob_size = mutable_cf_options.min_blob_size; cf_opts.blob_file_size = mutable_cf_options.blob_file_size; cf_opts.blob_compression_type = mutable_cf_options.blob_compression_type; + cf_opts.enable_blob_garbage_collection = + mutable_cf_options.enable_blob_garbage_collection; + cf_opts.blob_garbage_collection_age_cutoff = + mutable_cf_options.blob_garbage_collection_age_cutoff; // Misc options cf_opts.max_sequential_skip_in_iterations = diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 21bb76152..630aa302f 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -504,6 +504,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "min_blob_size=256;" "blob_file_size=1000000;" "blob_compression_type=kBZip2Compression;" + "enable_blob_garbage_collection=true;" + "blob_garbage_collection_age_cutoff=0.5;" "compaction_options_fifo={max_table_files_size=3;allow_" "compaction=false;};", new_options)); diff --git a/options/options_test.cc b/options/options_test.cc index ffa91f2cd..5aa035fd2 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -102,6 +102,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"min_blob_size", "1K"}, {"blob_file_size", "1G"}, {"blob_compression_type", "kZSTD"}, + {"enable_blob_garbage_collection", "true"}, + {"blob_garbage_collection_age_cutoff", "0.5"}, }; std::unordered_map db_options_map = { @@ -231,6 +233,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); + ASSERT_EQ(new_cf_opt.enable_blob_garbage_collection, true); + ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, @@ -1649,6 +1653,8 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"min_blob_size", "1K"}, {"blob_file_size", "1G"}, {"blob_compression_type", "kZSTD"}, + {"enable_blob_garbage_collection", "true"}, + {"blob_garbage_collection_age_cutoff", "0.5"}, }; std::unordered_map db_options_map = { @@ -1770,6 +1776,8 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); + ASSERT_EQ(new_cf_opt.enable_blob_garbage_collection, true); + ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap( diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 276629dd7..86c67a182 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -381,12 +381,14 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions& db_options, cf_opt->compaction_options_fifo.allow_compaction = rnd->Uniform(2); cf_opt->memtable_whole_key_filtering = rnd->Uniform(2); cf_opt->enable_blob_files = rnd->Uniform(2); + cf_opt->enable_blob_garbage_collection = rnd->Uniform(2); // double options cf_opt->hard_rate_limit = static_cast(rnd->Uniform(10000)) / 13; cf_opt->soft_rate_limit = static_cast(rnd->Uniform(10000)) / 13; cf_opt->memtable_prefix_bloom_size_ratio = static_cast(rnd->Uniform(10000)) / 20000.0; + cf_opt->blob_garbage_collection_age_cutoff = rnd->Uniform(10000) / 10000.0; // int options cf_opt->level0_file_num_compaction_trigger = rnd->Uniform(100);