diff --git a/db/column_family.cc b/db/column_family.cc index 14551d652..745f5de59 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -148,6 +149,16 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) { "should be nonzero if we're using zstd's dictionary generator."); } } + + if (!CompressionTypeSupported(cf_options.blob_compression_type)) { + std::ostringstream oss; + oss << "The specified blob compression type " + << CompressionTypeToString(cf_options.blob_compression_type) + << " is not available."; + + return Status::InvalidArgument(oss.str()); + } + return Status::OK(); } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 2a4ae6b5d..68b79855f 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -717,6 +717,51 @@ struct AdvancedColumnFamilyOptions { // data is left uncompressed (unless compression is also requested). uint64_t sample_for_compression = 0; + // UNDER CONSTRUCTION -- DO NOT USE + // When set, large values (blobs) are written to separate blob files, and + // only pointers to them are stored in SST files. This can reduce write + // amplification for large-value use cases at the cost of introducing a level + // of indirection for reads. See also the options min_blob_size, + // blob_file_size, and blob_compression_type below. + // + // Default: false + // + // Dynamically changeable through the SetOptions() API + bool enable_blob_files = false; + + // UNDER CONSTRUCTION -- DO NOT USE + // The size of the smallest value to be stored separately in a blob file. + // Values which have an uncompressed size smaller than this threshold are + // stored alongside the keys in SST files in the usual fashion. A value of + // zero for this option means that all values are stored in blob files. Note + // that enable_blob_files has to be set in order for this option to have any + // effect. + // + // Default: 0 + // + // Dynamically changeable through the SetOptions() API + uint64_t min_blob_size = 0; + + // UNDER CONSTRUCTION -- DO NOT USE + // The size limit for blob files. When writing blob files, a new file is + // opened once this limit is reached. Note that enable_blob_files has to be + // set in order for this option to have any effect. + // + // Default: 256 MB + // + // Dynamically changeable through the SetOptions() API + uint64_t blob_file_size = 1ULL << 28; + + // UNDER CONSTRUCTION -- DO NOT USE + // The compression algorithm to use for large values stored in blob files. + // Note that enable_blob_files has to be set in order for this option to have + // any effect. + // + // Default: no compression + // + // Dynamically changeable through the SetOptions() API + CompressionType blob_compression_type = kNoCompression; + // Create ColumnFamilyOptions with default values for all fields AdvancedColumnFamilyOptions(); // Create ColumnFamilyOptions from Options diff --git a/options/cf_options.cc b/options/cf_options.cc index c39b420aa..bfee67f73 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -631,6 +631,24 @@ std::unordered_map OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, sample_for_compression)}}, + {"enable_blob_files", + {offset_of(&ColumnFamilyOptions::enable_blob_files), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable, + offsetof(struct MutableCFOptions, enable_blob_files)}}, + {"min_blob_size", + {offset_of(&ColumnFamilyOptions::min_blob_size), OptionType::kUInt64T, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable, + offsetof(struct MutableCFOptions, min_blob_size)}}, + {"blob_file_size", + {offset_of(&ColumnFamilyOptions::blob_file_size), OptionType::kUInt64T, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable, + offsetof(struct MutableCFOptions, blob_file_size)}}, + {"blob_compression_type", + {offset_of(&ColumnFamilyOptions::blob_compression_type), + OptionType::kCompressionType, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable, + offsetof(struct MutableCFOptions, blob_compression_type)}}, // The following properties were handled as special cases in ParseOption // This means that the properties could be read from the options file // but never written to the file or compared to each other. @@ -920,6 +938,16 @@ void MutableCFOptions::Dump(Logger* log) const { compaction_options_fifo.max_table_files_size); ROCKS_LOG_INFO(log, "compaction_options_fifo.allow_compaction : %d", compaction_options_fifo.allow_compaction); + + // Blob file related options + ROCKS_LOG_INFO(log, " enable_blob_files: %s", + enable_blob_files ? "true" : "false"); + ROCKS_LOG_INFO(log, " min_blob_size: %" PRIu64, + min_blob_size); + ROCKS_LOG_INFO(log, " blob_file_size: %" PRIu64, + blob_file_size); + ROCKS_LOG_INFO(log, " blob_compression_type: %s", + CompressionTypeToString(blob_compression_type).c_str()); } MutableCFOptions::MutableCFOptions(const Options& options) diff --git a/options/cf_options.h b/options/cf_options.h index 4aab909ea..a2fbf2240 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -155,6 +155,10 @@ struct MutableCFOptions { options.max_bytes_for_level_multiplier_additional), compaction_options_fifo(options.compaction_options_fifo), compaction_options_universal(options.compaction_options_universal), + enable_blob_files(options.enable_blob_files), + min_blob_size(options.min_blob_size), + blob_file_size(options.blob_file_size), + blob_compression_type(options.blob_compression_type), max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations), paranoid_file_checks(options.paranoid_file_checks), @@ -192,6 +196,10 @@ struct MutableCFOptions { ttl(0), periodic_compaction_seconds(0), compaction_options_fifo(), + enable_blob_files(false), + min_blob_size(0), + blob_file_size(0), + blob_compression_type(kNoCompression), max_sequential_skip_in_iterations(0), paranoid_file_checks(false), report_bg_io_stats(false), @@ -247,6 +255,12 @@ struct MutableCFOptions { CompactionOptionsFIFO compaction_options_fifo; CompactionOptionsUniversal compaction_options_universal; + // Blob file related options + bool enable_blob_files; + uint64_t min_blob_size; + uint64_t blob_file_size; + CompressionType blob_compression_type; + // Misc options uint64_t max_sequential_skip_in_iterations; bool paranoid_file_checks; diff --git a/options/options.cc b/options/options.cc index d2b8d5d4c..a1848219a 100644 --- a/options/options.cc +++ b/options/options.cc @@ -88,7 +88,11 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) report_bg_io_stats(options.report_bg_io_stats), ttl(options.ttl), periodic_compaction_seconds(options.periodic_compaction_seconds), - sample_for_compression(options.sample_for_compression) { + sample_for_compression(options.sample_for_compression), + enable_blob_files(options.enable_blob_files), + min_blob_size(options.min_blob_size), + blob_file_size(options.blob_file_size), + blob_compression_type(options.blob_compression_type) { assert(memtable_factory.get() != nullptr); if (max_bytes_for_level_multiplier_additional.size() < static_cast(num_levels)) { @@ -369,6 +373,16 @@ void ColumnFamilyOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.periodic_compaction_seconds: %" PRIu64, periodic_compaction_seconds); + ROCKS_LOG_HEADER(log, " Options.enable_blob_files: %s", + enable_blob_files ? "true" : "false"); + ROCKS_LOG_HEADER(log, + " Options.min_blob_size: %" PRIu64, + min_blob_size); + ROCKS_LOG_HEADER(log, + " Options.blob_file_size: %" PRIu64, + blob_file_size); + ROCKS_LOG_HEADER(log, " Options.blob_compression_type: %s", + CompressionTypeToString(blob_compression_type).c_str()); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/options/options_helper.cc b/options/options_helper.cc index 62bcb9ee4..dddefeebb 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -207,6 +207,12 @@ ColumnFamilyOptions BuildColumnFamilyOptions( cf_opts.compaction_options_universal = mutable_cf_options.compaction_options_universal; + // Blob file related options + cf_opts.enable_blob_files = mutable_cf_options.enable_blob_files; + cf_opts.min_blob_size = mutable_cf_options.min_blob_size; + cf_opts.blob_file_size = mutable_cf_options.blob_file_size; + cf_opts.blob_compression_type = mutable_cf_options.blob_compression_type; + // Misc options cf_opts.max_sequential_skip_in_iterations = mutable_cf_options.max_sequential_skip_in_iterations; diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index ff945694f..70e0bdb51 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -490,6 +490,10 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "ttl=60;" "periodic_compaction_seconds=3600;" "sample_for_compression=0;" + "enable_blob_files=true;" + "min_blob_size=256;" + "blob_file_size=1000000;" + "blob_compression_type=kBZip2Compression;" "compaction_options_fifo={max_table_files_size=3;allow_" "compaction=false;};", new_options)); diff --git a/options/options_test.cc b/options/options_test.cc index c068843d3..8a981c182 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -97,6 +97,10 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"min_partial_merge_operands", "31"}, {"prefix_extractor", "fixed:31"}, {"optimize_filters_for_hits", "true"}, + {"enable_blob_files", "true"}, + {"min_blob_size", "1K"}, + {"blob_file_size", "1G"}, + {"blob_compression_type", "kZSTD"}, }; std::unordered_map db_options_map = { @@ -221,6 +225,10 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()), "rocksdb.FixedPrefix.31"); + ASSERT_EQ(new_cf_opt.enable_blob_files, true); + ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); + ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); + ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, @@ -1445,7 +1453,7 @@ TEST_F(OptionsTest, ConvertOptionsTest) { // This test suite tests the old APIs into the Configure options methods. // Once those APIs are officially deprecated, this test suite can be deleted. class OptionsOldApiTest : public testing::Test {}; - + TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { std::unordered_map cf_options_map = { {"write_buffer_size", "1"}, @@ -1500,6 +1508,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"min_partial_merge_operands", "31"}, {"prefix_extractor", "fixed:31"}, {"optimize_filters_for_hits", "true"}, + {"enable_blob_files", "true"}, + {"min_blob_size", "1K"}, + {"blob_file_size", "1G"}, + {"blob_compression_type", "kZSTD"}, }; std::unordered_map db_options_map = { @@ -1616,6 +1628,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()), "rocksdb.FixedPrefix.31"); + ASSERT_EQ(new_cf_opt.enable_blob_files, true); + ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); + ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); + ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap( @@ -1910,7 +1926,7 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) { ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory"); } - + TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { BlockBasedTableOptions table_opt; BlockBasedTableOptions new_opt; @@ -2085,7 +2101,7 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { ->GetHighPriPoolRatio(), 0.5); } - + TEST_F(OptionsOldApiTest, GetPlainTableOptionsFromString) { PlainTableOptions table_opt; PlainTableOptions new_opt; @@ -2116,7 +2132,7 @@ TEST_F(OptionsOldApiTest, GetPlainTableOptionsFromString) { "encoding_type=kPrefixXX", &new_opt)); } - + TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) { Options base_options, new_options; base_options.write_buffer_size = 20; @@ -2674,7 +2690,7 @@ TEST_F(OptionsParserTest, Readahead) { uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(kOptionsFileName, &file_size)); assert(file_size > 0); - + RocksDBOptionsParser parser; env_->num_seq_file_read_ = 0; diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 234c76d81..d643b1d8b 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -368,6 +368,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions& db_options, cf_opt->force_consistency_checks = rnd->Uniform(2); cf_opt->compaction_options_fifo.allow_compaction = rnd->Uniform(2); cf_opt->memtable_whole_key_filtering = rnd->Uniform(2); + cf_opt->enable_blob_files = rnd->Uniform(2); // double options cf_opt->hard_rate_limit = static_cast(rnd->Uniform(10000)) / 13; @@ -417,6 +418,8 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions& db_options, cf_opt->target_file_size_base * rnd->Uniform(100); cf_opt->compaction_options_fifo.max_table_files_size = uint_max + rnd->Uniform(10000); + cf_opt->min_blob_size = uint_max + rnd->Uniform(10000); + cf_opt->blob_file_size = uint_max + rnd->Uniform(10000); // unsigned int options cf_opt->rate_limit_delay_max_milliseconds = rnd->Uniform(10000); @@ -435,6 +438,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions& db_options, cf_opt->compression = RandomCompressionType(rnd); RandomCompressionTypeVector(cf_opt->num_levels, &cf_opt->compression_per_level, rnd); + cf_opt->blob_compression_type = RandomCompressionType(rnd); } bool IsDirectIOSupported(Env* env, const std::string& dir) {