diff --git a/HISTORY.md b/HISTORY.md index ffa718c9d..197687401 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,7 @@ * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. * Deprecate options.filter_deletes. +* options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. ### New Features * Add avoid_flush_during_recovery option. diff --git a/db/c.cc b/db/c.cc index d722bb1f3..05802aa21 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1797,9 +1797,9 @@ void rocksdb_options_set_memtable_prefix_bloom_size_ratio( opt->rep.memtable_prefix_bloom_size_ratio = v; } -void rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size( - rocksdb_options_t* opt, size_t v) { - opt->rep.memtable_prefix_bloom_huge_page_tlb_size = v; +void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt, + size_t v) { + opt->rep.memtable_huge_page_size = v; } void rocksdb_options_set_hash_skip_list_rep( diff --git a/db/memtable.cc b/db/memtable.cc index f263601c7..d8e35a289 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -44,8 +44,7 @@ MemTableOptions::MemTableOptions(const ImmutableCFOptions& ioptions, static_cast(mutable_cf_options.write_buffer_size) * mutable_cf_options.memtable_prefix_bloom_size_ratio) * 8u), - memtable_prefix_bloom_huge_page_tlb_size( - mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size), + memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size), inplace_update_support(ioptions.inplace_update_support), inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks), inplace_callback(ioptions.inplace_callback), @@ -63,7 +62,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, moptions_(ioptions, mutable_cf_options), refs_(0), kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)), - arena_(moptions_.arena_block_size, 0), + arena_(moptions_.arena_block_size, + mutable_cf_options.memtable_huge_page_size), allocator_(&arena_, write_buffer_manager), table_(ioptions.memtable_factory->CreateMemTableRep( comparator_, &allocator_, ioptions.prefix_extractor, @@ -92,7 +92,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp, prefix_bloom_.reset(new DynamicBloom( &allocator_, moptions_.memtable_prefix_bloom_bits, ioptions.bloom_locality, 6 /* hard coded 6 probes */, nullptr, - moptions_.memtable_prefix_bloom_huge_page_tlb_size, ioptions.info_log)); + moptions_.memtable_huge_page_size, ioptions.info_log)); } } diff --git a/db/memtable.h b/db/memtable.h index 40a02ea41..62931613e 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -41,7 +41,7 @@ struct MemTableOptions { size_t write_buffer_size; size_t arena_block_size; uint32_t memtable_prefix_bloom_bits; - size_t memtable_prefix_bloom_huge_page_tlb_size; + size_t memtable_huge_page_size; bool inplace_update_support; size_t inplace_update_num_locks; UpdateStatus (*inplace_callback)(char* existing_value, diff --git a/db/prefix_test.cc b/db/prefix_test.cc index 4a24a3b00..2af3897e4 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -44,7 +44,7 @@ DEFINE_int32(max_write_buffer_number, 2, ""); DEFINE_int32(min_write_buffer_number_to_merge, 1, ""); DEFINE_int32(skiplist_height, 4, ""); DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, ""); -DEFINE_int32(memtable_prefix_bloom_huge_page_tlb_size, 2 * 1024 * 1024, ""); +DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, ""); DEFINE_int32(value_size, 40, ""); // Path to the database on file system @@ -161,8 +161,7 @@ class PrefixTest : public testing::Test { options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_prefix_bloom_size_ratio; - options.memtable_prefix_bloom_huge_page_tlb_size = - FLAGS_memtable_prefix_bloom_huge_page_tlb_size; + options.memtable_huge_page_size = FLAGS_memtable_huge_page_size; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); BlockBasedTableOptions bbto; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 18d44c058..e269aa9b4 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -678,8 +678,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_prefix_bloom_bits( rocksdb_options_t*, uint32_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_prefix_bloom_probes(rocksdb_options_t*, uint32_t); -extern ROCKSDB_LIBRARY_API void -rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size(rocksdb_options_t*, size_t); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_huge_page_size( + rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges( rocksdb_options_t*, size_t); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 1e3f58252..ac14fa570 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -746,14 +746,17 @@ struct ColumnFamilyOptions { // Dynamically changeable through SetOptions() API double memtable_prefix_bloom_size_ratio; - // Page size for huge page TLB for bloom in memtable. If <=0, not allocate - // from huge page TLB but from malloc. - // Need to reserve huge pages for it to be allocated. For example: + // Page size for huge page for the arena used by the memtable. If <=0, it + // won't allocate from huge page but from malloc. + // Users are responsible to reserve huge pages for it to be allocated. For + // example: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt + // If there isn't enough free huge page available, it will fall back to + // malloc. // // Dynamically changeable through SetOptions() API - size_t memtable_prefix_bloom_huge_page_tlb_size; + size_t memtable_huge_page_size; // Control locality of bloom filter probes to improve cache miss rate. // This option only applies to memtable prefix bloom and plaintable diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 485dc651d..4e0cb6f45 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -402,6 +402,8 @@ DEFINE_int32(bloom_bits, -1, "Bloom filter bits per key. Negative means" DEFINE_double(memtable_bloom_size_ratio, 0, "Ratio of memtable size used for bloom filter. 0 means no bloom " "filter."); +DEFINE_bool(memtable_use_huge_page, false, + "Try to use huge page in memtables."); DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing" " database. If you set this flag and also specify a benchmark that" @@ -2464,6 +2466,7 @@ class Benchmark { exit(1); } } + options.memtable_huge_page_size = FLAGS_memtable_use_huge_page ? 2048 : 0; options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio; options.bloom_locality = FLAGS_bloom_locality; options.max_file_opening_threads = FLAGS_file_opening_threads; diff --git a/tools/db_bench_tool_test.cc b/tools/db_bench_tool_test.cc index 4088483f4..978334174 100644 --- a/tools/db_bench_tool_test.cc +++ b/tools/db_bench_tool_test.cc @@ -229,7 +229,7 @@ const std::string options_file_content = R"OPTIONS_FILE( max_bytes_for_level_base=104857600 bloom_locality=0 target_file_size_base=10485760 - memtable_prefix_bloom_huge_page_tlb_size=0 + memtable_huge_page_size=0 max_successive_merges=1000 max_sequential_skip_in_iterations=8 arena_block_size=52428800 diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 57be48ff2..ad1d71ebb 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -1045,8 +1045,7 @@ class StressTest { }}, {"memtable_prefix_bloom_bits", {"0", "8", "10"}}, {"memtable_prefix_bloom_probes", {"4", "5", "6"}}, - {"memtable_prefix_bloom_huge_page_tlb_size", - {"0", ToString(2 * 1024 * 1024)}}, + {"memtable_huge_page_size", {"0", ToString(2 * 1024 * 1024)}}, {"max_successive_merges", {"0", "2", "4"}}, {"inplace_update_num_locks", {"100", "200", "300"}}, // TODO(ljin): enable test for this option diff --git a/util/mutable_cf_options.cc b/util/mutable_cf_options.cc index dffcc8b93..1dd970eb2 100644 --- a/util/mutable_cf_options.cc +++ b/util/mutable_cf_options.cc @@ -72,8 +72,8 @@ void MutableCFOptions::Dump(Logger* log) const { arena_block_size); Log(log, " memtable_prefix_bloom_ratio: %f", memtable_prefix_bloom_size_ratio); - Log(log, " memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt, - memtable_prefix_bloom_huge_page_tlb_size); + Log(log, " memtable_huge_page_size: %" ROCKSDB_PRIszt, + memtable_huge_page_size); Log(log, " max_successive_merges: %" ROCKSDB_PRIszt, max_successive_merges); Log(log, " disable_auto_compactions: %d", diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index 1dbbfe76e..cd484c1e2 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -19,8 +19,7 @@ struct MutableCFOptions { arena_block_size(options.arena_block_size), memtable_prefix_bloom_size_ratio( options.memtable_prefix_bloom_size_ratio), - memtable_prefix_bloom_huge_page_tlb_size( - options.memtable_prefix_bloom_huge_page_tlb_size), + memtable_huge_page_size(options.memtable_huge_page_size), max_successive_merges(options.max_successive_merges), inplace_update_num_locks(options.inplace_update_num_locks), disable_auto_compactions(options.disable_auto_compactions), @@ -58,7 +57,7 @@ struct MutableCFOptions { max_write_buffer_number(0), arena_block_size(0), memtable_prefix_bloom_size_ratio(0), - memtable_prefix_bloom_huge_page_tlb_size(0), + memtable_huge_page_size(0), max_successive_merges(0), inplace_update_num_locks(0), disable_auto_compactions(false), @@ -108,7 +107,7 @@ struct MutableCFOptions { int max_write_buffer_number; size_t arena_block_size; double memtable_prefix_bloom_size_ratio; - size_t memtable_prefix_bloom_huge_page_tlb_size; + size_t memtable_huge_page_size; size_t max_successive_merges; size_t inplace_update_num_locks; diff --git a/util/options.cc b/util/options.cc index 16c7a7fa7..3614f3cbe 100644 --- a/util/options.cc +++ b/util/options.cc @@ -121,7 +121,7 @@ ColumnFamilyOptions::ColumnFamilyOptions() inplace_update_num_locks(10000), inplace_callback(nullptr), memtable_prefix_bloom_size_ratio(0.0), - memtable_prefix_bloom_huge_page_tlb_size(0), + memtable_huge_page_size(0), bloom_locality(0), max_successive_merges(0), min_partial_merge_operands(2), @@ -189,8 +189,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) inplace_callback(options.inplace_callback), memtable_prefix_bloom_size_ratio( options.memtable_prefix_bloom_size_ratio), - memtable_prefix_bloom_huge_page_tlb_size( - options.memtable_prefix_bloom_huge_page_tlb_size), + memtable_huge_page_size(options.memtable_huge_page_size), bloom_locality(options.bloom_locality), max_successive_merges(options.max_successive_merges), min_partial_merge_operands(options.min_partial_merge_operands), @@ -599,9 +598,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const { Header(log, " Options.memtable_prefix_bloom_size_ratio: %f", memtable_prefix_bloom_size_ratio); - Header(log, - " Options.memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt, - memtable_prefix_bloom_huge_page_tlb_size); + Header(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt, + memtable_huge_page_size); Header(log, " Options.bloom_locality: %d", bloom_locality); diff --git a/util/options_helper.cc b/util/options_helper.cc index 5d13c1aa4..259c27c81 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -543,8 +543,9 @@ bool ParseMemtableOptions(const std::string& name, const std::string& value, } else if (name == "memtable_prefix_bloom_probes") { // Deprecated } else if (name == "memtable_prefix_bloom_huge_page_tlb_size") { - new_options->memtable_prefix_bloom_huge_page_tlb_size = - ParseSizeT(value); + // Deprecated + } else if (name == "memtable_huge_page_size") { + new_options->memtable_huge_page_size = ParseSizeT(value); } else if (name == "max_successive_merges") { new_options->max_successive_merges = ParseSizeT(value); } else if (name == "filter_deletes") { @@ -1443,8 +1444,7 @@ ColumnFamilyOptions BuildColumnFamilyOptions( cf_opts.arena_block_size = mutable_cf_options.arena_block_size; cf_opts.memtable_prefix_bloom_size_ratio = mutable_cf_options.memtable_prefix_bloom_size_ratio; - cf_opts.memtable_prefix_bloom_huge_page_tlb_size = - mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size; + cf_opts.memtable_huge_page_size = mutable_cf_options.memtable_huge_page_size; cf_opts.max_successive_merges = mutable_cf_options.max_successive_merges; cf_opts.inplace_update_num_locks = mutable_cf_options.inplace_update_num_locks; diff --git a/util/options_helper.h b/util/options_helper.h index 074e121ea..b30cb4b90 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -428,10 +428,11 @@ static std::unordered_map cf_options_type_info = { {"max_successive_merges", {offsetof(struct ColumnFamilyOptions, max_successive_merges), OptionType::kSizeT, OptionVerificationType::kNormal}}, - {"memtable_prefix_bloom_huge_page_tlb_size", - {offsetof(struct ColumnFamilyOptions, - memtable_prefix_bloom_huge_page_tlb_size), + {"memtable_huge_page_size", + {offsetof(struct ColumnFamilyOptions, memtable_huge_page_size), OptionType::kSizeT, OptionVerificationType::kNormal}}, + {"memtable_prefix_bloom_huge_page_tlb_size", + {0, OptionType::kSizeT, OptionVerificationType::kDeprecated}}, {"write_buffer_size", {offsetof(struct ColumnFamilyOptions, write_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal}}, diff --git a/util/options_settable_test.cc b/util/options_settable_test.cc index b23e270e0..2d5e238c0 100644 --- a/util/options_settable_test.cc +++ b/util/options_settable_test.cc @@ -391,7 +391,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "max_bytes_for_level_base=986;" "bloom_locality=8016;" "target_file_size_base=4294976376;" - "memtable_prefix_bloom_huge_page_tlb_size=2557;" + "memtable_huge_page_size=2557;" "max_successive_merges=5497;" "max_sequential_skip_in_iterations=4294971408;" "arena_block_size=1893;" diff --git a/util/options_test.cc b/util/options_test.cc index a36fb3443..cb6841428 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -86,7 +86,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"compaction_measure_io_stats", "false"}, {"inplace_update_num_locks", "25"}, {"memtable_prefix_bloom_size_ratio", "0.26"}, - {"memtable_prefix_bloom_huge_page_tlb_size", "28"}, + {"memtable_huge_page_size", "28"}, {"bloom_locality", "29"}, {"max_successive_merges", "30"}, {"min_partial_merge_operands", "31"}, @@ -185,7 +185,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.inplace_update_support, true); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); - ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_huge_page_tlb_size, 28U); + ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U); ASSERT_EQ(new_cf_opt.bloom_locality, 29U); ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U); diff --git a/util/testutil.cc b/util/testutil.cc index bcaa279f4..ca209db88 100644 --- a/util/testutil.cc +++ b/util/testutil.cc @@ -328,7 +328,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, Random* rnd) { cf_opt->arena_block_size = rnd->Uniform(10000); cf_opt->inplace_update_num_locks = rnd->Uniform(10000); cf_opt->max_successive_merges = rnd->Uniform(10000); - cf_opt->memtable_prefix_bloom_huge_page_tlb_size = rnd->Uniform(10000); + cf_opt->memtable_huge_page_size = rnd->Uniform(10000); cf_opt->write_buffer_size = rnd->Uniform(10000); // uint32_t options