Change options memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size and cover huge page to memtable too

Summary: Extend the option memtable_prefix_bloom_huge_page_tlb_size from just putting memtable bloom filter to huge page to memtable itself too.

Test Plan: Run all existing tests.

Reviewers: IslamAbdelRahman, yhchiang, andrewkr

Reviewed By: andrewkr

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D60513
This commit is contained in:
sdong 2016-07-26 18:05:30 -07:00
parent 0ce258f9b3
commit e5b5f12b81
18 changed files with 46 additions and 43 deletions

View File

@ -4,6 +4,7 @@
* options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes
* enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one.
* Deprecate options.filter_deletes.
* options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter.
### New Features
* Add avoid_flush_during_recovery option.

View File

@ -1797,9 +1797,9 @@ void rocksdb_options_set_memtable_prefix_bloom_size_ratio(
opt->rep.memtable_prefix_bloom_size_ratio = v;
}
void rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size(
rocksdb_options_t* opt, size_t v) {
opt->rep.memtable_prefix_bloom_huge_page_tlb_size = v;
void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt,
size_t v) {
opt->rep.memtable_huge_page_size = v;
}
void rocksdb_options_set_hash_skip_list_rep(

View File

@ -44,8 +44,7 @@ MemTableOptions::MemTableOptions(const ImmutableCFOptions& ioptions,
static_cast<double>(mutable_cf_options.write_buffer_size) *
mutable_cf_options.memtable_prefix_bloom_size_ratio) *
8u),
memtable_prefix_bloom_huge_page_tlb_size(
mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size),
memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size),
inplace_update_support(ioptions.inplace_update_support),
inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks),
inplace_callback(ioptions.inplace_callback),
@ -63,7 +62,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
moptions_(ioptions, mutable_cf_options),
refs_(0),
kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)),
arena_(moptions_.arena_block_size, 0),
arena_(moptions_.arena_block_size,
mutable_cf_options.memtable_huge_page_size),
allocator_(&arena_, write_buffer_manager),
table_(ioptions.memtable_factory->CreateMemTableRep(
comparator_, &allocator_, ioptions.prefix_extractor,
@ -92,7 +92,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
prefix_bloom_.reset(new DynamicBloom(
&allocator_, moptions_.memtable_prefix_bloom_bits,
ioptions.bloom_locality, 6 /* hard coded 6 probes */, nullptr,
moptions_.memtable_prefix_bloom_huge_page_tlb_size, ioptions.info_log));
moptions_.memtable_huge_page_size, ioptions.info_log));
}
}

View File

@ -41,7 +41,7 @@ struct MemTableOptions {
size_t write_buffer_size;
size_t arena_block_size;
uint32_t memtable_prefix_bloom_bits;
size_t memtable_prefix_bloom_huge_page_tlb_size;
size_t memtable_huge_page_size;
bool inplace_update_support;
size_t inplace_update_num_locks;
UpdateStatus (*inplace_callback)(char* existing_value,

View File

@ -44,7 +44,7 @@ DEFINE_int32(max_write_buffer_number, 2, "");
DEFINE_int32(min_write_buffer_number_to_merge, 1, "");
DEFINE_int32(skiplist_height, 4, "");
DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, "");
DEFINE_int32(memtable_prefix_bloom_huge_page_tlb_size, 2 * 1024 * 1024, "");
DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, "");
DEFINE_int32(value_size, 40, "");
// Path to the database on file system
@ -161,8 +161,7 @@ class PrefixTest : public testing::Test {
options.memtable_prefix_bloom_size_ratio =
FLAGS_memtable_prefix_bloom_size_ratio;
options.memtable_prefix_bloom_huge_page_tlb_size =
FLAGS_memtable_prefix_bloom_huge_page_tlb_size;
options.memtable_huge_page_size = FLAGS_memtable_huge_page_size;
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
BlockBasedTableOptions bbto;

View File

@ -678,8 +678,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_prefix_bloom_bits(
rocksdb_options_t*, uint32_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_memtable_prefix_bloom_probes(rocksdb_options_t*, uint32_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_memtable_prefix_bloom_huge_page_tlb_size(rocksdb_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_huge_page_size(
rocksdb_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges(
rocksdb_options_t*, size_t);

View File

@ -746,14 +746,17 @@ struct ColumnFamilyOptions {
// Dynamically changeable through SetOptions() API
double memtable_prefix_bloom_size_ratio;
// Page size for huge page TLB for bloom in memtable. If <=0, not allocate
// from huge page TLB but from malloc.
// Need to reserve huge pages for it to be allocated. For example:
// Page size for huge page for the arena used by the memtable. If <=0, it
// won't allocate from huge page but from malloc.
// Users are responsible to reserve huge pages for it to be allocated. For
// example:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
// If there isn't enough free huge page available, it will fall back to
// malloc.
//
// Dynamically changeable through SetOptions() API
size_t memtable_prefix_bloom_huge_page_tlb_size;
size_t memtable_huge_page_size;
// Control locality of bloom filter probes to improve cache miss rate.
// This option only applies to memtable prefix bloom and plaintable

View File

@ -402,6 +402,8 @@ DEFINE_int32(bloom_bits, -1, "Bloom filter bits per key. Negative means"
DEFINE_double(memtable_bloom_size_ratio, 0,
"Ratio of memtable size used for bloom filter. 0 means no bloom "
"filter.");
DEFINE_bool(memtable_use_huge_page, false,
"Try to use huge page in memtables.");
DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
" database. If you set this flag and also specify a benchmark that"
@ -2464,6 +2466,7 @@ class Benchmark {
exit(1);
}
}
options.memtable_huge_page_size = FLAGS_memtable_use_huge_page ? 2048 : 0;
options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio;
options.bloom_locality = FLAGS_bloom_locality;
options.max_file_opening_threads = FLAGS_file_opening_threads;

View File

@ -229,7 +229,7 @@ const std::string options_file_content = R"OPTIONS_FILE(
max_bytes_for_level_base=104857600
bloom_locality=0
target_file_size_base=10485760
memtable_prefix_bloom_huge_page_tlb_size=0
memtable_huge_page_size=0
max_successive_merges=1000
max_sequential_skip_in_iterations=8
arena_block_size=52428800

View File

@ -1045,8 +1045,7 @@ class StressTest {
}},
{"memtable_prefix_bloom_bits", {"0", "8", "10"}},
{"memtable_prefix_bloom_probes", {"4", "5", "6"}},
{"memtable_prefix_bloom_huge_page_tlb_size",
{"0", ToString(2 * 1024 * 1024)}},
{"memtable_huge_page_size", {"0", ToString(2 * 1024 * 1024)}},
{"max_successive_merges", {"0", "2", "4"}},
{"inplace_update_num_locks", {"100", "200", "300"}},
// TODO(ljin): enable test for this option

View File

@ -72,8 +72,8 @@ void MutableCFOptions::Dump(Logger* log) const {
arena_block_size);
Log(log, " memtable_prefix_bloom_ratio: %f",
memtable_prefix_bloom_size_ratio);
Log(log, " memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt,
memtable_prefix_bloom_huge_page_tlb_size);
Log(log, " memtable_huge_page_size: %" ROCKSDB_PRIszt,
memtable_huge_page_size);
Log(log, " max_successive_merges: %" ROCKSDB_PRIszt,
max_successive_merges);
Log(log, " disable_auto_compactions: %d",

View File

@ -19,8 +19,7 @@ struct MutableCFOptions {
arena_block_size(options.arena_block_size),
memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio),
memtable_prefix_bloom_huge_page_tlb_size(
options.memtable_prefix_bloom_huge_page_tlb_size),
memtable_huge_page_size(options.memtable_huge_page_size),
max_successive_merges(options.max_successive_merges),
inplace_update_num_locks(options.inplace_update_num_locks),
disable_auto_compactions(options.disable_auto_compactions),
@ -58,7 +57,7 @@ struct MutableCFOptions {
max_write_buffer_number(0),
arena_block_size(0),
memtable_prefix_bloom_size_ratio(0),
memtable_prefix_bloom_huge_page_tlb_size(0),
memtable_huge_page_size(0),
max_successive_merges(0),
inplace_update_num_locks(0),
disable_auto_compactions(false),
@ -108,7 +107,7 @@ struct MutableCFOptions {
int max_write_buffer_number;
size_t arena_block_size;
double memtable_prefix_bloom_size_ratio;
size_t memtable_prefix_bloom_huge_page_tlb_size;
size_t memtable_huge_page_size;
size_t max_successive_merges;
size_t inplace_update_num_locks;

View File

@ -121,7 +121,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
inplace_update_num_locks(10000),
inplace_callback(nullptr),
memtable_prefix_bloom_size_ratio(0.0),
memtable_prefix_bloom_huge_page_tlb_size(0),
memtable_huge_page_size(0),
bloom_locality(0),
max_successive_merges(0),
min_partial_merge_operands(2),
@ -189,8 +189,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
inplace_callback(options.inplace_callback),
memtable_prefix_bloom_size_ratio(
options.memtable_prefix_bloom_size_ratio),
memtable_prefix_bloom_huge_page_tlb_size(
options.memtable_prefix_bloom_huge_page_tlb_size),
memtable_huge_page_size(options.memtable_huge_page_size),
bloom_locality(options.bloom_locality),
max_successive_merges(options.max_successive_merges),
min_partial_merge_operands(options.min_partial_merge_operands),
@ -599,9 +598,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
Header(log, " Options.memtable_prefix_bloom_size_ratio: %f",
memtable_prefix_bloom_size_ratio);
Header(log,
" Options.memtable_prefix_bloom_huge_page_tlb_size: %" ROCKSDB_PRIszt,
memtable_prefix_bloom_huge_page_tlb_size);
Header(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt,
memtable_huge_page_size);
Header(log, " Options.bloom_locality: %d",
bloom_locality);

View File

@ -543,8 +543,9 @@ bool ParseMemtableOptions(const std::string& name, const std::string& value,
} else if (name == "memtable_prefix_bloom_probes") {
// Deprecated
} else if (name == "memtable_prefix_bloom_huge_page_tlb_size") {
new_options->memtable_prefix_bloom_huge_page_tlb_size =
ParseSizeT(value);
// Deprecated
} else if (name == "memtable_huge_page_size") {
new_options->memtable_huge_page_size = ParseSizeT(value);
} else if (name == "max_successive_merges") {
new_options->max_successive_merges = ParseSizeT(value);
} else if (name == "filter_deletes") {
@ -1443,8 +1444,7 @@ ColumnFamilyOptions BuildColumnFamilyOptions(
cf_opts.arena_block_size = mutable_cf_options.arena_block_size;
cf_opts.memtable_prefix_bloom_size_ratio =
mutable_cf_options.memtable_prefix_bloom_size_ratio;
cf_opts.memtable_prefix_bloom_huge_page_tlb_size =
mutable_cf_options.memtable_prefix_bloom_huge_page_tlb_size;
cf_opts.memtable_huge_page_size = mutable_cf_options.memtable_huge_page_size;
cf_opts.max_successive_merges = mutable_cf_options.max_successive_merges;
cf_opts.inplace_update_num_locks =
mutable_cf_options.inplace_update_num_locks;

View File

@ -428,10 +428,11 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
{"max_successive_merges",
{offsetof(struct ColumnFamilyOptions, max_successive_merges),
OptionType::kSizeT, OptionVerificationType::kNormal}},
{"memtable_prefix_bloom_huge_page_tlb_size",
{offsetof(struct ColumnFamilyOptions,
memtable_prefix_bloom_huge_page_tlb_size),
{"memtable_huge_page_size",
{offsetof(struct ColumnFamilyOptions, memtable_huge_page_size),
OptionType::kSizeT, OptionVerificationType::kNormal}},
{"memtable_prefix_bloom_huge_page_tlb_size",
{0, OptionType::kSizeT, OptionVerificationType::kDeprecated}},
{"write_buffer_size",
{offsetof(struct ColumnFamilyOptions, write_buffer_size),
OptionType::kSizeT, OptionVerificationType::kNormal}},

View File

@ -391,7 +391,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"max_bytes_for_level_base=986;"
"bloom_locality=8016;"
"target_file_size_base=4294976376;"
"memtable_prefix_bloom_huge_page_tlb_size=2557;"
"memtable_huge_page_size=2557;"
"max_successive_merges=5497;"
"max_sequential_skip_in_iterations=4294971408;"
"arena_block_size=1893;"

View File

@ -86,7 +86,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
{"compaction_measure_io_stats", "false"},
{"inplace_update_num_locks", "25"},
{"memtable_prefix_bloom_size_ratio", "0.26"},
{"memtable_prefix_bloom_huge_page_tlb_size", "28"},
{"memtable_huge_page_size", "28"},
{"bloom_locality", "29"},
{"max_successive_merges", "30"},
{"min_partial_merge_operands", "31"},
@ -185,7 +185,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
ASSERT_EQ(new_cf_opt.inplace_update_support, true);
ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U);
ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26);
ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_huge_page_tlb_size, 28U);
ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U);
ASSERT_EQ(new_cf_opt.bloom_locality, 29U);
ASSERT_EQ(new_cf_opt.max_successive_merges, 30U);
ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U);

View File

@ -328,7 +328,7 @@ void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, Random* rnd) {
cf_opt->arena_block_size = rnd->Uniform(10000);
cf_opt->inplace_update_num_locks = rnd->Uniform(10000);
cf_opt->max_successive_merges = rnd->Uniform(10000);
cf_opt->memtable_prefix_bloom_huge_page_tlb_size = rnd->Uniform(10000);
cf_opt->memtable_huge_page_size = rnd->Uniform(10000);
cf_opt->write_buffer_size = rnd->Uniform(10000);
// uint32_t options