Add columnfamily option optimize_filters_for_hits to optimize for key hits only

Summary:
    Summary:
    Added a new option to ColumnFamllyOptions  - optimize_filters_for_hits. This option can be used in the case where most
    accesses to the store are key hits and we dont need to optimize performance for key misses.
    This is useful when you have a very large database and most of your lookups succeed.  The option allows the store to
     not store and use filters in the last level (the largest level which contains data). These filters can take a large amount of
     space for large databases (in memory and on-disk). For the last level, these filters are only useful for key misses and not
     for key hits. If we are not optimizing for key misses, we can choose to not store these filters for that level.

    This option is only provided for BlockBasedTable. We skip the filters when we are compacting

Test Plan:
1. Modified db_test toalso run tests with an additonal option (skip_filters_on_last_level)
 2. Added another unit test to db_test which specifically tests that filters are being skipped

Reviewers: rven, igor, sdong

Reviewed By: sdong

Subscribers: lgalanis, yoshinorim, MarkCallaghan, rven, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D33717
This commit is contained in:
Sameet Agarwal 2015-02-17 08:03:45 -08:00
parent ba9d1737a8
commit e7c434c364
22 changed files with 194 additions and 69 deletions

View File

@ -30,9 +30,12 @@ TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) {
return ioptions.table_factory->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
const CompressionOptions& compression_opts,
const bool skip_filters) {
return ioptions.table_factory->NewTableBuilder(ioptions, internal_comparator,
file, compression_type,
compression_opts,
skip_filters);
}
Status BuildTable(const std::string& dbname, Env* env,

View File

@ -26,11 +26,12 @@ class VersionEdit;
class TableBuilder;
class WritableFile;
extern TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts);
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false);
// Build a Table file from the contents of *iter. The generated file
// will be named according to number specified in meta. On success, the rest of

View File

@ -1062,10 +1062,20 @@ Status CompactionJob::OpenCompactionOutputFile() {
compact_->compaction->OutputFilePreallocationSize(mutable_cf_options_)));
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
bool skip_filters = false;
// If the Column family flag is to only optimize filters for hits,
// we can skip creating filters if this is the bottommost_level where
// data is going to be found
//
if (cfd->ioptions()->optimize_filters_for_hits && bottommost_level_) {
skip_filters = true;
}
compact_->builder.reset(NewTableBuilder(
*cfd->ioptions(), cfd->internal_comparator(), compact_->outfile.get(),
compact_->compaction->OutputCompressionType(),
cfd->ioptions()->compression_opts));
cfd->ioptions()->compression_opts, skip_filters));
LogFlush(db_options_.info_log);
return s;
}

View File

@ -419,7 +419,8 @@ class DBTest {
kInfiniteMaxOpenFiles = 23,
kxxHashChecksum = 24,
kFIFOCompaction = 25,
kEnd = 26
kOptimizeFiltersForHits = 26,
kEnd = 27
};
int option_config_;
@ -682,6 +683,12 @@ class DBTest {
options.prefix_extractor.reset(NewNoopTransform());
break;
}
case kOptimizeFiltersForHits: {
options.optimize_filters_for_hits = true;
set_block_based_table_factory = true;
break;
}
default:
break;
}
@ -10797,6 +10804,58 @@ TEST(DBTest, DeleteMovedFileAfterCompaction) {
}
}
TEST(DBTest, OptimizeFiltersForHits) {
Options options = CurrentOptions();
options.write_buffer_size = 32 * 1024;
options.target_file_size_base = 32 * 1024;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 4;
options.max_bytes_for_level_base = 64 * 1024;
options.max_write_buffer_number = 2;
options.max_background_compactions = 8;
options.max_background_flushes = 8;
options.compaction_style = kCompactionStyleLevel;
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
bbto.whole_key_filtering = true;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.optimize_filters_for_hits = true;
options.statistics = rocksdb::CreateDBStatistics();
CreateAndReopenWithCF({"mypikachu"}, options);
int numkeys = 200000;
for (int i = 0; i < 20; i += 2) {
for (int j = i; j < numkeys; j += 20) {
ASSERT_OK(Put(1, Key(j), "val"));
}
}
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();
for (int i = 1; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
}
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
// When the skip_filters_on_last_level is ON, the last level which has
// most of the keys does not use bloom filters. We end up using
// bloom filters in a very small number of cases. Without the flag.
// this number would be close to 150000 (all the key at the last level) +
// some use in the upper levels
//
ASSERT_GT(90000, TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
for (int i = 0; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "val");
}
}
TEST(DBTest, L0L1L2AndUpHitCounter) {
Options options = CurrentOptions();
options.write_buffer_size = 32 * 1024;

View File

@ -91,6 +91,8 @@ struct ImmutableCFOptions {
int num_levels;
bool optimize_filters_for_hits;
#ifndef ROCKSDB_LITE
// A vector of EventListeners which call-back functions will be called
// when specific RocksDB event happens.

View File

@ -610,6 +610,22 @@ struct ColumnFamilyOptions {
// Default: 2
uint32_t min_partial_merge_operands;
// This flag specifies that the implementation should optimize the filters
// mainly for cases where keys are found rather than also optimize for keys
// missed. This would be used in cases where the application knows that
// there are very few misses or the performance in the case of misses is not
// important.
//
// For now, this flag allows us to not store filters for the last level i.e
// the largest level which contains data of the LSM store. For keys which
// are hits, the filters in this level are not useful because we will search
// for the data anyway. NOTE: the filters in other levels are still useful
// even for key hit because they tell us whether to look in that level or go
// to the higher level.
//
// Default: false
bool optimize_filters_for_hits;
#ifndef ROCKSDB_LITE
// A vector of EventListeners which call-back functions will be called
// when specific RocksDB event happens.

View File

@ -371,9 +371,10 @@ class TableFactory {
// to use in this table.
virtual TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const = 0;
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skipFilters = false) const = 0;
// Sanitizes the specified DB Options and ColumnFamilyOptions.
//

View File

@ -65,11 +65,13 @@ Status AdaptiveTableFactory::NewTableReader(
TableBuilder* AdaptiveTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const {
return table_factory_to_write_->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
ioptions, internal_comparator, file, compression_type, compression_opts,
skip_filters);
}
std::string AdaptiveTableFactory::GetPrintableTableOptions() const {

View File

@ -39,12 +39,12 @@ class AdaptiveTableFactory : public TableFactory {
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

View File

@ -462,7 +462,7 @@ struct BlockBasedTableBuilder::Rep {
const BlockBasedTableOptions& table_opt,
const InternalKeyComparator& icomparator, WritableFile* f,
const CompressionType _compression_type,
const CompressionOptions& _compression_opts)
const CompressionOptions& _compression_opts, const bool skip_filters)
: ioptions(_ioptions),
table_options(table_opt),
internal_comparator(icomparator),
@ -474,7 +474,8 @@ struct BlockBasedTableBuilder::Rep {
&this->internal_prefix_transform)),
compression_type(_compression_type),
compression_opts(_compression_opts),
filter_block(CreateFilterBlockBuilder(_ioptions, table_options)),
filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder(
_ioptions, table_options)),
flush_block_policy(
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
table_options, data_block)) {
@ -495,7 +496,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) {
const CompressionOptions& compression_opts, const bool skip_filters) {
BlockBasedTableOptions sanitized_table_options(table_options);
if (sanitized_table_options.format_version == 0 &&
sanitized_table_options.checksum != kCRC32c) {
@ -508,7 +509,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
}
rep_ = new Rep(ioptions, sanitized_table_options, internal_comparator, file,
compression_type, compression_opts);
compression_type, compression_opts, skip_filters);
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}

View File

@ -33,7 +33,8 @@ class BlockBasedTableBuilder : public TableBuilder {
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts);
const CompressionOptions& compression_opts,
const bool skip_filters);
// REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder();

View File

@ -53,13 +53,12 @@ Status BlockBasedTableFactory::NewTableReader(
TableBuilder* BlockBasedTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters) const {
auto table_builder = new BlockBasedTableBuilder(
ioptions, table_options_, internal_comparator, file,
compression_type, compression_opts);
ioptions, table_options_, internal_comparator, file, compression_type,
compression_opts, skip_filters);
return table_builder;
}

View File

@ -54,9 +54,10 @@ class BlockBasedTableFactory : public TableFactory {
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

View File

@ -27,9 +27,12 @@ Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* CuckooTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType, const CompressionOptions&,
const bool skip_filters) const {
// Ignore the skipFIlters flag. Does not apply to this file format
//
// TODO: change builder to take the option struct
return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64,
table_options_.max_search_depth, internal_comparator.user_comparator(),

View File

@ -60,8 +60,10 @@ class CuckooTableFactory : public TableFactory {
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator, WritableFile* file,
const CompressionType, const CompressionOptions&) const override;
const InternalKeyComparator& icomparator,
WritableFile* file, const CompressionType,
const CompressionOptions&,
const bool skip_filters = false) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

View File

@ -65,7 +65,7 @@ TableBuilder* MockTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const CompressionOptions& compression_opts, const bool skip_filters) const {
uint32_t id = GetAndWriteNextID(file);
return new MockTableBuilder(id, &file_system_);

View File

@ -136,16 +136,16 @@ class MockTableFactory : public TableFactory {
MockTableFactory();
const char* Name() const override { return "MockTable"; }
Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const override;
// This function will directly create mock table instead of going through
// MockTableBuilder. MockFileContents has to have a format of <internal_key,

View File

@ -28,9 +28,14 @@ Status PlainTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* PlainTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType, const CompressionOptions&,
const bool skip_filters) const {
// Ignore the skip_filters flag. PlainTable format is optimized for small
// in-memory dbs. The skip_filters optimization is not useful for plain
// tables
//
return new PlainTableBuilder(ioptions, file, user_key_len_, encoding_type_,
index_sparseness_, bloom_bits_per_key_, 6,
huge_page_tlb_size_, hash_table_ratio_,

View File

@ -158,12 +158,11 @@ class PlainTableFactory : public TableFactory {
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType,
const CompressionOptions&) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator,
WritableFile* file, const CompressionType,
const CompressionOptions&,
const bool skip_filters = false) const override;
std::string GetPrintableTableOptions() const override;

View File

@ -66,7 +66,8 @@ ImmutableCFOptions::ImmutableCFOptions(const Options& options)
compression_per_level(options.compression_per_level),
compression_opts(options.compression_opts),
access_hint_on_compaction_start(options.access_hint_on_compaction_start),
num_levels(options.num_levels)
num_levels(options.num_levels),
optimize_filters_for_hits(options.optimize_filters_for_hits)
#ifndef ROCKSDB_LITE
, listeners(options.listeners) {}
#else // ROCKSDB_LITE
@ -119,7 +120,8 @@ ColumnFamilyOptions::ColumnFamilyOptions()
memtable_prefix_bloom_huge_page_tlb_size(0),
bloom_locality(0),
max_successive_merges(0),
min_partial_merge_operands(2)
min_partial_merge_operands(2),
optimize_filters_for_hits(false)
#ifndef ROCKSDB_LITE
, listeners() {
#else // ROCKSDB_LITE
@ -184,7 +186,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
options.memtable_prefix_bloom_huge_page_tlb_size),
bloom_locality(options.bloom_locality),
max_successive_merges(options.max_successive_merges),
min_partial_merge_operands(options.min_partial_merge_operands)
min_partial_merge_operands(options.min_partial_merge_operands),
optimize_filters_for_hits(options.optimize_filters_for_hits)
#ifndef ROCKSDB_LITE
, listeners(options.listeners) {
#else // ROCKSDB_LITE
@ -240,7 +243,8 @@ DBOptions::DBOptions()
access_hint_on_compaction_start(NORMAL),
use_adaptive_mutex(false),
bytes_per_sync(0),
enable_thread_tracking(false) {}
enable_thread_tracking(false) {
}
DBOptions::DBOptions(const Options& options)
: create_if_missing(options.create_if_missing),
@ -342,7 +346,7 @@ void DBOptions::Dump(Logger* log) const {
stats_dump_period_sec);
Log(log, " Options.advise_random_on_open: %d",
advise_random_on_open);
Log(log, " Options.db_write_buffer_size: %zd",
Log(log, " Options.db_write_buffer_size: %zd",
db_write_buffer_size);
Log(log, " Options.access_hint_on_compaction_start: %s",
access_hints[access_hint_on_compaction_start]);
@ -352,7 +356,7 @@ void DBOptions::Dump(Logger* log) const {
rate_limiter.get());
Log(log, " Options.bytes_per_sync: %" PRIu64,
bytes_per_sync);
Log(log, " enable_thread_tracking: %d",
Log(log, " Options.enable_thread_tracking: %d",
enable_thread_tracking);
} // DBOptions::Dump
@ -477,6 +481,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
bloom_locality);
Log(log, " Options.max_successive_merges: %zd",
max_successive_merges);
Log(log, " Options.optimize_fllters_for_hits: %d",
optimize_filters_for_hits);
} // ColumnFamilyOptions::Dump
void Options::Dump(Logger* log) const {

View File

@ -447,6 +447,8 @@ bool ParseColumnFamilyOption(const std::string& name, const std::string& value,
} else {
return false;
}
} else if (name == "optimize_filters_for_hits") {
new_options->optimize_filters_for_hits = ParseBoolean(name, value);
} else {
return false;
}

View File

@ -134,7 +134,8 @@ TEST(OptionsTest, GetOptionsFromMapTest) {
{"bloom_locality", "29"},
{"max_successive_merges", "30"},
{"min_partial_merge_operands", "31"},
{"prefix_extractor", "fixed:31"}
{"prefix_extractor", "fixed:31"},
{"optimize_filters_for_hits", "true"},
};
std::unordered_map<std::string, std::string> db_options_map = {
@ -226,6 +227,7 @@ TEST(OptionsTest, GetOptionsFromMapTest) {
ASSERT_EQ(new_cf_opt.max_successive_merges, 30U);
ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U);
ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr);
ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true);
ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()),
"rocksdb.FixedPrefix.31");
@ -395,6 +397,15 @@ TEST(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
"write_buffer_size=10;max_write_buffer_number=16;"
"block_based_table_factory={xx_block_size=4;}",
&new_cf_opt));
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=true",
&new_cf_opt));
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=false",
&new_cf_opt));
ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=junk",
&new_cf_opt));
}
#endif // !ROCKSDB_LITE