Skip filters for last L0 file if hit-optimized

Summary:
Following up on D53493, we can still enable the filter-skipping
optimization for last file in L0. It's correct to assume the key will be present
in the last L0 file when we're hit-optimized and L0 is deepest.

The FilePicker encapsulates the state for traversing each level's files, so I
needed to make it expose whether the returned file is last in its level.

Test Plan:
verified below test fails before this patch and passes afterwards.
The change to how the test memtable is populated is needed so file 1 has keys
(0, 30, 60), file 2 has keys (10, 40, 70), etc.

  $ ./db_universal_compaction_test --gtest_filter=UniversalCompactionNumLevels/DBTestUniversalCompaction.OptimizeFiltersForHits/*

Reviewers: sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D53583
This commit is contained in:
Andrew Kryczka 2016-02-01 14:58:46 -08:00
parent aa5e3b7c04
commit fdd70d1495
3 changed files with 31 additions and 16 deletions

View File

@ -187,14 +187,16 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::LOW);
Put("", "");
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
Put(Key(num * 10), "val");
if (num) {
dbfull()->TEST_WaitForFlushMemTable();
}
Put(Key(30 + num * 10), "val");
Put(Key(60 + num * 10), "val");
dbfull()->TEST_WaitForFlushMemTable();
}
Put("", "");
dbfull()->TEST_WaitForFlushMemTable();
// Query set of non existing keys
for (int i = 5; i < 90; i += 10) {
@ -205,6 +207,13 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Make sure bloom filter is used for all but the last L0 file when looking
// up a non-existent key that's in the range of all L0 files.
ASSERT_EQ(Get(Key(35)), "NOT_FOUND");
ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1,
TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Unblock compaction and wait it for happening.
sleeping_task_low.WakeUp();
dbfull()->TEST_WaitForCompact();

View File

@ -84,15 +84,11 @@ int FindFileInRange(const InternalKeyComparator& icmp,
// are MergeInProgress).
class FilePicker {
public:
FilePicker(
std::vector<FileMetaData*>* files,
const Slice& user_key,
const Slice& ikey,
autovector<LevelFilesBrief>* file_levels,
unsigned int num_levels,
FileIndexer* file_indexer,
const Comparator* user_comparator,
const InternalKeyComparator* internal_comparator)
FilePicker(std::vector<FileMetaData*>* files, const Slice& user_key,
const Slice& ikey, autovector<LevelFilesBrief>* file_levels,
unsigned int num_levels, FileIndexer* file_indexer,
const Comparator* user_comparator,
const InternalKeyComparator* internal_comparator)
: num_levels_(num_levels),
curr_level_(-1),
hit_file_level_(-1),
@ -102,6 +98,7 @@ class FilePicker {
files_(files),
#endif
level_files_brief_(file_levels),
is_hit_file_last_in_level_(false),
user_key_(user_key),
ikey_(ikey),
file_indexer_(file_indexer),
@ -126,6 +123,8 @@ class FilePicker {
// Loops over all files in current level.
FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
hit_file_level_ = curr_level_;
is_hit_file_last_in_level_ =
curr_index_in_curr_level_ == curr_file_level_->num_files - 1;
int cmp_largest = -1;
// Do key range filtering of files or/and fractional cascading if:
@ -209,6 +208,10 @@ class FilePicker {
// for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
unsigned int GetHitFileLevel() { return hit_file_level_; }
// Returns true if the most recent "hit file" (i.e., one returned by
// GetNextFile()) is at the last index in its level.
bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
private:
unsigned int num_levels_;
unsigned int curr_level_;
@ -220,6 +223,7 @@ class FilePicker {
#endif
autovector<LevelFilesBrief>* level_files_brief_;
bool search_ended_;
bool is_hit_file_last_in_level_;
LevelFilesBrief* curr_file_level_;
unsigned int curr_index_in_curr_level_;
unsigned int start_index_in_curr_level_;
@ -903,7 +907,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
*status = table_cache_->Get(
read_options, *internal_comparator(), f->fd, ikey, &get_context,
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel())));
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
fp.IsHitFileLastInLevel()));
// TODO: examine the behavior for corrupted key
if (!status->ok()) {
return;
@ -960,10 +965,11 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
}
}
bool Version::IsFilterSkipped(int level) {
bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) {
// Reaching the bottom level implies misses at all upper levels, so we'll
// skip checking the filters when we predict a hit.
return cfd_->ioptions()->optimize_filters_for_hits && level > 0 &&
return cfd_->ioptions()->optimize_filters_for_hits &&
(level > 0 || is_file_last_in_level) &&
level == storage_info_.num_non_empty_levels() - 1;
}

View File

@ -530,7 +530,7 @@ class Version {
// checked during read operations. In certain cases (trivial move or preload),
// the filter block may already be cached, but we still do not access it such
// that it eventually expires from the cache.
bool IsFilterSkipped(int level);
bool IsFilterSkipped(int level, bool is_file_last_in_level = false);
// The helper function of UpdateAccumulatedStats, which may fill the missing
// fields of file_mata from its associated TableProperties.