From 2e9a9f04d7a48e98d1139af5239342212b14b628 Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 17 Feb 2022 16:30:40 -0800 Subject: [PATCH] Fix some MultiGet batching stats (#9583) Summary: The NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats were being recorded only when the last file in a level happened to have hits. They are supposed to be updated for every level. Also, there was some overcounting of GetContextStats. This PR fixes both the problems. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9583 Test Plan: Update the unit test in db_basic_test Reviewed By: akankshamahajan15 Differential Revision: D34308044 Pulled By: anand1976 fbshipit-source-id: b3b36020fda26ba91bc6e0e47d52d58f4d7f656e --- HISTORY.md | 1 + db/db_basic_test.cc | 20 ++++++++++--------- db/version_set.cc | 47 ++++++++++++++++++++++++++++++--------------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e90737243..3d8df7da7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Fix a race condition when cancel manual compaction with `DisableManualCompaction`. Also DB close can cancel the manual compaction thread. * Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496) * Fixed a read-after-free bug in `DB::GetMergeOperands()`. +* Fixed NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats to be reported once per MultiGet batch per level. ## 6.29.3 (02/17/2022) ### Bug Fixes diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 96687ba74..9f07f7577 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1938,8 +1938,9 @@ TEST_F(DBBasicTest, MultiGetStats) { int total_keys = 2000; std::vector keys_str(total_keys); std::vector keys(total_keys); - std::vector values(total_keys); - std::vector s(total_keys); + static size_t kMultiGetBatchSize = 100; + std::vector values(kMultiGetBatchSize); + std::vector s(kMultiGetBatchSize); ReadOptions read_opts; Random rnd(309); @@ -1976,15 +1977,16 @@ TEST_F(DBBasicTest, MultiGetStats) { } } ASSERT_OK(Flush(1)); + MoveFilesToLevel(1, 1); Close(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_OK(options.statistics->Reset()); - db_->MultiGet(read_opts, handles_[1], total_keys, keys.data(), values.data(), - s.data(), false); + db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[1250], + values.data(), s.data(), false); - ASSERT_EQ(values.size(), total_keys); + ASSERT_EQ(values.size(), kMultiGetBatchSize); HistogramData hist_data_blocks; HistogramData hist_index_and_filter_blocks; HistogramData hist_sst; @@ -1996,16 +1998,16 @@ TEST_F(DBBasicTest, MultiGetStats) { options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst); // Maximum number of blocks read from a file system in a level. - ASSERT_GT(hist_data_blocks.max, 0); + ASSERT_EQ(hist_data_blocks.max, 32); ASSERT_GT(hist_index_and_filter_blocks.max, 0); // Maximum number of sst files read from file system in a level. - ASSERT_GT(hist_sst.max, 0); + ASSERT_EQ(hist_sst.max, 2); // Minimun number of blocks read in a level. - ASSERT_EQ(hist_data_blocks.min, 3); + ASSERT_EQ(hist_data_blocks.min, 4); ASSERT_GT(hist_index_and_filter_blocks.min, 0); // Minimun number of sst files read in a level. - ASSERT_GT(hist_sst.max, 0); + ASSERT_EQ(hist_sst.min, 1); } // Test class for batched MultiGet with prefix extractor diff --git a/db/version_set.cc b/db/version_set.cc index a542dd60c..5e13c9532 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2189,12 +2189,31 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end()); // blob_file => [[blob_idx, it], ...] std::unordered_map blob_rqs; + int level = -1; while (f != nullptr) { MultiGetRange file_range = fp.CurrentFileRange(); bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; + + // Report MultiGet stats per level. + if (level >= 0 && level != (int)fp.GetHitFileLevel()) { + // Dump the stats if the search has moved to the next level and + // reset for next level. + RecordInHistogram(db_statistics_, + NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_index_read + num_filter_read); + RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, + num_data_read); + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + num_filter_read = 0; + num_index_read = 0; + num_data_read = 0; + num_sst_read = 0; + level = fp.GetHitFileLevel(); + } + StopWatchNano timer(clock_, timer_enabled /* auto_start */); s = table_cache_->MultiGet( read_options, *internal_comparator(), *f->file_metadata, &file_range, @@ -2239,6 +2258,11 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, num_filter_read += get_context.get_context_stats_.num_filter_read; num_data_read += get_context.get_context_stats_.num_data_read; num_sst_read += get_context.get_context_stats_.num_sst_read; + // Reset these stats since they're specific to a level + get_context.get_context_stats_.num_index_read = 0; + get_context.get_context_stats_.num_filter_read = 0; + get_context.get_context_stats_.num_data_read = 0; + get_context.get_context_stats_.num_sst_read = 0; // report the counters before returning if (get_context.State() != GetContext::kNotFound && @@ -2315,22 +2339,6 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, } } - // Report MultiGet stats per level. - if (fp.IsHitFileLastInLevel()) { - // Dump the stats if this is the last file of this level and reset for - // next level. - RecordInHistogram(db_statistics_, - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, - num_index_read + num_filter_read); - RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, - num_data_read); - RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); - num_filter_read = 0; - num_index_read = 0; - num_data_read = 0; - num_sst_read = 0; - } - RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size); if (!s.ok() || file_picker_range.empty()) { break; @@ -2338,6 +2346,13 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, f = fp.GetNextFile(); } + // Dump stats for most recent level + RecordInHistogram(db_statistics_, NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + num_index_read + num_filter_read); + RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL, + num_data_read); + RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read); + if (s.ok() && !blob_rqs.empty()) { MultiGetBlob(read_options, keys_with_blobs_range, blob_rqs); }