From bb2ab26b9f5505803a4bfbfe88cc4356a42e1fd6 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 14 Aug 2019 18:13:14 -0700 Subject: [PATCH] Fix regression affecting partitioned indexes/filters when cache_index_and_filter_blocks is false (#5705) Summary: PR https://github.com/facebook/rocksdb/issues/5298 (and subsequent related patches) unintentionally changed the semantics of cache_index_and_filter_blocks: historically, this option only affected the main index/filter block; with the changes, it affects index/filter partitions as well. This can cause performance issues when cache_index_and_filter_blocks is false since in this case, partitions are neither cached nor preloaded (i.e. they are loaded on demand upon each access). The patch reverts to the earlier behavior, that is, partitions are cached similarly to data blocks regardless of the value of the above option. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5705 Test Plan: make check ./db_bench -benchmarks=fillrandom --statistics --stats_interval_seconds=1 --duration=30 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false ./db_bench -benchmarks=readrandom --use_existing_db --statistics --stats_interval_seconds=1 --duration=10 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false --cache_size=8000000000 Relevant statistics from the readrandom benchmark with the old code: rocksdb.block.cache.index.miss COUNT : 0 rocksdb.block.cache.index.hit COUNT : 0 rocksdb.block.cache.index.add COUNT : 0 rocksdb.block.cache.index.bytes.insert COUNT : 0 rocksdb.block.cache.index.bytes.evict COUNT : 0 rocksdb.block.cache.filter.miss COUNT : 0 rocksdb.block.cache.filter.hit COUNT : 0 rocksdb.block.cache.filter.add COUNT : 0 rocksdb.block.cache.filter.bytes.insert COUNT : 0 rocksdb.block.cache.filter.bytes.evict COUNT : 0 With the new code: rocksdb.block.cache.index.miss COUNT : 2500 rocksdb.block.cache.index.hit COUNT : 42696 rocksdb.block.cache.index.add COUNT : 2500 rocksdb.block.cache.index.bytes.insert COUNT : 4050048 rocksdb.block.cache.index.bytes.evict COUNT : 0 rocksdb.block.cache.filter.miss COUNT : 2500 rocksdb.block.cache.filter.hit COUNT : 4550493 rocksdb.block.cache.filter.add COUNT : 2500 rocksdb.block.cache.filter.bytes.insert COUNT : 10331040 rocksdb.block.cache.filter.bytes.evict COUNT : 0 Differential Revision: D16817382 Pulled By: ltamasi fbshipit-source-id: 28a516b0da1f041a03313e0b70b28cf5cf205d00 --- table/block_based/block_based_table_reader.cc | 44 ++++++++++++------- table/block_based/block_based_table_reader.h | 3 +- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 664b0edca..64db9e423 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -183,7 +183,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { protected: static Status ReadIndexBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block); @@ -217,6 +217,12 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { return properties == nullptr || !properties->index_value_is_delta_encoded; } + bool cache_index_blocks() const { + assert(table_ != nullptr); + assert(table_->get_rep() != nullptr); + return table_->get_rep()->table_options.cache_index_and_filter_blocks; + } + Status GetOrReadIndexBlock(bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const; @@ -235,7 +241,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, GetContext* get_context, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) { PERF_TIMER_GUARD(read_index_block_nanos); @@ -250,7 +256,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->footer.index_handle(), UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, - get_context, lookup_context); + get_context, lookup_context, use_cache); return s; } @@ -272,7 +278,8 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( } return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options, - get_context, lookup_context, index_block); + cache_index_blocks(), get_context, lookup_context, + index_block); } // Index that allows binary search lookup in a two-level index structure. @@ -294,7 +301,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -482,7 +489,7 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -563,7 +570,7 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -2102,7 +2109,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( CachableEntry block; s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block, - block_type, get_context, lookup_context); + block_type, get_context, lookup_context, + /* use_cache */ true); if (!s.ok()) { assert(block.IsEmpty()); @@ -2248,8 +2256,10 @@ Status BlockBasedTable::GetDataBlockFromCache( GetContext* get_context) const { BlockCacheLookupContext lookup_data_block_context( BlockCacheLookupCaller::kUserMGet); + assert(block_type == BlockType::kData); Status s = RetrieveBlock(nullptr, ro, handle, uncompression_dict, block, - block_type, get_context, &lookup_data_block_context); + block_type, get_context, &lookup_data_block_context, + /* use_cache */ true); if (s.IsIncomplete()) { s = Status::OK(); } @@ -2448,9 +2458,11 @@ void BlockBasedTable::MaybeLoadBlocksToCache( continue; } - (*statuses)[idx_in_batch] = RetrieveBlock(nullptr, options, handle, - uncompression_dict, &(*results)[idx_in_batch], BlockType::kData, - mget_iter->get_context, &lookup_data_block_context); + (*statuses)[idx_in_batch] = + RetrieveBlock(nullptr, options, handle, uncompression_dict, + &(*results)[idx_in_batch], BlockType::kData, + mget_iter->get_context, &lookup_data_block_context, + /* use_cache */ true); } return; } @@ -2575,15 +2587,13 @@ Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, - GetContext* get_context, BlockCacheLookupContext* lookup_context) const { + GetContext* get_context, BlockCacheLookupContext* lookup_context, + bool use_cache) const { assert(block_entry); assert(block_entry->IsEmpty()); Status s; - if (rep_->table_options.cache_index_and_filter_blocks || - (block_type != BlockType::kFilter && - block_type != BlockType::kCompressionDictionary && - block_type != BlockType::kIndex)) { + if (use_cache) { s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, uncompression_dict, block_entry, block_type, get_context, lookup_context, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index d648ba4d3..3beda6b8c 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -290,7 +290,8 @@ class BlockBasedTable : public TableReader { const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, - BlockCacheLookupContext* lookup_context) const; + BlockCacheLookupContext* lookup_context, + bool use_cache) const; Status GetDataBlockFromCache( const ReadOptions& ro, const BlockHandle& handle,