From 02193ce406d8f647a82a71920bdd467709a357c7 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 18 Dec 2019 10:59:21 -0800 Subject: [PATCH] Prevent file prefetch when mmap is enabled. (#6206) Summary: Right now, sometimes file prefetching is still on when mmap is enabled. This causes bug of reading wrong data. In this commit, we remove all those possible paths. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6206 Test Plan: make crash_test with compaction_readahead_size, which used to fail. RUn all existing tests. Differential Revision: D19149429 fbshipit-source-id: 9e18ea8c566e416aac9647bdd05afe596634791b --- file/file_prefetch_buffer.cc | 3 ++ table/block_based/block_based_table_reader.cc | 39 +++++++++++-------- table/block_based/block_based_table_reader.h | 7 ++++ 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 89f32c6ff..d741189d9 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -24,6 +24,9 @@ namespace rocksdb { Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n, bool for_compaction) { + if (!enable_ || reader == nullptr) { + return Status::OK(); + } size_t alignment = reader->file()->GetRequiredBufferAlignment(); size_t offset_ = static_cast(offset); uint64_t rounddown_offset = Rounddown(offset_, alignment); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 1a50bd02b..f7cf43ee5 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -499,9 +499,8 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { uint64_t last_off = handle.offset() + block_size(handle); uint64_t prefetch_len = last_off - prefetch_off; std::unique_ptr prefetch_buffer; - auto& file = rep->file; - prefetch_buffer.reset(new FilePrefetchBuffer()); - s = prefetch_buffer->Prefetch(file.get(), prefetch_off, + rep->CreateFilePrefetchBuffer(0, 0, &prefetch_buffer); + s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, static_cast(prefetch_len)); // After prefetch, read the partitions one by one @@ -1147,8 +1146,14 @@ Status BlockBasedTable::Open( const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0; const bool preload_all = !table_options.cache_index_and_filter_blocks; - s = PrefetchTail(file.get(), file_size, tail_prefetch_stats, prefetch_all, - preload_all, &prefetch_buffer); + if (!ioptions.allow_mmap_reads) { + s = PrefetchTail(file.get(), file_size, tail_prefetch_stats, prefetch_all, + preload_all, &prefetch_buffer); + } else { + // Should not prefetch for mmap mode. + prefetch_buffer.reset(new FilePrefetchBuffer( + nullptr, 0, 0, false /* enable */, true /* track_min_offset */)); + } // Read in the following order: // 1. Footer @@ -1274,10 +1279,12 @@ Status BlockBasedTable::PrefetchTail( Status s; // TODO should not have this special logic in the future. if (!file->use_direct_io()) { - prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, false, true)); + prefetch_buffer->reset(new FilePrefetchBuffer( + nullptr, 0, 0, false /* enable */, true /* track_min_offset */)); s = file->Prefetch(prefetch_off, prefetch_len); } else { - prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, true, true)); + prefetch_buffer->reset(new FilePrefetchBuffer( + nullptr, 0, 0, true /* enable */, true /* track_min_offset */)); s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len); } return s; @@ -3016,23 +3023,23 @@ void BlockBasedTableIterator::InitDataBlock() { } else if (rep->file->use_direct_io() && !prefetch_buffer_) { // Direct I/O // Let FilePrefetchBuffer take care of the readahead. - prefetch_buffer_.reset(new FilePrefetchBuffer( - rep->file.get(), BlockBasedTable::kInitAutoReadaheadSize, - BlockBasedTable::kMaxAutoReadaheadSize)); + rep->CreateFilePrefetchBuffer( + BlockBasedTable::kInitAutoReadaheadSize, + BlockBasedTable::kMaxAutoReadaheadSize, &prefetch_buffer_); } } } else if (!prefetch_buffer_) { // Explicit user requested readahead // The actual condition is: // if (read_options_.readahead_size != 0 && !prefetch_buffer_) - prefetch_buffer_.reset(new FilePrefetchBuffer( - rep->file.get(), read_options_.readahead_size, - read_options_.readahead_size)); + rep->CreateFilePrefetchBuffer(read_options_.readahead_size, + read_options_.readahead_size, + &prefetch_buffer_); } } else if (!prefetch_buffer_) { - prefetch_buffer_.reset( - new FilePrefetchBuffer(rep->file.get(), compaction_readahead_size_, - compaction_readahead_size_)); + rep->CreateFilePrefetchBuffer(compaction_readahead_size_, + compaction_readahead_size_, + &prefetch_buffer_); } Status s; diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 59bd1fdf6..96f440ba6 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -604,6 +604,13 @@ struct BlockBasedTable::Rep { uint64_t sst_number_for_tracing() const { return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX; } + void CreateFilePrefetchBuffer( + size_t readahead_size, size_t max_readahead_size, + std::unique_ptr* fpb) const { + fpb->reset(new FilePrefetchBuffer(file.get(), readahead_size, + max_readahead_size, + !ioptions.allow_mmap_reads /* enable */)); + } }; // Iterates over the contents of BlockBasedTable.