diff --git a/HISTORY.md b/HISTORY.md index f2db631bb..c633014f6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,7 @@ ### Bug Fixes * fix deadlock with enable_pipelined_write=true and max_successive_merges > 0 +* Fix corruption in non-iterator reads when mmap is used for file reads ## 5.14.0 (5/16/2018) ### Public API Change diff --git a/db/db_test2.cc b/db/db_test2.cc index 1550cf551..5043b38a9 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -2502,6 +2502,27 @@ TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) { #endif // ROCKSDB_LITE +TEST_F(DBTest2, PinnableSliceAndMmapReads) { + Options options = CurrentOptions(); + options.allow_mmap_reads = true; + Reopen(options); + + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + + PinnableSlice pinned_value; + ASSERT_EQ(Get("foo", &pinned_value), Status::OK()); + ASSERT_EQ(pinned_value.ToString(), "bar"); + + dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */, + nullptr /* end */, nullptr /* column_family */, + true /* disallow_trivial_move */); + + // Ensure pinned_value doesn't rely on memory munmap'd by the above + // compaction. + ASSERT_EQ(pinned_value.ToString(), "bar"); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/table/block.cc b/table/block.cc index 15c5cd609..57bb50057 100644 --- a/table/block.cc +++ b/table/block.cc @@ -464,7 +464,7 @@ BlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp, total_order_seek ? nullptr : prefix_index_.get(); ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_, prefix_index_ptr, global_seqno_, - read_amp_bitmap_.get(), key_includes_seq); + read_amp_bitmap_.get(), key_includes_seq, cachable()); if (read_amp_bitmap_) { if (read_amp_bitmap_->GetStatistics() != stats) { diff --git a/table/block.h b/table/block.h index e2f6e48d0..bac9afc81 100644 --- a/table/block.h +++ b/table/block.h @@ -220,22 +220,26 @@ class BlockIter final : public InternalIterator { key_includes_seq_(true), global_seqno_(kDisableGlobalSequenceNumber), read_amp_bitmap_(nullptr), - last_bitmap_offset_(0) {} + last_bitmap_offset_(0), + block_contents_pinned_(false) {} BlockIter(const Comparator* comparator, const Comparator* user_comparator, const char* data, uint32_t restarts, uint32_t num_restarts, BlockPrefixIndex* prefix_index, SequenceNumber global_seqno, - BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq) + BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq, + bool block_contents_pinned) : BlockIter() { Initialize(comparator, user_comparator, data, restarts, num_restarts, - prefix_index, global_seqno, read_amp_bitmap, key_includes_seq); + prefix_index, global_seqno, read_amp_bitmap, key_includes_seq, + block_contents_pinned); } void Initialize(const Comparator* comparator, const Comparator* user_comparator, const char* data, uint32_t restarts, uint32_t num_restarts, BlockPrefixIndex* prefix_index, SequenceNumber global_seqno, - BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq) { + BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq, + bool block_contents_pinned) { assert(data_ == nullptr); // Ensure it is called only once assert(num_restarts > 0); // Ensure the param is valid @@ -251,6 +255,7 @@ class BlockIter final : public InternalIterator { read_amp_bitmap_ = read_amp_bitmap; last_bitmap_offset_ = current_ + 1; key_includes_seq_ = key_includes_seq; + block_contents_pinned_ = block_contents_pinned; } // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do @@ -312,9 +317,11 @@ class BlockIter final : public InternalIterator { PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; #endif - virtual bool IsKeyPinned() const override { return key_pinned_; } + virtual bool IsKeyPinned() const override { + return block_contents_pinned_ && key_pinned_; + } - virtual bool IsValuePinned() const override { return true; } + virtual bool IsValuePinned() const override { return block_contents_pinned_; } size_t TEST_CurrentEntrySize() { return NextEntryOffset() - current_; } @@ -349,6 +356,8 @@ class BlockIter final : public InternalIterator { BlockReadAmpBitmap* read_amp_bitmap_; // last `current_` value we report to read-amp bitmp mutable uint32_t last_bitmap_offset_; + // whether the block data is guaranteed to outlive this iterator + bool block_contents_pinned_; struct CachedPrevEntry { explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index d37e60e77..2462b880f 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2226,8 +2226,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, s = Status::Corruption(Slice()); } - if (!get_context->SaveValue(parsed_key, biter.value(), &matched, - &biter)) { + if (!get_context->SaveValue( + parsed_key, biter.value(), &matched, + biter.IsValuePinned() ? &biter : nullptr)) { done = true; break; }