diff --git a/HISTORY.md b/HISTORY.md index 7b51d37a0..804321151 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,8 @@ ## 5.4.0 (04/11/2017) ### Public API Change +* random_access_max_buffer_size no longer has any effect +* Removed Env::EnableReadAhead(), Env::ShouldForwardRawRequest() * Support dynamically change `stats_dump_period_sec` option via SetDBOptions(). * Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped when using iterators. * DB::Get in place of std::string accepts PinnableSlice, which avoids the extra memcpy of value to std::string in most of cases. diff --git a/db/compaction_iterator.cc b/db/compaction_iterator.cc index a69c62578..55579c3a5 100644 --- a/db/compaction_iterator.cc +++ b/db/compaction_iterator.cc @@ -6,6 +6,7 @@ // of patent rights can be found in the PATENTS file in the same directory. #include "db/compaction_iterator.h" +#include "rocksdb/listener.h" #include "table/internal_iterator.h" namespace rocksdb { diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index f1d6e666b..a2fde4733 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -510,16 +510,6 @@ class RandomAccessFile { return Status::OK(); } - // Used by the file_reader_writer to decide if the ReadAhead wrapper - // should simply forward the call and do not enact buffering or locking. - virtual bool ShouldForwardRawRequest() const { - return false; - } - - // For cases when read-ahead is implemented in the platform dependent - // layer - virtual void EnableReadAhead() {} - // Tries to get an unique ID for this file that will be the same each time // the file is opened (and will stay the same while the file is open). // Furthermore, it tries to make this ID at most "max_size" bytes. If such an @@ -751,17 +741,6 @@ class RandomRWFile { // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } - // Used by the file_reader_writer to decide if the ReadAhead wrapper - // should simply forward the call and do not enact read_ahead buffering or locking. - // The implementation below takes care of reading ahead - virtual bool ShouldForwardRawRequest() const { - return false; - } - - // For cases when read-ahead is implemented in the platform dependent - // layer. This is when ShouldForwardRawRequest() returns true. - virtual void EnableReadAhead() {} - // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. virtual Status Write(uint64_t offset, const Slice& data) = 0; diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 04060834f..1c2130397 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -45,7 +45,7 @@ namespace rocksdb { // settable through string on limited platforms as it depends on behavior of // compilers. #ifndef ROCKSDB_LITE -#ifdef OS_LINUX +#if defined OS_LINUX || defined OS_WIN #ifndef __clang__ class OptionsSettableTest : public testing::Test { @@ -54,7 +54,7 @@ class OptionsSettableTest : public testing::Test { }; const char kSpecialChar = 'z'; -typedef std::vector> OffsetGap; +typedef std::vector> OffsetGap; void FillWithSpecialChar(char* start_ptr, size_t total_size, const OffsetGap& blacklist) { @@ -446,7 +446,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { delete[] new_options_ptr; } #endif // !__clang__ -#endif // OS_LINUX +#endif // OS_LINUX || OS_WIN #endif // !ROCKSDB_LITE } // namespace rocksdb diff --git a/port/win/io_win.cc b/port/win/io_win.cc index 0dd5ed9d1..ac7c4ce90 100644 --- a/port/win/io_win.cc +++ b/port/win/io_win.cc @@ -638,71 +638,6 @@ Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) { ////////////////////////////////////////////////////////////////////////////////////////////////// /// WinRandomAccessBase -// Helper -void CalculateReadParameters(size_t alignment, uint64_t offset, - size_t bytes_requested, - size_t& actual_bytes_toread, - uint64_t& first_page_start) { - - first_page_start = TruncateToPageBoundary(alignment, offset); - const uint64_t last_page_start = - TruncateToPageBoundary(alignment, offset + bytes_requested - 1); - actual_bytes_toread = (last_page_start - first_page_start) + alignment; -} - -SSIZE_T WinRandomAccessImpl::ReadIntoBuffer(uint64_t user_offset, - uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - AlignedBuffer& buffer, char* dest) const { - assert(buffer.CurrentSize() == 0); - assert(buffer.Capacity() >= bytes_to_read); - - SSIZE_T read = - PositionedReadInternal(buffer.Destination(), bytes_to_read, - first_page_start); - - if (read > 0) { - buffer.Size(read); - - // Let's figure out how much we read from the users standpoint - if ((first_page_start + buffer.CurrentSize()) > user_offset) { - assert(first_page_start <= user_offset); - size_t buffer_offset = user_offset - first_page_start; - read = buffer.Read(dest, buffer_offset, left); - } else { - read = 0; - } - left -= read; - } - return read; -} - -SSIZE_T WinRandomAccessImpl::ReadIntoOneShotBuffer(uint64_t user_offset, - uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - char* dest) const { - AlignedBuffer bigBuffer; - bigBuffer.Alignment(buffer_.Alignment()); - bigBuffer.AllocateNewBuffer(bytes_to_read); - - return ReadIntoBuffer(user_offset, first_page_start, bytes_to_read, left, - bigBuffer, dest); -} - -SSIZE_T WinRandomAccessImpl::ReadIntoInstanceBuffer(uint64_t user_offset, - uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - char* dest) const { - SSIZE_T read = ReadIntoBuffer(user_offset, first_page_start, bytes_to_read, - left, buffer_, dest); - - if (read > 0) { - buffered_start_ = first_page_start; - } - - return read; -} - SSIZE_T WinRandomAccessImpl::PositionedReadInternal(char* src, size_t numBytes, uint64_t offset) const { @@ -714,17 +649,9 @@ WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base, size_t alignment, const EnvOptions& options) : file_base_(file_base), - read_ahead_(false), - compaction_readahead_size_(options.compaction_readahead_size), - random_access_max_buffer_size_(options.random_access_max_buffer_size), - buffer_(), - buffered_start_(0) { + alignment_(alignment) { assert(!options.use_mmap_reads); - - // Do not allocate the buffer either until the first request or - // until there is a call to allocate a read-ahead buffer - buffer_.Alignment(alignment); } inline @@ -732,94 +659,26 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; - SSIZE_T r = -1; - size_t left = n; - char* dest = scratch; + + // Check buffer alignment + if (file_base_->use_direct_io()) { + if (!IsAligned(alignment_, scratch)) { + return Status::InvalidArgument("WinRandomAccessImpl::ReadImpl: scratch is not properly aligned"); + } + } if (n == 0) { *result = Slice(scratch, 0); return s; } - // When in direct I/O mode we need to do the following changes: - // - use our own aligned buffer - // - always read at the offset of that is a multiple of alignment - if (file_base_->use_direct_io()) { - uint64_t first_page_start = 0; - size_t actual_bytes_toread = 0; - size_t bytes_requested = left; + size_t left = n; + char* dest = scratch; - if (!read_ahead_ && random_access_max_buffer_size_ == 0) { - CalculateReadParameters(buffer_.Alignment(), offset, bytes_requested, - actual_bytes_toread, - first_page_start); - - assert(actual_bytes_toread > 0); - - r = ReadIntoOneShotBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); - } else { - - std::unique_lock lock(buffer_mut_); - - // Let's see if at least some of the requested data is already - // in the buffer - if (offset >= buffered_start_ && - offset < (buffered_start_ + buffer_.CurrentSize())) { - size_t buffer_offset = offset - buffered_start_; - r = buffer_.Read(dest, buffer_offset, left); - assert(r >= 0); - - left -= size_t(r); - offset += r; - dest += r; - } - - // Still some left or none was buffered - if (left > 0) { - // Figure out the start/end offset for reading and amount to read - bytes_requested = left; - - if (read_ahead_ && bytes_requested < compaction_readahead_size_) { - bytes_requested = compaction_readahead_size_; - } - - CalculateReadParameters(buffer_.Alignment(), offset, bytes_requested, - actual_bytes_toread, - first_page_start); - - assert(actual_bytes_toread > 0); - - if (buffer_.Capacity() < actual_bytes_toread) { - // If we are in read-ahead mode or the requested size - // exceeds max buffer size then use one-shot - // big buffer otherwise reallocate main buffer - if (read_ahead_ || - (actual_bytes_toread > random_access_max_buffer_size_)) { - // Unlock the mutex since we are not using instance buffer - lock.unlock(); - r = ReadIntoOneShotBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); - } else { - buffer_.AllocateNewBuffer(actual_bytes_toread); - r = ReadIntoInstanceBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); - } - } else { - buffer_.Clear(); - r = ReadIntoInstanceBuffer(offset, first_page_start, - actual_bytes_toread, left, dest); - } - } - } - } else { - r = PositionedReadInternal(scratch, left, offset); - if (r > 0) { - left -= r; - } - } - - if (r < 0) { + SSIZE_T r = PositionedReadInternal(scratch, left, offset); + if (r > 0) { + left -= r; + } else if (r < 0) { auto lastError = GetLastError(); // Posix impl wants to treat reads from beyond // of the file as OK. @@ -833,23 +692,6 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, return s; } -inline -void WinRandomAccessImpl::HintImpl(RandomAccessFile::AccessPattern pattern) { - if (pattern == RandomAccessFile::SEQUENTIAL && file_base_->use_direct_io() && - compaction_readahead_size_ > 0) { - std::lock_guard lg(buffer_mut_); - if (!read_ahead_) { - read_ahead_ = true; - // This would allocate read-ahead size + 2 alignments - // - one for memory alignment which added implicitly by AlignedBuffer - // - We add one more alignment because we will read one alignment more - // from disk - buffer_.AllocateNewBuffer(compaction_readahead_size_ + - buffer_.Alignment()); - } - } -} - /////////////////////////////////////////////////////////////////////////////////////////////////// /// WinRandomAccessFile @@ -867,18 +709,6 @@ Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result, return ReadImpl(offset, n, result, scratch); } -void WinRandomAccessFile::EnableReadAhead() { - HintImpl(SEQUENTIAL); -} - -bool WinRandomAccessFile::ShouldForwardRawRequest() const { - return true; -} - -void WinRandomAccessFile::Hint(AccessPattern pattern) { - HintImpl(pattern); -} - Status WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) { return Status::OK(); } @@ -1136,14 +966,6 @@ size_t WinRandomRWFile::GetRequiredBufferAlignment() const { return GetAlignement(); } -bool WinRandomRWFile::ShouldForwardRawRequest() const { - return true; -} - -void WinRandomRWFile::EnableReadAhead() { - HintImpl(RandomAccessFile::SEQUENTIAL); -} - Status WinRandomRWFile::Write(uint64_t offset, const Slice & data) { return PositionedAppendImpl(data, offset); } diff --git a/port/win/io_win.h b/port/win/io_win.h index a434ab775..95e9df5fd 100644 --- a/port/win/io_win.h +++ b/port/win/io_win.h @@ -237,54 +237,12 @@ class WinMmapFile : private WinFileData, public WritableFile { class WinRandomAccessImpl { protected: WinFileData* file_base_; - bool read_ahead_; - const size_t compaction_readahead_size_; - const size_t random_access_max_buffer_size_; - mutable std::mutex buffer_mut_; - mutable AlignedBuffer buffer_; - mutable uint64_t - buffered_start_; // file offset set that is currently buffered + size_t alignment_; // Override for behavior change when creating a custom env virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, uint64_t offset) const; - /* - * The function reads a requested amount of bytes into the specified aligned - * buffer Upon success the function sets the length of the buffer to the - * amount of bytes actually read even though it might be less than actually - * requested. It then copies the amount of bytes requested by the user (left) - * to the user supplied buffer (dest) and reduces left by the amount of bytes - * copied to the user buffer - * - * @user_offset [in] - offset on disk where the read was requested by the user - * @first_page_start [in] - actual page aligned disk offset that we want to - * read from - * @bytes_to_read [in] - total amount of bytes that will be read from disk - * which is generally greater or equal to the amount - * that the user has requested due to the - * either alignment requirements or read_ahead in - * effect. - * @left [in/out] total amount of bytes that needs to be copied to the user - * buffer. It is reduced by the amount of bytes that actually - * copied - * @buffer - buffer to use - * @dest - user supplied buffer - */ - - SSIZE_T ReadIntoBuffer(uint64_t user_offset, uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - AlignedBuffer& buffer, char* dest) const; - - SSIZE_T ReadIntoOneShotBuffer(uint64_t user_offset, uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - char* dest) const; - - SSIZE_T ReadIntoInstanceBuffer(uint64_t user_offset, - uint64_t first_page_start, - size_t bytes_to_read, size_t& left, - char* dest) const; - WinRandomAccessImpl(WinFileData* file_base, size_t alignment, const EnvOptions& options); @@ -293,9 +251,7 @@ class WinRandomAccessImpl { Status ReadImpl(uint64_t offset, size_t n, Slice* result, char* scratch) const; - void HintImpl(RandomAccessFile::AccessPattern pattern); - - size_t GetAlignment() const { return buffer_.Alignment(); } + size_t GetAlignment() const { return alignment_; } public: @@ -318,14 +274,8 @@ class WinRandomAccessFile virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override; - virtual bool ShouldForwardRawRequest() const override; - - virtual void EnableReadAhead() override; - virtual size_t GetUniqueId(char* id, size_t max_size) const override; - virtual void Hint(AccessPattern pattern) override; - virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } virtual Status InvalidateCache(size_t offset, size_t length) override; @@ -449,16 +399,6 @@ class WinRandomRWFile : private WinFileData, // buffer for Write() when use_direct_io() returns true virtual size_t GetRequiredBufferAlignment() const override; - // Used by the file_reader_writer to decide if the ReadAhead wrapper - // should simply forward the call and do not enact read_ahead buffering or - // locking. - // The implementation below takes care of reading ahead - virtual bool ShouldForwardRawRequest() const override; - - // For cases when read-ahead is implemented in the platform dependent - // layer. This is when ShouldForwardRawRequest() returns true. - virtual void EnableReadAhead() override; - // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. virtual Status Write(uint64_t offset, const Slice& data) override; diff --git a/util/aligned_buffer.h b/util/aligned_buffer.h index 3eeec5bad..dbe57e9da 100644 --- a/util/aligned_buffer.h +++ b/util/aligned_buffer.h @@ -125,7 +125,11 @@ public: size_t Read(char* dest, size_t offset, size_t read_size) const { assert(offset < cursize_); - size_t to_read = std::min(cursize_ - offset, read_size); + + size_t to_read = 0; + if(offset < cursize_) { + to_read = std::min(cursize_ - offset, read_size); + } if (to_read > 0) { memcpy(dest, bufstart_ + offset, to_read); } diff --git a/util/file_reader_writer.cc b/util/file_reader_writer.cc index 1088ae3f4..f1fa5fcbb 100644 --- a/util/file_reader_writer.cc +++ b/util/file_reader_writer.cc @@ -435,16 +435,12 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { : file_(std::move(file)), alignment_(file_->GetRequiredBufferAlignment()), readahead_size_(Roundup(readahead_size, alignment_)), - forward_calls_(file_->ShouldForwardRawRequest()), buffer_(), buffer_offset_(0), buffer_len_(0) { - if (!forward_calls_) { - buffer_.Alignment(alignment_); - buffer_.AllocateNewBuffer(readahead_size_); - } else if (readahead_size_ > 0) { - file_->EnableReadAhead(); - } + + buffer_.Alignment(alignment_); + buffer_.AllocateNewBuffer(readahead_size_); } ReadaheadRandomAccessFile(const ReadaheadRandomAccessFile&) = delete; @@ -453,15 +449,8 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { - if (n + alignment_ >= readahead_size_) { - return file_->Read(offset, n, result, scratch); - } - // On Windows in unbuffered mode this will lead to double buffering - // and double locking so we avoid that. - // In normal mode Windows caches so much data from disk that we do - // not need readahead. - if (forward_calls_) { + if (n + alignment_ >= readahead_size_) { return file_->Read(offset, n, result, scratch); } @@ -469,7 +458,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { size_t cached_len = 0; // Check if there is a cache hit, means that [offset, offset + n) is either - // complitely or partially in the buffer + // completely or partially in the buffer // If it's completely cached, including end of file case when offset + n is // greater than EOF, return if (TryReadFromCache(offset, n, &cached_len, scratch) && @@ -558,7 +547,6 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { std::unique_ptr file_; const size_t alignment_; size_t readahead_size_; - const bool forward_calls_; mutable std::mutex lock_; mutable AlignedBuffer buffer_; diff --git a/utilities/env_librados.cc b/utilities/env_librados.cc index db1c74006..4a0b26273 100644 --- a/utilities/env_librados.cc +++ b/utilities/env_librados.cc @@ -219,16 +219,6 @@ public: return s; } - // Used by the file_reader_writer to decide if the ReadAhead wrapper - // should simply forward the call and do not enact buffering or locking. - bool ShouldForwardRawRequest() const { - return false; - } - - // For cases when read-ahead is implemented in the platform dependent - // layer - void EnableReadAhead() {} - /** * @brief [brief description] * @details Get unique id for each file and guarantee this id is different for each file diff --git a/utilities/env_mirror.cc b/utilities/env_mirror.cc index b7a56e55f..304a3c60b 100644 --- a/utilities/env_mirror.cc +++ b/utilities/env_mirror.cc @@ -86,11 +86,6 @@ class RandomAccessFileMirror : public RandomAccessFile { return as; } - bool ShouldForwardRawRequest() const { - // NOTE: not verified - return a_->ShouldForwardRawRequest(); - } - size_t GetUniqueId(char* id, size_t max_size) const { // NOTE: not verified return a_->GetUniqueId(id, max_size);