Adjust pread/pwrite to return Status

Summary:
Returning bytes_read causes the caller to call GetLastError()
  to report failure but the lasterror may be overwritten by then
  so we lose the error code.
  Fix up CMake file to include xpress source code only when needed.
  Fix warning for the uninitialized var.
Closes https://github.com/facebook/rocksdb/pull/3795

Differential Revision: D7832935

Pulled By: anand1976

fbshipit-source-id: 4be21affb9b85d361b96244f4ef459f492b7cb2b
This commit is contained in:
Dmitri Smirnov 2018-05-01 13:38:36 -07:00 committed by Facebook Github Bot
parent 19fde54841
commit acb61b7a52
4 changed files with 159 additions and 144 deletions

View File

@ -659,8 +659,12 @@ if(WIN32)
port/win/env_default.cc port/win/env_default.cc
port/win/port_win.cc port/win/port_win.cc
port/win/win_logger.cc port/win/win_logger.cc
port/win/win_thread.cc port/win/win_thread.cc)
if(WITH_XPRESS)
list(APPEND SOURCES
port/win/xpress_win.cc) port/win/xpress_win.cc)
endif()
if(WITH_JEMALLOC) if(WITH_JEMALLOC)
list(APPEND SOURCES list(APPEND SOURCES

View File

@ -67,9 +67,20 @@ std::string GetWindowsErrSz(DWORD err) {
// Because all the reads/writes happen by the specified offset, the caller in // Because all the reads/writes happen by the specified offset, the caller in
// theory should not // theory should not
// rely on the current file offset. // rely on the current file offset.
SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, Status pwrite(const WinFileData* file_data, const Slice& data,
uint64_t offset) { uint64_t offset, size_t& bytes_written) {
assert(numBytes <= std::numeric_limits<DWORD>::max());
Status s;
bytes_written = 0;
size_t num_bytes = data.size();
if (num_bytes > std::numeric_limits<DWORD>::max()) {
// May happen in 64-bit builds where size_t is 64-bits but
// long is still 32-bit, but that's the API here at the moment
return Status::InvalidArgument("num_bytes is too large for a single write: " +
file_data->GetName());
}
OVERLAPPED overlapped = { 0 }; OVERLAPPED overlapped = { 0 };
ULARGE_INTEGER offsetUnion; ULARGE_INTEGER offsetUnion;
offsetUnion.QuadPart = offset; offsetUnion.QuadPart = offset;
@ -77,23 +88,32 @@ SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes,
overlapped.Offset = offsetUnion.LowPart; overlapped.Offset = offsetUnion.LowPart;
overlapped.OffsetHigh = offsetUnion.HighPart; overlapped.OffsetHigh = offsetUnion.HighPart;
SSIZE_T result = 0; DWORD bytesWritten = 0;
unsigned long bytesWritten = 0; if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(), static_cast<DWORD>(num_bytes),
&bytesWritten, &overlapped)) {
if (FALSE == WriteFile(hFile, src, static_cast<DWORD>(numBytes), &bytesWritten, auto lastError = GetLastError();
&overlapped)) { s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(),
result = -1; lastError);
} else { } else {
result = bytesWritten; bytes_written = bytesWritten;
} }
return result; return s;
} }
// See comments for pwrite above // See comments for pwrite above
SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset) { Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
assert(numBytes <= std::numeric_limits<DWORD>::max()); uint64_t offset, size_t& bytes_read) {
Status s;
bytes_read = 0;
if (num_bytes > std::numeric_limits<DWORD>::max()) {
return Status::InvalidArgument("num_bytes is too large for a single read: " +
file_data->GetName());
}
OVERLAPPED overlapped = { 0 }; OVERLAPPED overlapped = { 0 };
ULARGE_INTEGER offsetUnion; ULARGE_INTEGER offsetUnion;
offsetUnion.QuadPart = offset; offsetUnion.QuadPart = offset;
@ -101,18 +121,21 @@ SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset) {
overlapped.Offset = offsetUnion.LowPart; overlapped.Offset = offsetUnion.LowPart;
overlapped.OffsetHigh = offsetUnion.HighPart; overlapped.OffsetHigh = offsetUnion.HighPart;
SSIZE_T result = 0; DWORD bytesRead = 0;
unsigned long bytesRead = 0; if (FALSE == ReadFile(file_data->GetFileHandle(), src, static_cast<DWORD>(num_bytes),
&bytesRead, &overlapped)) {
if (FALSE == ReadFile(hFile, src, static_cast<DWORD>(numBytes), &bytesRead, auto lastError = GetLastError();
&overlapped)) { // EOF is OK with zero bytes read
return -1; if (lastError != ERROR_HANDLE_EOF) {
s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(),
lastError);
}
} else { } else {
result = bytesRead; bytes_read = bytesRead;
} }
return result; return s;
} }
// SetFileInformationByHandle() is capable of fast pre-allocates. // SetFileInformationByHandle() is capable of fast pre-allocates.
@ -586,34 +609,42 @@ WinSequentialFile::~WinSequentialFile() {
} }
Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) { Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) {
assert(result != nullptr && !WinFileData::use_direct_io());
Status s; Status s;
size_t r = 0; size_t r = 0;
assert(result != nullptr);
if (WinFileData::use_direct_io()) {
return Status::NotSupported("Read() does not support direct_io");
}
// Windows ReadFile API accepts a DWORD. // Windows ReadFile API accepts a DWORD.
// While it is possible to read in a loop if n is > UINT_MAX // While it is possible to read in a loop if n is too big
// it is a highly unlikely case. // it is an unlikely case.
if (n > UINT_MAX) { if (n > std::numeric_limits<DWORD>::max()) {
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER); return Status::InvalidArgument("n is too big for a single ReadFile: "
+ filename_);
} }
DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above
DWORD bytesRead = 0; DWORD bytesRead = 0;
BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL); BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
if (ret == TRUE) { if (ret != FALSE) {
r = bytesRead; r = bytesRead;
} else { } else {
return IOErrorFromWindowsError(filename_, GetLastError()); auto lastError = GetLastError();
if (lastError != ERROR_HANDLE_EOF) {
s = IOErrorFromWindowsError("ReadFile failed: " + filename_,
lastError);
}
} }
*result = Slice(scratch, r); *result = Slice(scratch, r);
return s; return s;
} }
SSIZE_T WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes, Status WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes,
uint64_t offset) const { uint64_t offset, size_t& bytes_read) const {
return pread(GetFileHandle(), src, numBytes, offset); return pread(this, src, numBytes, offset, bytes_read);
} }
Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* result, Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* result,
@ -621,27 +652,19 @@ Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* resul
Status s; Status s;
assert(WinFileData::use_direct_io()); if (!WinFileData::use_direct_io()) {
return Status::NotSupported("This function is only used for direct_io");
// Windows ReadFile API accepts a DWORD.
// While it is possible to read in a loop if n is > UINT_MAX
// it is a highly unlikely case.
if (n > UINT_MAX) {
return IOErrorFromWindowsError(GetName(), ERROR_INVALID_PARAMETER);
} }
auto r = PositionedReadInternal(scratch, n, offset); if (!IsSectorAligned(offset) ||
!IsSectorAligned(n)) {
if (r < 0) { return Status::InvalidArgument(
auto lastError = GetLastError(); "WinSequentialFile::PositionedRead: offset is not properly aligned");
// Posix impl wants to treat reads from beyond
// of the file as OK.
if (lastError != ERROR_HANDLE_EOF) {
s = IOErrorFromWindowsError(GetName(), lastError);
}
} }
*result = Slice(scratch, (r < 0) ? 0 : size_t(r)); size_t bytes_read = 0; // out param
s = PositionedReadInternal(scratch, n, offset, bytes_read);
*result = Slice(scratch, bytes_read);
return s; return s;
} }
@ -649,15 +672,18 @@ Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* resul
Status WinSequentialFile::Skip(uint64_t n) { Status WinSequentialFile::Skip(uint64_t n) {
// Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit // Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit
// integer. As such it is a highly unlikley case to have n so large. // integer. As such it is a highly unlikley case to have n so large.
if (n > _I64_MAX) { if (n > static_cast<uint64_t>(std::numeric_limits<LONGLONG>::max())) {
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER); return Status::InvalidArgument("n is too large for a single SetFilePointerEx() call" +
filename_);
} }
LARGE_INTEGER li; LARGE_INTEGER li;
li.QuadPart = static_cast<int64_t>(n); //cast is safe due to the check above li.QuadPart = static_cast<LONGLONG>(n); //cast is safe due to the check above
BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT); BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
if (ret == FALSE) { if (ret == FALSE) {
return IOErrorFromWindowsError(filename_, GetLastError()); auto lastError = GetLastError();
return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_,
lastError);
} }
return Status::OK(); return Status::OK();
} }
@ -670,10 +696,11 @@ Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
/// WinRandomAccessBase /// WinRandomAccessBase
inline inline
SSIZE_T WinRandomAccessImpl::PositionedReadInternal(char* src, Status WinRandomAccessImpl::PositionedReadInternal(char* src,
size_t numBytes, size_t numBytes,
uint64_t offset) const { uint64_t offset,
return pread(file_base_->GetFileHandle(), src, numBytes, offset); size_t& bytes_read) const {
return pread(file_base_, src, numBytes, offset, bytes_read);
} }
inline inline
@ -694,8 +721,10 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result,
// Check buffer alignment // Check buffer alignment
if (file_base_->use_direct_io()) { if (file_base_->use_direct_io()) {
if (!IsAligned(alignment_, scratch)) { if (!IsSectorAligned(offset) ||
return Status::InvalidArgument("WinRandomAccessImpl::ReadImpl: scratch is not properly aligned"); !IsAligned(alignment_, scratch)) {
return Status::InvalidArgument(
"WinRandomAccessImpl::ReadImpl: offset or scratch is not properly aligned");
} }
} }
@ -704,22 +733,9 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result,
return s; return s;
} }
size_t left = n; size_t bytes_read = 0;
s = PositionedReadInternal(scratch, n, offset, bytes_read);
SSIZE_T r = PositionedReadInternal(scratch, left, offset); *result = Slice(scratch, bytes_read);
if (r > 0) {
left -= r;
} else if (r < 0) {
auto lastError = GetLastError();
// Posix impl wants to treat reads from beyond
// of the file as OK.
if(lastError != ERROR_HANDLE_EOF) {
s = IOErrorFromWindowsError(file_base_->GetName(), lastError);
}
}
*result = Slice(scratch, (r < 0) ? 0 : n - left);
return s; return s;
} }
@ -778,7 +794,7 @@ WinWritableImpl::WinWritableImpl(WinFileData* file_data, size_t alignment)
BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos, BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos,
FILE_CURRENT); FILE_CURRENT);
// Querying no supped to fail // Querying no supped to fail
if (ret) { if (ret != 0) {
next_write_offset_ = pos.QuadPart; next_write_offset_ = pos.QuadPart;
} else { } else {
assert(false); assert(false);
@ -790,31 +806,24 @@ Status WinWritableImpl::AppendImpl(const Slice& data) {
Status s; Status s;
assert(data.size() < std::numeric_limits<DWORD>::max()); if (data.size() > std::numeric_limits<DWORD>::max()) {
return Status::InvalidArgument("data is too long for a single write" +
file_data_->GetName());
}
uint64_t written = 0; size_t bytes_written = 0; // out param
(void)written;
if (file_data_->use_direct_io()) { if (file_data_->use_direct_io()) {
// With no offset specified we are appending // With no offset specified we are appending
// to the end of the file // to the end of the file
assert(IsSectorAligned(next_write_offset_)); assert(IsSectorAligned(next_write_offset_));
assert(IsSectorAligned(data.size())); if (!IsSectorAligned(data.size()) ||
assert(IsAligned(GetAlignement(), data.data())); !IsAligned(GetAlignement(), data.data())) {
s = Status::InvalidArgument(
SSIZE_T ret = pwrite(file_data_->GetFileHandle(), data.data(), "WriteData must be page aligned, size must be sector aligned");
data.size(), next_write_offset_);
if (ret < 0) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError(
"Failed to pwrite for: " + file_data_->GetName(), lastError);
} else { } else {
written = ret; s = pwrite(file_data_, data, next_write_offset_, bytes_written);
} }
} else { } else {
DWORD bytesWritten = 0; DWORD bytesWritten = 0;
@ -824,15 +833,21 @@ Status WinWritableImpl::AppendImpl(const Slice& data) {
s = IOErrorFromWindowsError( s = IOErrorFromWindowsError(
"Failed to WriteFile: " + file_data_->GetName(), "Failed to WriteFile: " + file_data_->GetName(),
lastError); lastError);
} } else {
else { bytes_written = bytesWritten;
written = bytesWritten;
} }
} }
if(s.ok()) { if(s.ok()) {
assert(written == data.size()); if (bytes_written == data.size()) {
next_write_offset_ += data.size(); // This matters for direct_io cases where
// we rely on the fact that next_write_offset_
// is sector aligned
next_write_offset_ += bytes_written;
} else {
s = Status::IOError("Failed to write all bytes: " +
file_data_->GetName());
}
} }
return s; return s;
@ -842,38 +857,44 @@ inline
Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) { Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) {
if(file_data_->use_direct_io()) { if(file_data_->use_direct_io()) {
assert(IsSectorAligned(offset)); if (!IsSectorAligned(offset) ||
assert(IsSectorAligned(data.size())); !IsSectorAligned(data.size()) ||
assert(IsAligned(GetAlignement(), data.data())); !IsAligned(GetAlignement(), data.data())) {
return Status::InvalidArgument(
"Data and offset must be page aligned, size must be sector aligned");
}
} }
Status s; size_t bytes_written = 0;
Status s = pwrite(file_data_, data, offset, bytes_written);
SSIZE_T ret = pwrite(file_data_->GetFileHandle(), data.data(), data.size(), offset); if(s.ok()) {
if (bytes_written == data.size()) {
// Error break // For sequential write this would be simple
if (ret < 0) { // size extension by data.size()
auto lastError = GetLastError(); uint64_t write_end = offset + bytes_written;
s = IOErrorFromWindowsError( if (write_end >= next_write_offset_) {
"Failed to pwrite for: " + file_data_->GetName(), lastError); next_write_offset_ = write_end;
} else { }
assert(size_t(ret) == data.size()); } else {
// For sequential write this would be simple s = Status::IOError("Failed to write all of the requested data: " +
// size extension by data.size() file_data_->GetName());
uint64_t write_end = offset + data.size();
if (write_end >= next_write_offset_) {
next_write_offset_ = write_end;
} }
} }
return s; return s;
} }
// Need to implement this so the file is truncated correctly
// when buffered and unbuffered mode
inline inline
Status WinWritableImpl::TruncateImpl(uint64_t size) { Status WinWritableImpl::TruncateImpl(uint64_t size) {
// It is tempting to check for the size for sector alignment
// but truncation may come at the end and there is not a requirement
// for this to be sector aligned so long as we do not attempt to write
// after that. The interface docs state that the behavior is undefined
// in that case.
Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(),
size); size);
if (s.ok()) { if (s.ok()) {
next_write_offset_ = size; next_write_offset_ = size;
} }
@ -888,14 +909,14 @@ Status WinWritableImpl::CloseImpl() {
auto hFile = file_data_->GetFileHandle(); auto hFile = file_data_->GetFileHandle();
assert(INVALID_HANDLE_VALUE != hFile); assert(INVALID_HANDLE_VALUE != hFile);
if (fsync(hFile) < 0) { if (!::FlushFileBuffers(hFile)) {
auto lastError = GetLastError(); auto lastError = GetLastError();
s = IOErrorFromWindowsError("fsync failed at Close() for: " + s = IOErrorFromWindowsError("FlushFileBuffers failed at Close() for: " +
file_data_->GetName(), file_data_->GetName(),
lastError); lastError);
} }
if(!file_data_->CloseFile()) { if(!file_data_->CloseFile() && s.ok()) {
auto lastError = GetLastError(); auto lastError = GetLastError();
s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(), s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(),
lastError); lastError);
@ -906,11 +927,10 @@ Status WinWritableImpl::CloseImpl() {
inline inline
Status WinWritableImpl::SyncImpl() { Status WinWritableImpl::SyncImpl() {
Status s; Status s;
// Calls flush buffers if (!::FlushFileBuffers (file_data_->GetFileHandle())) {
if (!file_data_->use_direct_io() && fsync(file_data_->GetFileHandle()) < 0) {
auto lastError = GetLastError(); auto lastError = GetLastError();
s = IOErrorFromWindowsError( s = IOErrorFromWindowsError(
"fsync failed at Sync() for: " + file_data_->GetName(), lastError); "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(), lastError);
} }
return s; return s;
} }

View File

@ -40,22 +40,13 @@ inline Status IOError(const std::string& context, int err_number) {
: Status::IOError(context, strerror(err_number)); : Status::IOError(context, strerror(err_number));
} }
// Note the below two do not set errno because they are used only here in this class WinFileData;
// file
// on a Windows handle and, therefore, not necessary. Translating GetLastError()
// to errno
// is a sad business
inline int fsync(HANDLE hFile) {
if (!FlushFileBuffers(hFile)) {
return -1;
}
return 0; Status pwrite(const WinFileData* file_data, const Slice& data,
} uint64_t offset, size_t& bytes_written);
SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, uint64_t offset); Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
uint64_t offset, size_t& bytes_read);
SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset);
Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
@ -104,8 +95,8 @@ class WinFileData {
class WinSequentialFile : protected WinFileData, public SequentialFile { class WinSequentialFile : protected WinFileData, public SequentialFile {
// Override for behavior change when creating a custom env // Override for behavior change when creating a custom env
virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, virtual Status PositionedReadInternal(char* src, size_t numBytes,
uint64_t offset) const; uint64_t offset, size_t& bytes_read) const;
public: public:
WinSequentialFile(const std::string& fname, HANDLE f, WinSequentialFile(const std::string& fname, HANDLE f,
@ -240,8 +231,8 @@ class WinRandomAccessImpl {
size_t alignment_; size_t alignment_;
// Override for behavior change when creating a custom env // Override for behavior change when creating a custom env
virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, virtual Status PositionedReadInternal(char* src, size_t numBytes,
uint64_t offset) const; uint64_t offset, size_t& bytes_read) const;
WinRandomAccessImpl(WinFileData* file_base, size_t alignment, WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
const EnvOptions& options); const EnvOptions& options);

View File

@ -857,7 +857,7 @@ class SharedState {
"Cannot use --expected_values_path on when " "Cannot use --expected_values_path on when "
"--clear_column_family_one_in is greater than zero."); "--clear_column_family_one_in is greater than zero.");
} }
size_t size; size_t size = 0;
if (status.ok()) { if (status.ok()) {
status = FLAGS_env->GetFileSize(FLAGS_expected_values_path, &size); status = FLAGS_env->GetFileSize(FLAGS_expected_values_path, &size);
} }