Implement WinRandomRW file and improve code reuse (#1388)

This commit is contained in:
Dmitri Smirnov 2016-10-13 16:36:34 -07:00 committed by GitHub
parent a249a0b75b
commit b9311aa65c
5 changed files with 569 additions and 200 deletions

View File

@ -504,15 +504,16 @@ class WritableFile {
virtual ~WritableFile();
// Indicates if the class makes use of unbuffered I/O
// If false you must pass aligned buffer to Write()
virtual bool UseOSBuffer() const {
return true;
}
const size_t c_DefaultPageSize = 4 * 1024;
// This is needed when you want to allocate
// AlignedBuffer for use with file I/O classes
// Used for unbuffered file I/O when UseOSBuffer() returns false
// Use the returned alignment value to allocate
// aligned buffer for Write() when UseOSBuffer()
// returns false
virtual size_t GetRequiredBufferAlignment() const {
return c_DefaultPageSize;
}
@ -664,7 +665,34 @@ class RandomRWFile {
RandomRWFile() {}
virtual ~RandomRWFile() {}
// Indicates if the class makes use of unbuffered I/O
// If false you must pass aligned buffer to Write()
virtual bool UseOSBuffer() const {
return true;
}
const size_t c_DefaultPageSize = 4 * 1024;
// Use the returned alignment value to allocate
// aligned buffer for Write() when UseOSBuffer()
// returns false
virtual size_t GetRequiredBufferAlignment() const {
return c_DefaultPageSize;
}
// Used by the file_reader_writer to decide if the ReadAhead wrapper
// should simply forward the call and do not enact read_ahead buffering or locking.
// The implementation below takes care of reading ahead
virtual bool ShouldForwardRawRequest() const {
return false;
}
// For cases when read-ahead is implemented in the platform dependent
// layer. This is when ShouldForwardRawRequest() returns true.
virtual void EnableReadAhead() {}
// Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
// Pass aligned buffer when UseOSBuffer() returns false.
virtual Status Write(uint64_t offset, const Slice& data) = 0;
// Read up to `n` bytes starting from offset `offset` and store them in
@ -681,7 +709,6 @@ class RandomRWFile {
virtual Status Close() = 0;
private:
// No copying allowed
RandomRWFile(const RandomRWFile&) = delete;
RandomRWFile& operator=(const RandomRWFile&) = delete;

View File

@ -293,6 +293,50 @@ Status WinEnvIO::NewWritableFile(const std::string& fname,
return s;
}
Status WinEnvIO::NewRandomRWFile(const std::string & fname,
unique_ptr<RandomRWFile>* result, const EnvOptions & options) {
Status s;
// Open the file for read-only random access
// Random access is to disable read-ahead as the system reads too much data
DWORD desired_access = GENERIC_READ | GENERIC_WRITE;
DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
DWORD creation_disposition = OPEN_ALWAYS; // Create if necessary or open existing
DWORD file_flags = FILE_FLAG_RANDOM_ACCESS;
if (!options.use_os_buffer) {
file_flags |= FILE_FLAG_NO_BUFFERING;
}
/// Shared access is necessary for corruption test to pass
// almost all tests would work with a possible exception of fault_injection
HANDLE hFile = 0;
{
IOSTATS_TIMER_GUARD(open_nanos);
hFile =
CreateFileA(fname.c_str(),
desired_access,
shared_mode,
NULL, // Security attributes
creation_disposition,
file_flags,
NULL);
}
if (INVALID_HANDLE_VALUE == hFile) {
auto lastError = GetLastError();
return IOErrorFromWindowsError(
"NewRandomRWFile failed to Create/Open: " + fname, lastError);
}
UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
result->reset(new WinRandomRWFile(fname, hFile, page_size_, options));
fileGuard.release();
return s;
}
Status WinEnvIO::NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) {
Status s;
@ -868,6 +912,11 @@ Status WinEnv::NewWritableFile(const std::string& fname,
return winenv_io_.NewWritableFile(fname, result, options);
}
Status WinEnv::NewRandomRWFile(const std::string & fname,
unique_ptr<RandomRWFile>* result, const EnvOptions & options) {
return winenv_io_.NewRandomRWFile(fname, result, options);
}
Status WinEnv::NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) {
return winenv_io_.NewDirectory(name, result);

View File

@ -92,6 +92,11 @@ public:
std::unique_ptr<WritableFile>* result,
const EnvOptions& options);
// The returned file will only be accessed by one thread at a time.
virtual Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result,
const EnvOptions& options);
virtual Status NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result);
@ -188,6 +193,11 @@ public:
std::unique_ptr<WritableFile>* result,
const EnvOptions& options) override;
// The returned file will only be accessed by one thread at a time.
Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result,
const EnvOptions& options) override;
Status NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) override;

View File

@ -155,10 +155,12 @@ size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size) {
return static_cast<size_t>(rid - id);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// WinMmapReadableFile
WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
const void* mapped_region, size_t length)
: fileName_(fileName),
hFile_(hFile),
: WinFileData(fileName, hFile, false),
hMap_(hMap),
mapped_region_(mapped_region),
length_(length) {}
@ -169,9 +171,6 @@ WinMmapReadableFile::~WinMmapReadableFile() {
ret = ::CloseHandle(hMap_);
assert(ret);
ret = ::CloseHandle(hFile_);
assert(ret);
}
Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result,
@ -180,7 +179,7 @@ Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result,
if (offset > length_) {
*result = Slice();
return IOError(fileName_, EINVAL);
return IOError(filename_, EINVAL);
} else if (offset + n > length_) {
n = length_ - offset;
}
@ -197,6 +196,10 @@ size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
return GetUniqueIdFromFile(hFile_, id, max_size);
}
///////////////////////////////////////////////////////////////////////////////
/// WinMmapFile
// Can only truncate or reserve to a sector size aligned if
// used on files that are opened with Unbuffered I/O
Status WinMmapFile::TruncateFile(uint64_t toSize) {
@ -302,8 +305,7 @@ Status WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
size_t allocation_granularity, const EnvOptions& options)
: filename_(fname),
hFile_(hFile),
: WinFileData(fname, hFile, false),
hMap_(NULL),
page_size_(page_size),
allocation_granularity_(allocation_granularity),
@ -515,16 +517,16 @@ size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
return GetUniqueIdFromFile(hFile_, id, max_size);
}
//////////////////////////////////////////////////////////////////////////////////
// WinSequentialFile
WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
const EnvOptions& options)
: filename_(fname),
file_(f),
use_os_buffer_(options.use_os_buffer)
: WinFileData(fname, f, options.use_os_buffer)
{}
WinSequentialFile::~WinSequentialFile() {
assert(file_ != INVALID_HANDLE_VALUE);
CloseHandle(file_);
assert(hFile_ != INVALID_HANDLE_VALUE);
}
Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) {
@ -540,7 +542,7 @@ Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) {
DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above
DWORD bytesRead = 0;
BOOL ret = ReadFile(file_, scratch, bytesToRead, &bytesRead, NULL);
BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
if (ret == TRUE) {
r = bytesRead;
} else {
@ -561,7 +563,7 @@ Status WinSequentialFile::Skip(uint64_t n) {
LARGE_INTEGER li;
li.QuadPart = static_cast<int64_t>(n); //cast is safe due to the check above
BOOL ret = SetFilePointerEx(file_, li, NULL, FILE_CURRENT);
BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
if (ret == FALSE) {
return IOErrorFromWindowsError(filename_, GetLastError());
}
@ -572,14 +574,31 @@ Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
return Status::OK();
}
SSIZE_T WinRandomAccessFile::ReadIntoBuffer(uint64_t user_offset, uint64_t first_page_start,
//////////////////////////////////////////////////////////////////////////////////////////////////
/// WinRandomAccessBase
// Helper
void CalculateReadParameters(size_t alignment, uint64_t offset,
size_t bytes_requested,
size_t& actual_bytes_toread,
uint64_t& first_page_start) {
first_page_start = TruncateToPageBoundary(alignment, offset);
const uint64_t last_page_start =
TruncateToPageBoundary(alignment, offset + bytes_requested - 1);
actual_bytes_toread = (last_page_start - first_page_start) + alignment;
}
SSIZE_T WinRandomAccessImpl::ReadIntoBuffer(uint64_t user_offset,
uint64_t first_page_start,
size_t bytes_to_read, size_t& left,
AlignedBuffer& buffer, char* dest) const {
assert(buffer.CurrentSize() == 0);
assert(buffer.Capacity() >= bytes_to_read);
SSIZE_T read =
PositionedReadInternal(buffer.Destination(), bytes_to_read, first_page_start);
PositionedReadInternal(buffer.Destination(), bytes_to_read,
first_page_start);
if (read > 0) {
buffer.Size(read);
@ -597,7 +616,8 @@ SSIZE_T WinRandomAccessFile::ReadIntoBuffer(uint64_t user_offset, uint64_t first
return read;
}
SSIZE_T WinRandomAccessFile::ReadIntoOneShotBuffer(uint64_t user_offset, uint64_t first_page_start,
SSIZE_T WinRandomAccessImpl::ReadIntoOneShotBuffer(uint64_t user_offset,
uint64_t first_page_start,
size_t bytes_to_read, size_t& left,
char* dest) const {
AlignedBuffer bigBuffer;
@ -608,7 +628,7 @@ SSIZE_T WinRandomAccessFile::ReadIntoOneShotBuffer(uint64_t user_offset, uint64_
bigBuffer, dest);
}
SSIZE_T WinRandomAccessFile::ReadIntoInstanceBuffer(uint64_t user_offset,
SSIZE_T WinRandomAccessImpl::ReadIntoInstanceBuffer(uint64_t user_offset,
uint64_t first_page_start,
size_t bytes_to_read, size_t& left,
char* dest) const {
@ -622,52 +642,35 @@ SSIZE_T WinRandomAccessFile::ReadIntoInstanceBuffer(uint64_t user_offset,
return read;
}
void WinRandomAccessFile::CalculateReadParameters(uint64_t offset, size_t bytes_requested,
size_t& actual_bytes_toread,
uint64_t& first_page_start) const {
const size_t alignment = buffer_.Alignment();
first_page_start = TruncateToPageBoundary(alignment, offset);
const uint64_t last_page_start =
TruncateToPageBoundary(alignment, offset + bytes_requested - 1);
actual_bytes_toread = (last_page_start - first_page_start) + alignment;
}
SSIZE_T WinRandomAccessFile::PositionedReadInternal(char* src, size_t numBytes,
SSIZE_T WinRandomAccessImpl::PositionedReadInternal(char* src,
size_t numBytes,
uint64_t offset) const {
return pread(hFile_, src, numBytes, offset);
return pread(file_base_->GetFileHandle(), src, numBytes, offset);
}
WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options)
: filename_(fname),
hFile_(hFile),
use_os_buffer_(options.use_os_buffer),
inline
WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base,
size_t alignment,
const EnvOptions& options) :
file_base_(file_base),
read_ahead_(false),
compaction_readahead_size_(options.compaction_readahead_size),
random_access_max_buffer_size_(options.random_access_max_buffer_size),
buffer_(),
buffered_start_(0) {
assert(!options.use_mmap_reads);
// Unbuffered access, use internal buffer for reads
if (!use_os_buffer_) {
if (!file_base_->UseOSBuffer()) {
// Do not allocate the buffer either until the first request or
// until there is a call to allocate a read-ahead buffer
buffer_.Alignment(alignment);
}
}
WinRandomAccessFile::~WinRandomAccessFile() {
if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
::CloseHandle(hFile_);
}
}
void WinRandomAccessFile::EnableReadAhead() { this->Hint(SEQUENTIAL); }
Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
inline
Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
@ -683,14 +686,15 @@ Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
// When in unbuffered mode we need to do the following changes:
// - use our own aligned buffer
// - always read at the offset of that is a multiple of alignment
if (!use_os_buffer_) {
if (!file_base_->UseOSBuffer()) {
uint64_t first_page_start = 0;
size_t actual_bytes_toread = 0;
size_t bytes_requested = left;
if (!read_ahead_ && random_access_max_buffer_size_ == 0) {
CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
CalculateReadParameters(buffer_.Alignment(), offset, bytes_requested,
actual_bytes_toread,
first_page_start);
assert(actual_bytes_toread > 0);
@ -723,7 +727,8 @@ Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
bytes_requested = compaction_readahead_size_;
}
CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
CalculateReadParameters(buffer_.Alignment(), offset, bytes_requested,
actual_bytes_toread,
first_page_start);
assert(actual_bytes_toread > 0);
@ -757,20 +762,25 @@ Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
}
}
if (r < 0) {
auto lastError = GetLastError();
// Posix impl wants to treat reads from beyond
// of the file as OK.
if(lastError != ERROR_HANDLE_EOF) {
s = IOErrorFromWindowsError(file_base_->GetName(), lastError);
}
}
*result = Slice(scratch, (r < 0) ? 0 : n - left);
if (r < 0) {
s = IOErrorFromLastWindowsError(filename_);
}
return s;
}
bool WinRandomAccessFile::ShouldForwardRawRequest() const {
return true;
}
inline
void WinRandomAccessImpl::HintImpl(RandomAccessFile::AccessPattern pattern) {
void WinRandomAccessFile::Hint(AccessPattern pattern) {
if (pattern == SEQUENTIAL && !use_os_buffer_ &&
if (pattern == RandomAccessFile::SEQUENTIAL &&
!file_base_->UseOSBuffer() &&
compaction_readahead_size_ > 0) {
std::lock_guard<std::mutex> lg(buffer_mut_);
if (!read_ahead_) {
@ -785,60 +795,76 @@ void WinRandomAccessFile::Hint(AccessPattern pattern) {
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/// WinRandomAccessFile
WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options) :
WinFileData(fname, hFile, options.use_os_buffer),
WinRandomAccessImpl(this, alignment, options) {
}
WinRandomAccessFile::~WinRandomAccessFile() {
}
Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
return ReadImpl(offset, n, result, scratch);
}
void WinRandomAccessFile::EnableReadAhead() {
HintImpl(SEQUENTIAL);
}
bool WinRandomAccessFile::ShouldForwardRawRequest() const {
return true;
}
void WinRandomAccessFile::Hint(AccessPattern pattern) {
HintImpl(pattern);
}
Status WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
return Status::OK();
}
size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
return GetUniqueIdFromFile(hFile_, id, max_size);
return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
}
Status WinWritableFile::PreallocateInternal(uint64_t spaceToReserve) {
return fallocate(filename_, hFile_, spaceToReserve);
/////////////////////////////////////////////////////////////////////////////
// WinWritableImpl
//
inline
Status WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) {
return fallocate(file_data_->GetName(), file_data_->GetFileHandle(), spaceToReserve);
}
WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
size_t capacity, const EnvOptions& options)
: filename_(fname),
hFile_(hFile),
use_os_buffer_(options.use_os_buffer),
alignment_(alignment),
filesize_(0),
reservedsize_(0) {
assert(!options.use_mmap_writes);
WinWritableImpl::WinWritableImpl(WinFileData* file_data, size_t alignment)
: file_data_(file_data),
alignment_(alignment),
filesize_(0),
reservedsize_(0) {
}
WinWritableFile::~WinWritableFile() {
if (NULL != hFile_ && INVALID_HANDLE_VALUE != hFile_) {
WinWritableFile::Close();
}
}
// Indicates if the class makes use of unbuffered I/O
bool WinWritableFile::UseOSBuffer() const {
return use_os_buffer_;
}
size_t WinWritableFile::GetRequiredBufferAlignment() const {
return alignment_;
}
Status WinWritableFile::Append(const Slice& data) {
Status WinWritableImpl::AppendImpl(const Slice& data) {
// Used for buffered access ONLY
assert(use_os_buffer_);
assert(file_data_->UseOSBuffer());
assert(data.size() < std::numeric_limits<DWORD>::max());
Status s;
DWORD bytesWritten = 0;
if (!WriteFile(hFile_, data.data(),
if (!WriteFile(file_data_->GetFileHandle(), data.data(),
static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError(
"Failed to WriteFile: " + filename_,
"Failed to WriteFile: " + file_data_->GetName(),
lastError);
} else {
}
else {
assert(size_t(bytesWritten) == data.size());
filesize_ += data.size();
}
@ -846,86 +872,77 @@ Status WinWritableFile::Append(const Slice& data) {
return s;
}
Status WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) {
Status s;
SSIZE_T ret = pwrite(hFile_, data.data(), data.size(), offset);
SSIZE_T ret = pwrite(file_data_->GetFileHandle(), data.data(), data.size(), offset);
// Error break
if (ret < 0) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError(
"Failed to pwrite for: " + filename_, lastError);
} else {
// With positional write it is not clear at all
// if this actually extends the filesize
"Failed to pwrite for: " + file_data_->GetName(), lastError);
}
else {
assert(size_t(ret) == data.size());
filesize_ += data.size();
// For sequential write this would be simple
// size extension by data.size()
uint64_t write_end = offset + data.size();
if (write_end >= filesize_) {
filesize_ = write_end;
}
}
return s;
}
// Need to implement this so the file is truncated correctly
// when buffered and unbuffered mode
Status WinWritableFile::Truncate(uint64_t size) {
Status s = ftruncate(filename_, hFile_, size);
// Need to implement this so the file is truncated correctly
// when buffered and unbuffered mode
inline
Status WinWritableImpl::TruncateImpl(uint64_t size) {
Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(),
size);
if (s.ok()) {
filesize_ = size;
}
return s;
}
Status WinWritableFile::Close() {
Status WinWritableImpl::CloseImpl() {
Status s;
assert(INVALID_HANDLE_VALUE != hFile_);
auto hFile = file_data_->GetFileHandle();
assert(INVALID_HANDLE_VALUE != hFile);
if (fsync(hFile_) < 0) {
if (fsync(hFile) < 0) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError("fsync failed at Close() for: " + filename_,
s = IOErrorFromWindowsError("fsync failed at Close() for: " +
file_data_->GetName(),
lastError);
}
if (FALSE == ::CloseHandle(hFile_)) {
if(!file_data_->CloseFile()) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError("CloseHandle failed for: " + filename_,
s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(),
lastError);
}
hFile_ = INVALID_HANDLE_VALUE;
return s;
}
// write out the cached data to the OS cache
// This is now taken care of the WritableFileWriter
Status WinWritableFile::Flush() {
return Status::OK();
}
Status WinWritableFile::Sync() {
Status WinWritableImpl::SyncImpl() {
Status s;
// Calls flush buffers
if (fsync(hFile_) < 0) {
if (fsync(file_data_->GetFileHandle()) < 0) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError("fsync failed at Sync() for: " + filename_,
s = IOErrorFromWindowsError("fsync failed at Sync() for: " +
file_data_->GetName(),
lastError);
}
return s;
}
Status WinWritableFile::Fsync() { return Sync(); }
uint64_t WinWritableFile::GetFileSize() {
// Double accounting now here with WritableFileWriter
// and this size will be wrong when unbuffered access is used
// but tests implement their own writable files and do not use WritableFileWrapper
// so we need to squeeze a square peg through
// a round hole here.
return filesize_;
}
Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) {
Status WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) {
Status status;
TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);
@ -946,18 +963,135 @@ Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) {
return status;
}
size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
return GetUniqueIdFromFile(hFile_, id, max_size);
////////////////////////////////////////////////////////////////////////////////
/// WinWritableFile
WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
size_t /* capacity */, const EnvOptions& options)
: WinFileData(fname, hFile, options.use_os_buffer),
WinWritableImpl(this, alignment) {
assert(!options.use_mmap_writes);
}
WinWritableFile::~WinWritableFile() {
}
// Indicates if the class makes use of unbuffered I/O
bool WinWritableFile::UseOSBuffer() const {
return WinFileData::UseOSBuffer();
}
size_t WinWritableFile::GetRequiredBufferAlignment() const {
return GetAlignement();
}
Status WinWritableFile::Append(const Slice& data) {
return AppendImpl(data);
}
Status WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
return PositionedAppendImpl(data, offset);
}
// Need to implement this so the file is truncated correctly
// when buffered and unbuffered mode
Status WinWritableFile::Truncate(uint64_t size) {
return TruncateImpl(size);
}
Status WinWritableFile::Close() {
return CloseImpl();
}
// write out the cached data to the OS cache
// This is now taken care of the WritableFileWriter
Status WinWritableFile::Flush() {
return Status::OK();
}
Status WinWritableFile::Sync() {
return SyncImpl();
}
Status WinWritableFile::Fsync() {
return SyncImpl();
}
uint64_t WinWritableFile::GetFileSize() {
return GetFileSizeImpl();
}
Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) {
return AllocateImpl(offset, len);
}
size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
}
/////////////////////////////////////////////////////////////////////////
/// WinRandomRWFile
WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options) :
WinFileData(fname, hFile, options.use_os_buffer),
WinRandomAccessImpl(this, alignment, options),
WinWritableImpl(this, alignment) {
}
bool WinRandomRWFile::UseOSBuffer() const {
return WinFileData::UseOSBuffer();
}
size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
return GetAlignement();
}
bool WinRandomRWFile::ShouldForwardRawRequest() const {
return true;
}
void WinRandomRWFile::EnableReadAhead() {
HintImpl(RandomAccessFile::SEQUENTIAL);
}
Status WinRandomRWFile::Write(uint64_t offset, const Slice & data) {
return PositionedAppendImpl(data, offset);
}
Status WinRandomRWFile::Read(uint64_t offset, size_t n, Slice * result,
char * scratch) const {
return ReadImpl(offset, n, result, scratch);
}
Status WinRandomRWFile::Flush() {
return Status::OK();
}
Status WinRandomRWFile::Sync() {
return SyncImpl();
}
Status WinRandomRWFile::Close() {
return CloseImpl();
}
//////////////////////////////////////////////////////////////////////////
/// WinDirectory
Status WinDirectory::Fsync() { return Status::OK(); }
//////////////////////////////////////////////////////////////////////////
/// WinFileLock
WinFileLock::~WinFileLock() {
BOOL ret = ::CloseHandle(hFile_);
assert(ret);
}
}
}

View File

@ -68,10 +68,58 @@ Status ftruncate(const std::string& filename, HANDLE hFile,
size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
// mmap() based random-access
class WinMmapReadableFile : public RandomAccessFile {
const std::string fileName_;
class WinFileData {
protected:
const std::string filename_;
HANDLE hFile_;
// There is no equivalent of advising away buffered pages as in posix.
// To implement this flag we would need to do unbuffered reads which
// will need to be aligned (not sure there is a guarantee that the buffer
// passed in is aligned).
// Hence we currently ignore this flag. It is used only in a few cases
// which should not be perf critical.
// If perf evaluation finds this to be a problem, we can look into
// implementing this.
const bool use_os_buffer_;
public:
// We want this class be usable both for inheritance (prive
// or protected) and for containment so __ctor and __dtor public
WinFileData(const std::string& filename, HANDLE hFile, bool use_os_buffer) :
filename_(filename), hFile_(hFile), use_os_buffer_(use_os_buffer)
{}
virtual ~WinFileData() {
this->CloseFile();
}
bool CloseFile() {
bool result = true;
if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
result = ::CloseHandle(hFile_);
assert(result);
hFile_ = NULL;
}
return result;
}
const std::string& GetName() const { return filename_; }
HANDLE GetFileHandle() const { return hFile_; }
bool UseOSBuffer() const { return use_os_buffer_; }
WinFileData(const WinFileData&) = delete;
WinFileData& operator=(const WinFileData&) = delete;
};
// mmap() based random-access
class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
HANDLE hMap_;
const void* mapped_region_;
@ -84,6 +132,9 @@ public:
~WinMmapReadableFile();
WinMmapReadableFile(const WinMmapReadableFile&) = delete;
WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override;
@ -96,10 +147,8 @@ public:
// data to the file. This is safe since we either properly close the
// file before reading from it, or for log files, the reading code
// knows enough to skip zero suffixes.
class WinMmapFile : public WritableFile {
class WinMmapFile : private WinFileData, public WritableFile {
private:
const std::string filename_;
HANDLE hFile_;
HANDLE hMap_;
const size_t page_size_; // We flush the mapping view in page_size
@ -142,6 +191,9 @@ public:
~WinMmapFile();
WinMmapFile(const WinMmapFile&) = delete;
WinMmapFile& operator=(const WinMmapFile&) = delete;
virtual Status Append(const Slice& data) override;
// Means Close() will properly take care of truncate
@ -174,27 +226,16 @@ public:
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
};
class WinSequentialFile : public SequentialFile {
private:
const std::string filename_;
HANDLE file_;
// There is no equivalent of advising away buffered pages as in posix.
// To implement this flag we would need to do unbuffered reads which
// will need to be aligned (not sure there is a guarantee that the buffer
// passed in is aligned).
// Hence we currently ignore this flag. It is used only in a few cases
// which should not be perf critical.
// If perf evaluation finds this to be a problem, we can look into
// implementing this.
bool use_os_buffer_;
class WinSequentialFile : private WinFileData, public SequentialFile {
public:
WinSequentialFile(const std::string& fname, HANDLE f,
const EnvOptions& options);
~WinSequentialFile();
WinSequentialFile(const WinSequentialFile&) = delete;
WinSequentialFile& operator=(const WinSequentialFile&) = delete;
virtual Status Read(size_t n, Slice* result, char* scratch) override;
virtual Status Skip(uint64_t n) override;
@ -202,45 +243,49 @@ public:
virtual Status InvalidateCache(size_t offset, size_t length) override;
};
// pread() based random-access
class WinRandomAccessFile : public RandomAccessFile {
const std::string filename_;
HANDLE hFile_;
const bool use_os_buffer_;
bool read_ahead_;
class WinRandomAccessImpl {
protected:
WinFileData* file_base_;
bool read_ahead_;
const size_t compaction_readahead_size_;
const size_t random_access_max_buffer_size_;
mutable std::mutex buffer_mut_;
mutable std::mutex buffer_mut_;
mutable AlignedBuffer buffer_;
mutable uint64_t
buffered_start_; // file offset set that is currently buffered
/*
* The function reads a requested amount of bytes into the specified aligned
* buffer Upon success the function sets the length of the buffer to the
* amount of bytes actually read even though it might be less than actually
* requested. It then copies the amount of bytes requested by the user (left)
* to the user supplied buffer (dest) and reduces left by the amount of bytes
* copied to the user buffer
*
* @user_offset [in] - offset on disk where the read was requested by the user
* @first_page_start [in] - actual page aligned disk offset that we want to
* read from
* @bytes_to_read [in] - total amount of bytes that will be read from disk
* which is generally greater or equal to the amount
* that the user has requested due to the
* either alignment requirements or read_ahead in
* effect.
* @left [in/out] total amount of bytes that needs to be copied to the user
* buffer. It is reduced by the amount of bytes that actually
* copied
* @buffer - buffer to use
* @dest - user supplied buffer
*/
// Override for behavior change when creating a custom env
virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes,
uint64_t offset) const;
/*
* The function reads a requested amount of bytes into the specified aligned
* buffer Upon success the function sets the length of the buffer to the
* amount of bytes actually read even though it might be less than actually
* requested. It then copies the amount of bytes requested by the user (left)
* to the user supplied buffer (dest) and reduces left by the amount of bytes
* copied to the user buffer
*
* @user_offset [in] - offset on disk where the read was requested by the user
* @first_page_start [in] - actual page aligned disk offset that we want to
* read from
* @bytes_to_read [in] - total amount of bytes that will be read from disk
* which is generally greater or equal to the amount
* that the user has requested due to the
* either alignment requirements or read_ahead in
* effect.
* @left [in/out] total amount of bytes that needs to be copied to the user
* buffer. It is reduced by the amount of bytes that actually
* copied
* @buffer - buffer to use
* @dest - user supplied buffer
*/
SSIZE_T ReadIntoBuffer(uint64_t user_offset, uint64_t first_page_start,
size_t bytes_to_read, size_t& left,
AlignedBuffer& buffer, char* dest) const;
SSIZE_T ReadIntoOneShotBuffer(uint64_t user_offset, uint64_t first_page_start,
size_t bytes_to_read, size_t& left,
char* dest) const;
@ -250,13 +295,27 @@ class WinRandomAccessFile : public RandomAccessFile {
size_t bytes_to_read, size_t& left,
char* dest) const;
void CalculateReadParameters(uint64_t offset, size_t bytes_requested,
size_t& actual_bytes_toread,
uint64_t& first_page_start) const;
WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
const EnvOptions& options);
// Override for behavior change
virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes,
uint64_t offset) const;
virtual ~WinRandomAccessImpl() {}
public:
WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
Status ReadImpl(uint64_t offset, size_t n, Slice* result,
char* scratch) const;
void HintImpl(RandomAccessFile::AccessPattern pattern);
};
// pread() based random-access
class WinRandomAccessFile : private WinFileData,
protected WinRandomAccessImpl, // Want to be able to override PositionedReadInternal
public RandomAccessFile {
public:
WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
@ -291,18 +350,55 @@ public:
// the tail for the next write OR for Close() at which point we pad with zeros.
// No padding is required for
// buffered access.
class WinWritableFile : public WritableFile {
private:
const std::string filename_;
HANDLE hFile_;
const bool use_os_buffer_; // Used to indicate unbuffered access, the file
class WinWritableImpl {
protected:
WinFileData* file_data_;
const uint64_t alignment_;
// must be opened as unbuffered if false
uint64_t filesize_; // How much data is actually written disk
uint64_t reservedsize_; // how far we have reserved space
virtual Status PreallocateInternal(uint64_t spaceToReserve);
WinWritableImpl(WinFileData* file_data, size_t alignment);
~WinWritableImpl() {}
uint64_t GetAlignement() const { return alignment_; }
Status AppendImpl(const Slice& data);
// Requires that the data is aligned as specified by GetRequiredBufferAlignment()
Status PositionedAppendImpl(const Slice& data, uint64_t offset);
Status TruncateImpl(uint64_t size);
Status CloseImpl();
Status SyncImpl();
uint64_t GetFileSizeImpl() {
// Double accounting now here with WritableFileWriter
// and this size will be wrong when unbuffered access is used
// but tests implement their own writable files and do not use WritableFileWrapper
// so we need to squeeze a square peg through
// a round hole here.
return filesize_;
}
Status AllocateImpl(uint64_t offset, uint64_t len);
public:
WinWritableImpl(const WinWritableImpl&) = delete;
WinWritableImpl& operator=(const WinWritableImpl&) = delete;
};
class WinWritableFile : private WinFileData,
protected WinWritableImpl,
public WritableFile {
public:
WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
size_t capacity, const EnvOptions& options);
@ -310,12 +406,14 @@ public:
~WinWritableFile();
// Indicates if the class makes use of unbuffered I/O
// Use PositionedAppend
virtual bool UseOSBuffer() const override;
virtual size_t GetRequiredBufferAlignment() const override;
virtual Status Append(const Slice& data) override;
// Requires that the data is aligned as specified by GetRequiredBufferAlignment()
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
// Need to implement this so the file is truncated correctly
@ -339,6 +437,57 @@ public:
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
};
class WinRandomRWFile : private WinFileData,
protected WinRandomAccessImpl,
protected WinWritableImpl,
public RandomRWFile {
public:
WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options);
~WinRandomRWFile() {}
// Indicates if the class makes use of unbuffered I/O
// If false you must pass aligned buffer to Write()
virtual bool UseOSBuffer() const override;
// Use the returned alignment value to allocate
// aligned buffer for Write() when UseOSBuffer()
// returns false
virtual size_t GetRequiredBufferAlignment() const override;
// Used by the file_reader_writer to decide if the ReadAhead wrapper
// should simply forward the call and do not enact read_ahead buffering or locking.
// The implementation below takes care of reading ahead
virtual bool ShouldForwardRawRequest() const override;
// For cases when read-ahead is implemented in the platform dependent
// layer. This is when ShouldForwardRawRequest() returns true.
virtual void EnableReadAhead() override;
// Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
// Pass aligned buffer when UseOSBuffer() returns false.
virtual Status Write(uint64_t offset, const Slice& data) override;
// Read up to `n` bytes starting from offset `offset` and store them in
// result, provided `scratch` size should be at least `n`.
// Returns Status::OK() on success.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override;
virtual Status Flush() override;
virtual Status Sync() override;
virtual Status Fsync() { return Sync(); }
virtual Status Close() override;
};
class WinDirectory : public Directory {
public:
WinDirectory() {}