Split WinEnv into separate classes. (#1128)
For ease of reuse and customization as a library without wrapping. WinEnvThreads is a class for replacement. WintEnvIO is a class for reuse and behavior override. Added private virtual functions for custom override of fallocate pread for io classes.
This commit is contained in:
parent
bb98ca3c80
commit
26adaad438
@ -153,7 +153,9 @@ set(SOURCES
|
||||
memtable/skiplistrep.cc
|
||||
memtable/vectorrep.cc
|
||||
port/stack_trace.cc
|
||||
port/win/io_win.cc
|
||||
port/win/env_win.cc
|
||||
port/win/env_default.cc
|
||||
port/win/port_win.cc
|
||||
port/win/win_logger.cc
|
||||
port/win/xpress_win.cc
|
||||
|
42
port/win/env_default.cc
Normal file
42
port/win/env_default.cc
Normal file
@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <rocksdb/env.h>
|
||||
#include "port/win/env_win.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace port {
|
||||
|
||||
// We choose to create this on the heap and using std::once for the following
|
||||
// reasons
|
||||
// 1) Currently available MS compiler does not implement atomic C++11
|
||||
// initialization of
|
||||
// function local statics
|
||||
// 2) We choose not to destroy the env because joining the threads from the
|
||||
// system loader
|
||||
// which destroys the statics (same as from DLLMain) creates a system loader
|
||||
// dead-lock.
|
||||
// in this manner any remaining threads are terminated OK.
|
||||
namespace {
|
||||
std::once_flag winenv_once_flag;
|
||||
Env* envptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Env* Env::Default() {
|
||||
using namespace port;
|
||||
std::call_once(winenv_once_flag, []() { envptr = new WinEnv(); });
|
||||
return envptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
2802
port/win/env_win.cc
2802
port/win/env_win.cc
File diff suppressed because it is too large
Load Diff
276
port/win/env_win.h
Normal file
276
port/win/env_win.h
Normal file
@ -0,0 +1,276 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// An Env is an interface used by the rocksdb implementation to access
|
||||
// operating system functionality like the filesystem etc. Callers
|
||||
// may wish to provide a custom Env object when opening a database to
|
||||
// get fine gain control; e.g., to rate limit file system operations.
|
||||
//
|
||||
// All Env implementations are safe for concurrent access from
|
||||
// multiple threads without any external synchronization.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocksdb/env.h>
|
||||
#include "util/threadpool.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace rocksdb {
|
||||
namespace port {
|
||||
|
||||
// Currently not designed for inheritance but rather a replacement
|
||||
class WinEnvThreads {
|
||||
public:
|
||||
|
||||
explicit WinEnvThreads(Env* hosted_env);
|
||||
|
||||
~WinEnvThreads();
|
||||
|
||||
WinEnvThreads(const WinEnvThreads&) = delete;
|
||||
WinEnvThreads& operator=(const WinEnvThreads&) = delete;
|
||||
|
||||
void Schedule(void(*function)(void*), void* arg, Env::Priority pri,
|
||||
void* tag,
|
||||
void(*unschedFunction)(void* arg));
|
||||
|
||||
int UnSchedule(void* arg, Env::Priority pri);
|
||||
|
||||
void StartThread(void(*function)(void* arg), void* arg);
|
||||
|
||||
void WaitForJoin();
|
||||
|
||||
unsigned int GetThreadPoolQueueLen(Env::Priority pri) const;
|
||||
|
||||
static uint64_t gettid();
|
||||
|
||||
uint64_t GetThreadID() const;
|
||||
|
||||
void SleepForMicroseconds(int micros);
|
||||
|
||||
// Allow increasing the number of worker threads.
|
||||
void SetBackgroundThreads(int num, Env::Priority pri);
|
||||
|
||||
void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri);
|
||||
|
||||
private:
|
||||
|
||||
Env* hosted_env_;
|
||||
mutable std::mutex mu_;
|
||||
std::vector<ThreadPool> thread_pools_;
|
||||
std::vector<std::thread> threads_to_join_;
|
||||
|
||||
};
|
||||
|
||||
// Designed for inheritance so can be re-used
|
||||
// but certain parts replaced
|
||||
class WinEnvIO {
|
||||
public:
|
||||
explicit WinEnvIO(Env* hosted_env);
|
||||
|
||||
virtual ~WinEnvIO();
|
||||
|
||||
virtual Status DeleteFile(const std::string& fname);
|
||||
|
||||
virtual Status GetCurrentTime(int64_t* unix_time);
|
||||
|
||||
virtual Status NewSequentialFile(const std::string& fname,
|
||||
std::unique_ptr<SequentialFile>* result,
|
||||
const EnvOptions& options);
|
||||
|
||||
virtual Status NewRandomAccessFile(const std::string& fname,
|
||||
std::unique_ptr<RandomAccessFile>* result,
|
||||
const EnvOptions& options);
|
||||
|
||||
virtual Status NewWritableFile(const std::string& fname,
|
||||
std::unique_ptr<WritableFile>* result,
|
||||
const EnvOptions& options);
|
||||
|
||||
virtual Status NewDirectory(const std::string& name,
|
||||
std::unique_ptr<Directory>* result);
|
||||
|
||||
virtual Status FileExists(const std::string& fname);
|
||||
|
||||
virtual Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* result);
|
||||
|
||||
virtual Status CreateDir(const std::string& name);
|
||||
|
||||
virtual Status CreateDirIfMissing(const std::string& name);
|
||||
|
||||
virtual Status DeleteDir(const std::string& name);
|
||||
|
||||
virtual Status GetFileSize(const std::string& fname,
|
||||
uint64_t* size);
|
||||
|
||||
static uint64_t FileTimeToUnixTime(const FILETIME& ftTime);
|
||||
|
||||
virtual Status GetFileModificationTime(const std::string& fname,
|
||||
uint64_t* file_mtime);
|
||||
|
||||
virtual Status RenameFile(const std::string& src,
|
||||
const std::string& target);
|
||||
|
||||
virtual Status LinkFile(const std::string& src,
|
||||
const std::string& target);
|
||||
|
||||
virtual Status LockFile(const std::string& lockFname,
|
||||
FileLock** lock);
|
||||
|
||||
virtual Status UnlockFile(FileLock* lock);
|
||||
|
||||
virtual Status GetTestDirectory(std::string* result);
|
||||
|
||||
virtual Status NewLogger(const std::string& fname,
|
||||
std::shared_ptr<Logger>* result);
|
||||
|
||||
virtual uint64_t NowMicros();
|
||||
|
||||
virtual uint64_t NowNanos();
|
||||
|
||||
virtual Status GetHostName(char* name, uint64_t len);
|
||||
|
||||
virtual Status GetAbsolutePath(const std::string& db_path,
|
||||
std::string* output_path);
|
||||
|
||||
virtual std::string TimeToString(uint64_t secondsSince1970);
|
||||
|
||||
virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
|
||||
const DBOptions& db_options) const;
|
||||
|
||||
virtual EnvOptions OptimizeForManifestWrite(
|
||||
const EnvOptions& env_options) const;
|
||||
|
||||
size_t GetPageSize() const { return page_size_; }
|
||||
|
||||
size_t GetAllocationGranularity() const { return allocation_granularity_; }
|
||||
|
||||
uint64_t GetPerfCounterFrequency() const { return perf_counter_frequency_; }
|
||||
|
||||
private:
|
||||
// Returns true iff the named directory exists and is a directory.
|
||||
virtual bool DirExists(const std::string& dname);
|
||||
|
||||
typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME);
|
||||
|
||||
Env* hosted_env_;
|
||||
size_t page_size_;
|
||||
size_t allocation_granularity_;
|
||||
uint64_t perf_counter_frequency_;
|
||||
FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
|
||||
};
|
||||
|
||||
class WinEnv : public Env {
|
||||
public:
|
||||
WinEnv();
|
||||
|
||||
~WinEnv();
|
||||
|
||||
Status DeleteFile(const std::string& fname) override;
|
||||
|
||||
Status GetCurrentTime(int64_t* unix_time) override;
|
||||
|
||||
Status NewSequentialFile(const std::string& fname,
|
||||
std::unique_ptr<SequentialFile>* result,
|
||||
const EnvOptions& options) override;
|
||||
|
||||
Status NewRandomAccessFile(const std::string& fname,
|
||||
std::unique_ptr<RandomAccessFile>* result,
|
||||
const EnvOptions& options) override;
|
||||
|
||||
Status NewWritableFile(const std::string& fname,
|
||||
std::unique_ptr<WritableFile>* result,
|
||||
const EnvOptions& options) override;
|
||||
|
||||
Status NewDirectory(const std::string& name,
|
||||
std::unique_ptr<Directory>* result) override;
|
||||
|
||||
Status FileExists(const std::string& fname) override;
|
||||
|
||||
Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* result) override;
|
||||
|
||||
Status CreateDir(const std::string& name) override;
|
||||
|
||||
Status CreateDirIfMissing(const std::string& name) override;
|
||||
|
||||
Status DeleteDir(const std::string& name) override;
|
||||
|
||||
Status GetFileSize(const std::string& fname,
|
||||
uint64_t* size) override;
|
||||
|
||||
Status GetFileModificationTime(const std::string& fname,
|
||||
uint64_t* file_mtime) override;
|
||||
|
||||
Status RenameFile(const std::string& src,
|
||||
const std::string& target) override;
|
||||
|
||||
Status LinkFile(const std::string& src,
|
||||
const std::string& target) override;
|
||||
|
||||
Status LockFile(const std::string& lockFname,
|
||||
FileLock** lock) override;
|
||||
|
||||
Status UnlockFile(FileLock* lock) override;
|
||||
|
||||
Status GetTestDirectory(std::string* result) override;
|
||||
|
||||
Status NewLogger(const std::string& fname,
|
||||
std::shared_ptr<Logger>* result) override;
|
||||
|
||||
uint64_t NowMicros() override;
|
||||
|
||||
uint64_t NowNanos() override;
|
||||
|
||||
Status GetHostName(char* name, uint64_t len) override;
|
||||
|
||||
Status GetAbsolutePath(const std::string& db_path,
|
||||
std::string* output_path) override;
|
||||
|
||||
std::string TimeToString(uint64_t secondsSince1970) override;
|
||||
|
||||
Status GetThreadList(
|
||||
std::vector<ThreadStatus>* thread_list) override;
|
||||
|
||||
void Schedule(void(*function)(void*), void* arg, Env::Priority pri,
|
||||
void* tag,
|
||||
void(*unschedFunction)(void* arg)) override;
|
||||
|
||||
int UnSchedule(void* arg, Env::Priority pri) override;
|
||||
|
||||
void StartThread(void(*function)(void* arg), void* arg) override;
|
||||
|
||||
void WaitForJoin();
|
||||
|
||||
unsigned int GetThreadPoolQueueLen(Env::Priority pri) const override;
|
||||
|
||||
uint64_t GetThreadID() const override;
|
||||
|
||||
void SleepForMicroseconds(int micros) override;
|
||||
|
||||
// Allow increasing the number of worker threads.
|
||||
void SetBackgroundThreads(int num, Env::Priority pri) override;
|
||||
|
||||
void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) override;
|
||||
|
||||
EnvOptions OptimizeForLogWrite(const EnvOptions& env_options,
|
||||
const DBOptions& db_options) const override;
|
||||
|
||||
EnvOptions OptimizeForManifestWrite(
|
||||
const EnvOptions& env_options) const override;
|
||||
|
||||
private:
|
||||
|
||||
WinEnvIO winenv_io_;
|
||||
WinEnvThreads winenv_threads_;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
963
port/win/io_win.cc
Normal file
963
port/win/io_win.cc
Normal file
@ -0,0 +1,963 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "port/win/io_win.h"
|
||||
|
||||
#include "util/sync_point.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/iostats_context_imp.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/aligned_buffer.h"
|
||||
|
||||
|
||||
namespace rocksdb {
|
||||
namespace port {
|
||||
|
||||
std::string GetWindowsErrSz(DWORD err) {
|
||||
LPSTR lpMsgBuf;
|
||||
FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, err,
|
||||
0, // Default language
|
||||
reinterpret_cast<LPSTR>(&lpMsgBuf), 0, NULL);
|
||||
|
||||
std::string Err = lpMsgBuf;
|
||||
LocalFree(lpMsgBuf);
|
||||
return Err;
|
||||
}
|
||||
|
||||
// We preserve the original name of this interface to denote the original idea
|
||||
// behind it.
|
||||
// All reads happen by a specified offset and pwrite interface does not change
|
||||
// the position of the file pointer. Judging from the man page and errno it does
|
||||
// execute
|
||||
// lseek atomically to return the position of the file back where it was.
|
||||
// WriteFile() does not
|
||||
// have this capability. Therefore, for both pread and pwrite the pointer is
|
||||
// advanced to the next position
|
||||
// which is fine for writes because they are (should be) sequential.
|
||||
// Because all the reads/writes happen by the specified offset, the caller in
|
||||
// theory should not
|
||||
// rely on the current file offset.
|
||||
SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes,
|
||||
uint64_t offset) {
|
||||
assert(numBytes <= std::numeric_limits<DWORD>::max());
|
||||
OVERLAPPED overlapped = { 0 };
|
||||
ULARGE_INTEGER offsetUnion;
|
||||
offsetUnion.QuadPart = offset;
|
||||
|
||||
overlapped.Offset = offsetUnion.LowPart;
|
||||
overlapped.OffsetHigh = offsetUnion.HighPart;
|
||||
|
||||
SSIZE_T result = 0;
|
||||
|
||||
unsigned long bytesWritten = 0;
|
||||
|
||||
if (FALSE == WriteFile(hFile, src, static_cast<DWORD>(numBytes), &bytesWritten,
|
||||
&overlapped)) {
|
||||
result = -1;
|
||||
} else {
|
||||
result = bytesWritten;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// See comments for pwrite above
|
||||
SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset) {
|
||||
assert(numBytes <= std::numeric_limits<DWORD>::max());
|
||||
OVERLAPPED overlapped = { 0 };
|
||||
ULARGE_INTEGER offsetUnion;
|
||||
offsetUnion.QuadPart = offset;
|
||||
|
||||
overlapped.Offset = offsetUnion.LowPart;
|
||||
overlapped.OffsetHigh = offsetUnion.HighPart;
|
||||
|
||||
SSIZE_T result = 0;
|
||||
|
||||
unsigned long bytesRead = 0;
|
||||
|
||||
if (FALSE == ReadFile(hFile, src, static_cast<DWORD>(numBytes), &bytesRead,
|
||||
&overlapped)) {
|
||||
return -1;
|
||||
} else {
|
||||
result = bytesRead;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// SetFileInformationByHandle() is capable of fast pre-allocates.
|
||||
// However, this does not change the file end position unless the file is
|
||||
// truncated and the pre-allocated space is not considered filled with zeros.
|
||||
Status fallocate(const std::string& filename, HANDLE hFile,
|
||||
uint64_t to_size) {
|
||||
Status status;
|
||||
|
||||
FILE_ALLOCATION_INFO alloc_info;
|
||||
alloc_info.AllocationSize.QuadPart = to_size;
|
||||
|
||||
if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info,
|
||||
sizeof(FILE_ALLOCATION_INFO))) {
|
||||
auto lastError = GetLastError();
|
||||
status = IOErrorFromWindowsError(
|
||||
"Failed to pre-allocate space: " + filename, lastError);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status ftruncate(const std::string& filename, HANDLE hFile,
|
||||
uint64_t toSize) {
|
||||
Status status;
|
||||
|
||||
FILE_END_OF_FILE_INFO end_of_file;
|
||||
end_of_file.EndOfFile.QuadPart = toSize;
|
||||
|
||||
if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
|
||||
sizeof(FILE_END_OF_FILE_INFO))) {
|
||||
auto lastError = GetLastError();
|
||||
status = IOErrorFromWindowsError("Failed to Set end of file: " + filename,
|
||||
lastError);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size) {
|
||||
|
||||
if (max_size < kMaxVarint64Length * 3) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
BY_HANDLE_FILE_INFORMATION FileInfo;
|
||||
|
||||
BOOL result = GetFileInformationByHandle(hFile, &FileInfo);
|
||||
|
||||
TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);
|
||||
|
||||
if (!result) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* rid = id;
|
||||
rid = EncodeVarint64(rid, uint64_t(FileInfo.dwVolumeSerialNumber));
|
||||
rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexHigh));
|
||||
rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexLow));
|
||||
|
||||
assert(rid >= id);
|
||||
return static_cast<size_t>(rid - id);
|
||||
}
|
||||
|
||||
WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
|
||||
const void* mapped_region, size_t length)
|
||||
: fileName_(fileName),
|
||||
hFile_(hFile),
|
||||
hMap_(hMap),
|
||||
mapped_region_(mapped_region),
|
||||
length_(length) {}
|
||||
|
||||
WinMmapReadableFile::~WinMmapReadableFile() {
|
||||
BOOL ret = ::UnmapViewOfFile(mapped_region_);
|
||||
assert(ret);
|
||||
|
||||
ret = ::CloseHandle(hMap_);
|
||||
assert(ret);
|
||||
|
||||
ret = ::CloseHandle(hFile_);
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const {
|
||||
Status s;
|
||||
|
||||
if (offset > length_) {
|
||||
*result = Slice();
|
||||
return IOError(fileName_, EINVAL);
|
||||
} else if (offset + n > length_) {
|
||||
n = length_ - offset;
|
||||
}
|
||||
*result =
|
||||
Slice(reinterpret_cast<const char*>(mapped_region_)+offset, n);
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
|
||||
return GetUniqueIdFromFile(hFile_, id, max_size);
|
||||
}
|
||||
|
||||
// Can only truncate or reserve to a sector size aligned if
|
||||
// used on files that are opened with Unbuffered I/O
|
||||
Status WinMmapFile::TruncateFile(uint64_t toSize) {
|
||||
return ftruncate(filename_, hFile_, toSize);
|
||||
}
|
||||
|
||||
Status WinMmapFile::UnmapCurrentRegion() {
|
||||
Status status;
|
||||
|
||||
if (mapped_begin_ != nullptr) {
|
||||
if (!::UnmapViewOfFile(mapped_begin_)) {
|
||||
status = IOErrorFromWindowsError(
|
||||
"Failed to unmap file view: " + filename_, GetLastError());
|
||||
}
|
||||
|
||||
// Move on to the next portion of the file
|
||||
file_offset_ += view_size_;
|
||||
|
||||
// UnmapView automatically sends data to disk but not the metadata
|
||||
// which is good and provides some equivalent of fdatasync() on Linux
|
||||
// therefore, we donot need separate flag for metadata
|
||||
mapped_begin_ = nullptr;
|
||||
mapped_end_ = nullptr;
|
||||
dst_ = nullptr;
|
||||
|
||||
last_sync_ = nullptr;
|
||||
pending_sync_ = false;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
Status WinMmapFile::MapNewRegion() {
|
||||
|
||||
Status status;
|
||||
|
||||
assert(mapped_begin_ == nullptr);
|
||||
|
||||
size_t minDiskSize = file_offset_ + view_size_;
|
||||
|
||||
if (minDiskSize > reserved_size_) {
|
||||
status = Allocate(file_offset_, view_size_);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
// Need to remap
|
||||
if (hMap_ == NULL || reserved_size_ > mapping_size_) {
|
||||
|
||||
if (hMap_ != NULL) {
|
||||
// Unmap the previous one
|
||||
BOOL ret = ::CloseHandle(hMap_);
|
||||
assert(ret);
|
||||
hMap_ = NULL;
|
||||
}
|
||||
|
||||
ULARGE_INTEGER mappingSize;
|
||||
mappingSize.QuadPart = reserved_size_;
|
||||
|
||||
hMap_ = CreateFileMappingA(
|
||||
hFile_,
|
||||
NULL, // Security attributes
|
||||
PAGE_READWRITE, // There is not a write only mode for mapping
|
||||
mappingSize.HighPart, // Enable mapping the whole file but the actual
|
||||
// amount mapped is determined by MapViewOfFile
|
||||
mappingSize.LowPart,
|
||||
NULL); // Mapping name
|
||||
|
||||
if (NULL == hMap_) {
|
||||
return IOErrorFromWindowsError(
|
||||
"WindowsMmapFile failed to create file mapping for: " + filename_,
|
||||
GetLastError());
|
||||
}
|
||||
|
||||
mapping_size_ = reserved_size_;
|
||||
}
|
||||
|
||||
ULARGE_INTEGER offset;
|
||||
offset.QuadPart = file_offset_;
|
||||
|
||||
// View must begin at the granularity aligned offset
|
||||
mapped_begin_ = reinterpret_cast<char*>(
|
||||
MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart, offset.LowPart,
|
||||
view_size_, NULL));
|
||||
|
||||
if (!mapped_begin_) {
|
||||
status = IOErrorFromWindowsError(
|
||||
"WindowsMmapFile failed to map file view: " + filename_,
|
||||
GetLastError());
|
||||
} else {
|
||||
mapped_end_ = mapped_begin_ + view_size_;
|
||||
dst_ = mapped_begin_;
|
||||
last_sync_ = mapped_begin_;
|
||||
pending_sync_ = false;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
|
||||
return fallocate(filename_, hFile_, spaceToReserve);
|
||||
}
|
||||
|
||||
WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
|
||||
size_t allocation_granularity, const EnvOptions& options)
|
||||
: filename_(fname),
|
||||
hFile_(hFile),
|
||||
hMap_(NULL),
|
||||
page_size_(page_size),
|
||||
allocation_granularity_(allocation_granularity),
|
||||
reserved_size_(0),
|
||||
mapping_size_(0),
|
||||
view_size_(0),
|
||||
mapped_begin_(nullptr),
|
||||
mapped_end_(nullptr),
|
||||
dst_(nullptr),
|
||||
last_sync_(nullptr),
|
||||
file_offset_(0),
|
||||
pending_sync_(false) {
|
||||
// Allocation granularity must be obtained from GetSystemInfo() and must be
|
||||
// a power of two.
|
||||
assert(allocation_granularity > 0);
|
||||
assert((allocation_granularity & (allocation_granularity - 1)) == 0);
|
||||
|
||||
assert(page_size > 0);
|
||||
assert((page_size & (page_size - 1)) == 0);
|
||||
|
||||
// Only for memory mapped writes
|
||||
assert(options.use_mmap_writes);
|
||||
|
||||
// View size must be both the multiple of allocation_granularity AND the
|
||||
// page size and the granularity is usually a multiple of a page size.
|
||||
const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
|
||||
view_size_ = Roundup(viewSize, allocation_granularity_);
|
||||
}
|
||||
|
||||
WinMmapFile::~WinMmapFile() {
|
||||
if (hFile_) {
|
||||
this->Close();
|
||||
}
|
||||
}
|
||||
|
||||
Status WinMmapFile::Append(const Slice& data) {
|
||||
const char* src = data.data();
|
||||
size_t left = data.size();
|
||||
|
||||
while (left > 0) {
|
||||
assert(mapped_begin_ <= dst_);
|
||||
size_t avail = mapped_end_ - dst_;
|
||||
|
||||
if (avail == 0) {
|
||||
Status s = UnmapCurrentRegion();
|
||||
if (s.ok()) {
|
||||
s = MapNewRegion();
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
size_t n = std::min(left, avail);
|
||||
memcpy(dst_, src, n);
|
||||
dst_ += n;
|
||||
src += n;
|
||||
left -= n;
|
||||
pending_sync_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Now make sure that the last partial page is padded with zeros if needed
|
||||
size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
|
||||
if (bytesToPad > 0) {
|
||||
memset(dst_, 0, bytesToPad);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Means Close() will properly take care of truncate
|
||||
// and it does not need any additional information
|
||||
Status WinMmapFile::Truncate(uint64_t size) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WinMmapFile::Close() {
|
||||
Status s;
|
||||
|
||||
assert(NULL != hFile_);
|
||||
|
||||
// We truncate to the precise size so no
|
||||
// uninitialized data at the end. SetEndOfFile
|
||||
// which we use does not write zeros and it is good.
|
||||
uint64_t targetSize = GetFileSize();
|
||||
|
||||
if (mapped_begin_ != nullptr) {
|
||||
// Sync before unmapping to make sure everything
|
||||
// is on disk and there is not a lazy writing
|
||||
// so we are deterministic with the tests
|
||||
Sync();
|
||||
s = UnmapCurrentRegion();
|
||||
}
|
||||
|
||||
if (NULL != hMap_) {
|
||||
BOOL ret = ::CloseHandle(hMap_);
|
||||
if (!ret && s.ok()) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to Close mapping for file: " + filename_, lastError);
|
||||
}
|
||||
|
||||
hMap_ = NULL;
|
||||
}
|
||||
|
||||
if (hFile_ != NULL) {
|
||||
|
||||
TruncateFile(targetSize);
|
||||
|
||||
BOOL ret = ::CloseHandle(hFile_);
|
||||
hFile_ = NULL;
|
||||
|
||||
if (!ret && s.ok()) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to close file map handle: " + filename_, lastError);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinMmapFile::Flush() { return Status::OK(); }
|
||||
|
||||
// Flush only data
|
||||
Status WinMmapFile::Sync() {
|
||||
Status s;
|
||||
|
||||
// Some writes occurred since last sync
|
||||
if (dst_ > last_sync_) {
|
||||
assert(mapped_begin_);
|
||||
assert(dst_);
|
||||
assert(dst_ > mapped_begin_);
|
||||
assert(dst_ < mapped_end_);
|
||||
|
||||
size_t page_begin =
|
||||
TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
|
||||
size_t page_end =
|
||||
TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
|
||||
|
||||
// Flush only the amount of that is a multiple of pages
|
||||
if (!::FlushViewOfFile(mapped_begin_ + page_begin,
|
||||
(page_end - page_begin) + page_size_)) {
|
||||
s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
|
||||
GetLastError());
|
||||
} else {
|
||||
last_sync_ = dst_;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush data as well as metadata to stable storage.
|
||||
*/
|
||||
Status WinMmapFile::Fsync() {
|
||||
Status s = Sync();
|
||||
|
||||
// Flush metadata
|
||||
if (s.ok() && pending_sync_) {
|
||||
if (!::FlushFileBuffers(hFile_)) {
|
||||
s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
|
||||
GetLastError());
|
||||
}
|
||||
pending_sync_ = false;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of valid data in the file. This will not match the
|
||||
* size that is returned from the filesystem because we use mmap
|
||||
* to extend file by map_size every time.
|
||||
*/
|
||||
uint64_t WinMmapFile::GetFileSize() {
|
||||
size_t used = dst_ - mapped_begin_;
|
||||
return file_offset_ + used;
|
||||
}
|
||||
|
||||
Status WinMmapFile::InvalidateCache(size_t offset, size_t length) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WinMmapFile::Allocate(uint64_t offset, uint64_t len) {
|
||||
Status status;
|
||||
TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds);
|
||||
|
||||
// Make sure that we reserve an aligned amount of space
|
||||
// since the reservation block size is driven outside so we want
|
||||
// to check if we are ok with reservation here
|
||||
size_t spaceToReserve = Roundup(offset + len, view_size_);
|
||||
// Nothing to do
|
||||
if (spaceToReserve <= reserved_size_) {
|
||||
return status;
|
||||
}
|
||||
|
||||
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||
status = PreallocateInternal(spaceToReserve);
|
||||
if (status.ok()) {
|
||||
reserved_size_ = spaceToReserve;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
|
||||
return GetUniqueIdFromFile(hFile_, id, max_size);
|
||||
}
|
||||
|
||||
WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
|
||||
const EnvOptions& options)
|
||||
: filename_(fname),
|
||||
file_(f),
|
||||
use_os_buffer_(options.use_os_buffer)
|
||||
{}
|
||||
|
||||
WinSequentialFile::~WinSequentialFile() {
|
||||
assert(file_ != INVALID_HANDLE_VALUE);
|
||||
CloseHandle(file_);
|
||||
}
|
||||
|
||||
Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) {
|
||||
Status s;
|
||||
size_t r = 0;
|
||||
|
||||
// Windows ReadFile API accepts a DWORD.
|
||||
// While it is possible to read in a loop if n is > UINT_MAX
|
||||
// it is a highly unlikely case.
|
||||
if (n > UINT_MAX) {
|
||||
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER);
|
||||
}
|
||||
|
||||
DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above
|
||||
DWORD bytesRead = 0;
|
||||
BOOL ret = ReadFile(file_, scratch, bytesToRead, &bytesRead, NULL);
|
||||
if (ret == TRUE) {
|
||||
r = bytesRead;
|
||||
} else {
|
||||
return IOErrorFromWindowsError(filename_, GetLastError());
|
||||
}
|
||||
|
||||
*result = Slice(scratch, r);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinSequentialFile::Skip(uint64_t n) {
|
||||
// Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit
|
||||
// integer. As such it is a highly unlikley case to have n so large.
|
||||
if (n > _I64_MAX) {
|
||||
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER);
|
||||
}
|
||||
|
||||
LARGE_INTEGER li;
|
||||
li.QuadPart = static_cast<int64_t>(n); //cast is safe due to the check above
|
||||
BOOL ret = SetFilePointerEx(file_, li, NULL, FILE_CURRENT);
|
||||
if (ret == FALSE) {
|
||||
return IOErrorFromWindowsError(filename_, GetLastError());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
SSIZE_T WinRandomAccessFile::ReadIntoBuffer(uint64_t user_offset, uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
AlignedBuffer& buffer, char* dest) const {
|
||||
assert(buffer.CurrentSize() == 0);
|
||||
assert(buffer.Capacity() >= bytes_to_read);
|
||||
|
||||
SSIZE_T read =
|
||||
PositionedReadInternal(buffer.Destination(), bytes_to_read, first_page_start);
|
||||
|
||||
if (read > 0) {
|
||||
buffer.Size(read);
|
||||
|
||||
// Let's figure out how much we read from the users standpoint
|
||||
if ((first_page_start + buffer.CurrentSize()) > user_offset) {
|
||||
assert(first_page_start <= user_offset);
|
||||
size_t buffer_offset = user_offset - first_page_start;
|
||||
read = buffer.Read(dest, buffer_offset, left);
|
||||
} else {
|
||||
read = 0;
|
||||
}
|
||||
left -= read;
|
||||
}
|
||||
return read;
|
||||
}
|
||||
|
||||
SSIZE_T WinRandomAccessFile::ReadIntoOneShotBuffer(uint64_t user_offset, uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
char* dest) const {
|
||||
AlignedBuffer bigBuffer;
|
||||
bigBuffer.Alignment(buffer_.Alignment());
|
||||
bigBuffer.AllocateNewBuffer(bytes_to_read);
|
||||
|
||||
return ReadIntoBuffer(user_offset, first_page_start, bytes_to_read, left,
|
||||
bigBuffer, dest);
|
||||
}
|
||||
|
||||
SSIZE_T WinRandomAccessFile::ReadIntoInstanceBuffer(uint64_t user_offset,
|
||||
uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
char* dest) const {
|
||||
SSIZE_T read = ReadIntoBuffer(user_offset, first_page_start, bytes_to_read,
|
||||
left, buffer_, dest);
|
||||
|
||||
if (read > 0) {
|
||||
buffered_start_ = first_page_start;
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
|
||||
void WinRandomAccessFile::CalculateReadParameters(uint64_t offset, size_t bytes_requested,
|
||||
size_t& actual_bytes_toread,
|
||||
uint64_t& first_page_start) const {
|
||||
|
||||
const size_t alignment = buffer_.Alignment();
|
||||
|
||||
first_page_start = TruncateToPageBoundary(alignment, offset);
|
||||
const uint64_t last_page_start =
|
||||
TruncateToPageBoundary(alignment, offset + bytes_requested - 1);
|
||||
actual_bytes_toread = (last_page_start - first_page_start) + alignment;
|
||||
}
|
||||
|
||||
SSIZE_T WinRandomAccessFile::PositionedReadInternal(char* src, size_t numBytes,
|
||||
uint64_t offset) const {
|
||||
return pread(hFile_, src, numBytes, offset);
|
||||
}
|
||||
|
||||
WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
||||
const EnvOptions& options)
|
||||
: filename_(fname),
|
||||
hFile_(hFile),
|
||||
use_os_buffer_(options.use_os_buffer),
|
||||
read_ahead_(false),
|
||||
compaction_readahead_size_(options.compaction_readahead_size),
|
||||
random_access_max_buffer_size_(options.random_access_max_buffer_size),
|
||||
buffer_(),
|
||||
buffered_start_(0) {
|
||||
assert(!options.use_mmap_reads);
|
||||
|
||||
// Unbuffered access, use internal buffer for reads
|
||||
if (!use_os_buffer_) {
|
||||
// Do not allocate the buffer either until the first request or
|
||||
// until there is a call to allocate a read-ahead buffer
|
||||
buffer_.Alignment(alignment);
|
||||
}
|
||||
}
|
||||
|
||||
WinRandomAccessFile::~WinRandomAccessFile() {
|
||||
if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
|
||||
::CloseHandle(hFile_);
|
||||
}
|
||||
}
|
||||
|
||||
void WinRandomAccessFile::EnableReadAhead() { this->Hint(SEQUENTIAL); }
|
||||
|
||||
Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const {
|
||||
|
||||
Status s;
|
||||
SSIZE_T r = -1;
|
||||
size_t left = n;
|
||||
char* dest = scratch;
|
||||
|
||||
if (n == 0) {
|
||||
*result = Slice(scratch, 0);
|
||||
return s;
|
||||
}
|
||||
|
||||
// When in unbuffered mode we need to do the following changes:
|
||||
// - use our own aligned buffer
|
||||
// - always read at the offset of that is a multiple of alignment
|
||||
if (!use_os_buffer_) {
|
||||
|
||||
uint64_t first_page_start = 0;
|
||||
size_t actual_bytes_toread = 0;
|
||||
size_t bytes_requested = left;
|
||||
|
||||
if (!read_ahead_ && random_access_max_buffer_size_ == 0) {
|
||||
CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
|
||||
first_page_start);
|
||||
|
||||
assert(actual_bytes_toread > 0);
|
||||
|
||||
r = ReadIntoOneShotBuffer(offset, first_page_start,
|
||||
actual_bytes_toread, left, dest);
|
||||
} else {
|
||||
|
||||
std::unique_lock<std::mutex> lock(buffer_mut_);
|
||||
|
||||
// Let's see if at least some of the requested data is already
|
||||
// in the buffer
|
||||
if (offset >= buffered_start_ &&
|
||||
offset < (buffered_start_ + buffer_.CurrentSize())) {
|
||||
size_t buffer_offset = offset - buffered_start_;
|
||||
r = buffer_.Read(dest, buffer_offset, left);
|
||||
assert(r >= 0);
|
||||
|
||||
left -= size_t(r);
|
||||
offset += r;
|
||||
dest += r;
|
||||
}
|
||||
|
||||
// Still some left or none was buffered
|
||||
if (left > 0) {
|
||||
// Figure out the start/end offset for reading and amount to read
|
||||
bytes_requested = left;
|
||||
|
||||
if (read_ahead_ && bytes_requested < compaction_readahead_size_) {
|
||||
bytes_requested = compaction_readahead_size_;
|
||||
}
|
||||
|
||||
CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
|
||||
first_page_start);
|
||||
|
||||
assert(actual_bytes_toread > 0);
|
||||
|
||||
if (buffer_.Capacity() < actual_bytes_toread) {
|
||||
// If we are in read-ahead mode or the requested size
|
||||
// exceeds max buffer size then use one-shot
|
||||
// big buffer otherwise reallocate main buffer
|
||||
if (read_ahead_ ||
|
||||
(actual_bytes_toread > random_access_max_buffer_size_)) {
|
||||
// Unlock the mutex since we are not using instance buffer
|
||||
lock.unlock();
|
||||
r = ReadIntoOneShotBuffer(offset, first_page_start,
|
||||
actual_bytes_toread, left, dest);
|
||||
} else {
|
||||
buffer_.AllocateNewBuffer(actual_bytes_toread);
|
||||
r = ReadIntoInstanceBuffer(offset, first_page_start,
|
||||
actual_bytes_toread, left, dest);
|
||||
}
|
||||
} else {
|
||||
buffer_.Clear();
|
||||
r = ReadIntoInstanceBuffer(offset, first_page_start,
|
||||
actual_bytes_toread, left, dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
r = PositionedReadInternal(scratch, left, offset);
|
||||
if (r > 0) {
|
||||
left -= r;
|
||||
}
|
||||
}
|
||||
|
||||
*result = Slice(scratch, (r < 0) ? 0 : n - left);
|
||||
|
||||
if (r < 0) {
|
||||
s = IOErrorFromLastWindowsError(filename_);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
bool WinRandomAccessFile::ShouldForwardRawRequest() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void WinRandomAccessFile::Hint(AccessPattern pattern) {
|
||||
if (pattern == SEQUENTIAL && !use_os_buffer_ &&
|
||||
compaction_readahead_size_ > 0) {
|
||||
std::lock_guard<std::mutex> lg(buffer_mut_);
|
||||
if (!read_ahead_) {
|
||||
read_ahead_ = true;
|
||||
// This would allocate read-ahead size + 2 alignments
|
||||
// - one for memory alignment which added implicitly by AlignedBuffer
|
||||
// - We add one more alignment because we will read one alignment more
|
||||
// from disk
|
||||
buffer_.AllocateNewBuffer(compaction_readahead_size_ +
|
||||
buffer_.Alignment());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
|
||||
return GetUniqueIdFromFile(hFile_, id, max_size);
|
||||
}
|
||||
|
||||
Status WinWritableFile::PreallocateInternal(uint64_t spaceToReserve) {
|
||||
return fallocate(filename_, hFile_, spaceToReserve);
|
||||
}
|
||||
|
||||
WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
||||
size_t capacity, const EnvOptions& options)
|
||||
: filename_(fname),
|
||||
hFile_(hFile),
|
||||
use_os_buffer_(options.use_os_buffer),
|
||||
alignment_(alignment),
|
||||
filesize_(0),
|
||||
reservedsize_(0) {
|
||||
assert(!options.use_mmap_writes);
|
||||
}
|
||||
|
||||
WinWritableFile::~WinWritableFile() {
|
||||
if (NULL != hFile_ && INVALID_HANDLE_VALUE != hFile_) {
|
||||
WinWritableFile::Close();
|
||||
}
|
||||
}
|
||||
|
||||
// Indicates if the class makes use of unbuffered I/O
|
||||
bool WinWritableFile::UseOSBuffer() const {
|
||||
return use_os_buffer_;
|
||||
}
|
||||
|
||||
size_t WinWritableFile::GetRequiredBufferAlignment() const {
|
||||
return alignment_;
|
||||
}
|
||||
|
||||
Status WinWritableFile::Append(const Slice& data) {
|
||||
|
||||
// Used for buffered access ONLY
|
||||
assert(use_os_buffer_);
|
||||
assert(data.size() < std::numeric_limits<DWORD>::max());
|
||||
|
||||
Status s;
|
||||
|
||||
DWORD bytesWritten = 0;
|
||||
if (!WriteFile(hFile_, data.data(),
|
||||
static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to WriteFile: " + filename_,
|
||||
lastError);
|
||||
} else {
|
||||
assert(size_t(bytesWritten) == data.size());
|
||||
filesize_ += data.size();
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
|
||||
Status s;
|
||||
|
||||
SSIZE_T ret = pwrite(hFile_, data.data(), data.size(), offset);
|
||||
|
||||
// Error break
|
||||
if (ret < 0) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError(
|
||||
"Failed to pwrite for: " + filename_, lastError);
|
||||
} else {
|
||||
// With positional write it is not clear at all
|
||||
// if this actually extends the filesize
|
||||
assert(size_t(ret) == data.size());
|
||||
filesize_ += data.size();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// Need to implement this so the file is truncated correctly
|
||||
// when buffered and unbuffered mode
|
||||
Status WinWritableFile::Truncate(uint64_t size) {
|
||||
Status s = ftruncate(filename_, hFile_, size);
|
||||
if (s.ok()) {
|
||||
filesize_ = size;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinWritableFile::Close() {
|
||||
|
||||
Status s;
|
||||
|
||||
assert(INVALID_HANDLE_VALUE != hFile_);
|
||||
|
||||
if (fsync(hFile_) < 0) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError("fsync failed at Close() for: " + filename_,
|
||||
lastError);
|
||||
}
|
||||
|
||||
if (FALSE == ::CloseHandle(hFile_)) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError("CloseHandle failed for: " + filename_,
|
||||
lastError);
|
||||
}
|
||||
|
||||
hFile_ = INVALID_HANDLE_VALUE;
|
||||
return s;
|
||||
}
|
||||
|
||||
// write out the cached data to the OS cache
|
||||
// This is now taken care of the WritableFileWriter
|
||||
Status WinWritableFile::Flush() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WinWritableFile::Sync() {
|
||||
Status s;
|
||||
// Calls flush buffers
|
||||
if (fsync(hFile_) < 0) {
|
||||
auto lastError = GetLastError();
|
||||
s = IOErrorFromWindowsError("fsync failed at Sync() for: " + filename_,
|
||||
lastError);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WinWritableFile::Fsync() { return Sync(); }
|
||||
|
||||
uint64_t WinWritableFile::GetFileSize() {
|
||||
// Double accounting now here with WritableFileWriter
|
||||
// and this size will be wrong when unbuffered access is used
|
||||
// but tests implement their own writable files and do not use WritableFileWrapper
|
||||
// so we need to squeeze a square peg through
|
||||
// a round hole here.
|
||||
return filesize_;
|
||||
}
|
||||
|
||||
Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) {
|
||||
Status status;
|
||||
TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);
|
||||
|
||||
// Make sure that we reserve an aligned amount of space
|
||||
// since the reservation block size is driven outside so we want
|
||||
// to check if we are ok with reservation here
|
||||
size_t spaceToReserve = Roundup(offset + len, alignment_);
|
||||
// Nothing to do
|
||||
if (spaceToReserve <= reservedsize_) {
|
||||
return status;
|
||||
}
|
||||
|
||||
IOSTATS_TIMER_GUARD(allocate_nanos);
|
||||
status = PreallocateInternal(spaceToReserve);
|
||||
if (status.ok()) {
|
||||
reservedsize_ = spaceToReserve;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
|
||||
return GetUniqueIdFromFile(hFile_, id, max_size);
|
||||
}
|
||||
|
||||
Status WinDirectory::Fsync() { return Status::OK(); }
|
||||
|
||||
WinFileLock::~WinFileLock() {
|
||||
BOOL ret = ::CloseHandle(hFile_);
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
359
port/win/io_win.h
Normal file
359
port/win/io_win.h
Normal file
@ -0,0 +1,359 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
#pragma once
|
||||
|
||||
#include <rocksdb/Status.h>
|
||||
#include <rocksdb/env.h>
|
||||
|
||||
#include "util/aligned_buffer.h"
|
||||
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace rocksdb {
|
||||
namespace port {
|
||||
|
||||
std::string GetWindowsErrSz(DWORD err);
|
||||
|
||||
inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
|
||||
return Status::IOError(context, GetWindowsErrSz(err));
|
||||
}
|
||||
|
||||
inline Status IOErrorFromLastWindowsError(const std::string& context) {
|
||||
return IOErrorFromWindowsError(context, GetLastError());
|
||||
}
|
||||
|
||||
inline Status IOError(const std::string& context, int err_number) {
|
||||
return Status::IOError(context, strerror(err_number));
|
||||
}
|
||||
|
||||
// Note the below two do not set errno because they are used only here in this
|
||||
// file
|
||||
// on a Windows handle and, therefore, not necessary. Translating GetLastError()
|
||||
// to errno
|
||||
// is a sad business
|
||||
inline int fsync(HANDLE hFile) {
|
||||
if (!FlushFileBuffers(hFile)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes,
|
||||
uint64_t offset);
|
||||
|
||||
SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset);
|
||||
|
||||
Status fallocate(const std::string& filename, HANDLE hFile,
|
||||
uint64_t to_size);
|
||||
|
||||
Status ftruncate(const std::string& filename, HANDLE hFile,
|
||||
uint64_t toSize);
|
||||
|
||||
|
||||
size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
|
||||
|
||||
// mmap() based random-access
|
||||
class WinMmapReadableFile : public RandomAccessFile {
|
||||
const std::string fileName_;
|
||||
HANDLE hFile_;
|
||||
HANDLE hMap_;
|
||||
|
||||
const void* mapped_region_;
|
||||
const size_t length_;
|
||||
|
||||
public:
|
||||
// mapped_region_[0,length-1] contains the mmapped contents of the file.
|
||||
WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
|
||||
const void* mapped_region, size_t length);
|
||||
|
||||
~WinMmapReadableFile();
|
||||
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const override;
|
||||
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
||||
};
|
||||
|
||||
// We preallocate and use memcpy to append new
|
||||
// data to the file. This is safe since we either properly close the
|
||||
// file before reading from it, or for log files, the reading code
|
||||
// knows enough to skip zero suffixes.
|
||||
class WinMmapFile : public WritableFile {
|
||||
private:
|
||||
const std::string filename_;
|
||||
HANDLE hFile_;
|
||||
HANDLE hMap_;
|
||||
|
||||
const size_t page_size_; // We flush the mapping view in page_size
|
||||
// increments. We may decide if this is a memory
|
||||
// page size or SSD page size
|
||||
const size_t
|
||||
allocation_granularity_; // View must start at such a granularity
|
||||
|
||||
size_t reserved_size_; // Preallocated size
|
||||
|
||||
size_t mapping_size_; // The max size of the mapping object
|
||||
// we want to guess the final file size to minimize the remapping
|
||||
size_t view_size_; // How much memory to map into a view at a time
|
||||
|
||||
char* mapped_begin_; // Must begin at the file offset that is aligned with
|
||||
// allocation_granularity_
|
||||
char* mapped_end_;
|
||||
char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
|
||||
char* last_sync_; // Where have we synced up to
|
||||
|
||||
uint64_t file_offset_; // Offset of mapped_begin_ in file
|
||||
|
||||
// Do we have unsynced writes?
|
||||
bool pending_sync_;
|
||||
|
||||
// Can only truncate or reserve to a sector size aligned if
|
||||
// used on files that are opened with Unbuffered I/O
|
||||
Status TruncateFile(uint64_t toSize);
|
||||
|
||||
Status UnmapCurrentRegion();
|
||||
|
||||
Status MapNewRegion();
|
||||
|
||||
virtual Status PreallocateInternal(uint64_t spaceToReserve);
|
||||
|
||||
public:
|
||||
|
||||
WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
|
||||
size_t allocation_granularity, const EnvOptions& options);
|
||||
|
||||
~WinMmapFile();
|
||||
|
||||
virtual Status Append(const Slice& data) override;
|
||||
|
||||
// Means Close() will properly take care of truncate
|
||||
// and it does not need any additional information
|
||||
virtual Status Truncate(uint64_t size) override;
|
||||
|
||||
virtual Status Close() override;
|
||||
|
||||
virtual Status Flush() override;
|
||||
|
||||
// Flush only data
|
||||
virtual Status Sync() override;
|
||||
|
||||
/**
|
||||
* Flush data as well as metadata to stable storage.
|
||||
*/
|
||||
virtual Status Fsync() override;
|
||||
|
||||
/**
|
||||
* Get the size of valid data in the file. This will not match the
|
||||
* size that is returned from the filesystem because we use mmap
|
||||
* to extend file by map_size every time.
|
||||
*/
|
||||
virtual uint64_t GetFileSize() override;
|
||||
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
|
||||
virtual Status Allocate(uint64_t offset, uint64_t len) override;
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
||||
};
|
||||
|
||||
class WinSequentialFile : public SequentialFile {
|
||||
private:
|
||||
const std::string filename_;
|
||||
HANDLE file_;
|
||||
|
||||
// There is no equivalent of advising away buffered pages as in posix.
|
||||
// To implement this flag we would need to do unbuffered reads which
|
||||
// will need to be aligned (not sure there is a guarantee that the buffer
|
||||
// passed in is aligned).
|
||||
// Hence we currently ignore this flag. It is used only in a few cases
|
||||
// which should not be perf critical.
|
||||
// If perf evaluation finds this to be a problem, we can look into
|
||||
// implementing this.
|
||||
bool use_os_buffer_;
|
||||
|
||||
public:
|
||||
WinSequentialFile(const std::string& fname, HANDLE f,
|
||||
const EnvOptions& options);
|
||||
|
||||
~WinSequentialFile();
|
||||
|
||||
virtual Status Read(size_t n, Slice* result, char* scratch) override;
|
||||
|
||||
virtual Status Skip(uint64_t n) override;
|
||||
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
};
|
||||
|
||||
// pread() based random-access
|
||||
class WinRandomAccessFile : public RandomAccessFile {
|
||||
const std::string filename_;
|
||||
HANDLE hFile_;
|
||||
const bool use_os_buffer_;
|
||||
bool read_ahead_;
|
||||
const size_t compaction_readahead_size_;
|
||||
const size_t random_access_max_buffer_size_;
|
||||
mutable std::mutex buffer_mut_;
|
||||
mutable AlignedBuffer buffer_;
|
||||
mutable uint64_t
|
||||
buffered_start_; // file offset set that is currently buffered
|
||||
|
||||
/*
|
||||
* The function reads a requested amount of bytes into the specified aligned
|
||||
* buffer Upon success the function sets the length of the buffer to the
|
||||
* amount of bytes actually read even though it might be less than actually
|
||||
* requested. It then copies the amount of bytes requested by the user (left)
|
||||
* to the user supplied buffer (dest) and reduces left by the amount of bytes
|
||||
* copied to the user buffer
|
||||
*
|
||||
* @user_offset [in] - offset on disk where the read was requested by the user
|
||||
* @first_page_start [in] - actual page aligned disk offset that we want to
|
||||
* read from
|
||||
* @bytes_to_read [in] - total amount of bytes that will be read from disk
|
||||
* which is generally greater or equal to the amount
|
||||
* that the user has requested due to the
|
||||
* either alignment requirements or read_ahead in
|
||||
* effect.
|
||||
* @left [in/out] total amount of bytes that needs to be copied to the user
|
||||
* buffer. It is reduced by the amount of bytes that actually
|
||||
* copied
|
||||
* @buffer - buffer to use
|
||||
* @dest - user supplied buffer
|
||||
*/
|
||||
SSIZE_T ReadIntoBuffer(uint64_t user_offset, uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
AlignedBuffer& buffer, char* dest) const;
|
||||
|
||||
SSIZE_T ReadIntoOneShotBuffer(uint64_t user_offset, uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
char* dest) const;
|
||||
|
||||
SSIZE_T ReadIntoInstanceBuffer(uint64_t user_offset,
|
||||
uint64_t first_page_start,
|
||||
size_t bytes_to_read, size_t& left,
|
||||
char* dest) const;
|
||||
|
||||
void CalculateReadParameters(uint64_t offset, size_t bytes_requested,
|
||||
size_t& actual_bytes_toread,
|
||||
uint64_t& first_page_start) const;
|
||||
|
||||
// Override for behavior change
|
||||
virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes,
|
||||
uint64_t offset) const;
|
||||
|
||||
public:
|
||||
WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
||||
const EnvOptions& options);
|
||||
|
||||
~WinRandomAccessFile();
|
||||
|
||||
virtual void EnableReadAhead() override;
|
||||
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const override;
|
||||
|
||||
virtual bool ShouldForwardRawRequest() const override;
|
||||
|
||||
virtual void Hint(AccessPattern pattern) override;
|
||||
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
||||
};
|
||||
|
||||
|
||||
// This is a sequential write class. It has been mimicked (as others) after
|
||||
// the original Posix class. We add support for unbuffered I/O on windows as
|
||||
// well
|
||||
// we utilize the original buffer as an alignment buffer to write directly to
|
||||
// file with no buffering.
|
||||
// No buffering requires that the provided buffer is aligned to the physical
|
||||
// sector size (SSD page size) and
|
||||
// that all SetFilePointer() operations to occur with such an alignment.
|
||||
// We thus always write in sector/page size increments to the drive and leave
|
||||
// the tail for the next write OR for Close() at which point we pad with zeros.
|
||||
// No padding is required for
|
||||
// buffered access.
|
||||
class WinWritableFile : public WritableFile {
|
||||
private:
|
||||
const std::string filename_;
|
||||
HANDLE hFile_;
|
||||
const bool use_os_buffer_; // Used to indicate unbuffered access, the file
|
||||
const uint64_t alignment_;
|
||||
// must be opened as unbuffered if false
|
||||
uint64_t filesize_; // How much data is actually written disk
|
||||
uint64_t reservedsize_; // how far we have reserved space
|
||||
|
||||
virtual Status PreallocateInternal(uint64_t spaceToReserve);
|
||||
|
||||
public:
|
||||
WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
||||
size_t capacity, const EnvOptions& options);
|
||||
|
||||
~WinWritableFile();
|
||||
|
||||
// Indicates if the class makes use of unbuffered I/O
|
||||
virtual bool UseOSBuffer() const override;
|
||||
|
||||
virtual size_t GetRequiredBufferAlignment() const override;
|
||||
|
||||
virtual Status Append(const Slice& data) override;
|
||||
|
||||
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
|
||||
|
||||
// Need to implement this so the file is truncated correctly
|
||||
// when buffered and unbuffered mode
|
||||
virtual Status Truncate(uint64_t size) override;
|
||||
|
||||
virtual Status Close() override;
|
||||
|
||||
// write out the cached data to the OS cache
|
||||
// This is now taken care of the WritableFileWriter
|
||||
virtual Status Flush() override;
|
||||
|
||||
virtual Status Sync() override;
|
||||
|
||||
virtual Status Fsync() override;
|
||||
|
||||
virtual uint64_t GetFileSize() override;
|
||||
|
||||
virtual Status Allocate(uint64_t offset, uint64_t len) override;
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
||||
};
|
||||
|
||||
class WinDirectory : public Directory {
|
||||
public:
|
||||
WinDirectory() {}
|
||||
|
||||
virtual Status Fsync() override;
|
||||
};
|
||||
|
||||
class WinFileLock : public FileLock {
|
||||
public:
|
||||
explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
|
||||
assert(hFile != NULL);
|
||||
assert(hFile != INVALID_HANDLE_VALUE);
|
||||
}
|
||||
|
||||
~WinFileLock();
|
||||
|
||||
private:
|
||||
HANDLE hFile_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
@ -69,7 +69,6 @@ typedef SSIZE_T ssize_t;
|
||||
namespace rocksdb {
|
||||
|
||||
#define PREFETCH(addr, rw, locality)
|
||||
std::string GetWindowsErrSz(DWORD err);
|
||||
|
||||
namespace port {
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
// where enough posix functionality is available.
|
||||
|
||||
#include "port/win/win_logger.h"
|
||||
#include "port/win/io_win.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
@ -25,6 +26,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
namespace port {
|
||||
|
||||
WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file,
|
||||
const InfoLogLevel log_level)
|
||||
: Logger(log_level),
|
||||
@ -152,4 +155,6 @@ void WinLogger::Logv(const char* format, va_list ap) {
|
||||
|
||||
size_t WinLogger::GetLogFileSize() const { return log_size_; }
|
||||
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -23,6 +23,8 @@ namespace rocksdb {
|
||||
|
||||
class Env;
|
||||
|
||||
namespace port {
|
||||
|
||||
class WinLogger : public rocksdb::Logger {
|
||||
public:
|
||||
WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file,
|
||||
@ -55,4 +57,6 @@ class WinLogger : public rocksdb::Logger {
|
||||
const static uint64_t flush_every_seconds_ = 5;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
Loading…
Reference in New Issue
Block a user