13ded69484
Summary: Env::GenerateUniqueId() works fine on Windows and on POSIX where /proc/sys/kernel/random/uuid exists. Our other implementation is flawed and easily produces collision in a new multi-threaded test. As we rely more heavily on DB session ID uniqueness, this becomes a serious issue. This change combines several individually suitable entropy sources for reliable generation of random unique IDs, with goal of uniqueness and portability, not cryptographic strength nor maximum speed. Specifically: * Moves code for getting UUIDs from the OS to port::GenerateRfcUuid rather than in Env implementation details. Callers are now told whether the operation fails or succeeds. * Adds an internal API GenerateRawUniqueId for generating high-quality 128-bit unique identifiers, by combining entropy from three "tracks": * Lots of info from default Env like time, process id, and hostname. * std::random_device * port::GenerateRfcUuid (when working) * Built-in implementations of Env::GenerateUniqueId() will now always produce an RFC 4122 UUID string, either from platform-specific API or by converting the output of GenerateRawUniqueId. DB session IDs now use GenerateRawUniqueId while DB IDs (not as critical) try to use port::GenerateRfcUuid but fall back on GenerateRawUniqueId with conversion to an RFC 4122 UUID. GenerateRawUniqueId is declared and defined under env/ rather than util/ or even port/ because of the Env dependency. Likely follow-up: enhance GenerateRawUniqueId to be faster after the first call and to guarantee uniqueness within the lifetime of a single process (imparting the same property onto DB session IDs). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8708 Test Plan: A new mini-stress test in env_test checks the various public and internal APIs for uniqueness, including each track of GenerateRawUniqueId individually. We can't hope to verify anywhere close to 128 bits of entropy, but it can at least detect flaws as bad as the old code. Serial execution of the new tests takes about 350 ms on my machine. Reviewed By: zhichao-cao, mrambacher Differential Revision: D30563780 Pulled By: pdillinger fbshipit-source-id: de4c9ff4b2f581cf784fcedb5f39f16e5185c364
1423 lines
44 KiB
C++
1423 lines
44 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#if defined(OS_WIN)
|
|
|
|
#include <direct.h> // _rmdir, _mkdir, _getcwd
|
|
#include <errno.h>
|
|
#include <io.h> // _access
|
|
#include <rpc.h> // for uuid generation
|
|
#include <shlwapi.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <windows.h>
|
|
|
|
#include <algorithm>
|
|
#include <ctime>
|
|
#include <thread>
|
|
|
|
#include "monitoring/iostats_context_imp.h"
|
|
#include "monitoring/thread_status_updater.h"
|
|
#include "monitoring/thread_status_util.h"
|
|
#include "port/port.h"
|
|
#include "port/port_dirent.h"
|
|
#include "port/win/env_win.h"
|
|
#include "port/win/io_win.h"
|
|
#include "port/win/win_logger.h"
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/slice.h"
|
|
#include "strsafe.h"
|
|
#include "util/string_util.h"
|
|
|
|
// Undefine the functions windows might use (again)...
|
|
#undef GetCurrentTime
|
|
#undef DeleteFile
|
|
#undef LoadLibrary
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
ThreadStatusUpdater* CreateThreadStatusUpdater() {
|
|
return new ThreadStatusUpdater();
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Sector size used when physical sector size cannot be obtained from device.
|
|
static const size_t kSectorSize = 512;
|
|
|
|
// RAII helpers for HANDLEs
|
|
const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
|
|
typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr;
|
|
|
|
const auto FindCloseFunc = [](HANDLE h) { ::FindClose(h); };
|
|
typedef std::unique_ptr<void, decltype(FindCloseFunc)> UniqueFindClosePtr;
|
|
|
|
void WinthreadCall(const char* label, std::error_code result) {
|
|
if (0 != result.value()) {
|
|
fprintf(stderr, "Winthread %s: %s\n", label,
|
|
errnoStr(result.value()).c_str());
|
|
abort();
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace port {
|
|
WinClock::WinClock()
|
|
: perf_counter_frequency_(0),
|
|
nano_seconds_per_period_(0),
|
|
GetSystemTimePreciseAsFileTime_(NULL) {
|
|
{
|
|
LARGE_INTEGER qpf;
|
|
BOOL ret __attribute__((__unused__));
|
|
ret = QueryPerformanceFrequency(&qpf);
|
|
assert(ret == TRUE);
|
|
perf_counter_frequency_ = qpf.QuadPart;
|
|
|
|
if (std::nano::den % perf_counter_frequency_ == 0) {
|
|
nano_seconds_per_period_ = std::nano::den / perf_counter_frequency_;
|
|
}
|
|
}
|
|
|
|
HMODULE module = GetModuleHandle("kernel32.dll");
|
|
if (module != NULL) {
|
|
GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)(
|
|
void*)GetProcAddress(module, "GetSystemTimePreciseAsFileTime");
|
|
}
|
|
}
|
|
|
|
void WinClock::SleepForMicroseconds(int micros) {
|
|
std::this_thread::sleep_for(std::chrono::microseconds(micros));
|
|
}
|
|
|
|
std::string WinClock::TimeToString(uint64_t secondsSince1970) {
|
|
std::string result;
|
|
|
|
const time_t seconds = secondsSince1970;
|
|
const int maxsize = 64;
|
|
|
|
struct tm t;
|
|
errno_t ret = localtime_s(&t, &seconds);
|
|
|
|
if (ret) {
|
|
result = std::to_string(seconds);
|
|
} else {
|
|
result.resize(maxsize);
|
|
char* p = &result[0];
|
|
|
|
int len =
|
|
snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
|
|
t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
|
|
assert(len > 0);
|
|
|
|
result.resize(len);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
uint64_t WinClock::NowMicros() {
|
|
if (GetSystemTimePreciseAsFileTime_ != NULL) {
|
|
// all std::chrono clocks on windows proved to return
|
|
// values that may repeat that is not good enough for some uses.
|
|
const int64_t c_UnixEpochStartTicks = 116444736000000000LL;
|
|
const int64_t c_FtToMicroSec = 10;
|
|
|
|
// This interface needs to return system time and not
|
|
// just any microseconds because it is often used as an argument
|
|
// to TimedWait() on condition variable
|
|
FILETIME ftSystemTime;
|
|
GetSystemTimePreciseAsFileTime_(&ftSystemTime);
|
|
|
|
LARGE_INTEGER li;
|
|
li.LowPart = ftSystemTime.dwLowDateTime;
|
|
li.HighPart = ftSystemTime.dwHighDateTime;
|
|
// Subtract unix epoch start
|
|
li.QuadPart -= c_UnixEpochStartTicks;
|
|
// Convert to microsecs
|
|
li.QuadPart /= c_FtToMicroSec;
|
|
return li.QuadPart;
|
|
}
|
|
using namespace std::chrono;
|
|
return duration_cast<microseconds>(system_clock::now().time_since_epoch())
|
|
.count();
|
|
}
|
|
|
|
uint64_t WinClock::NowNanos() {
|
|
if (nano_seconds_per_period_ != 0) {
|
|
// all std::chrono clocks on windows have the same resolution that is only
|
|
// good enough for microseconds but not nanoseconds
|
|
// On Windows 8 and Windows 2012 Server
|
|
// GetSystemTimePreciseAsFileTime(¤t_time) can be used
|
|
LARGE_INTEGER li;
|
|
QueryPerformanceCounter(&li);
|
|
// Convert performance counter to nanoseconds by precomputed ratio.
|
|
// Directly multiply nano::den with li.QuadPart causes overflow.
|
|
// Only do this when nano::den is divisible by perf_counter_frequency_,
|
|
// which most likely is the case in reality. If it's not, fall back to
|
|
// high_resolution_clock, which may be less precise under old compilers.
|
|
li.QuadPart *= nano_seconds_per_period_;
|
|
return li.QuadPart;
|
|
}
|
|
using namespace std::chrono;
|
|
return duration_cast<nanoseconds>(
|
|
high_resolution_clock::now().time_since_epoch())
|
|
.count();
|
|
}
|
|
|
|
Status WinClock::GetCurrentTime(int64_t* unix_time) {
|
|
time_t time = std::time(nullptr);
|
|
if (time == (time_t)(-1)) {
|
|
return Status::NotSupported("Failed to get time");
|
|
}
|
|
|
|
*unix_time = time;
|
|
return Status::OK();
|
|
}
|
|
|
|
WinFileSystem::WinFileSystem(const std::shared_ptr<SystemClock>& clock)
|
|
: clock_(clock), page_size_(4 * 1024), allocation_granularity_(page_size_) {
|
|
SYSTEM_INFO sinfo;
|
|
GetSystemInfo(&sinfo);
|
|
|
|
page_size_ = sinfo.dwPageSize;
|
|
allocation_granularity_ = sinfo.dwAllocationGranularity;
|
|
}
|
|
|
|
const std::shared_ptr<WinFileSystem>& WinFileSystem::Default() {
|
|
static std::shared_ptr<WinFileSystem> fs =
|
|
std::make_shared<WinFileSystem>(WinClock::Default());
|
|
return fs;
|
|
}
|
|
|
|
WinEnvIO::WinEnvIO(Env* hosted_env) : hosted_env_(hosted_env) {}
|
|
|
|
WinEnvIO::~WinEnvIO() {}
|
|
|
|
IOStatus WinFileSystem::DeleteFile(const std::string& fname,
|
|
const IOOptions& /*options*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
|
|
BOOL ret = RX_DeleteFile(RX_FN(fname).c_str());
|
|
|
|
if (!ret) {
|
|
auto lastError = GetLastError();
|
|
result = IOErrorFromWindowsError("Failed to delete: " + fname, lastError);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::Truncate(const std::string& fname, size_t size,
|
|
const IOOptions& /*options*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
int result = ROCKSDB_NAMESPACE::port::Truncate(fname, size);
|
|
if (result != 0) {
|
|
s = IOError("Failed to truncate: " + fname, errno);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewSequentialFile(
|
|
const std::string& fname, const FileOptions& options,
|
|
std::unique_ptr<FSSequentialFile>* result, IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
|
|
result->reset();
|
|
|
|
// Corruption test needs to rename and delete files of these kind
|
|
// while they are still open with another handle. For that reason we
|
|
// allow share_write and delete(allows rename).
|
|
HANDLE hFile = INVALID_HANDLE_VALUE;
|
|
|
|
DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
|
|
|
|
if (options.use_direct_reads && !options.use_mmap_reads) {
|
|
fileFlags |= FILE_FLAG_NO_BUFFERING;
|
|
}
|
|
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(
|
|
RX_FN(fname).c_str(), GENERIC_READ,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
|
|
OPEN_EXISTING, // Original fopen mode is "rb"
|
|
fileFlags, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("Failed to open NewSequentialFile" + fname,
|
|
lastError);
|
|
} else {
|
|
result->reset(new WinSequentialFile(fname, hFile, options));
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewRandomAccessFile(
|
|
const std::string& fname, const FileOptions& options,
|
|
std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
|
|
result->reset();
|
|
IOStatus s;
|
|
|
|
// Open the file for read-only random access
|
|
// Random access is to disable read-ahead as the system reads too much data
|
|
DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
|
|
|
|
if (options.use_direct_reads && !options.use_mmap_reads) {
|
|
fileFlags |= FILE_FLAG_NO_BUFFERING;
|
|
} else {
|
|
fileFlags |= FILE_FLAG_RANDOM_ACCESS;
|
|
}
|
|
|
|
/// Shared access is necessary for corruption test to pass
|
|
// almost all tests would work with a possible exception of fault_injection
|
|
HANDLE hFile = 0;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile =
|
|
RX_CreateFile(RX_FN(fname).c_str(), GENERIC_READ,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
|
|
NULL, OPEN_EXISTING, fileFlags, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"NewRandomAccessFile failed to Create/Open: " + fname, lastError);
|
|
}
|
|
|
|
UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
|
|
|
|
// CAUTION! This will map the entire file into the process address space
|
|
if (options.use_mmap_reads && sizeof(void*) >= 8) {
|
|
// Use mmap when virtual address-space is plentiful.
|
|
uint64_t fileSize;
|
|
|
|
s = GetFileSize(fname, IOOptions(), &fileSize, dbg);
|
|
|
|
if (s.ok()) {
|
|
// Will not map empty files
|
|
if (fileSize == 0) {
|
|
return IOError("NewRandomAccessFile failed to map empty file: " + fname,
|
|
EINVAL);
|
|
}
|
|
|
|
HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READONLY,
|
|
0, // At its present length
|
|
0,
|
|
NULL); // Mapping name
|
|
|
|
if (!hMap) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to create file mapping for NewRandomAccessFile: " + fname,
|
|
lastError);
|
|
}
|
|
|
|
UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc);
|
|
|
|
const void* mapped_region =
|
|
MapViewOfFileEx(hMap, FILE_MAP_READ,
|
|
0, // High DWORD of access start
|
|
0, // Low DWORD
|
|
static_cast<SIZE_T>(fileSize),
|
|
NULL); // Let the OS choose the mapping
|
|
|
|
if (!mapped_region) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to MapViewOfFile for NewRandomAccessFile: " + fname,
|
|
lastError);
|
|
}
|
|
|
|
result->reset(new WinMmapReadableFile(fname, hFile, hMap, mapped_region,
|
|
static_cast<size_t>(fileSize)));
|
|
|
|
mapGuard.release();
|
|
fileGuard.release();
|
|
}
|
|
} else {
|
|
result->reset(new WinRandomAccessFile(fname, hFile, page_size_, options));
|
|
fileGuard.release();
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::OpenWritableFile(
|
|
const std::string& fname, const FileOptions& options,
|
|
std::unique_ptr<FSWritableFile>* result, bool reopen) {
|
|
const size_t c_BufferCapacity = 64 * 1024;
|
|
|
|
EnvOptions local_options(options);
|
|
|
|
result->reset();
|
|
IOStatus s;
|
|
|
|
DWORD fileFlags = FILE_ATTRIBUTE_NORMAL;
|
|
|
|
if (local_options.use_direct_writes && !local_options.use_mmap_writes) {
|
|
fileFlags = FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
|
|
}
|
|
|
|
// Desired access. We are want to write only here but if we want to memory
|
|
// map
|
|
// the file then there is no write only mode so we have to create it
|
|
// Read/Write
|
|
// However, MapViewOfFile specifies only Write only
|
|
DWORD desired_access = GENERIC_WRITE;
|
|
DWORD shared_mode = FILE_SHARE_READ;
|
|
|
|
if (local_options.use_mmap_writes) {
|
|
desired_access |= GENERIC_READ;
|
|
} else {
|
|
// Adding this solely for tests to pass (fault_injection_test,
|
|
// wal_manager_test).
|
|
shared_mode |= (FILE_SHARE_WRITE | FILE_SHARE_DELETE);
|
|
}
|
|
|
|
// This will always truncate the file
|
|
DWORD creation_disposition = CREATE_ALWAYS;
|
|
if (reopen) {
|
|
creation_disposition = OPEN_ALWAYS;
|
|
}
|
|
|
|
HANDLE hFile = 0;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(
|
|
RX_FN(fname).c_str(),
|
|
desired_access, // Access desired
|
|
shared_mode,
|
|
NULL, // Security attributes
|
|
// Posix env says (reopen) ? (O_CREATE | O_APPEND) : O_CREAT | O_TRUNC
|
|
creation_disposition,
|
|
fileFlags, // Flags
|
|
NULL); // Template File
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to create a NewWriteableFile: " + fname, lastError);
|
|
}
|
|
|
|
// We will start writing at the end, appending
|
|
if (reopen) {
|
|
LARGE_INTEGER zero_move;
|
|
zero_move.QuadPart = 0;
|
|
BOOL ret = SetFilePointerEx(hFile, zero_move, NULL, FILE_END);
|
|
if (!ret) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to create a ReopenWritableFile move to the end: " + fname,
|
|
lastError);
|
|
}
|
|
}
|
|
|
|
if (options.use_mmap_writes) {
|
|
// We usually do not use mmmapping on SSD and thus we pass memory
|
|
// page_size
|
|
result->reset(new WinMmapFile(fname, hFile, page_size_,
|
|
allocation_granularity_, local_options));
|
|
} else {
|
|
// Here we want the buffer allocation to be aligned by the SSD page size
|
|
// and to be a multiple of it
|
|
result->reset(new WinWritableFile(fname, hFile, GetPageSize(),
|
|
c_BufferCapacity, local_options));
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewWritableFile(const std::string& fname,
|
|
const FileOptions& options,
|
|
std::unique_ptr<FSWritableFile>* result,
|
|
IODebugContext* /*dbg*/) {
|
|
return OpenWritableFile(fname, options, result, false);
|
|
}
|
|
|
|
IOStatus WinFileSystem::ReopenWritableFile(
|
|
const std::string& fname, const FileOptions& options,
|
|
std::unique_ptr<FSWritableFile>* result, IODebugContext* /*dbg*/) {
|
|
return OpenWritableFile(fname, options, result, true);
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewRandomRWFile(const std::string& fname,
|
|
const FileOptions& options,
|
|
std::unique_ptr<FSRandomRWFile>* result,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
|
|
// Open the file for read-only random access
|
|
// Random access is to disable read-ahead as the system reads too much data
|
|
DWORD desired_access = GENERIC_READ | GENERIC_WRITE;
|
|
DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
|
DWORD creation_disposition = OPEN_EXISTING; // Fail if file does not exist
|
|
DWORD file_flags = FILE_FLAG_RANDOM_ACCESS;
|
|
|
|
if (options.use_direct_reads && options.use_direct_writes) {
|
|
file_flags |= FILE_FLAG_NO_BUFFERING;
|
|
}
|
|
|
|
/// Shared access is necessary for corruption test to pass
|
|
// almost all tests would work with a possible exception of fault_injection
|
|
HANDLE hFile = 0;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(RX_FN(fname).c_str(), desired_access, shared_mode,
|
|
NULL, // Security attributes
|
|
creation_disposition, file_flags, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"NewRandomRWFile failed to Create/Open: " + fname, lastError);
|
|
}
|
|
|
|
UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
|
|
result->reset(new WinRandomRWFile(fname, hFile, GetPageSize(), options));
|
|
fileGuard.release();
|
|
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewMemoryMappedFileBuffer(
|
|
const std::string& fname, std::unique_ptr<MemoryMappedFileBuffer>* result) {
|
|
IOStatus s;
|
|
result->reset();
|
|
|
|
DWORD fileFlags = FILE_ATTRIBUTE_READONLY;
|
|
|
|
HANDLE hFile = INVALID_HANDLE_VALUE;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(
|
|
RX_FN(fname).c_str(), GENERIC_READ | GENERIC_WRITE,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
|
|
OPEN_EXISTING, // Open only if it exists
|
|
fileFlags, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError(
|
|
"Failed to open NewMemoryMappedFileBuffer: " + fname, lastError);
|
|
return s;
|
|
}
|
|
UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc);
|
|
|
|
uint64_t fileSize = 0;
|
|
s = GetFileSize(fname, IOOptions(), &fileSize, nullptr);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
// Will not map empty files
|
|
if (fileSize == 0) {
|
|
return IOStatus::NotSupported(
|
|
"NewMemoryMappedFileBuffer can not map zero length files: " + fname);
|
|
}
|
|
|
|
// size_t is 32-bit with 32-bit builds
|
|
if (fileSize > std::numeric_limits<size_t>::max()) {
|
|
return IOStatus::NotSupported(
|
|
"The specified file size does not fit into 32-bit memory addressing: " +
|
|
fname);
|
|
}
|
|
|
|
HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READWRITE,
|
|
0, // Whole file at its present length
|
|
0,
|
|
NULL); // Mapping name
|
|
|
|
if (!hMap) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to create file mapping for: " + fname, lastError);
|
|
}
|
|
UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc);
|
|
|
|
void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE,
|
|
0, // High DWORD of access start
|
|
0, // Low DWORD
|
|
static_cast<SIZE_T>(fileSize),
|
|
NULL); // Let the OS choose the mapping
|
|
|
|
if (!base) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError(
|
|
"Failed to MapViewOfFile for NewMemoryMappedFileBuffer: " + fname,
|
|
lastError);
|
|
}
|
|
|
|
result->reset(new WinMemoryMappedBuffer(hFile, hMap, base,
|
|
static_cast<size_t>(fileSize)));
|
|
|
|
mapGuard.release();
|
|
fileGuard.release();
|
|
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewDirectory(const std::string& name,
|
|
const IOOptions& /*options*/,
|
|
std::unique_ptr<FSDirectory>* result,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
// Must be nullptr on failure
|
|
result->reset();
|
|
|
|
if (!DirExists(name)) {
|
|
s = IOErrorFromWindowsError("open folder: " + name, ERROR_DIRECTORY);
|
|
return s;
|
|
}
|
|
|
|
HANDLE handle = INVALID_HANDLE_VALUE;
|
|
// 0 - for access means read metadata
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
handle = RX_CreateFile(
|
|
RX_FN(name).c_str(), 0,
|
|
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
|
|
OPEN_EXISTING,
|
|
FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible
|
|
NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == handle) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("open folder: " + name, lastError);
|
|
return s;
|
|
}
|
|
|
|
result->reset(new WinDirectory(handle));
|
|
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::FileExists(const std::string& fname,
|
|
const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
// TODO: This does not follow symbolic links at this point
|
|
// which is consistent with _access() impl on windows
|
|
// but can be added
|
|
WIN32_FILE_ATTRIBUTE_DATA attrs;
|
|
if (FALSE == RX_GetFileAttributesEx(RX_FN(fname).c_str(),
|
|
GetFileExInfoStandard, &attrs)) {
|
|
auto lastError = GetLastError();
|
|
switch (lastError) {
|
|
case ERROR_ACCESS_DENIED:
|
|
case ERROR_NOT_FOUND:
|
|
case ERROR_FILE_NOT_FOUND:
|
|
case ERROR_PATH_NOT_FOUND:
|
|
s = IOStatus::NotFound();
|
|
break;
|
|
default:
|
|
s = IOErrorFromWindowsError("Unexpected error for: " + fname,
|
|
lastError);
|
|
break;
|
|
}
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetChildren(const std::string& dir,
|
|
const IOOptions& /*opts*/,
|
|
std::vector<std::string>* result,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus status;
|
|
result->clear();
|
|
|
|
RX_WIN32_FIND_DATA data;
|
|
memset(&data, 0, sizeof(data));
|
|
std::string pattern(dir);
|
|
pattern.append("\\").append("*");
|
|
|
|
HANDLE handle =
|
|
RX_FindFirstFileEx(RX_FN(pattern).c_str(),
|
|
// Do not want alternative name
|
|
FindExInfoBasic, &data, FindExSearchNameMatch,
|
|
NULL, // lpSearchFilter
|
|
0);
|
|
|
|
if (handle == INVALID_HANDLE_VALUE) {
|
|
auto lastError = GetLastError();
|
|
switch (lastError) {
|
|
case ERROR_NOT_FOUND:
|
|
case ERROR_ACCESS_DENIED:
|
|
case ERROR_FILE_NOT_FOUND:
|
|
case ERROR_PATH_NOT_FOUND:
|
|
status = IOStatus::NotFound();
|
|
break;
|
|
default:
|
|
status = IOErrorFromWindowsError("Failed to GetChhildren for: " + dir,
|
|
lastError);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
UniqueFindClosePtr fc(handle, FindCloseFunc);
|
|
|
|
// For safety
|
|
data.cFileName[MAX_PATH - 1] = 0;
|
|
|
|
while (true) {
|
|
// filter out '.' and '..' directory entries
|
|
// which appear only on some platforms
|
|
const bool ignore =
|
|
((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) &&
|
|
(RX_FNCMP(data.cFileName, ".") == 0 ||
|
|
RX_FNCMP(data.cFileName, "..") == 0);
|
|
if (!ignore) {
|
|
auto x = RX_FILESTRING(data.cFileName, RX_FNLEN(data.cFileName));
|
|
result->push_back(FN_TO_RX(x));
|
|
}
|
|
|
|
BOOL ret = -RX_FindNextFile(handle, &data);
|
|
// If the function fails the return value is zero
|
|
// and non-zero otherwise. Not TRUE or FALSE.
|
|
if (ret == FALSE) {
|
|
// Posix does not care why we stopped
|
|
break;
|
|
}
|
|
data.cFileName[MAX_PATH - 1] = 0;
|
|
}
|
|
return status;
|
|
}
|
|
|
|
IOStatus WinFileSystem::CreateDir(const std::string& name,
|
|
const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL);
|
|
if (!ret) {
|
|
auto lastError = GetLastError();
|
|
result = IOErrorFromWindowsError("Failed to create a directory: " + name,
|
|
lastError);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::CreateDirIfMissing(const std::string& name,
|
|
const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
|
|
if (DirExists(name)) {
|
|
return result;
|
|
}
|
|
|
|
BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL);
|
|
if (!ret) {
|
|
auto lastError = GetLastError();
|
|
if (lastError != ERROR_ALREADY_EXISTS) {
|
|
result = IOErrorFromWindowsError("Failed to create a directory: " + name,
|
|
lastError);
|
|
} else {
|
|
result = IOStatus::IOError(name + ": exists but is not a directory");
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::DeleteDir(const std::string& name,
|
|
const IOOptions& /*options*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
BOOL ret = RX_RemoveDirectory(RX_FN(name).c_str());
|
|
if (!ret) {
|
|
auto lastError = GetLastError();
|
|
result =
|
|
IOErrorFromWindowsError("Failed to remove dir: " + name, lastError);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetFileSize(const std::string& fname,
|
|
const IOOptions& /*opts*/, uint64_t* size,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
|
|
WIN32_FILE_ATTRIBUTE_DATA attrs;
|
|
if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard,
|
|
&attrs)) {
|
|
ULARGE_INTEGER file_size;
|
|
file_size.HighPart = attrs.nFileSizeHigh;
|
|
file_size.LowPart = attrs.nFileSizeLow;
|
|
*size = file_size.QuadPart;
|
|
} else {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("Can not get size for: " + fname, lastError);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
uint64_t WinFileSystem::FileTimeToUnixTime(const FILETIME& ftTime) {
|
|
const uint64_t c_FileTimePerSecond = 10000000U;
|
|
// UNIX epoch starts on 1970-01-01T00:00:00Z
|
|
// Windows FILETIME starts on 1601-01-01T00:00:00Z
|
|
// Therefore, we need to subtract the below number of seconds from
|
|
// the seconds that we obtain from FILETIME with an obvious loss of
|
|
// precision
|
|
const uint64_t c_SecondBeforeUnixEpoch = 11644473600U;
|
|
|
|
ULARGE_INTEGER li;
|
|
li.HighPart = ftTime.dwHighDateTime;
|
|
li.LowPart = ftTime.dwLowDateTime;
|
|
|
|
uint64_t result =
|
|
(li.QuadPart / c_FileTimePerSecond) - c_SecondBeforeUnixEpoch;
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetFileModificationTime(const std::string& fname,
|
|
const IOOptions& /*opts*/,
|
|
uint64_t* file_mtime,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
|
|
WIN32_FILE_ATTRIBUTE_DATA attrs;
|
|
if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard,
|
|
&attrs)) {
|
|
*file_mtime = FileTimeToUnixTime(attrs.ftLastWriteTime);
|
|
} else {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError(
|
|
"Can not get file modification time for: " + fname, lastError);
|
|
*file_mtime = 0;
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::RenameFile(const std::string& src,
|
|
const std::string& target,
|
|
const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
|
|
// rename() is not capable of replacing the existing file as on Linux
|
|
// so use OS API directly
|
|
if (!RX_MoveFileEx(RX_FN(src).c_str(), RX_FN(target).c_str(),
|
|
MOVEFILE_REPLACE_EXISTING)) {
|
|
DWORD lastError = GetLastError();
|
|
|
|
std::string text("Failed to rename: ");
|
|
text.append(src).append(" to: ").append(target);
|
|
|
|
result = IOErrorFromWindowsError(text, lastError);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::LinkFile(const std::string& src,
|
|
const std::string& target,
|
|
const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
|
|
if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) {
|
|
DWORD lastError = GetLastError();
|
|
if (lastError == ERROR_NOT_SAME_DEVICE) {
|
|
return IOStatus::NotSupported("No cross FS links allowed");
|
|
}
|
|
|
|
std::string text("Failed to link: ");
|
|
text.append(src).append(" to: ").append(target);
|
|
|
|
result = IOErrorFromWindowsError(text, lastError);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::NumFileLinks(const std::string& fname,
|
|
const IOOptions& /*opts*/, uint64_t* count,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
HANDLE handle =
|
|
RX_CreateFile(RX_FN(fname).c_str(), 0,
|
|
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
|
|
|
|
if (INVALID_HANDLE_VALUE == handle) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("NumFileLinks: " + fname, lastError);
|
|
return s;
|
|
}
|
|
UniqueCloseHandlePtr handle_guard(handle, CloseHandleFunc);
|
|
FILE_STANDARD_INFO standard_info;
|
|
if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo,
|
|
&standard_info,
|
|
sizeof(standard_info))) {
|
|
*count = standard_info.NumberOfLinks;
|
|
} else {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname,
|
|
lastError);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::AreFilesSame(const std::string& first,
|
|
const std::string& second,
|
|
const IOOptions& /*opts*/, bool* res,
|
|
IODebugContext* /*dbg*/) {
|
|
// For MinGW builds
|
|
#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
|
|
IOStatus s = IOStatus::NotSupported();
|
|
#else
|
|
assert(res != nullptr);
|
|
IOStatus s;
|
|
if (res == nullptr) {
|
|
s = IOStatus::InvalidArgument("res");
|
|
return s;
|
|
}
|
|
|
|
// 0 - for access means read metadata
|
|
HANDLE file_1 = RX_CreateFile(
|
|
RX_FN(first).c_str(), 0,
|
|
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
|
|
OPEN_EXISTING,
|
|
FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible
|
|
NULL);
|
|
|
|
if (INVALID_HANDLE_VALUE == file_1) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("open file: " + first, lastError);
|
|
return s;
|
|
}
|
|
UniqueCloseHandlePtr g_1(file_1, CloseHandleFunc);
|
|
|
|
HANDLE file_2 = RX_CreateFile(
|
|
RX_FN(second).c_str(), 0,
|
|
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
|
|
OPEN_EXISTING,
|
|
FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible
|
|
NULL);
|
|
|
|
if (INVALID_HANDLE_VALUE == file_2) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("open file: " + second, lastError);
|
|
return s;
|
|
}
|
|
UniqueCloseHandlePtr g_2(file_2, CloseHandleFunc);
|
|
|
|
FILE_ID_INFO FileInfo_1;
|
|
BOOL result = GetFileInformationByHandleEx(file_1, FileIdInfo, &FileInfo_1,
|
|
sizeof(FileInfo_1));
|
|
|
|
if (!result) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("stat file: " + first, lastError);
|
|
return s;
|
|
}
|
|
|
|
FILE_ID_INFO FileInfo_2;
|
|
result = GetFileInformationByHandleEx(file_2, FileIdInfo, &FileInfo_2,
|
|
sizeof(FileInfo_2));
|
|
|
|
if (!result) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("stat file: " + second, lastError);
|
|
return s;
|
|
}
|
|
|
|
if (FileInfo_1.VolumeSerialNumber == FileInfo_2.VolumeSerialNumber) {
|
|
*res =
|
|
(0 == memcmp(FileInfo_1.FileId.Identifier, FileInfo_2.FileId.Identifier,
|
|
sizeof(FileInfo_1.FileId.Identifier)));
|
|
} else {
|
|
*res = false;
|
|
}
|
|
#endif
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::LockFile(const std::string& lockFname,
|
|
const IOOptions& /*opts*/, FileLock** lock,
|
|
IODebugContext* /*dbg*/) {
|
|
assert(lock != nullptr);
|
|
|
|
*lock = NULL;
|
|
IOStatus result;
|
|
|
|
// No-sharing, this is a LOCK file
|
|
const DWORD ExclusiveAccessON = 0;
|
|
|
|
// Obtain exclusive access to the LOCK file
|
|
// Previously, instead of NORMAL attr we set DELETE on close and that worked
|
|
// well except with fault_injection test that insists on deleting it.
|
|
HANDLE hFile = 0;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(RX_FN(lockFname).c_str(),
|
|
(GENERIC_READ | GENERIC_WRITE), ExclusiveAccessON,
|
|
NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
result = IOErrorFromWindowsError("Failed to create lock file: " + lockFname,
|
|
lastError);
|
|
} else {
|
|
*lock = new WinFileLock(hFile);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::UnlockFile(FileLock* lock, const IOOptions& /*opts*/,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus result;
|
|
|
|
assert(lock != nullptr);
|
|
|
|
delete lock;
|
|
|
|
return result;
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetTestDirectory(const IOOptions& opts,
|
|
std::string* result,
|
|
IODebugContext* dbg) {
|
|
std::string output;
|
|
|
|
const char* env = getenv("TEST_TMPDIR");
|
|
if (env && env[0] != '\0') {
|
|
output = env;
|
|
} else {
|
|
env = getenv("TMP");
|
|
|
|
if (env && env[0] != '\0') {
|
|
output = env;
|
|
} else {
|
|
output = "c:\\tmp";
|
|
}
|
|
}
|
|
CreateDir(output, opts, dbg);
|
|
|
|
output.append("\\testrocksdb-");
|
|
output.append(std::to_string(GetCurrentProcessId()));
|
|
|
|
CreateDir(output, opts, dbg);
|
|
|
|
output.swap(*result);
|
|
|
|
return IOStatus::OK();
|
|
}
|
|
|
|
IOStatus WinFileSystem::NewLogger(const std::string& fname,
|
|
const IOOptions& /*opts*/,
|
|
std::shared_ptr<Logger>* result,
|
|
IODebugContext* /*dbg*/) {
|
|
IOStatus s;
|
|
|
|
result->reset();
|
|
|
|
HANDLE hFile = 0;
|
|
{
|
|
IOSTATS_TIMER_GUARD(open_nanos);
|
|
hFile = RX_CreateFile(
|
|
RX_FN(fname).c_str(), GENERIC_WRITE,
|
|
FILE_SHARE_READ | FILE_SHARE_DELETE, // In RocksDb log files are
|
|
// renamed and deleted before
|
|
// they are closed. This enables
|
|
// doing so.
|
|
NULL,
|
|
CREATE_ALWAYS, // Original fopen mode is "w"
|
|
FILE_ATTRIBUTE_NORMAL, NULL);
|
|
}
|
|
|
|
if (INVALID_HANDLE_VALUE == hFile) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("Failed to open LogFile" + fname, lastError);
|
|
} else {
|
|
{
|
|
// With log files we want to set the true creation time as of now
|
|
// because the system
|
|
// for some reason caches the attributes of the previous file that just
|
|
// been renamed from
|
|
// this name so auto_roll_logger_test fails
|
|
FILETIME ft;
|
|
GetSystemTimeAsFileTime(&ft);
|
|
// Set creation, last access and last write time to the same value
|
|
SetFileTime(hFile, &ft, &ft, &ft);
|
|
}
|
|
result->reset(new WinLogger(&WinEnvThreads::gettid, clock_.get(), hFile));
|
|
}
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::IsDirectory(const std::string& path,
|
|
const IOOptions& /*opts*/, bool* is_dir,
|
|
IODebugContext* /*dbg*/) {
|
|
BOOL ret = RX_PathIsDirectory(RX_FN(path).c_str());
|
|
if (is_dir) {
|
|
*is_dir = ret ? true : false;
|
|
}
|
|
return IOStatus::OK();
|
|
}
|
|
|
|
Status WinEnvIO::GetHostName(char* name, uint64_t len) {
|
|
Status s;
|
|
DWORD nSize = static_cast<DWORD>(
|
|
std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));
|
|
|
|
if (!::GetComputerNameA(name, &nSize)) {
|
|
auto lastError = GetLastError();
|
|
s = IOErrorFromWindowsError("GetHostName", lastError);
|
|
} else {
|
|
name[nSize] = 0;
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetAbsolutePath(const std::string& db_path,
|
|
const IOOptions& /*options*/,
|
|
std::string* output_path,
|
|
IODebugContext* dbg) {
|
|
// Check if we already have an absolute path
|
|
// For test compatibility we will consider starting slash as an
|
|
// absolute path
|
|
if ((!db_path.empty() && (db_path[0] == '\\' || db_path[0] == '/')) ||
|
|
!RX_PathIsRelative(RX_FN(db_path).c_str())) {
|
|
*output_path = db_path;
|
|
return IOStatus::OK();
|
|
}
|
|
|
|
RX_FILESTRING result;
|
|
result.resize(MAX_PATH);
|
|
|
|
// Hopefully no changes the current directory while we do this
|
|
// however _getcwd also suffers from the same limitation
|
|
DWORD len = RX_GetCurrentDirectory(MAX_PATH, &result[0]);
|
|
if (len == 0) {
|
|
auto lastError = GetLastError();
|
|
return IOErrorFromWindowsError("Failed to get current working directory",
|
|
lastError);
|
|
}
|
|
|
|
result.resize(len);
|
|
std::string res = FN_TO_RX(result);
|
|
|
|
res.swap(*output_path);
|
|
return IOStatus::OK();
|
|
}
|
|
|
|
IOStatus WinFileSystem::GetFreeSpace(const std::string& path,
|
|
const IOOptions& /*options*/,
|
|
uint64_t* diskfree,
|
|
IODebugContext* /*dbg*/) {
|
|
assert(diskfree != nullptr);
|
|
ULARGE_INTEGER freeBytes;
|
|
BOOL f = RX_GetDiskFreeSpaceEx(RX_FN(path).c_str(), &freeBytes, NULL, NULL);
|
|
if (f) {
|
|
*diskfree = freeBytes.QuadPart;
|
|
return IOStatus::OK();
|
|
} else {
|
|
DWORD lastError = GetLastError();
|
|
return IOErrorFromWindowsError("Failed to get free space: " + path,
|
|
lastError);
|
|
}
|
|
}
|
|
|
|
FileOptions WinFileSystem::OptimizeForLogWrite(
|
|
const FileOptions& file_options, const DBOptions& db_options) const {
|
|
FileOptions optimized(file_options);
|
|
// These two the same as default optimizations
|
|
optimized.bytes_per_sync = db_options.wal_bytes_per_sync;
|
|
optimized.writable_file_max_buffer_size =
|
|
db_options.writable_file_max_buffer_size;
|
|
|
|
// This adversely affects %999 on windows
|
|
optimized.use_mmap_writes = false;
|
|
// Direct writes will produce a huge perf impact on
|
|
// Windows. Pre-allocate space for WAL.
|
|
optimized.use_direct_writes = false;
|
|
return optimized;
|
|
}
|
|
|
|
FileOptions WinFileSystem::OptimizeForManifestWrite(
|
|
const FileOptions& options) const {
|
|
FileOptions optimized(options);
|
|
optimized.use_mmap_writes = false;
|
|
optimized.use_direct_reads = false;
|
|
return optimized;
|
|
}
|
|
|
|
FileOptions WinFileSystem::OptimizeForManifestRead(
|
|
const FileOptions& file_options) const {
|
|
FileOptions optimized(file_options);
|
|
optimized.use_mmap_writes = false;
|
|
optimized.use_direct_reads = false;
|
|
return optimized;
|
|
}
|
|
|
|
// Returns true iff the named directory exists and is a directory.
|
|
bool WinFileSystem::DirExists(const std::string& dname) {
|
|
WIN32_FILE_ATTRIBUTE_DATA attrs;
|
|
if (RX_GetFileAttributesEx(RX_FN(dname).c_str(), GetFileExInfoStandard,
|
|
&attrs)) {
|
|
return 0 != (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
size_t WinFileSystem::GetSectorSize(const std::string& fname) {
|
|
size_t sector_size = kSectorSize;
|
|
|
|
// obtain device handle
|
|
char devicename[7] = "\\\\.\\";
|
|
int erresult = 0;
|
|
if (RX_PathIsRelative(RX_FN(fname).c_str())) {
|
|
RX_FILESTRING rx_current_dir;
|
|
rx_current_dir.resize(MAX_PATH);
|
|
DWORD len = RX_GetCurrentDirectory(MAX_PATH, &rx_current_dir[0]);
|
|
if (len == 0) {
|
|
return sector_size;
|
|
}
|
|
rx_current_dir.resize(len);
|
|
std::string current_dir = FN_TO_RX(rx_current_dir);
|
|
erresult =
|
|
strncat_s(devicename, sizeof(devicename), current_dir.c_str(), 2);
|
|
} else {
|
|
erresult = strncat_s(devicename, sizeof(devicename), fname.c_str(), 2);
|
|
}
|
|
|
|
if (erresult) {
|
|
assert(false);
|
|
return sector_size;
|
|
}
|
|
|
|
HANDLE hDevice = CreateFile(devicename, 0, 0, nullptr, OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL, nullptr);
|
|
|
|
if (hDevice == INVALID_HANDLE_VALUE) {
|
|
return sector_size;
|
|
}
|
|
|
|
STORAGE_PROPERTY_QUERY spropertyquery;
|
|
spropertyquery.PropertyId = StorageAccessAlignmentProperty;
|
|
spropertyquery.QueryType = PropertyStandardQuery;
|
|
|
|
BYTE output_buffer[sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR)];
|
|
DWORD output_bytes = 0;
|
|
|
|
BOOL ret = DeviceIoControl(
|
|
hDevice, IOCTL_STORAGE_QUERY_PROPERTY, &spropertyquery,
|
|
sizeof(spropertyquery), output_buffer,
|
|
sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), &output_bytes, nullptr);
|
|
|
|
if (ret) {
|
|
sector_size = ((STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR*)output_buffer)
|
|
->BytesPerLogicalSector;
|
|
} else {
|
|
// many devices do not support StorageProcessAlignmentProperty. Any failure
|
|
// here and we fall back to logical alignment
|
|
|
|
DISK_GEOMETRY_EX geometry = {0};
|
|
ret = DeviceIoControl(hDevice, IOCTL_DISK_GET_DRIVE_GEOMETRY, nullptr, 0,
|
|
&geometry, sizeof(geometry), &output_bytes, nullptr);
|
|
if (ret) {
|
|
sector_size = geometry.Geometry.BytesPerSector;
|
|
}
|
|
}
|
|
|
|
if (hDevice != INVALID_HANDLE_VALUE) {
|
|
CloseHandle(hDevice);
|
|
}
|
|
|
|
return sector_size;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// WinEnvThreads
|
|
|
|
WinEnvThreads::WinEnvThreads(Env* hosted_env)
|
|
: hosted_env_(hosted_env), thread_pools_(Env::Priority::TOTAL) {
|
|
for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
|
|
thread_pools_[pool_id].SetThreadPriority(
|
|
static_cast<Env::Priority>(pool_id));
|
|
// This allows later initializing the thread-local-env of each thread.
|
|
thread_pools_[pool_id].SetHostEnv(hosted_env);
|
|
}
|
|
}
|
|
|
|
WinEnvThreads::~WinEnvThreads() {
|
|
WaitForJoin();
|
|
|
|
for (auto& thpool : thread_pools_) {
|
|
thpool.JoinAllThreads();
|
|
}
|
|
}
|
|
|
|
void WinEnvThreads::Schedule(void (*function)(void*), void* arg,
|
|
Env::Priority pri, void* tag,
|
|
void (*unschedFunction)(void* arg)) {
|
|
assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
|
|
thread_pools_[pri].Schedule(function, arg, tag, unschedFunction);
|
|
}
|
|
|
|
int WinEnvThreads::UnSchedule(void* arg, Env::Priority pri) {
|
|
return thread_pools_[pri].UnSchedule(arg);
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct StartThreadState {
|
|
void (*user_function)(void*);
|
|
void* arg;
|
|
};
|
|
|
|
void* StartThreadWrapper(void* arg) {
|
|
std::unique_ptr<StartThreadState> state(
|
|
reinterpret_cast<StartThreadState*>(arg));
|
|
state->user_function(state->arg);
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void WinEnvThreads::StartThread(void (*function)(void* arg), void* arg) {
|
|
std::unique_ptr<StartThreadState> state(new StartThreadState);
|
|
state->user_function = function;
|
|
state->arg = arg;
|
|
try {
|
|
Thread th(&StartThreadWrapper, state.get());
|
|
state.release();
|
|
|
|
std::lock_guard<std::mutex> lg(mu_);
|
|
threads_to_join_.push_back(std::move(th));
|
|
|
|
} catch (const std::system_error& ex) {
|
|
WinthreadCall("start thread", ex.code());
|
|
}
|
|
}
|
|
|
|
void WinEnvThreads::WaitForJoin() {
|
|
for (auto& th : threads_to_join_) {
|
|
th.join();
|
|
}
|
|
threads_to_join_.clear();
|
|
}
|
|
|
|
unsigned int WinEnvThreads::GetThreadPoolQueueLen(Env::Priority pri) const {
|
|
assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
|
|
return thread_pools_[pri].GetQueueLen();
|
|
}
|
|
|
|
uint64_t WinEnvThreads::gettid() {
|
|
uint64_t thread_id = GetCurrentThreadId();
|
|
return thread_id;
|
|
}
|
|
|
|
uint64_t WinEnvThreads::GetThreadID() const { return gettid(); }
|
|
|
|
void WinEnvThreads::SetBackgroundThreads(int num, Env::Priority pri) {
|
|
assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
|
|
thread_pools_[pri].SetBackgroundThreads(num);
|
|
}
|
|
|
|
int WinEnvThreads::GetBackgroundThreads(Env::Priority pri) {
|
|
assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
|
|
return thread_pools_[pri].GetBackgroundThreads();
|
|
}
|
|
|
|
void WinEnvThreads::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) {
|
|
assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH);
|
|
thread_pools_[pri].IncBackgroundThreadsIfNeeded(num);
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
// WinEnv
|
|
|
|
WinEnv::WinEnv()
|
|
: CompositeEnv(WinFileSystem::Default(), WinClock::Default()),
|
|
winenv_io_(this),
|
|
winenv_threads_(this) {
|
|
// Protected member of the base class
|
|
thread_status_updater_ = CreateThreadStatusUpdater();
|
|
}
|
|
|
|
WinEnv::~WinEnv() {
|
|
// All threads must be joined before the deletion of
|
|
// thread_status_updater_.
|
|
delete thread_status_updater_;
|
|
}
|
|
|
|
Status WinEnv::GetThreadList(std::vector<ThreadStatus>* thread_list) {
|
|
assert(thread_status_updater_);
|
|
return thread_status_updater_->GetThreadList(thread_list);
|
|
}
|
|
|
|
Status WinEnv::GetHostName(char* name, uint64_t len) {
|
|
return winenv_io_.GetHostName(name, len);
|
|
}
|
|
|
|
void WinEnv::Schedule(void (*function)(void*), void* arg, Env::Priority pri,
|
|
void* tag, void (*unschedFunction)(void* arg)) {
|
|
return winenv_threads_.Schedule(function, arg, pri, tag, unschedFunction);
|
|
}
|
|
|
|
int WinEnv::UnSchedule(void* arg, Env::Priority pri) {
|
|
return winenv_threads_.UnSchedule(arg, pri);
|
|
}
|
|
|
|
void WinEnv::StartThread(void (*function)(void* arg), void* arg) {
|
|
return winenv_threads_.StartThread(function, arg);
|
|
}
|
|
|
|
void WinEnv::WaitForJoin() { return winenv_threads_.WaitForJoin(); }
|
|
|
|
unsigned int WinEnv::GetThreadPoolQueueLen(Env::Priority pri) const {
|
|
return winenv_threads_.GetThreadPoolQueueLen(pri);
|
|
}
|
|
|
|
uint64_t WinEnv::GetThreadID() const { return winenv_threads_.GetThreadID(); }
|
|
|
|
// Allow increasing the number of worker threads.
|
|
void WinEnv::SetBackgroundThreads(int num, Env::Priority pri) {
|
|
return winenv_threads_.SetBackgroundThreads(num, pri);
|
|
}
|
|
|
|
int WinEnv::GetBackgroundThreads(Env::Priority pri) {
|
|
return winenv_threads_.GetBackgroundThreads(pri);
|
|
}
|
|
|
|
void WinEnv::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) {
|
|
return winenv_threads_.IncBackgroundThreadsIfNeeded(num, pri);
|
|
}
|
|
|
|
} // namespace port
|
|
|
|
std::shared_ptr<FileSystem> FileSystem::Default() {
|
|
return port::WinFileSystem::Default();
|
|
}
|
|
|
|
const std::shared_ptr<SystemClock>& SystemClock::Default() {
|
|
static std::shared_ptr<SystemClock> clock =
|
|
std::make_shared<port::WinClock>();
|
|
return clock;
|
|
}
|
|
|
|
std::unique_ptr<Env> NewCompositeEnv(const std::shared_ptr<FileSystem>& fs) {
|
|
return std::unique_ptr<Env>(new CompositeEnvWrapper(Env::Default(), fs));
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
#endif
|