Merge pull request #701 from PraveenSinghRao/usewinapi_notcruntime

Remove usage of C runtime API that has file handle limitation
This commit is contained in:
Igor Canadi 2015-08-27 17:13:55 -07:00
commit e2db15efde
4 changed files with 66 additions and 72 deletions

View File

@ -703,55 +703,69 @@ class AlignedBuffer {
class WinSequentialFile : public SequentialFile {
private:
const std::string filename_;
FILE* file_;
int fd_;
HANDLE file_;
// There is no equivalent of advising away buffered pages as in posix.
// To implement this flag we would need to do unbuffered reads which
// will need to be aligned (not sure there is a guarantee that the buffer
// passed in is aligned).
// Hence we currently ignore this flag. It is used only in a few cases
// which should not be perf critical.
// If perf evaluation finds this to be a problem, we can look into
// implementing this.
bool use_os_buffer_;
public:
WinSequentialFile(const std::string& fname, FILE* f,
WinSequentialFile(const std::string& fname, HANDLE f,
const EnvOptions& options)
: filename_(fname),
file_(f),
fd_(fileno(f)),
use_os_buffer_(options.use_os_buffer) {}
virtual ~WinSequentialFile() {
assert(file_ != nullptr);
fclose(file_);
assert(file_ != INVALID_HANDLE_VALUE);
CloseHandle(file_);
}
virtual Status Read(size_t n, Slice* result, char* scratch) override {
Status s;
size_t r = 0;
// read() and fread() as well as write/fwrite do not guarantee
// to fullfil the entire request in one call thus the loop.
do {
r = fread(scratch, 1, n, file_);
} while (r == 0 && ferror(file_));
// Windows ReadFile API accepts a DWORD.
// While it is possible to read in a loop if n is > UINT_MAX
// it is a highly unlikely case.
if (n > UINT_MAX) {
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER);
}
DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above
DWORD bytesRead = 0;
BOOL ret = ReadFile(file_, scratch, bytesToRead, &bytesRead, NULL);
if (ret == TRUE) {
r = bytesRead;
} else {
return IOErrorFromWindowsError(filename_, GetLastError());
}
IOSTATS_ADD(bytes_read, r);
*result = Slice(scratch, r);
if (r < n) {
if (feof(file_)) {
// We leave status as ok if we hit the end of the file
// We also clear the error so that the reads can continue
// if a new data is written to the file
clearerr(file_);
} else {
// A partial read with an error: return a non-ok status
s = Status::IOError(filename_, strerror(errno));
}
}
return s;
}
virtual Status Skip(uint64_t n) override {
if (fseek(file_, n, SEEK_CUR)) {
return IOError(filename_, errno);
// Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit
// integer. As such it is a highly unlikley case to have n so large.
if (n > _I64_MAX) {
return IOErrorFromWindowsError(filename_, ERROR_INVALID_PARAMETER);
}
LARGE_INTEGER li;
li.QuadPart = static_cast<int64_t>(n); //cast is safe due to the check above
BOOL ret = SetFilePointerEx(file_, li, NULL, FILE_CURRENT);
if (ret == FALSE) {
return IOErrorFromWindowsError(filename_, GetLastError());
}
return Status::OK();
}
@ -1314,7 +1328,7 @@ class WinEnv : public Env {
// Corruption test needs to rename and delete files of these kind
// while they are still open with another handle. For that reason we
// allow share_write and delete(allows rename).
HANDLE hFile = 0;
HANDLE hFile = INVALID_HANDLE_VALUE;
{
IOSTATS_TIMER_GUARD(open_nanos);
hFile = CreateFileA(
@ -1329,22 +1343,7 @@ class WinEnv : public Env {
s = IOErrorFromWindowsError("Failed to open NewSequentialFile" + fname,
lastError);
} else {
int fd = _open_osfhandle(reinterpret_cast<intptr_t>(hFile), 0);
if (fd == -1) {
auto code = errno;
CloseHandle(hFile);
s = IOError("Failed to _open_osfhandle for NewSequentialFile: " + fname,
code);
} else {
FILE* file = _fdopen(fd, "rb");
if (file == nullptr) {
auto code = errno;
_close(fd);
s = IOError("Failed to fdopen NewSequentialFile: " + fname, code);
} else {
result->reset(new WinSequentialFile(fname, file, options));
}
}
result->reset(new WinSequentialFile(fname, hFile, options));
}
return s;
}
@ -1811,22 +1810,7 @@ class WinEnv : public Env {
// Set creation, last access and last write time to the same value
SetFileTime(hFile, &ft, &ft, &ft);
}
int fd = _open_osfhandle(reinterpret_cast<intptr_t>(hFile), 0);
if (fd == -1) {
auto code = errno;
CloseHandle(hFile);
s = IOError("Failed to _open_osfhandle: " + fname, code);
} else {
FILE* file = _fdopen(fd, "w");
if (file == nullptr) {
auto code = errno;
_close(fd);
s = IOError("Failed to fdopen: " + fname, code);
} else {
result->reset(new WinLogger(&WinEnv::gettid, this, file));
}
}
result->reset(new WinLogger(&WinEnv::gettid, this, hFile));
}
return s;
}

View File

@ -92,6 +92,7 @@ typedef SSIZE_T ssize_t;
namespace rocksdb {
#define PREFETCH(addr, rw, locality)
std::string GetWindowsErrSz(DWORD err);
namespace port {

View File

@ -18,13 +18,16 @@
#include <atomic>
#include "rocksdb/env.h"
#include <Windows.h>
#include "port/win/win_logger.h"
#include "port/sys_time.h"
#include "util/iostats_context_imp.h"
namespace rocksdb {
WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, FILE* file,
WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file,
const InfoLogLevel log_level)
: Logger(log_level),
gettid_(gettid),
@ -35,20 +38,23 @@ WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, FILE* file,
file_(file) {}
void WinLogger::DebugWriter(const char* str, int len) {
size_t sz = fwrite(str, 1, len, file_);
if (sz == 0) {
perror("fwrite .. [BAD]");
DWORD bytesWritten = 0;
BOOL ret = WriteFile(file_, str, len, &bytesWritten, NULL);
if (ret == FALSE) {
std::string errSz = GetWindowsErrSz(GetLastError());
fprintf(stderr, errSz.c_str());
}
}
WinLogger::~WinLogger() { close(); }
void WinLogger::close() { fclose(file_); }
void WinLogger::close() { CloseHandle(file_); }
void WinLogger::Flush() {
if (flush_pending_) {
flush_pending_ = false;
fflush(file_);
// With Windows API writes go to OS buffers directly so no fflush needed unlike
// with C runtime API. We don't flush all the way to disk for perf reasons.
}
last_flush_micros_ = env_->NowMicros();
@ -118,14 +124,16 @@ void WinLogger::Logv(const char* format, va_list ap) {
assert(p <= limit);
const size_t write_size = p - base;
size_t sz = fwrite(base, 1, write_size, file_);
if (sz == 0) {
perror("fwrite .. [BAD]");
DWORD bytesWritten = 0;
BOOL ret = WriteFile(file_, base, write_size, &bytesWritten, NULL);
if (ret == FALSE) {
std::string errSz = GetWindowsErrSz(GetLastError());
fprintf(stderr, errSz.c_str());
}
flush_pending_ = true;
assert(sz == write_size);
if (sz > 0) {
assert(bytesWritten == write_size);
if (bytesWritten > 0) {
log_size_ += write_size;
}
@ -133,7 +141,8 @@ void WinLogger::Logv(const char* format, va_list ap) {
static_cast<uint64_t>(now_tv.tv_sec) * 1000000 + now_tv.tv_usec;
if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
flush_pending_ = false;
fflush(file_);
// With Windows API writes go to OS buffers directly so no fflush needed unlike
// with C runtime API. We don't flush all the way to disk for perf reasons.
last_flush_micros_ = now_micros;
}
break;

View File

@ -24,7 +24,7 @@ const int kDebugLogChunkSize = 128 * 1024;
class WinLogger : public rocksdb::Logger {
public:
WinLogger(uint64_t (*gettid)(), Env* env, FILE* file,
WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file,
const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL);
virtual ~WinLogger();
@ -44,7 +44,7 @@ class WinLogger : public rocksdb::Logger {
void DebugWriter(const char* str, int len);
private:
FILE* file_;
HANDLE file_;
uint64_t (*gettid_)(); // Return the thread id for the current thread
std::atomic_size_t log_size_;
std::atomic_uint_fast64_t last_flush_micros_;