Use posix_fallocate as default.
Summary: Ftruncate does not throw an error on disk-full. This causes Sig-bus in the case where the database tries to issue a Put call on a full-disk. Use posix_fallocate for allocation instead of truncate. Add a check to use MMaped files only on ext4, xfs and tempfs, as posix_fallocate is very slow on ext3 and older. Test Plan: make all check Reviewers: dhruba, chip Reviewed By: dhruba CC: adsharma, leveldb Differential Revision: https://reviews.facebook.net/D9291
This commit is contained in:
parent
4e581c6ab4
commit
1ba5abca97
2
README
2
README
@ -1,7 +1,7 @@
|
|||||||
rocksdb: A persistent key-value store for flash storage
|
rocksdb: A persistent key-value store for flash storage
|
||||||
Authors: The Facebook Database Engineering Team
|
Authors: The Facebook Database Engineering Team
|
||||||
|
|
||||||
This code is a library that forms the core building block for a fast
|
This code is a library that forms the core building block for a fast
|
||||||
key value server, especially suited for storing data on flash drives.
|
key value server, especially suited for storing data on flash drives.
|
||||||
It has an Log-Stuctured-Merge-Database (LSM) design with flexible tradeoffs
|
It has an Log-Stuctured-Merge-Database (LSM) design with flexible tradeoffs
|
||||||
between Write-Amplification-Factor(WAF), Read-Amplification-Factor (RAF)
|
between Write-Amplification-Factor(WAF), Read-Amplification-Factor (RAF)
|
||||||
|
@ -14,8 +14,10 @@
|
|||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <sys/statfs.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <sys/vfs.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#if defined(OS_LINUX)
|
#if defined(OS_LINUX)
|
||||||
@ -31,6 +33,16 @@
|
|||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/posix_logger.h"
|
#include "util/posix_logger.h"
|
||||||
|
|
||||||
|
#if !defined(TMPFS_MAGIC)
|
||||||
|
#define TMPFS_MAGIC 0x01021994
|
||||||
|
#endif
|
||||||
|
#if !defined(XFS_SUPER_MAGIC)
|
||||||
|
#define XFS_SUPER_MAGIC 0x58465342
|
||||||
|
#endif
|
||||||
|
#if !defined(EXT4_SUPER_MAGIC)
|
||||||
|
#define EXT4_SUPER_MAGIC 0xEF53
|
||||||
|
#endif
|
||||||
|
|
||||||
bool useOsBuffer = 1; // cache data in OS buffers
|
bool useOsBuffer = 1; // cache data in OS buffers
|
||||||
bool useFsReadAhead = 1; // allow filesystem to do readaheads
|
bool useFsReadAhead = 1; // allow filesystem to do readaheads
|
||||||
bool useMmapRead = 0; // do not use mmaps for reading files
|
bool useMmapRead = 0; // do not use mmaps for reading files
|
||||||
@ -224,21 +236,26 @@ class PosixMmapFile : public WritableFile {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MapNewRegion() {
|
Status MapNewRegion() {
|
||||||
assert(base_ == nullptr);
|
assert(base_ == nullptr);
|
||||||
if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
|
|
||||||
return false;
|
int alloc_status = posix_fallocate(fd_, file_offset_, map_size_);
|
||||||
|
if (alloc_status != 0) {
|
||||||
|
return Status::IOError("Error allocating space to file : " + filename_ +
|
||||||
|
"Error : " + strerror(alloc_status));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
|
void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||||
fd_, file_offset_);
|
fd_, file_offset_);
|
||||||
if (ptr == MAP_FAILED) {
|
if (ptr == MAP_FAILED) {
|
||||||
return false;
|
return Status::IOError("MMap failed on " + filename_);
|
||||||
}
|
}
|
||||||
base_ = reinterpret_cast<char*>(ptr);
|
base_ = reinterpret_cast<char*>(ptr);
|
||||||
limit_ = base_ + map_size_;
|
limit_ = base_ + map_size_;
|
||||||
dst_ = base_;
|
dst_ = base_;
|
||||||
last_sync_ = base_;
|
last_sync_ = base_;
|
||||||
return true;
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -272,9 +289,11 @@ class PosixMmapFile : public WritableFile {
|
|||||||
assert(dst_ <= limit_);
|
assert(dst_ <= limit_);
|
||||||
size_t avail = limit_ - dst_;
|
size_t avail = limit_ - dst_;
|
||||||
if (avail == 0) {
|
if (avail == 0) {
|
||||||
if (!UnmapCurrentRegion() ||
|
if (UnmapCurrentRegion()) {
|
||||||
!MapNewRegion()) {
|
Status s = MapNewRegion();
|
||||||
return IOError(filename_, errno);
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -614,6 +633,15 @@ class PosixEnv : public Env {
|
|||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
s = IOError(fname, errno);
|
s = IOError(fname, errno);
|
||||||
} else {
|
} else {
|
||||||
|
if (!checkedDiskForMmap_) {
|
||||||
|
// this will be executed once in the program's lifetime.
|
||||||
|
if (useMmapWrite) {
|
||||||
|
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
|
||||||
|
useMmapWrite = SupportsFastAllocate(fname);
|
||||||
|
}
|
||||||
|
checkedDiskForMmap_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (useMmapWrite) {
|
if (useMmapWrite) {
|
||||||
result->reset(new PosixMmapFile(fname, fd, page_size_));
|
result->reset(new PosixMmapFile(fname, fd, page_size_));
|
||||||
} else {
|
} else {
|
||||||
@ -851,6 +879,8 @@ class PosixEnv : public Env {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool checkedDiskForMmap_ = false;
|
||||||
|
|
||||||
void PthreadCall(const char* label, int result) {
|
void PthreadCall(const char* label, int result) {
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
|
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
|
||||||
@ -875,6 +905,23 @@ class PosixEnv : public Env {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SupportsFastAllocate(const std::string& path) {
|
||||||
|
struct statfs s;
|
||||||
|
if (statfs(path.c_str(), &s)){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
switch (s.f_type) {
|
||||||
|
case EXT4_SUPER_MAGIC:
|
||||||
|
return true;
|
||||||
|
case XFS_SUPER_MAGIC:
|
||||||
|
return true;
|
||||||
|
case TMPFS_MAGIC:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t page_size_;
|
size_t page_size_;
|
||||||
pthread_mutex_t mu_;
|
pthread_mutex_t mu_;
|
||||||
pthread_cond_t bgsignal_;
|
pthread_cond_t bgsignal_;
|
||||||
|
Loading…
Reference in New Issue
Block a user