Added boolean variable to guard fallocate() calls

Summary:
Added boolean variable to guard fallocate() calls.
Set to false to prevent space leaks when tests fail.

Test Plan:
Compliles
Set to false and ran log device tests

Reviewers: sdong, lovro, igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D48027
This commit is contained in:
Lakshmi Narayanan 2015-10-07 10:04:05 -07:00
parent aadf49fe6b
commit 4049bcde39
6 changed files with 45 additions and 19 deletions

View File

@ -68,6 +68,9 @@ struct EnvOptions {
// If true, then use mmap to write data
bool use_mmap_writes = true;
// If false, fallocate() calls are bypassed
bool allow_fallocate = true;
// If true, set the FD_CLOEXEC on open fd.
bool set_fd_cloexec = true;

View File

@ -1006,6 +1006,9 @@ struct DBOptions {
// Default: false
bool allow_mmap_writes;
// If false, fallocate() calls are bypassed
bool allow_fallocate;
// Disable child process inherit open files. Default: true
bool is_fd_close_on_exec;
@ -1137,9 +1140,7 @@ struct DBOptions {
// Options to control the behavior of a database (passed to DB::Open)
struct Options : public DBOptions, public ColumnFamilyOptions {
// Create an Options object with default values for all fields.
Options() :
DBOptions(),
ColumnFamilyOptions() {}
Options() : DBOptions(), ColumnFamilyOptions() {}
Options(const DBOptions& db_options,
const ColumnFamilyOptions& column_family_options)

View File

@ -12,7 +12,6 @@
#include <thread>
#include "port/port.h"
#include "port/sys_time.h"
#include "port/port.h"
#include "rocksdb/options.h"
#include "util/arena.h"
@ -283,6 +282,7 @@ void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) {
env_options->set_fd_cloexec = options.is_fd_close_on_exec;
env_options->bytes_per_sync = options.bytes_per_sync;
env_options->rate_limiter = options.rate_limiter.get();
env_options->allow_fallocate = options.allow_fallocate;
}
}

View File

@ -351,6 +351,7 @@ class PosixMmapFile : public WritableFile {
char* dst_; // Where to write next (in range [base_,limit_])
char* last_sync_; // Where have we synced up to
uint64_t file_offset_; // Offset of base_ in file
bool allow_fallocate_; // If false, fallocate calls are bypassed
#ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_;
#endif
@ -393,7 +394,7 @@ class PosixMmapFile : public WritableFile {
TEST_KILL_RANDOM(rocksdb_kill_odds);
// we can't fallocate with FALLOC_FL_KEEP_SIZE here
{
if (allow_fallocate_) {
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
if (alloc_status != 0) {
@ -451,7 +452,8 @@ class PosixMmapFile : public WritableFile {
limit_(nullptr),
dst_(nullptr),
last_sync_(nullptr),
file_offset_(0) {
file_offset_(0),
allow_fallocate_(options.allow_fallocate) {
#ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif
@ -575,8 +577,12 @@ class PosixMmapFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds);
int alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status =
fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
offset, len);
}
if (alloc_status == 0) {
return Status::OK();
} else {
@ -592,13 +598,17 @@ class PosixWritableFile : public WritableFile {
const std::string filename_;
int fd_;
uint64_t filesize_;
bool allow_fallocate_;
#ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_;
#endif
public:
PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options)
: filename_(fname), fd_(fd), filesize_(0) {
: filename_(fname),
fd_(fd),
filesize_(0),
allow_fallocate_(options.allow_fallocate) {
#ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif
@ -660,8 +670,10 @@ class PosixWritableFile : public WritableFile {
// We ignore error since failure of this operation does not affect
// correctness.
IOSTATS_TIMER_GUARD(allocate_nanos);
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
filesize_, block_size * last_allocated_block - filesize_);
if (allow_fallocate_) {
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_,
block_size * last_allocated_block - filesize_);
}
#endif
}
@ -714,9 +726,12 @@ class PosixWritableFile : public WritableFile {
virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status;
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status =
fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
offset, len);
}
if (alloc_status == 0) {
return Status::OK();
} else {
@ -1146,7 +1161,7 @@ class PosixEnv : public Env {
} else {
int fd = fileno(f);
#ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024 * 1024);
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024);
#endif
SetFD_CLOEXEC(fd, nullptr);
result->reset(new PosixLogger(f, &PosixEnv::gettid, this));
@ -1609,10 +1624,11 @@ class PosixEnv : public Env {
};
PosixEnv::PosixEnv() : checkedDiskForMmap_(false),
forceMmapOff(false),
page_size_(getpagesize()),
thread_pools_(Priority::TOTAL) {
PosixEnv::PosixEnv()
: checkedDiskForMmap_(false),
forceMmapOff(false),
page_size_(getpagesize()),
thread_pools_(Priority::TOTAL) {
PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr));
for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
thread_pools_[pool_id].SetThreadPriority(

View File

@ -239,6 +239,7 @@ DBOptions::DBOptions()
allow_os_buffer(true),
allow_mmap_reads(false),
allow_mmap_writes(false),
allow_fallocate(true),
is_fd_close_on_exec(true),
skip_log_error_on_recovery(false),
stats_dump_period_sec(600),
@ -292,6 +293,7 @@ DBOptions::DBOptions(const Options& options)
allow_os_buffer(options.allow_os_buffer),
allow_mmap_reads(options.allow_mmap_reads),
allow_mmap_writes(options.allow_mmap_writes),
allow_fallocate(options.allow_fallocate),
is_fd_close_on_exec(options.is_fd_close_on_exec),
skip_log_error_on_recovery(options.skip_log_error_on_recovery),
stats_dump_period_sec(options.stats_dump_period_sec),
@ -338,6 +340,7 @@ void DBOptions::Dump(Logger* log) const {
keep_log_file_num);
Header(log, " Options.allow_os_buffer: %d", allow_os_buffer);
Header(log, " Options.allow_mmap_reads: %d", allow_mmap_reads);
Header(log, " Options.allow_fallocate: %d", allow_fallocate);
Header(log, " Options.allow_mmap_writes: %d", allow_mmap_writes);
Header(log, " Options.create_missing_column_families: %d",
create_missing_column_families);

View File

@ -122,6 +122,9 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
{"allow_mmap_reads",
{offsetof(struct DBOptions, allow_mmap_reads), OptionType::kBoolean,
OptionVerificationType::kNormal}},
{"allow_fallocate",
{offsetof(struct DBOptions, allow_fallocate), OptionType::kBoolean,
OptionVerificationType::kNormal}},
{"allow_mmap_writes",
{offsetof(struct DBOptions, allow_mmap_writes), OptionType::kBoolean,
OptionVerificationType::kNormal}},