From 4049bcde396842f802d123c44340128612e6d077 Mon Sep 17 00:00:00 2001 From: Lakshmi Narayanan Date: Wed, 7 Oct 2015 10:04:05 -0700 Subject: [PATCH] Added boolean variable to guard fallocate() calls Summary: Added boolean variable to guard fallocate() calls. Set to false to prevent space leaks when tests fail. Test Plan: Compliles Set to false and ran log device tests Reviewers: sdong, lovro, igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D48027 --- include/rocksdb/env.h | 3 +++ include/rocksdb/options.h | 7 +++--- util/env.cc | 2 +- util/env_posix.cc | 46 ++++++++++++++++++++++++++------------- util/options.cc | 3 +++ util/options_helper.h | 3 +++ 6 files changed, 45 insertions(+), 19 deletions(-) diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 6300a4703..57c60f0c9 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -68,6 +68,9 @@ struct EnvOptions { // If true, then use mmap to write data bool use_mmap_writes = true; + // If false, fallocate() calls are bypassed + bool allow_fallocate = true; + // If true, set the FD_CLOEXEC on open fd. bool set_fd_cloexec = true; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index b89d5d247..d4066cf20 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1006,6 +1006,9 @@ struct DBOptions { // Default: false bool allow_mmap_writes; + // If false, fallocate() calls are bypassed + bool allow_fallocate; + // Disable child process inherit open files. Default: true bool is_fd_close_on_exec; @@ -1137,9 +1140,7 @@ struct DBOptions { // Options to control the behavior of a database (passed to DB::Open) struct Options : public DBOptions, public ColumnFamilyOptions { // Create an Options object with default values for all fields. - Options() : - DBOptions(), - ColumnFamilyOptions() {} + Options() : DBOptions(), ColumnFamilyOptions() {} Options(const DBOptions& db_options, const ColumnFamilyOptions& column_family_options) diff --git a/util/env.cc b/util/env.cc index be1d9cd76..effa7f552 100644 --- a/util/env.cc +++ b/util/env.cc @@ -12,7 +12,6 @@ #include #include "port/port.h" #include "port/sys_time.h" -#include "port/port.h" #include "rocksdb/options.h" #include "util/arena.h" @@ -283,6 +282,7 @@ void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) { env_options->set_fd_cloexec = options.is_fd_close_on_exec; env_options->bytes_per_sync = options.bytes_per_sync; env_options->rate_limiter = options.rate_limiter.get(); + env_options->allow_fallocate = options.allow_fallocate; } } diff --git a/util/env_posix.cc b/util/env_posix.cc index 4f35f8ed4..4b2cb9a49 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -351,6 +351,7 @@ class PosixMmapFile : public WritableFile { char* dst_; // Where to write next (in range [base_,limit_]) char* last_sync_; // Where have we synced up to uint64_t file_offset_; // Offset of base_ in file + bool allow_fallocate_; // If false, fallocate calls are bypassed #ifdef ROCKSDB_FALLOCATE_PRESENT bool fallocate_with_keep_size_; #endif @@ -393,7 +394,7 @@ class PosixMmapFile : public WritableFile { TEST_KILL_RANDOM(rocksdb_kill_odds); // we can't fallocate with FALLOC_FL_KEEP_SIZE here - { + if (allow_fallocate_) { IOSTATS_TIMER_GUARD(allocate_nanos); int alloc_status = fallocate(fd_, 0, file_offset_, map_size_); if (alloc_status != 0) { @@ -451,7 +452,8 @@ class PosixMmapFile : public WritableFile { limit_(nullptr), dst_(nullptr), last_sync_(nullptr), - file_offset_(0) { + file_offset_(0), + allow_fallocate_(options.allow_fallocate) { #ifdef ROCKSDB_FALLOCATE_PRESENT fallocate_with_keep_size_ = options.fallocate_with_keep_size; #endif @@ -575,8 +577,12 @@ class PosixMmapFile : public WritableFile { #ifdef ROCKSDB_FALLOCATE_PRESENT virtual Status Allocate(off_t offset, off_t len) override { TEST_KILL_RANDOM(rocksdb_kill_odds); - int alloc_status = fallocate( - fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); + int alloc_status = 0; + if (allow_fallocate_) { + alloc_status = + fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, + offset, len); + } if (alloc_status == 0) { return Status::OK(); } else { @@ -592,13 +598,17 @@ class PosixWritableFile : public WritableFile { const std::string filename_; int fd_; uint64_t filesize_; + bool allow_fallocate_; #ifdef ROCKSDB_FALLOCATE_PRESENT bool fallocate_with_keep_size_; #endif public: PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options) - : filename_(fname), fd_(fd), filesize_(0) { + : filename_(fname), + fd_(fd), + filesize_(0), + allow_fallocate_(options.allow_fallocate) { #ifdef ROCKSDB_FALLOCATE_PRESENT fallocate_with_keep_size_ = options.fallocate_with_keep_size; #endif @@ -660,8 +670,10 @@ class PosixWritableFile : public WritableFile { // We ignore error since failure of this operation does not affect // correctness. IOSTATS_TIMER_GUARD(allocate_nanos); - fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, - filesize_, block_size * last_allocated_block - filesize_); + if (allow_fallocate_) { + fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_, + block_size * last_allocated_block - filesize_); + } #endif } @@ -714,9 +726,12 @@ class PosixWritableFile : public WritableFile { virtual Status Allocate(off_t offset, off_t len) override { TEST_KILL_RANDOM(rocksdb_kill_odds); IOSTATS_TIMER_GUARD(allocate_nanos); - int alloc_status; - alloc_status = fallocate( - fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); + int alloc_status = 0; + if (allow_fallocate_) { + alloc_status = + fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, + offset, len); + } if (alloc_status == 0) { return Status::OK(); } else { @@ -1146,7 +1161,7 @@ class PosixEnv : public Env { } else { int fd = fileno(f); #ifdef ROCKSDB_FALLOCATE_PRESENT - fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024 * 1024); + fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024); #endif SetFD_CLOEXEC(fd, nullptr); result->reset(new PosixLogger(f, &PosixEnv::gettid, this)); @@ -1609,10 +1624,11 @@ class PosixEnv : public Env { }; -PosixEnv::PosixEnv() : checkedDiskForMmap_(false), - forceMmapOff(false), - page_size_(getpagesize()), - thread_pools_(Priority::TOTAL) { +PosixEnv::PosixEnv() + : checkedDiskForMmap_(false), + forceMmapOff(false), + page_size_(getpagesize()), + thread_pools_(Priority::TOTAL) { PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr)); for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { thread_pools_[pool_id].SetThreadPriority( diff --git a/util/options.cc b/util/options.cc index 8d8a1e23f..14b69e678 100644 --- a/util/options.cc +++ b/util/options.cc @@ -239,6 +239,7 @@ DBOptions::DBOptions() allow_os_buffer(true), allow_mmap_reads(false), allow_mmap_writes(false), + allow_fallocate(true), is_fd_close_on_exec(true), skip_log_error_on_recovery(false), stats_dump_period_sec(600), @@ -292,6 +293,7 @@ DBOptions::DBOptions(const Options& options) allow_os_buffer(options.allow_os_buffer), allow_mmap_reads(options.allow_mmap_reads), allow_mmap_writes(options.allow_mmap_writes), + allow_fallocate(options.allow_fallocate), is_fd_close_on_exec(options.is_fd_close_on_exec), skip_log_error_on_recovery(options.skip_log_error_on_recovery), stats_dump_period_sec(options.stats_dump_period_sec), @@ -338,6 +340,7 @@ void DBOptions::Dump(Logger* log) const { keep_log_file_num); Header(log, " Options.allow_os_buffer: %d", allow_os_buffer); Header(log, " Options.allow_mmap_reads: %d", allow_mmap_reads); + Header(log, " Options.allow_fallocate: %d", allow_fallocate); Header(log, " Options.allow_mmap_writes: %d", allow_mmap_writes); Header(log, " Options.create_missing_column_families: %d", create_missing_column_families); diff --git a/util/options_helper.h b/util/options_helper.h index d8bce3c61..d72a375f1 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -122,6 +122,9 @@ static std::unordered_map db_options_type_info = { {"allow_mmap_reads", {offsetof(struct DBOptions, allow_mmap_reads), OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"allow_fallocate", + {offsetof(struct DBOptions, allow_fallocate), OptionType::kBoolean, + OptionVerificationType::kNormal}}, {"allow_mmap_writes", {offsetof(struct DBOptions, allow_mmap_writes), OptionType::kBoolean, OptionVerificationType::kNormal}},