From 6ef136a7c688236f6b9dc80d724ece504065dd48 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Wed, 29 Oct 2014 22:25:46 -0700 Subject: [PATCH] [3.6.fb] use fallocate(FALLOC_FL_PUNCH_HOLE) to release unused blocks at the end of file Summary: ftruncate does not always free preallocated unused space at the end of file. In some cases, we pin too much disk space than it should This is a fix previously reviewed in on https://reviews.facebook.net/D25641 Test Plan: env_test Reviewers: igor, ljin, sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D27975 --- util/env_posix.cc | 24 +++++++++++++++++++----- util/env_test.cc | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index cf917e874..98d0c3df2 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -736,14 +736,28 @@ class PosixWritableFile : public WritableFile { GetPreallocationStatus(&block_size, &last_allocated_block); if (last_allocated_block > 0) { // trim the extra space preallocated at the end of the file - int dummy __attribute__((unused)); - dummy = ftruncate(fd_, filesize_); // ignore errors + // NOTE(ljin): we probably don't want to surface failure as an IOError, + // but it will be nice to log these errors. + ftruncate(fd_, filesize_); +#ifdef ROCKSDB_FALLOCATE_PRESENT + // in some file systems, ftruncate only trims trailing space if the + // new file size is smaller than the current size. Calling fallocate + // with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused + // blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following + // filesystems: + // XFS (since Linux 2.6.38) + // ext4 (since Linux 3.0) + // Btrfs (since Linux 3.7) + // tmpfs (since Linux 3.5) + // We ignore error since failure of this operation does not affect + // correctness. + fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + filesize_, block_size * last_allocated_block - filesize_); +#endif } if (close(fd_) < 0) { - if (s.ok()) { - s = IOError(filename_, errno); - } + s = IOError(filename_, errno); } fd_ = -1; return s; diff --git a/util/env_test.cc b/util/env_test.cc index 1779f1aa0..3757c2c41 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -516,7 +516,7 @@ TEST(EnvPosixTest, AllocateTest) { // allocate 100 MB size_t kPreallocateSize = 100 * 1024 * 1024; size_t kBlockSize = 512; - std::string data = "test"; + std::string data(1024 * 1024, 'a'); wfile->SetPreallocationBlockSize(kPreallocateSize); ASSERT_OK(wfile->Append(Slice(data))); ASSERT_OK(wfile->Flush()); @@ -538,7 +538,7 @@ TEST(EnvPosixTest, AllocateTest) { stat(fname.c_str(), &f_stat); ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); // verify that preallocated blocks were deallocated on file close - ASSERT_GT(st_blocks, f_stat.st_blocks); + ASSERT_EQ((f_stat.st_size + kBlockSize - 1) / kBlockSize, f_stat.st_blocks); } #endif // ROCKSDB_FALLOCATE_PRESENT