use fallocate(FALLOC_FL_PUNCH_HOLE) to release unused blocks at the end of file

Summary:
ftruncate does not always free preallocated unused space at the end of file.
In some cases, we pin too much disk space than it should

Test Plan: env_test

Reviewers: sdong, rven, yhchiang, igor

Reviewed By: igor

Subscribers: nkg-, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D25641
This commit is contained in:
Lei Jin 2014-10-29 12:24:49 -07:00
parent 97451f837e
commit 44f0ff31c2
2 changed files with 21 additions and 7 deletions

View File

@ -737,15 +737,29 @@ class PosixWritableFile : public WritableFile {
GetPreallocationStatus(&block_size, &last_allocated_block); GetPreallocationStatus(&block_size, &last_allocated_block);
if (last_allocated_block > 0) { if (last_allocated_block > 0) {
// trim the extra space preallocated at the end of the file // trim the extra space preallocated at the end of the file
int dummy __attribute__((unused)); // NOTE(ljin): we probably don't want to surface failure as an IOError,
dummy = ftruncate(fd_, filesize_); // ignore errors // but it will be nice to log these errors.
ftruncate(fd_, filesize_);
#ifdef ROCKSDB_FALLOCATE_PRESENT
// in some file systems, ftruncate only trims trailing space if the
// new file size is smaller than the current size. Calling fallocate
// with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused
// blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following
// filesystems:
// XFS (since Linux 2.6.38)
// ext4 (since Linux 3.0)
// Btrfs (since Linux 3.7)
// tmpfs (since Linux 3.5)
// We ignore error since failure of this operation does not affect
// correctness.
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
filesize_, block_size * last_allocated_block - filesize_);
#endif
} }
if (close(fd_) < 0) { if (close(fd_) < 0) {
if (s.ok()) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
} }
}
fd_ = -1; fd_ = -1;
return s; return s;
} }

View File

@ -518,7 +518,7 @@ TEST(EnvPosixTest, AllocateTest) {
// allocate 100 MB // allocate 100 MB
size_t kPreallocateSize = 100 * 1024 * 1024; size_t kPreallocateSize = 100 * 1024 * 1024;
size_t kBlockSize = 512; size_t kBlockSize = 512;
std::string data = "test"; std::string data(1024 * 1024, 'a');
wfile->SetPreallocationBlockSize(kPreallocateSize); wfile->SetPreallocationBlockSize(kPreallocateSize);
ASSERT_OK(wfile->Append(Slice(data))); ASSERT_OK(wfile->Append(Slice(data)));
ASSERT_OK(wfile->Flush()); ASSERT_OK(wfile->Flush());
@ -540,7 +540,7 @@ TEST(EnvPosixTest, AllocateTest) {
stat(fname.c_str(), &f_stat); stat(fname.c_str(), &f_stat);
ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); ASSERT_EQ((unsigned int)data.size(), f_stat.st_size);
// verify that preallocated blocks were deallocated on file close // verify that preallocated blocks were deallocated on file close
ASSERT_GT(st_blocks, f_stat.st_blocks); ASSERT_EQ((f_stat.st_size + kBlockSize - 1) / kBlockSize, f_stat.st_blocks);
} }
#endif // ROCKSDB_FALLOCATE_PRESENT #endif // ROCKSDB_FALLOCATE_PRESENT