[3.6.fb] use fallocate(FALLOC_FL_PUNCH_HOLE) to release unused blocks at the end of file

Summary:
ftruncate does not always free preallocated unused space at the end of file.
In some cases, we pin too much disk space than it should
This is a fix previously reviewed in on https://reviews.facebook.net/D25641

Test Plan: env_test

Reviewers: igor, ljin, sdong

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D27975
This commit is contained in:
Yueh-Hsuan Chiang 2014-10-29 22:25:46 -07:00
parent ca41f994dd
commit 6ef136a7c6
2 changed files with 21 additions and 7 deletions

View File

@ -736,14 +736,28 @@ class PosixWritableFile : public WritableFile {
GetPreallocationStatus(&block_size, &last_allocated_block); GetPreallocationStatus(&block_size, &last_allocated_block);
if (last_allocated_block > 0) { if (last_allocated_block > 0) {
// trim the extra space preallocated at the end of the file // trim the extra space preallocated at the end of the file
int dummy __attribute__((unused)); // NOTE(ljin): we probably don't want to surface failure as an IOError,
dummy = ftruncate(fd_, filesize_); // ignore errors // but it will be nice to log these errors.
ftruncate(fd_, filesize_);
#ifdef ROCKSDB_FALLOCATE_PRESENT
// in some file systems, ftruncate only trims trailing space if the
// new file size is smaller than the current size. Calling fallocate
// with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused
// blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following
// filesystems:
// XFS (since Linux 2.6.38)
// ext4 (since Linux 3.0)
// Btrfs (since Linux 3.7)
// tmpfs (since Linux 3.5)
// We ignore error since failure of this operation does not affect
// correctness.
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
filesize_, block_size * last_allocated_block - filesize_);
#endif
} }
if (close(fd_) < 0) { if (close(fd_) < 0) {
if (s.ok()) { s = IOError(filename_, errno);
s = IOError(filename_, errno);
}
} }
fd_ = -1; fd_ = -1;
return s; return s;

View File

@ -516,7 +516,7 @@ TEST(EnvPosixTest, AllocateTest) {
// allocate 100 MB // allocate 100 MB
size_t kPreallocateSize = 100 * 1024 * 1024; size_t kPreallocateSize = 100 * 1024 * 1024;
size_t kBlockSize = 512; size_t kBlockSize = 512;
std::string data = "test"; std::string data(1024 * 1024, 'a');
wfile->SetPreallocationBlockSize(kPreallocateSize); wfile->SetPreallocationBlockSize(kPreallocateSize);
ASSERT_OK(wfile->Append(Slice(data))); ASSERT_OK(wfile->Append(Slice(data)));
ASSERT_OK(wfile->Flush()); ASSERT_OK(wfile->Flush());
@ -538,7 +538,7 @@ TEST(EnvPosixTest, AllocateTest) {
stat(fname.c_str(), &f_stat); stat(fname.c_str(), &f_stat);
ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); ASSERT_EQ((unsigned int)data.size(), f_stat.st_size);
// verify that preallocated blocks were deallocated on file close // verify that preallocated blocks were deallocated on file close
ASSERT_GT(st_blocks, f_stat.st_blocks); ASSERT_EQ((f_stat.st_size + kBlockSize - 1) / kBlockSize, f_stat.st_blocks);
} }
#endif // ROCKSDB_FALLOCATE_PRESENT #endif // ROCKSDB_FALLOCATE_PRESENT