Avoid calling fallocate with UINT64_MAX

Summary:
When user doesn't set a limit on compaction output file size, let's use the sum of the input files' sizes. This will avoid passing UINT64_MAX as fallocate()'s length. Reported in #2249.

Test setup:
- command: `TEST_TMPDIR=/data/rocksdb-test/ strace -e fallocate ./db_compaction_test --gtest_filter=DBCompactionTest.ManualCompactionUnknownOutputSize`
- filesystem: xfs

before this diff:
`fallocate(10, 01, 0, 1844674407370955160) = -1 ENOSPC (No space left on device)`

after this diff:
`fallocate(10, 01, 0, 1977)              = 0`
Closes https://github.com/facebook/rocksdb/pull/2252

Differential Revision: D5007275

Pulled By: ajkr

fbshipit-source-id: 4491404a6ae8a41328aede2e2d6f4d9ac3e38880
This commit is contained in:
Andrew Kryczka 2017-05-04 17:40:29 -07:00 committed by Facebook Github Bot
parent a45e98a5b5
commit 7c1c8ce5ac
4 changed files with 47 additions and 10 deletions

View File

@ -415,14 +415,15 @@ void Compaction::Summary(char* output, int len) {
uint64_t Compaction::OutputFilePreallocationSize() const {
uint64_t preallocation_size = 0;
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel ||
output_level() > 0) {
if (max_output_file_size_ != port::kMaxUint64 &&
(cfd_->ioptions()->compaction_style == kCompactionStyleLevel ||
output_level() > 0)) {
preallocation_size = max_output_file_size_;
} else {
// output_level() == 0
assert(num_input_levels() > 0);
for (const auto& f : inputs_[0].files) {
preallocation_size += f->fd.GetFileSize();
for (const auto& level_files : inputs_) {
for (const auto& file : level_files.files) {
preallocation_size += file->fd.GetFileSize();
}
}
}
// Over-estimate slightly so we don't end up just barely crossing

View File

@ -780,6 +780,41 @@ TEST_F(DBCompactionTest, ZeroSeqIdCompaction) {
ASSERT_OK(Put("", ""));
}
TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) {
// github issue #2249
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleLevel;
options.level0_file_num_compaction_trigger = 3;
DestroyAndReopen(options);
// create two files in l1 that we can compact
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) {
// make l0 files' ranges overlap to avoid trivial move
Put(std::to_string(2 * i), std::string(1, 'A'));
Put(std::to_string(2 * i + 1), std::string(1, 'A'));
Flush();
dbfull()->TEST_WaitForFlushMemTable();
}
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1, 0), i + 1);
}
ColumnFamilyMetaData cf_meta;
dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
ASSERT_EQ(2, cf_meta.levels[1].files.size());
std::vector<std::string> input_filenames;
for (const auto& sst_file : cf_meta.levels[1].files) {
input_filenames.push_back(sst_file.name);
}
// note CompactionOptions::output_file_size_limit is unset.
CompactionOptions compact_opt;
compact_opt.compression = kNoCompression;
dbfull()->CompactFiles(compact_opt, input_filenames, 1);
}
// Check that writes done during a memtable compaction are recovered
// if the database is shutdown during the memtable compaction.
TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) {

View File

@ -277,6 +277,9 @@ class SpecialEnv : public EnvWrapper {
bool use_direct_io() const override {
return base_->use_direct_io();
}
Status Allocate(uint64_t offset, uint64_t len) override {
return base_->Allocate(offset, len);
}
};
class ManifestFile : public WritableFile {
public:

View File

@ -703,14 +703,12 @@ class WritableFile {
}
}
protected:
/*
* Pre-allocate space for a file.
*/
// Pre-allocates space for a file.
virtual Status Allocate(uint64_t offset, uint64_t len) {
return Status::OK();
}
protected:
size_t preallocation_block_size() { return preallocation_block_size_; }
private: