From f2e68d2becfa9a8728f7088425049795e83aac1a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Thu, 4 May 2017 17:40:29 -0700 Subject: [PATCH] Avoid calling fallocate with UINT64_MAX Summary: When user doesn't set a limit on compaction output file size, let's use the sum of the input files' sizes. This will avoid passing UINT64_MAX as fallocate()'s length. Reported in #2249. Test setup: - command: `TEST_TMPDIR=/data/rocksdb-test/ strace -e fallocate ./db_compaction_test --gtest_filter=DBCompactionTest.ManualCompactionUnknownOutputSize` - filesystem: xfs before this diff: `fallocate(10, 01, 0, 1844674407370955160) = -1 ENOSPC (No space left on device)` after this diff: `fallocate(10, 01, 0, 1977) = 0` Closes https://github.com/facebook/rocksdb/pull/2252 Differential Revision: D5007275 Pulled By: ajkr fbshipit-source-id: 4491404a6ae8a41328aede2e2d6f4d9ac3e38880 --- db/compaction.cc | 13 +++++++------ db/db_compaction_test.cc | 35 +++++++++++++++++++++++++++++++++++ db/db_test_util.h | 3 +++ include/rocksdb/env.h | 6 ++---- 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/db/compaction.cc b/db/compaction.cc index 315edaf8a..5577a06c9 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -413,14 +413,15 @@ void Compaction::Summary(char* output, int len) { uint64_t Compaction::OutputFilePreallocationSize() const { uint64_t preallocation_size = 0; - if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel || - output_level() > 0) { + if (max_output_file_size_ != port::kMaxUint64 && + (cfd_->ioptions()->compaction_style == kCompactionStyleLevel || + output_level() > 0)) { preallocation_size = max_output_file_size_; } else { - // output_level() == 0 - assert(num_input_levels() > 0); - for (const auto& f : inputs_[0].files) { - preallocation_size += f->fd.GetFileSize(); + for (const auto& level_files : inputs_) { + for (const auto& file : level_files.files) { + preallocation_size += file->fd.GetFileSize(); + } } } // Over-estimate slightly so we don't end up just barely crossing diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index f51a8a3ad..bc9a8d473 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -778,6 +778,41 @@ TEST_F(DBCompactionTest, ZeroSeqIdCompaction) { ASSERT_OK(Put("", "")); } +TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) { + // github issue #2249 + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 3; + DestroyAndReopen(options); + + // create two files in l1 that we can compact + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) { + // make l0 files' ranges overlap to avoid trivial move + Put(std::to_string(2 * i), std::string(1, 'A')); + Put(std::to_string(2 * i + 1), std::string(1, 'A')); + Flush(); + dbfull()->TEST_WaitForFlushMemTable(); + } + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); + ASSERT_EQ(NumTableFilesAtLevel(1, 0), i + 1); + } + + ColumnFamilyMetaData cf_meta; + dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); + ASSERT_EQ(2, cf_meta.levels[1].files.size()); + std::vector input_filenames; + for (const auto& sst_file : cf_meta.levels[1].files) { + input_filenames.push_back(sst_file.name); + } + + // note CompactionOptions::output_file_size_limit is unset. + CompactionOptions compact_opt; + compact_opt.compression = kNoCompression; + dbfull()->CompactFiles(compact_opt, input_filenames, 1); +} + // Check that writes done during a memtable compaction are recovered // if the database is shutdown during the memtable compaction. TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) { diff --git a/db/db_test_util.h b/db/db_test_util.h index 65e983a50..03cb4f245 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -277,6 +277,9 @@ class SpecialEnv : public EnvWrapper { bool use_direct_io() const override { return base_->use_direct_io(); } + Status Allocate(uint64_t offset, uint64_t len) override { + return base_->Allocate(offset, len); + } }; class ManifestFile : public WritableFile { public: diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index f1d6e666b..887ccd151 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -713,14 +713,12 @@ class WritableFile { } } - protected: - /* - * Pre-allocate space for a file. - */ + // Pre-allocates space for a file. virtual Status Allocate(uint64_t offset, uint64_t len) { return Status::OK(); } + protected: size_t preallocation_block_size() { return preallocation_block_size_; } private: