Consider more factors when determining preallocation size of WAL files

Summary: Currently the WAL file preallocation size is 1.1 * write_buffer_size. This, however, will be over-estimated if options.db_write_buffer_size or options.max_total_wal_size is set and is much smaller.

Test Plan: Add a unit test.

Reviewers: andrewkr, yiwu

Reviewed By: yiwu

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D63957
This commit is contained in:
sdong 2016-09-13 16:38:28 -07:00
parent 4c3f4496b5
commit b666f85445
5 changed files with 124 additions and 5 deletions

View File

@ -3682,6 +3682,24 @@ bool DBImpl::MCOverlap(ManualCompaction* m, ManualCompaction* m1) {
return true;
}
uint64_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const {
uint64_t bsize = write_buffer_size / 10 + write_buffer_size;
// Some users might set very high write_buffer_size and rely on
// max_total_wal_size or other parameters to control the WAL size.
if (db_options_.max_total_wal_size > 0) {
bsize = std::min(bsize, db_options_.max_total_wal_size);
}
if (db_options_.db_write_buffer_size > 0) {
bsize = std::min(bsize, db_options_.db_write_buffer_size);
}
if (db_options_.write_buffer_manager &&
db_options_.write_buffer_manager->enabled()) {
bsize = std::min(bsize, db_options_.write_buffer_manager->buffer_size());
}
return bsize;
}
namespace {
struct IterState {
IterState(DBImpl* _db, InstrumentedMutex* _mu, SuperVersion* _super_version,
@ -4995,8 +5013,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(
mutable_cf_options.write_buffer_size / 10 +
mutable_cf_options.write_buffer_size);
GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size));
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_opt));
new_log = new log::Writer(std::move(file_writer), new_log_number,
@ -5747,7 +5764,8 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
LogFileName(impl->db_options_.wal_dir, new_log_number),
&lfile, opt_env_options);
if (s.ok()) {
lfile->SetPreallocationBlockSize((max_write_buffer_size / 10) + max_write_buffer_size);
lfile->SetPreallocationBlockSize(
impl->GetWalPreallocateBlockSize(max_write_buffer_size));
impl->logfile_number_ = new_log_number;
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_options));

View File

@ -1067,6 +1067,8 @@ class DBImpl : public DB {
bool ShouldntRunManualCompaction(ManualCompaction* m);
bool HaveManualCompaction(ColumnFamilyData* cfd);
bool MCOverlap(ManualCompaction* m, ManualCompaction* m1);
uint64_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
};
// Sanitize db options. The caller should delete result.info_log if

View File

@ -2065,7 +2065,6 @@ TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) {
ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2,
total_loaded_bytes_iter1 + total_loaded_bytes_iter2);
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -310,7 +310,18 @@ class SpecialEnv : public EnvWrapper {
return s;
}
Status Truncate(uint64_t size) override { return base_->Truncate(size); }
Status Close() override { return base_->Close(); }
Status Close() override {
// SyncPoint is not supported in Released Windows Mode.
#if !(defined NDEBUG) || !defined(OS_WIN)
// Check preallocation size
// preallocation size is never passed to base file.
size_t preallocation_size = preallocation_block_size();
TEST_SYNC_POINT_CALLBACK("DBTestWalFile.GetPreallocationStatus",
&preallocation_size);
#endif // !(defined NDEBUG) || !defined(OS_WIN)
return base_->Close();
}
Status Flush() override { return base_->Flush(); }
Status Sync() override {
++env_->sync_counter_;

View File

@ -291,6 +291,95 @@ TEST_F(DBWALTest, RecoveryWithEmptyLog) {
} while (ChangeOptions());
}
#if !(defined NDEBUG) || !defined(OS_WIN)
TEST_F(DBWALTest, PreallocateBlock) {
Options options = CurrentOptions();
options.write_buffer_size = 10 * 1000 * 1000;
options.max_total_wal_size = 0;
size_t expected_preallocation_size = static_cast<size_t>(
options.write_buffer_size + options.write_buffer_size / 10);
DestroyAndReopen(options);
std::atomic<int> called(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());
options.max_total_wal_size = 1000 * 1000;
expected_preallocation_size = static_cast<size_t>(options.max_total_wal_size);
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());
options.db_write_buffer_size = 800 * 1000;
expected_preallocation_size =
static_cast<size_t>(options.db_write_buffer_size);
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());
expected_preallocation_size = 700 * 1000;
std::shared_ptr<WriteBufferManager> write_buffer_manager =
std::make_shared<WriteBufferManager>(static_cast<uint64_t>(700 * 1000));
options.write_buffer_manager = write_buffer_manager;
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());
}
#endif // !(defined NDEBUG) || !defined(OS_WIN)
#ifndef ROCKSDB_LITE
TEST_F(DBWALTest, GetSortedWalFiles) {
do {