From d58ef52ba6113f3f122a4cad1268cd0c7fffc199 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 29 Dec 2016 14:51:52 -0800 Subject: [PATCH] Allow SstFileWriter to Fadvise the file away from page cache Summary: Add `fadvise_trigger` option to `SstFileWriter` If fadvise_trigger is passed with a non-zero value, SstFileWriter will invalidate the os page cache every `fadvise_trigger` bytes for the sst file Closes https://github.com/facebook/rocksdb/pull/1731 Differential Revision: D4371246 Pulled By: IslamAbdelRahman fbshipit-source-id: 91caff1 --- db/external_sst_file_test.cc | 41 +++++++++++++++++++++ include/rocksdb/sst_file_writer.h | 7 +++- table/sst_file_writer.cc | 59 ++++++++++++++++++++++++------- 3 files changed, 94 insertions(+), 13 deletions(-) diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index f94ade569..6b3dea46c 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -1947,6 +1947,47 @@ TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) { db_->ReleaseSnapshot(snap); } +TEST_F(ExternalSSTFileTest, FadviseTrigger) { + Options options = CurrentOptions(); + const int kNumKeys = 10000; + + size_t total_fadvised_bytes = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "SstFileWriter::InvalidatePageCache", [&](void* arg) { + size_t fadvise_size = *(reinterpret_cast(arg)); + total_fadvised_bytes += fadvise_size; + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + std::unique_ptr sst_file_writer; + + std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst"; + sst_file_writer.reset(new SstFileWriter(EnvOptions(), options, + options.comparator, nullptr, false)); + ASSERT_OK(sst_file_writer->Open(sst_file_path)); + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(sst_file_writer->Add(Key(i), Key(i))); + } + ASSERT_OK(sst_file_writer->Finish()); + // fadvise disabled + ASSERT_EQ(total_fadvised_bytes, 0); + + + sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst"; + sst_file_writer.reset(new SstFileWriter(EnvOptions(), options, + options.comparator, nullptr, true)); + ASSERT_OK(sst_file_writer->Open(sst_file_path)); + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(sst_file_writer->Add(Key(i), Key(i))); + } + ASSERT_OK(sst_file_writer->Finish()); + // fadvise enabled + ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize()); + ASSERT_GT(total_fadvised_bytes, 0); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + #endif // ROCKSDB_LITE } // namespace rocksdb diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index d7ca81bc4..a323b5db2 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -47,9 +47,12 @@ class SstFileWriter { // User can pass `column_family` to specify that the the generated file will // be ingested into this column_family, note that passing nullptr means that // the column_family is unknown. + // If invalidate_page_cache is set to true, SstFileWriter will give the OS a + // hint that this file pages is not needed everytime we write 1MB to the file SstFileWriter(const EnvOptions& env_options, const Options& options, const Comparator* user_comparator, - ColumnFamilyHandle* column_family = nullptr); + ColumnFamilyHandle* column_family = nullptr, + bool invalidate_page_cache = true); ~SstFileWriter(); @@ -70,6 +73,8 @@ class SstFileWriter { uint64_t FileSize(); private: + void InvalidatePageCache(bool closing); + struct Rep; Rep* rep_; }; diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 1d4f1c40a..2c3c63547 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -11,6 +11,7 @@ #include "table/block_based_table_builder.h" #include "table/sst_file_writer_collectors.h" #include "util/file_reader_writer.h" +#include "util/sync_point.h" namespace rocksdb { @@ -18,15 +19,19 @@ const std::string ExternalSstFilePropertyNames::kVersion = "rocksdb.external_sst_file.version"; const std::string ExternalSstFilePropertyNames::kGlobalSeqno = "rocksdb.external_sst_file.global_seqno"; +const size_t kFadviseTrigger = 1024 * 1024; // 1MB struct SstFileWriter::Rep { Rep(const EnvOptions& _env_options, const Options& options, - const Comparator* _user_comparator, ColumnFamilyHandle* _cfh) + const Comparator* _user_comparator, ColumnFamilyHandle* _cfh, + bool _invalidate_page_cache) : env_options(_env_options), ioptions(options), mutable_cf_options(options), internal_comparator(_user_comparator), - cfh(_cfh) {} + cfh(_cfh), + invalidate_page_cache(_invalidate_page_cache), + last_fadvise_size(0) {} std::unique_ptr file_writer; std::unique_ptr builder; @@ -38,15 +43,23 @@ struct SstFileWriter::Rep { InternalKey ikey; std::string column_family_name; ColumnFamilyHandle* cfh; + // If true, We will give the OS a hint that this file pages is not needed + // everytime we write 1MB to the file + bool invalidate_page_cache; + // the size of the file during the last time we called Fadvise to remove + // cached pages from page cache. + uint64_t last_fadvise_size; }; SstFileWriter::SstFileWriter(const EnvOptions& env_options, const Options& options, const Comparator* user_comparator, - ColumnFamilyHandle* column_family) - : rep_(new Rep(env_options, options, user_comparator, column_family)) { - rep_->file_info.file_size = 0; - } + ColumnFamilyHandle* column_family, + bool invalidate_page_cache) + : rep_(new Rep(env_options, options, user_comparator, column_family, + invalidate_page_cache)) { + rep_->file_info.file_size = 0; +} SstFileWriter::~SstFileWriter() { if (rep_->builder) { @@ -143,15 +156,17 @@ Status SstFileWriter::Add(const Slice& user_key, const Slice& value) { } } + // TODO(tec) : For external SST files we could omit the seqno and type. + r->ikey.Set(user_key, 0 /* Sequence Number */, + ValueType::kTypeValue /* Put */); + r->builder->Add(r->ikey.Encode(), value); + // update file info r->file_info.num_entries++; r->file_info.largest_key.assign(user_key.data(), user_key.size()); r->file_info.file_size = r->builder->FileSize(); - // TODO(tec) : For external SST files we could omit the seqno and type. - r->ikey.Set(user_key, 0 /* Sequence Number */, - ValueType::kTypeValue /* Put */); - r->builder->Add(r->ikey.Encode(), value); + InvalidatePageCache(false /* closing */); return Status::OK(); } @@ -166,10 +181,13 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { } Status s = r->builder->Finish(); + r->file_info.file_size = r->builder->FileSize(); + if (s.ok()) { if (!r->ioptions.disable_data_sync) { s = r->file_writer->Sync(r->ioptions.use_fsync); } + InvalidatePageCache(true /* closing */); if (s.ok()) { s = r->file_writer->Close(); } @@ -181,8 +199,7 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { r->ioptions.env->DeleteFile(r->file_info.file_path); } - if (s.ok() && file_info != nullptr) { - r->file_info.file_size = r->builder->FileSize(); + if (file_info != nullptr) { *file_info = r->file_info; } @@ -190,6 +207,24 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { return s; } +void SstFileWriter::InvalidatePageCache(bool closing) { + Rep* r = rep_; + if (r->invalidate_page_cache == false) { + // Fadvise disabled + return; + } + + uint64_t bytes_since_last_fadvise = + r->builder->FileSize() - r->last_fadvise_size; + if (bytes_since_last_fadvise > kFadviseTrigger || closing) { + TEST_SYNC_POINT_CALLBACK("SstFileWriter::InvalidatePageCache", + &(bytes_since_last_fadvise)); + // Tell the OS that we dont need this file in page cache + r->file_writer->InvalidateCache(0, 0); + r->last_fadvise_size = r->builder->FileSize(); + } +} + uint64_t SstFileWriter::FileSize() { return rep_->file_info.file_size; }