Allow SstFileWriter to Fadvise the file away from page cache
Summary: Add `fadvise_trigger` option to `SstFileWriter` If fadvise_trigger is passed with a non-zero value, SstFileWriter will invalidate the os page cache every `fadvise_trigger` bytes for the sst file Closes https://github.com/facebook/rocksdb/pull/1731 Differential Revision: D4371246 Pulled By: IslamAbdelRahman fbshipit-source-id: 91caff1
This commit is contained in:
parent
17a4b75cc3
commit
d58ef52ba6
@ -1947,6 +1947,47 @@ TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) {
|
|||||||
db_->ReleaseSnapshot(snap);
|
db_->ReleaseSnapshot(snap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ExternalSSTFileTest, FadviseTrigger) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
const int kNumKeys = 10000;
|
||||||
|
|
||||||
|
size_t total_fadvised_bytes = 0;
|
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SstFileWriter::InvalidatePageCache", [&](void* arg) {
|
||||||
|
size_t fadvise_size = *(reinterpret_cast<size_t*>(arg));
|
||||||
|
total_fadvised_bytes += fadvise_size;
|
||||||
|
});
|
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
std::unique_ptr<SstFileWriter> sst_file_writer;
|
||||||
|
|
||||||
|
std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst";
|
||||||
|
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options,
|
||||||
|
options.comparator, nullptr, false));
|
||||||
|
ASSERT_OK(sst_file_writer->Open(sst_file_path));
|
||||||
|
for (int i = 0; i < kNumKeys; i++) {
|
||||||
|
ASSERT_OK(sst_file_writer->Add(Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(sst_file_writer->Finish());
|
||||||
|
// fadvise disabled
|
||||||
|
ASSERT_EQ(total_fadvised_bytes, 0);
|
||||||
|
|
||||||
|
|
||||||
|
sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst";
|
||||||
|
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options,
|
||||||
|
options.comparator, nullptr, true));
|
||||||
|
ASSERT_OK(sst_file_writer->Open(sst_file_path));
|
||||||
|
for (int i = 0; i < kNumKeys; i++) {
|
||||||
|
ASSERT_OK(sst_file_writer->Add(Key(i), Key(i)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(sst_file_writer->Finish());
|
||||||
|
// fadvise enabled
|
||||||
|
ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize());
|
||||||
|
ASSERT_GT(total_fadvised_bytes, 0);
|
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
#endif // ROCKSDB_LITE
|
#endif // ROCKSDB_LITE
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -47,9 +47,12 @@ class SstFileWriter {
|
|||||||
// User can pass `column_family` to specify that the the generated file will
|
// User can pass `column_family` to specify that the the generated file will
|
||||||
// be ingested into this column_family, note that passing nullptr means that
|
// be ingested into this column_family, note that passing nullptr means that
|
||||||
// the column_family is unknown.
|
// the column_family is unknown.
|
||||||
|
// If invalidate_page_cache is set to true, SstFileWriter will give the OS a
|
||||||
|
// hint that this file pages is not needed everytime we write 1MB to the file
|
||||||
SstFileWriter(const EnvOptions& env_options, const Options& options,
|
SstFileWriter(const EnvOptions& env_options, const Options& options,
|
||||||
const Comparator* user_comparator,
|
const Comparator* user_comparator,
|
||||||
ColumnFamilyHandle* column_family = nullptr);
|
ColumnFamilyHandle* column_family = nullptr,
|
||||||
|
bool invalidate_page_cache = true);
|
||||||
|
|
||||||
~SstFileWriter();
|
~SstFileWriter();
|
||||||
|
|
||||||
@ -70,6 +73,8 @@ class SstFileWriter {
|
|||||||
uint64_t FileSize();
|
uint64_t FileSize();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void InvalidatePageCache(bool closing);
|
||||||
|
|
||||||
struct Rep;
|
struct Rep;
|
||||||
Rep* rep_;
|
Rep* rep_;
|
||||||
};
|
};
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "table/block_based_table_builder.h"
|
#include "table/block_based_table_builder.h"
|
||||||
#include "table/sst_file_writer_collectors.h"
|
#include "table/sst_file_writer_collectors.h"
|
||||||
#include "util/file_reader_writer.h"
|
#include "util/file_reader_writer.h"
|
||||||
|
#include "util/sync_point.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -18,15 +19,19 @@ const std::string ExternalSstFilePropertyNames::kVersion =
|
|||||||
"rocksdb.external_sst_file.version";
|
"rocksdb.external_sst_file.version";
|
||||||
const std::string ExternalSstFilePropertyNames::kGlobalSeqno =
|
const std::string ExternalSstFilePropertyNames::kGlobalSeqno =
|
||||||
"rocksdb.external_sst_file.global_seqno";
|
"rocksdb.external_sst_file.global_seqno";
|
||||||
|
const size_t kFadviseTrigger = 1024 * 1024; // 1MB
|
||||||
|
|
||||||
struct SstFileWriter::Rep {
|
struct SstFileWriter::Rep {
|
||||||
Rep(const EnvOptions& _env_options, const Options& options,
|
Rep(const EnvOptions& _env_options, const Options& options,
|
||||||
const Comparator* _user_comparator, ColumnFamilyHandle* _cfh)
|
const Comparator* _user_comparator, ColumnFamilyHandle* _cfh,
|
||||||
|
bool _invalidate_page_cache)
|
||||||
: env_options(_env_options),
|
: env_options(_env_options),
|
||||||
ioptions(options),
|
ioptions(options),
|
||||||
mutable_cf_options(options),
|
mutable_cf_options(options),
|
||||||
internal_comparator(_user_comparator),
|
internal_comparator(_user_comparator),
|
||||||
cfh(_cfh) {}
|
cfh(_cfh),
|
||||||
|
invalidate_page_cache(_invalidate_page_cache),
|
||||||
|
last_fadvise_size(0) {}
|
||||||
|
|
||||||
std::unique_ptr<WritableFileWriter> file_writer;
|
std::unique_ptr<WritableFileWriter> file_writer;
|
||||||
std::unique_ptr<TableBuilder> builder;
|
std::unique_ptr<TableBuilder> builder;
|
||||||
@ -38,15 +43,23 @@ struct SstFileWriter::Rep {
|
|||||||
InternalKey ikey;
|
InternalKey ikey;
|
||||||
std::string column_family_name;
|
std::string column_family_name;
|
||||||
ColumnFamilyHandle* cfh;
|
ColumnFamilyHandle* cfh;
|
||||||
|
// If true, We will give the OS a hint that this file pages is not needed
|
||||||
|
// everytime we write 1MB to the file
|
||||||
|
bool invalidate_page_cache;
|
||||||
|
// the size of the file during the last time we called Fadvise to remove
|
||||||
|
// cached pages from page cache.
|
||||||
|
uint64_t last_fadvise_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
SstFileWriter::SstFileWriter(const EnvOptions& env_options,
|
SstFileWriter::SstFileWriter(const EnvOptions& env_options,
|
||||||
const Options& options,
|
const Options& options,
|
||||||
const Comparator* user_comparator,
|
const Comparator* user_comparator,
|
||||||
ColumnFamilyHandle* column_family)
|
ColumnFamilyHandle* column_family,
|
||||||
: rep_(new Rep(env_options, options, user_comparator, column_family)) {
|
bool invalidate_page_cache)
|
||||||
rep_->file_info.file_size = 0;
|
: rep_(new Rep(env_options, options, user_comparator, column_family,
|
||||||
}
|
invalidate_page_cache)) {
|
||||||
|
rep_->file_info.file_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
SstFileWriter::~SstFileWriter() {
|
SstFileWriter::~SstFileWriter() {
|
||||||
if (rep_->builder) {
|
if (rep_->builder) {
|
||||||
@ -143,15 +156,17 @@ Status SstFileWriter::Add(const Slice& user_key, const Slice& value) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(tec) : For external SST files we could omit the seqno and type.
|
||||||
|
r->ikey.Set(user_key, 0 /* Sequence Number */,
|
||||||
|
ValueType::kTypeValue /* Put */);
|
||||||
|
r->builder->Add(r->ikey.Encode(), value);
|
||||||
|
|
||||||
// update file info
|
// update file info
|
||||||
r->file_info.num_entries++;
|
r->file_info.num_entries++;
|
||||||
r->file_info.largest_key.assign(user_key.data(), user_key.size());
|
r->file_info.largest_key.assign(user_key.data(), user_key.size());
|
||||||
r->file_info.file_size = r->builder->FileSize();
|
r->file_info.file_size = r->builder->FileSize();
|
||||||
|
|
||||||
// TODO(tec) : For external SST files we could omit the seqno and type.
|
InvalidatePageCache(false /* closing */);
|
||||||
r->ikey.Set(user_key, 0 /* Sequence Number */,
|
|
||||||
ValueType::kTypeValue /* Put */);
|
|
||||||
r->builder->Add(r->ikey.Encode(), value);
|
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
@ -166,10 +181,13 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status s = r->builder->Finish();
|
Status s = r->builder->Finish();
|
||||||
|
r->file_info.file_size = r->builder->FileSize();
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
if (!r->ioptions.disable_data_sync) {
|
if (!r->ioptions.disable_data_sync) {
|
||||||
s = r->file_writer->Sync(r->ioptions.use_fsync);
|
s = r->file_writer->Sync(r->ioptions.use_fsync);
|
||||||
}
|
}
|
||||||
|
InvalidatePageCache(true /* closing */);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = r->file_writer->Close();
|
s = r->file_writer->Close();
|
||||||
}
|
}
|
||||||
@ -181,8 +199,7 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
|
|||||||
r->ioptions.env->DeleteFile(r->file_info.file_path);
|
r->ioptions.env->DeleteFile(r->file_info.file_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ok() && file_info != nullptr) {
|
if (file_info != nullptr) {
|
||||||
r->file_info.file_size = r->builder->FileSize();
|
|
||||||
*file_info = r->file_info;
|
*file_info = r->file_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,6 +207,24 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SstFileWriter::InvalidatePageCache(bool closing) {
|
||||||
|
Rep* r = rep_;
|
||||||
|
if (r->invalidate_page_cache == false) {
|
||||||
|
// Fadvise disabled
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t bytes_since_last_fadvise =
|
||||||
|
r->builder->FileSize() - r->last_fadvise_size;
|
||||||
|
if (bytes_since_last_fadvise > kFadviseTrigger || closing) {
|
||||||
|
TEST_SYNC_POINT_CALLBACK("SstFileWriter::InvalidatePageCache",
|
||||||
|
&(bytes_since_last_fadvise));
|
||||||
|
// Tell the OS that we dont need this file in page cache
|
||||||
|
r->file_writer->InvalidateCache(0, 0);
|
||||||
|
r->last_fadvise_size = r->builder->FileSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t SstFileWriter::FileSize() {
|
uint64_t SstFileWriter::FileSize() {
|
||||||
return rep_->file_info.file_size;
|
return rep_->file_info.file_size;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user