From c129c75fb7810959a3da548d03bd3cededcb0a8f Mon Sep 17 00:00:00 2001 From: Eli Pozniansky Date: Fri, 19 Jul 2019 11:54:38 -0700 Subject: [PATCH] Added log_readahead_size option to control prefetching for Log::Reader (#5592) Summary: Added log_readahead_size option to control prefetching for Log::Reader. This is mostly useful for reading a remotely located log, as it can save the number of round-trips when reading it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5592 Differential Revision: D16362989 Pulled By: elipoz fbshipit-source-id: c5d4d5245a44008cd59879640efff70c091ad3e8 --- db/db_impl/db_impl_open.cc | 3 ++- db/db_impl/db_impl_secondary.cc | 3 ++- db/version_set.cc | 9 ++++++--- include/rocksdb/env.h | 4 ++-- include/rocksdb/options.h | 11 +++++++++++ options/db_options.cc | 6 +++++- options/db_options.h | 1 + options/options_helper.cc | 5 ++++- options/options_settable_test.cc | 3 ++- 9 files changed, 35 insertions(+), 10 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 82e61a260..0e0fcfbf2 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -721,7 +721,8 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, continue; } } - file_reader.reset(new SequentialFileReader(std::move(file), fname)); + file_reader.reset(new SequentialFileReader( + std::move(file), fname, immutable_db_options_.log_readahead_size)); } // Create the log reader. diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index e14e53e55..a73cd6ba2 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -150,7 +150,8 @@ Status DBImplSecondary::MaybeInitLogReader( *log_reader = nullptr; return status; } - file_reader.reset(new SequentialFileReader(std::move(file), fname)); + file_reader.reset(new SequentialFileReader( + std::move(file), fname, immutable_db_options_.log_readahead_size)); } // Create the log reader. diff --git a/db/version_set.cc b/db/version_set.cc index 0d3b9fb4e..559a4190f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4267,7 +4267,8 @@ Status VersionSet::Recover( return s; } manifest_file_reader.reset( - new SequentialFileReader(std::move(manifest_file), manifest_path)); + new SequentialFileReader(std::move(manifest_file), manifest_path, + db_options_->log_readahead_size)); } uint64_t current_manifest_file_size; s = env_->GetFileSize(manifest_path, ¤t_manifest_file_size); @@ -4597,7 +4598,8 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, if (!s.ok()) { return s; } - file_reader.reset(new SequentialFileReader(std::move(file), dscname)); + file_reader.reset(new SequentialFileReader( + std::move(file), dscname, db_options_->log_readahead_size)); } bool have_prev_log_number = false; @@ -5721,7 +5723,8 @@ Status ReactiveVersionSet::MaybeSwitchManifest( std::unique_ptr manifest_file_reader; if (s.ok()) { manifest_file_reader.reset( - new SequentialFileReader(std::move(manifest_file), manifest_path)); + new SequentialFileReader(std::move(manifest_file), manifest_path, + db_options_->log_readahead_size)); manifest_reader->reset(new log::FragmentBufferedReader( nullptr, std::move(manifest_file_reader), reporter, true /* checksum */, 0 /* log_number */)); diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 67464cc5c..126f25747 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -118,10 +118,10 @@ struct EnvOptions { bool fallocate_with_keep_size = true; // See DBOptions doc - size_t compaction_readahead_size; + size_t compaction_readahead_size = 0; // See DBOptions doc - size_t random_access_max_buffer_size; + size_t random_access_max_buffer_size = 0; // See DBOptions doc size_t writable_file_max_buffer_size = 1024 * 1024; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 09dc8e54c..234af6a31 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1087,6 +1087,17 @@ struct DBOptions { // If set to true, takes precedence over // ReadOptions::background_purge_on_iterator_cleanup. bool avoid_unnecessary_blocking_io = false; + + // The number of bytes to prefetch when reading the log. This is mostly useful + // for reading a remotely located log, as it can save the number of + // round-trips. If 0, then the prefetching is disabled. + + // If non-zero, we perform bigger reads when reading the log. + // This is mostly useful for reading a remotely located log, as it can save + // the number of round-trips. If 0, then the prefetching is disabled. + // + // Default: 0 + size_t log_readahead_size = 0; }; // Options to control the behavior of a database (passed to DB::Open) diff --git a/options/db_options.cc b/options/db_options.cc index 490a37080..3756c555c 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -85,7 +85,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) manual_wal_flush(options.manual_wal_flush), atomic_flush(options.atomic_flush), avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io), - persist_stats_to_disk(options.persist_stats_to_disk) { + persist_stats_to_disk(options.persist_stats_to_disk), + log_readahead_size(options.log_readahead_size) { } void ImmutableDBOptions::Dump(Logger* log) const { @@ -225,6 +226,9 @@ void ImmutableDBOptions::Dump(Logger* log) const { avoid_unnecessary_blocking_io); ROCKS_LOG_HEADER(log, " Options.persist_stats_to_disk: %u", persist_stats_to_disk); + ROCKS_LOG_HEADER( + log, " Options.log_readahead_size: %" ROCKSDB_PRIszt, + log_readahead_size); } MutableDBOptions::MutableDBOptions() diff --git a/options/db_options.h b/options/db_options.h index 92eea4ecf..e39e2903f 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -82,6 +82,7 @@ struct ImmutableDBOptions { bool atomic_flush; bool avoid_unnecessary_blocking_io; bool persist_stats_to_disk; + size_t log_readahead_size; }; struct MutableDBOptions { diff --git a/options/options_helper.cc b/options/options_helper.cc index 47aba7ad0..922ece3a8 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -138,7 +138,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.atomic_flush = immutable_db_options.atomic_flush; options.avoid_unnecessary_blocking_io = immutable_db_options.avoid_unnecessary_blocking_io; - + options.log_readahead_size = immutable_db_options.log_readahead_size; return options; } @@ -1664,6 +1664,9 @@ std::unordered_map {offsetof(struct DBOptions, avoid_unnecessary_blocking_io), OptionType::kBoolean, OptionVerificationType::kNormal, false, offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io)}}, + {"log_readahead_size", + {offsetof(struct DBOptions, log_readahead_size), OptionType::kSizeT, + OptionVerificationType::kNormal, false, 0}}, }; std::unordered_map diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index f0b79e372..e60fd6f9e 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -295,7 +295,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "manual_wal_flush=false;" "seq_per_batch=false;" "atomic_flush=false;" - "avoid_unnecessary_blocking_io=false", + "avoid_unnecessary_blocking_io=false;" + "log_readahead_size=0", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),