From d705755e52f243da632c695893c62051ae37dccd Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 17 Apr 2014 14:07:05 -0700 Subject: [PATCH] Minimize accessing multiple objects in Version::Get() Summary: One of our profilings shows that Version::Get() sometimes is slow when getting pointer of user comparators or other global objects. In this patch: (1) we keep pointers of immutable objects in Version to avoid accesses them though option objects or cfd objects (2) table_reader is directly cached in FileMetaData so that table cache don't have to go through handle first to fetch it (3) If level 0 has less than 3 files, skip the filtering logic based on SST tables' key range. Smallest and largest key are stored in separated memory locations, which has potential cache misses Test Plan: make all check Reviewers: haobo, ljin Reviewed By: haobo CC: igor, yhchiang, nkg-, leveldb Differential Revision: https://reviews.facebook.net/D17739 Conflicts: db/db_impl.cc db/db_impl_readonly.cc db/table_cache.cc db/version_edit.h db/version_set.cc db/version_set.h --- db/db_impl.cc | 4 ++-- db/db_impl_readonly.cc | 2 +- db/table_cache.cc | 38 +++++++++++++++++--------------- db/version_edit.h | 5 ++++- db/version_set.cc | 49 +++++++++++++++++++++++++----------------- db/version_set.h | 9 ++++++-- 6 files changed, 64 insertions(+), 43 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 24bc86789..72010eded 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3453,7 +3453,7 @@ Status DBImpl::GetImpl(const ReadOptions& options, StartPerfTimer(&from_files_timer); sv->current->Get(options, lkey, value, &s, &merge_context, &stats, - options_, value_found); + value_found); have_stat_update = true; BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer); RecordTick(options_.statistics.get(), MEMTABLE_MISS); @@ -3557,7 +3557,7 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, // Done } else { get_version->current->Get(options, lkey, value, &s, &merge_context, - &stats, options_); + &stats); have_stat_update = true; } diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index d130783ea..1b67d7793 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -64,7 +64,7 @@ Status DBImplReadOnly::Get(const ReadOptions& options, } else { Version::GetStats stats; super_version->current->Get(options, lkey, value, &s, &merge_context, - &stats, options_); + &stats); } return s; } diff --git a/db/table_cache.cc b/db/table_cache.cc index 7058221e0..e99e48b30 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -111,19 +111,20 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, if (table_reader_ptr != nullptr) { *table_reader_ptr = nullptr; } - Cache::Handle* handle = file_meta.table_reader_handle; + TableReader* table_reader = file_meta.table_reader; + Cache::Handle* handle = nullptr; Status s; - if (!handle) { + if (table_reader == nullptr) { s = FindTable(toptions, icomparator, file_meta.number, file_meta.file_size, &handle, nullptr, options.read_tier == kBlockCacheTier); + table_reader = GetTableReaderFromHandle(handle); } if (!s.ok()) { return NewErrorIterator(s); } - TableReader* table_reader = GetTableReaderFromHandle(handle); Iterator* result = table_reader->NewIterator(options); - if (!file_meta.table_reader_handle) { + if (handle != nullptr) { result->RegisterCleanup(&UnrefEntry, cache_.get(), handle); } if (table_reader_ptr != nullptr) { @@ -143,17 +144,18 @@ Status TableCache::Get(const ReadOptions& options, bool (*saver)(void*, const ParsedInternalKey&, const Slice&, bool), bool* table_io, void (*mark_key_may_exist)(void*)) { - Cache::Handle* handle = file_meta.table_reader_handle; + TableReader* t = file_meta.table_reader; Status s; - if (!handle) { + Cache::Handle* handle = nullptr; + if (!t) { s = FindTable(storage_options_, internal_comparator, file_meta.number, file_meta.file_size, &handle, table_io, options.read_tier == kBlockCacheTier); + t = GetTableReaderFromHandle(handle); } if (s.ok()) { - TableReader* t = GetTableReaderFromHandle(handle); s = t->Get(options, k, arg, saver, mark_key_may_exist); - if (!file_meta.table_reader_handle) { + if (handle != nullptr) { ReleaseHandle(handle); } } else if (options.read_tier && s.IsIncomplete()) { @@ -169,15 +171,16 @@ Status TableCache::GetTableProperties( const FileMetaData& file_meta, std::shared_ptr* properties, bool no_io) { Status s; - auto table_handle = file_meta.table_reader_handle; + auto table_reader = file_meta.table_reader; // table already been pre-loaded? - if (table_handle) { - auto table = GetTableReaderFromHandle(table_handle); - *properties = table->GetTableProperties(); + if (table_reader) { + *properties = table_reader->GetTableProperties(); + return s; } bool table_io; + Cache::Handle* table_handle = nullptr; s = FindTable(toptions, internal_comparator, file_meta.number, file_meta.file_size, &table_handle, &table_io, no_io); if (!s.ok()) { @@ -195,20 +198,21 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options, const FileMetaData& file_meta, const Slice& internal_prefix, bool* table_io) { bool may_match = true; - auto table_handle = file_meta.table_reader_handle; - if (table_handle == nullptr) { + auto table_reader = file_meta.table_reader; + Cache::Handle* table_handle = nullptr; + if (table_reader == nullptr) { // Need to get table handle from file number Status s = FindTable(storage_options_, icomparator, file_meta.number, file_meta.file_size, &table_handle, table_io); if (!s.ok()) { return may_match; } + table_reader = GetTableReaderFromHandle(table_handle); } - auto table = GetTableReaderFromHandle(table_handle); - may_match = table->PrefixMayMatch(internal_prefix); + may_match = table_reader->PrefixMayMatch(internal_prefix); - if (file_meta.table_reader_handle == nullptr) { + if (table_handle != nullptr) { // Need to release handle if it is generated from here. ReleaseHandle(table_handle); } diff --git a/db/version_edit.h b/db/version_edit.h index f54949fbf..2bbd07542 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -31,10 +31,13 @@ struct FileMetaData { // Needs to be disposed when refs becomes 0. Cache::Handle* table_reader_handle; + // Table reader in table_reader_handle + TableReader* table_reader; FileMetaData(uint64_t number, uint64_t file_size) : refs(0), allowed_seeks(1 << 30), number(number), file_size(file_size), - being_compacted(false), table_reader_handle(nullptr) { + being_compacted(false), table_reader_handle(nullptr), + table_reader(nullptr) { } FileMetaData() : FileMetaData(0, 0) { } }; diff --git a/db/version_set.cc b/db/version_set.cc index 2057d6dd4..4411dde9d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -478,6 +478,12 @@ bool BySmallestKey(FileMetaData* a, FileMetaData* b, Version::Version(VersionSet* vset, uint64_t version_number) : vset_(vset), + internal_comparator_(&(vset->icmp_)), + user_comparator_(internal_comparator_->user_comparator()), + table_cache_(vset->table_cache_), + merge_operator_(vset->options_->merge_operator.get()), + info_log_(vset->options_->info_log.get()), + db_statistics_(vset->options_->statistics.get()), next_(this), prev_(this), refs_(0), @@ -497,27 +503,22 @@ void Version::Get(const ReadOptions& options, Status* status, MergeContext* merge_context, GetStats* stats, - const Options& db_options, bool* value_found) { Slice ikey = k.internal_key(); Slice user_key = k.user_key(); - const Comparator* ucmp = vset_->icmp_.user_comparator(); - - auto merge_operator = db_options.merge_operator.get(); - auto logger = db_options.info_log.get(); assert(status->ok() || status->IsMergeInProgress()); Saver saver; saver.state = status->ok()? kNotFound : kMerge; - saver.ucmp = ucmp; + saver.ucmp = user_comparator_; saver.user_key = user_key; saver.value_found = value_found; saver.value = value; - saver.merge_operator = merge_operator; + saver.merge_operator = merge_operator_; saver.merge_context = merge_context; - saver.logger = logger; + saver.logger = info_log_; saver.didIO = false; - saver.statistics = db_options.statistics.get(); + saver.statistics = db_statistics_; stats->seek_file = nullptr; stats->seek_file_level = -1; @@ -548,7 +549,7 @@ void Version::Get(const ReadOptions& options, // On Level-n (n>=1), files are sorted. // Binary search to find earliest index whose largest key >= ikey. // We will also stop when the file no longer overlaps ikey - start_index = FindFile(vset_->icmp_, files_[level], ikey); + start_index = FindFile(*internal_comparator_, files_[level], ikey); } // Traverse each relevant file to find the desired key @@ -557,8 +558,10 @@ void Version::Get(const ReadOptions& options, #endif for (uint32_t i = start_index; i < num_files; ++i) { FileMetaData* f = files[i]; - if (ucmp->Compare(user_key, f->smallest.user_key()) < 0 || - ucmp->Compare(user_key, f->largest.user_key()) > 0) { + // Skip key range filtering for levle 0 if there are few level 0 files. + if ((level > 0 || num_files > 2) && + (user_comparator_->Compare(user_key, f->smallest.user_key()) < 0 || + user_comparator_->Compare(user_key, f->largest.user_key()) > 0)) { // Only process overlapping files. if (level > 0) { // If on Level-n (n>=1) then the files are sorted. @@ -574,7 +577,8 @@ void Version::Get(const ReadOptions& options, // Sanity check to make sure that the files are correctly sorted if (prev_file) { if (level != 0) { - int comp_sign = vset_->icmp_.Compare(prev_file->largest, f->smallest); + int comp_sign = + internal_comparator_->Compare(prev_file->largest, f->smallest); assert(comp_sign < 0); } else { // level == 0, the current file cannot be newer than the previous one. @@ -588,9 +592,8 @@ void Version::Get(const ReadOptions& options, prev_file = f; #endif bool tableIO = false; - *status = - vset_->table_cache_->Get(options, vset_->icmp_, *f, ikey, &saver, - SaveValue, &tableIO, MarkKeyMayExist); + *status = table_cache_->Get(options, *internal_comparator_, *f, ikey, + &saver, SaveValue, &tableIO, MarkKeyMayExist); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -635,12 +638,12 @@ void Version::Get(const ReadOptions& options, if (kMerge == saver.state) { // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; - if (merge_operator->FullMerge(user_key, nullptr, - saver.merge_context->GetOperands(), - value, logger)) { + if (merge_operator_->FullMerge(user_key, nullptr, + saver.merge_context->GetOperands(), value, + info_log_)) { *status = Status::OK(); } else { - RecordTick(db_options.statistics.get(), NUMBER_MERGE_FAILURES); + RecordTick(db_statistics_, NUMBER_MERGE_FAILURES); *status = Status::Corruption("could not perform end-of-key merge for ", user_key); } @@ -1447,6 +1450,12 @@ class VersionSet::Builder { file_meta->number, file_meta->file_size, &file_meta->table_reader_handle, &table_io, false); + if (file_meta->table_reader_handle != nullptr) { + // Load table_reader + file_meta->table_reader = + vset_->table_cache_->GetTableReaderFromHandle( + file_meta->table_reader_handle); + } } } } diff --git a/db/version_set.h b/db/version_set.h index 7d7cdf4fc..38b4bada9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -83,8 +83,7 @@ class Version { int seek_file_level; }; void Get(const ReadOptions&, const LookupKey& key, std::string* val, - Status* status, MergeContext* merge_context, - GetStats* stats, const Options& db_option, + Status* status, MergeContext* merge_context, GetStats* stats, bool* value_found = nullptr); // Adds "stats" into the current state. Returns true if a new @@ -224,6 +223,12 @@ class Version { void UpdateFilesBySize(); VersionSet* vset_; // VersionSet to which this Version belongs + const InternalKeyComparator* internal_comparator_; + const Comparator* user_comparator_; + TableCache* table_cache_; + const MergeOperator* merge_operator_; + Logger* info_log_; + Statistics* db_statistics_; Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list int refs_; // Number of live refs to this version