From fc9d4071f0c504e107d7e1e11ed173175003a99b Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 21 Jan 2022 11:36:36 -0800 Subject: [PATCH] Fast path for detecting unchanged prefix_extractor (#9407) Summary: Fixes a major performance regression in 6.26, where extra CPU is spent in SliceTransform::AsString when reads involve a prefix_extractor (Get, MultiGet, Seek). Common case performance is now better than 6.25. This change creates a "fast path" for verifying that the current prefix extractor is unchanged and compatible with what was used to generate a table file. This fast path detects the common case by pointer comparison on the current prefix_extractor and a "known good" prefix extractor (if applicable) that is saved at the time the table reader is opened. The "known good" prefix extractor is saved as another shared_ptr copy (in an existing field, however) to ensure the pointer is not recycled. When the prefix_extractor has changed to a different instance but same compatible configuration (rare, odd), performance is still a regression compared to 6.25, but this is likely acceptable because of the oddity of such a case. The performance of incompatible prefix_extractor is essentially unchanged. Also fixed a minor case (ForwardIterator) where a prefix_extractor could be used via a raw pointer after being freed as a shared_ptr, if replaced via SetOptions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9407 Test Plan: ## Performance Populate DB with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Running head-to-head comparisons simultaneously with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -use_existing_db -readonly -benchmarks=seekrandom -num=10000000 -duration=20 -disable_wal=1 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Below each is compared by ops/sec vs. baseline which is version 6.25 (multiple baseline runs because of variable machine load) v6.26: 4833 vs. 6698 (<- major regression!) v6.27: 4737 vs. 6397 (still) New: 6704 vs. 6461 (better than baseline in common case) Disabled fastpath: 4843 vs. 6389 (e.g. if prefix extractor instance changes but is still compatible) Changed prefix size (no usable filter) in new: 787 vs. 5927 Changed prefix size (no usable filter) in new & baseline: 773 vs. 784 Reviewed By: mrambacher Differential Revision: D33677812 Pulled By: pdillinger fbshipit-source-id: 571d9711c461fb97f957378a061b7e7dbc4d6a76 --- HISTORY.md | 4 + db/builder.cc | 4 +- db/compaction/compaction_job.cc | 4 +- db/convenience.cc | 2 +- db/external_sst_file_ingestion_job.cc | 2 +- db/forward_iterator.cc | 29 ++--- db/forward_iterator.h | 3 +- db/import_column_family_job.cc | 2 +- db/plain_table_db_test.cc | 2 +- db/repair.cc | 2 +- db/table_cache.cc | 56 +++++----- db/table_cache.h | 101 +++++++++--------- db/version_builder.cc | 12 +-- db/version_builder.h | 10 +- db/version_edit_handler.cc | 2 +- db/version_set.cc | 35 +++--- table/block_based/block_based_table_reader.cc | 80 ++++++++------ table/block_based/block_based_table_reader.h | 39 +++---- .../block_based/data_block_hash_index_test.cc | 2 +- table/plain/plain_table_factory.cc | 2 +- table/sst_file_dumper.cc | 2 +- table/sst_file_reader.cc | 2 +- table/table_builder.h | 43 ++++---- table/table_reader_bench.cc | 4 +- table/table_test.cc | 9 +- util/slice.cc | 2 +- 26 files changed, 240 insertions(+), 215 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 0cea930e5..b0ab026fb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -14,9 +14,13 @@ ### New Features * Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.) +### Performance Improvements +* Improved read performance when a prefix extractor is used (Seek, Get, MultiGet), even compared to version 6.25 baseline (see bug fix below), by optimizing the common case of prefix extractor compatible with table file and unchanging. + ### Bug Fixes * Fix a bug that FlushMemTable may return ok even flush not succeed. * Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS. +* Fixed a significant performance regression in version 6.26 when a prefix extractor is used on the read path (Seek, Get, MultiGet). (Excessive time was spent in SliceTransform::AsString().) ## 6.28.0 (2021-12-17) ### New Features diff --git a/db/builder.cc b/db/builder.cc index 3dccb93c0..48f338546 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -328,8 +328,8 @@ Status BuildTable( ReadOptions read_options; std::unique_ptr it(table_cache->NewIterator( read_options, file_options, tboptions.internal_comparator, *meta, - nullptr /* range_del_agg */, - mutable_cf_options.prefix_extractor.get(), nullptr, + nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor, + nullptr, (internal_stats == nullptr) ? nullptr : internal_stats->GetFileReadHist(0), TableReaderCaller::kFlush, /*arena=*/nullptr, diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 4958af3c9..2d6f6fc95 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -790,8 +790,8 @@ Status CompactionJob::Run() { } } ColumnFamilyData* cfd = compact_->compaction->column_family_data(); - auto prefix_extractor = - compact_->compaction->mutable_cf_options()->prefix_extractor.get(); + auto& prefix_extractor = + compact_->compaction->mutable_cf_options()->prefix_extractor; std::atomic next_file_idx(0); auto verify_table = [&](Status& output_status) { while (true) { diff --git a/db/convenience.cc b/db/convenience.cc index 5af6515c8..097d1fd47 100644 --- a/db/convenience.cc +++ b/db/convenience.cc @@ -59,7 +59,7 @@ Status VerifySstFileChecksum(const Options& options, new RandomAccessFileReader(std::move(file), file_path)); const bool kImmortal = true; s = ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions, options.prefix_extractor.get(), env_options, + TableReaderOptions(ioptions, options.prefix_extractor, env_options, internal_comparator, false /* skip_filters */, !kImmortal, false /* force_direct_prefetch */, -1 /* level */), diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 6e77b64dc..e1e873396 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -573,7 +573,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions( - *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor.get(), + *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor, env_options_, cfd_->internal_comparator(), /*skip_filters*/ false, /*immortal*/ false, /*force_direct_prefetch*/ false, /*level*/ -1, diff --git a/db/forward_iterator.cc b/db/forward_iterator.cc index 80dd1bb9e..a38e1e4bb 100644 --- a/db/forward_iterator.cc +++ b/db/forward_iterator.cc @@ -33,11 +33,11 @@ namespace ROCKSDB_NAMESPACE { // iter.Next() class ForwardLevelIterator : public InternalIterator { public: - ForwardLevelIterator(const ColumnFamilyData* const cfd, - const ReadOptions& read_options, - const std::vector& files, - const SliceTransform* prefix_extractor, - bool allow_unprepared_value) + ForwardLevelIterator( + const ColumnFamilyData* const cfd, const ReadOptions& read_options, + const std::vector& files, + const std::shared_ptr& prefix_extractor, + bool allow_unprepared_value) : cfd_(cfd), read_options_(read_options), files_(files), @@ -211,7 +211,8 @@ class ForwardLevelIterator : public InternalIterator { Status status_; InternalIterator* file_iter_; PinnedIteratorsManager* pinned_iters_mgr_; - const SliceTransform* prefix_extractor_; + // Kept alive by ForwardIterator::sv_->mutable_cf_options + const std::shared_ptr& prefix_extractor_; const bool allow_unprepared_value_; }; @@ -692,7 +693,7 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) { l0_iters_.push_back(cfd_->table_cache()->NewIterator( read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0, read_options_.ignore_range_deletions ? nullptr : &range_del_agg, - sv_->mutable_cf_options.prefix_extractor.get(), + sv_->mutable_cf_options.prefix_extractor, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, @@ -700,7 +701,7 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) { /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value_)); } - BuildLevelIterators(vstorage); + BuildLevelIterators(vstorage, sv_); current_ = nullptr; is_prev_set_ = false; @@ -772,7 +773,7 @@ void ForwardIterator::RenewIterators() { read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0_files_new[inew], read_options_.ignore_range_deletions ? nullptr : &range_del_agg, - svnew->mutable_cf_options.prefix_extractor.get(), + svnew->mutable_cf_options.prefix_extractor, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, @@ -791,7 +792,7 @@ void ForwardIterator::RenewIterators() { DeleteIterator(l); } level_iters_.clear(); - BuildLevelIterators(vstorage_new); + BuildLevelIterators(vstorage_new, svnew); current_ = nullptr; is_prev_set_ = false; SVCleanup(); @@ -805,7 +806,8 @@ void ForwardIterator::RenewIterators() { } } -void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage) { +void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage, + SuperVersion* sv) { level_iters_.reserve(vstorage->num_levels() - 1); for (int32_t level = 1; level < vstorage->num_levels(); ++level) { const auto& level_files = vstorage->LevelFiles(level); @@ -821,8 +823,7 @@ void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage) { } else { level_iters_.push_back(new ForwardLevelIterator( cfd_, read_options_, level_files, - sv_->mutable_cf_options.prefix_extractor.get(), - allow_unprepared_value_)); + sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_)); } } } @@ -838,7 +839,7 @@ void ForwardIterator::ResetIncompleteIterators() { l0_iters_[i] = cfd_->table_cache()->NewIterator( read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0_files[i], /*range_del_agg=*/nullptr, - sv_->mutable_cf_options.prefix_extractor.get(), + sv_->mutable_cf_options.prefix_extractor, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, diff --git a/db/forward_iterator.h b/db/forward_iterator.h index 99434d858..fa6c77218 100644 --- a/db/forward_iterator.h +++ b/db/forward_iterator.h @@ -97,7 +97,8 @@ class ForwardIterator : public InternalIterator { void RebuildIterators(bool refresh_sv); void RenewIterators(); - void BuildLevelIterators(const VersionStorageInfo* vstorage); + void BuildLevelIterators(const VersionStorageInfo* vstorage, + SuperVersion* sv); void ResetIncompleteIterators(); void SeekInternal(const Slice& internal_key, bool seek_to_first); void UpdateCurrent(); diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index cffdfcd7e..c7c69d937 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -227,7 +227,7 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions( - *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor.get(), + *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor, env_options_, cfd_->internal_comparator(), /*skip_filters*/ false, /*immortal*/ false, /*force_direct_prefetch*/ false, /*level*/ -1, diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 9932de039..9ac55e0fe 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -368,7 +368,7 @@ class TestPlainTableFactory : public PlainTableFactory { table_reader_options.internal_comparator, encoding_type, file_size, bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, std::move(props), std::move(file), table_reader_options.ioptions, - table_reader_options.prefix_extractor, expect_bloom_not_match_, + table_reader_options.prefix_extractor.get(), expect_bloom_not_match_, store_index_in_file_, column_family_id_, column_family_name_)); *table = std::move(new_reader); diff --git a/db/repair.cc b/db/repair.cc index acec61bad..dbf4614d1 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -544,7 +544,7 @@ class Repairer { InternalIterator* iter = table_cache_->NewIterator( ropts, file_options_, cfd->internal_comparator(), t->meta, nullptr /* range_del_agg */, - cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), + cfd->GetLatestMutableCFOptions()->prefix_extractor, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0, diff --git a/db/table_cache.cc b/db/table_cache.cc index aea78769a..78e1eaad2 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -102,8 +102,8 @@ Status TableCache::GetTableReader( const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, std::unique_ptr* table_reader, - const SliceTransform* prefix_extractor, bool skip_filters, int level, - bool prefetch_index_and_filter_in_cache, + const std::shared_ptr& prefix_extractor, + bool skip_filters, int level, bool prefetch_index_and_filter_in_cache, size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) { std::string fname = TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId()); @@ -160,7 +160,8 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) { Status TableCache::FindTable( const ReadOptions& ro, const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, - Cache::Handle** handle, const SliceTransform* prefix_extractor, + Cache::Handle** handle, + const std::shared_ptr& prefix_extractor, const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist, bool skip_filters, int level, bool prefetch_index_and_filter_in_cache, size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) { @@ -209,7 +210,8 @@ Status TableCache::FindTable( InternalIterator* TableCache::NewIterator( const ReadOptions& options, const FileOptions& file_options, const InternalKeyComparator& icomparator, const FileMetaData& file_meta, - RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor, + RangeDelAggregator* range_del_agg, + const std::shared_ptr& prefix_extractor, TableReader** table_reader_ptr, HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena, bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin, @@ -243,10 +245,9 @@ InternalIterator* TableCache::NewIterator( !options.table_filter(*table_reader->GetTableProperties())) { result = NewEmptyInternalIterator(arena); } else { - result = table_reader->NewIterator(options, prefix_extractor, arena, - skip_filters, caller, - file_options.compaction_readahead_size, - allow_unprepared_value); + result = table_reader->NewIterator( + options, prefix_extractor.get(), arena, skip_filters, caller, + file_options.compaction_readahead_size, allow_unprepared_value); } if (handle != nullptr) { result->RegisterCleanup(&UnrefEntry, cache_, handle); @@ -395,13 +396,13 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, } #endif // ROCKSDB_LITE -Status TableCache::Get(const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, - GetContext* get_context, - const SliceTransform* prefix_extractor, - HistogramImpl* file_read_hist, bool skip_filters, - int level, size_t max_file_size_for_l0_meta_pin) { +Status TableCache::Get( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, int level, + size_t max_file_size_for_l0_meta_pin) { auto& fd = file_meta.fd; std::string* row_cache_entry = nullptr; bool done = false; @@ -451,7 +452,7 @@ Status TableCache::Get(const ReadOptions& options, } if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. - s = t->Get(options, k, get_context, prefix_extractor, skip_filters); + s = t->Get(options, k, get_context, prefix_extractor.get(), skip_filters); get_context->SetReplayLog(nullptr); } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { // Couldn't find Table in cache but treat as kFound if no_io set @@ -482,13 +483,12 @@ Status TableCache::Get(const ReadOptions& options, } // Batched version of TableCache::MultiGet. -Status TableCache::MultiGet(const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, - const MultiGetContext::Range* mget_range, - const SliceTransform* prefix_extractor, - HistogramImpl* file_read_hist, bool skip_filters, - int level) { +Status TableCache::MultiGet( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const MultiGetContext::Range* mget_range, + const std::shared_ptr& prefix_extractor, + HistogramImpl* file_read_hist, bool skip_filters, int level) { auto& fd = file_meta.fd; Status s; TableReader* t = fd.table_reader; @@ -559,7 +559,7 @@ Status TableCache::MultiGet(const ReadOptions& options, } } if (s.ok()) { - t->MultiGet(options, &table_range, prefix_extractor, skip_filters); + t->MultiGet(options, &table_range, prefix_extractor.get(), skip_filters); } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) { Status* status = iter->s; @@ -612,7 +612,7 @@ Status TableCache::GetTableProperties( const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, std::shared_ptr* properties, - const SliceTransform* prefix_extractor, bool no_io) { + const std::shared_ptr& prefix_extractor, bool no_io) { auto table_reader = fd.table_reader; // table already been pre-loaded? if (table_reader) { @@ -637,7 +637,7 @@ Status TableCache::GetTableProperties( size_t TableCache::GetMemoryUsageByTableReader( const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, - const SliceTransform* prefix_extractor) { + const std::shared_ptr& prefix_extractor) { auto table_reader = fd.table_reader; // table already been pre-loaded? if (table_reader) { @@ -674,7 +674,7 @@ void TableCache::Evict(Cache* cache, uint64_t file_number) { uint64_t TableCache::ApproximateOffsetOf( const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, - const SliceTransform* prefix_extractor) { + const std::shared_ptr& prefix_extractor) { uint64_t result = 0; TableReader* table_reader = fd.table_reader; Cache::Handle* table_handle = nullptr; @@ -701,7 +701,7 @@ uint64_t TableCache::ApproximateOffsetOf( uint64_t TableCache::ApproximateSize( const Slice& start, const Slice& end, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, - const SliceTransform* prefix_extractor) { + const std::shared_ptr& prefix_extractor) { uint64_t result = 0; TableReader* table_reader = fd.table_reader; Cache::Handle* table_handle = nullptr; diff --git a/db/table_cache.h b/db/table_cache.h index da172833b..fce50775b 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -73,9 +73,10 @@ class TableCache { const ReadOptions& options, const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, RangeDelAggregator* range_del_agg, - const SliceTransform* prefix_extractor, TableReader** table_reader_ptr, - HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena, - bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin, + const std::shared_ptr& prefix_extractor, + TableReader** table_reader_ptr, HistogramImpl* file_read_hist, + TableReaderCaller caller, Arena* arena, bool skip_filters, int level, + size_t max_file_size_for_l0_meta_pin, const InternalKey* smallest_compaction_key, const InternalKey* largest_compaction_key, bool allow_unprepared_value); @@ -89,13 +90,13 @@ class TableCache { // recorded // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" - Status Get(const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, const Slice& k, - GetContext* get_context, - const SliceTransform* prefix_extractor = nullptr, - HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, - int level = -1, size_t max_file_size_for_l0_meta_pin = 0); + Status Get( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const Slice& k, GetContext* get_context, + const std::shared_ptr& prefix_extractor = nullptr, + HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, + int level = -1, size_t max_file_size_for_l0_meta_pin = 0); // Return the range delete tombstone iterator of the file specified by // `file_meta`. @@ -114,13 +115,13 @@ class TableCache { // in the embedded GetContext // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" - Status MultiGet(const ReadOptions& options, - const InternalKeyComparator& internal_comparator, - const FileMetaData& file_meta, - const MultiGetContext::Range* mget_range, - const SliceTransform* prefix_extractor = nullptr, - HistogramImpl* file_read_hist = nullptr, - bool skip_filters = false, int level = -1); + Status MultiGet( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, const MultiGetContext::Range* mget_range, + const std::shared_ptr& prefix_extractor = nullptr, + HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, + int level = -1); // Evict any entry for the specified file number static void Evict(Cache* cache, uint64_t file_number); @@ -135,16 +136,16 @@ class TableCache { // Find table reader // @param skip_filters Disables loading/accessing the filter block // @param level == -1 means not specified - Status FindTable(const ReadOptions& ro, const FileOptions& toptions, - const InternalKeyComparator& internal_comparator, - const FileDescriptor& file_fd, Cache::Handle**, - const SliceTransform* prefix_extractor = nullptr, - const bool no_io = false, bool record_read_stats = true, - HistogramImpl* file_read_hist = nullptr, - bool skip_filters = false, int level = -1, - bool prefetch_index_and_filter_in_cache = true, - size_t max_file_size_for_l0_meta_pin = 0, - Temperature file_temperature = Temperature::kUnknown); + Status FindTable( + const ReadOptions& ro, const FileOptions& toptions, + const InternalKeyComparator& internal_comparator, + const FileDescriptor& file_fd, Cache::Handle**, + const std::shared_ptr& prefix_extractor = nullptr, + const bool no_io = false, bool record_read_stats = true, + HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, + int level = -1, bool prefetch_index_and_filter_in_cache = true, + size_t max_file_size_for_l0_meta_pin = 0, + Temperature file_temperature = Temperature::kUnknown); // Get TableReader from a cache handle. TableReader* GetTableReaderFromHandle(Cache::Handle* handle); @@ -155,12 +156,13 @@ class TableCache { // @returns: `properties` will be reset on success. Please note that we will // return Status::Incomplete() if table is not present in cache and // we set `no_io` to be true. - Status GetTableProperties(const FileOptions& toptions, - const InternalKeyComparator& internal_comparator, - const FileDescriptor& file_meta, - std::shared_ptr* properties, - const SliceTransform* prefix_extractor = nullptr, - bool no_io = false); + Status GetTableProperties( + const FileOptions& toptions, + const InternalKeyComparator& internal_comparator, + const FileDescriptor& file_meta, + std::shared_ptr* properties, + const std::shared_ptr& prefix_extractor = nullptr, + bool no_io = false); // Return total memory usage of the table reader of the file. // 0 if table reader of the file is not loaded. @@ -168,20 +170,21 @@ class TableCache { const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, - const SliceTransform* prefix_extractor = nullptr); + const std::shared_ptr& prefix_extractor = nullptr); // Returns approximated offset of a key in a file represented by fd. uint64_t ApproximateOffsetOf( const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, - const SliceTransform* prefix_extractor = nullptr); + const std::shared_ptr& prefix_extractor = nullptr); // Returns approximated data size between start and end keys in a file // represented by fd (the start key must not be greater than the end key). - uint64_t ApproximateSize(const Slice& start, const Slice& end, - const FileDescriptor& fd, TableReaderCaller caller, - const InternalKeyComparator& internal_comparator, - const SliceTransform* prefix_extractor = nullptr); + uint64_t ApproximateSize( + const Slice& start, const Slice& end, const FileDescriptor& fd, + TableReaderCaller caller, + const InternalKeyComparator& internal_comparator, + const std::shared_ptr& prefix_extractor = nullptr); // Release the handle from a cache void ReleaseHandle(Cache::Handle* handle); @@ -202,16 +205,16 @@ class TableCache { private: // Build a table reader - Status GetTableReader(const ReadOptions& ro, const FileOptions& file_options, - const InternalKeyComparator& internal_comparator, - const FileDescriptor& fd, bool sequential_mode, - bool record_read_stats, HistogramImpl* file_read_hist, - std::unique_ptr* table_reader, - const SliceTransform* prefix_extractor = nullptr, - bool skip_filters = false, int level = -1, - bool prefetch_index_and_filter_in_cache = true, - size_t max_file_size_for_l0_meta_pin = 0, - Temperature file_temperature = Temperature::kUnknown); + Status GetTableReader( + const ReadOptions& ro, const FileOptions& file_options, + const InternalKeyComparator& internal_comparator, + const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, + HistogramImpl* file_read_hist, std::unique_ptr* table_reader, + const std::shared_ptr& prefix_extractor = nullptr, + bool skip_filters = false, int level = -1, + bool prefetch_index_and_filter_in_cache = true, + size_t max_file_size_for_l0_meta_pin = 0, + Temperature file_temperature = Temperature::kUnknown); // Create a key prefix for looking up the row cache. The prefix is of the // format row_cache_id + fd_number + seq_no. Later, the user key can be diff --git a/db/version_builder.cc b/db/version_builder.cc index ccb1dc511..10908e323 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -1143,11 +1143,11 @@ class VersionBuilder::Rep { return s; } - Status LoadTableHandlers(InternalStats* internal_stats, int max_threads, - bool prefetch_index_and_filter_in_cache, - bool is_initial_load, - const SliceTransform* prefix_extractor, - size_t max_file_size_for_l0_meta_pin) { + Status LoadTableHandlers( + InternalStats* internal_stats, int max_threads, + bool prefetch_index_and_filter_in_cache, bool is_initial_load, + const std::shared_ptr& prefix_extractor, + size_t max_file_size_for_l0_meta_pin) { assert(table_cache_ != nullptr); size_t table_cache_capacity = table_cache_->get_cache()->GetCapacity(); @@ -1272,7 +1272,7 @@ Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) const { Status VersionBuilder::LoadTableHandlers( InternalStats* internal_stats, int max_threads, bool prefetch_index_and_filter_in_cache, bool is_initial_load, - const SliceTransform* prefix_extractor, + const std::shared_ptr& prefix_extractor, size_t max_file_size_for_l0_meta_pin) { return rep_->LoadTableHandlers( internal_stats, max_threads, prefetch_index_and_filter_in_cache, diff --git a/db/version_builder.h b/db/version_builder.h index b56257a13..add1edac9 100644 --- a/db/version_builder.h +++ b/db/version_builder.h @@ -39,11 +39,11 @@ class VersionBuilder { bool CheckConsistencyForNumLevels(); Status Apply(const VersionEdit* edit); Status SaveTo(VersionStorageInfo* vstorage) const; - Status LoadTableHandlers(InternalStats* internal_stats, int max_threads, - bool prefetch_index_and_filter_in_cache, - bool is_initial_load, - const SliceTransform* prefix_extractor, - size_t max_file_size_for_l0_meta_pin); + Status LoadTableHandlers( + InternalStats* internal_stats, int max_threads, + bool prefetch_index_and_filter_in_cache, bool is_initial_load, + const std::shared_ptr& prefix_extractor, + size_t max_file_size_for_l0_meta_pin); uint64_t GetMinOldestBlobFileNumber() const; private: diff --git a/db/version_edit_handler.cc b/db/version_edit_handler.cc index 3fc25a010..4d44cb917 100644 --- a/db/version_edit_handler.cc +++ b/db/version_edit_handler.cc @@ -564,7 +564,7 @@ Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd, cfd->internal_stats(), version_set_->db_options_->max_file_opening_threads, prefetch_index_and_filter_in_cache, is_initial_load, - cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), + cfd->GetLatestMutableCFOptions()->prefix_extractor, MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions())); if ((s.IsPathNotFound() || s.IsCorruption()) && no_error_if_files_missing_) { s = Status::OK(); diff --git a/db/version_set.cc b/db/version_set.cc index 0f96848b1..342c7462f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -858,9 +858,10 @@ class LevelIterator final : public InternalIterator { const FileOptions& file_options, const InternalKeyComparator& icomparator, const LevelFilesBrief* flevel, - const SliceTransform* prefix_extractor, bool should_sample, - HistogramImpl* file_read_hist, TableReaderCaller caller, - bool skip_filters, int level, RangeDelAggregator* range_del_agg, + const std::shared_ptr& prefix_extractor, + bool should_sample, HistogramImpl* file_read_hist, + TableReaderCaller caller, bool skip_filters, int level, + RangeDelAggregator* range_del_agg, const std::vector* compaction_boundaries = nullptr, bool allow_unprepared_value = false) @@ -1011,7 +1012,7 @@ class LevelIterator final : public InternalIterator { // `prefix_extractor_` may be non-null even for total order seek. Checking // this variable is not the right way to identify whether prefix iterator // is used. - const SliceTransform* prefix_extractor_; + const std::shared_ptr& prefix_extractor_; HistogramImpl* file_read_hist_; bool should_sample_; @@ -1243,7 +1244,7 @@ Status Version::GetTableProperties(std::shared_ptr* tp, auto ioptions = cfd_->ioptions(); Status s = table_cache->GetTableProperties( file_options_, cfd_->internal_comparator(), file_meta->fd, tp, - mutable_cf_options_.prefix_extractor.get(), true /* no io */); + mutable_cf_options_.prefix_extractor, true /* no io */); if (s.ok()) { return s; } @@ -1436,7 +1437,7 @@ size_t Version::GetMemoryUsageByTableReaders() { for (size_t i = 0; i < file_level.num_files; i++) { total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader( file_options_, cfd_->internal_comparator(), file_level.files[i].fd, - mutable_cf_options_.prefix_extractor.get()); + mutable_cf_options_.prefix_extractor); } } return total_usage; @@ -1616,7 +1617,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options, merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( read_options, soptions, cfd_->internal_comparator(), *file.file_metadata, range_del_agg, - mutable_cf_options_.prefix_extractor.get(), nullptr, + mutable_cf_options_.prefix_extractor, nullptr, cfd_->internal_stats()->GetFileReadHist(0), TableReaderCaller::kUserIterator, arena, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, @@ -1640,7 +1641,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options, merge_iter_builder->AddIterator(new (mem) LevelIterator( cfd_->table_cache(), read_options, soptions, cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), - mutable_cf_options_.prefix_extractor.get(), should_sample_file_read(), + mutable_cf_options_.prefix_extractor, should_sample_file_read(), cfd_->internal_stats()->GetFileReadHist(level), TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, range_del_agg, @@ -1675,7 +1676,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options, ScopedArenaIterator iter(cfd_->table_cache()->NewIterator( read_options, file_options, cfd_->internal_comparator(), *file->file_metadata, &range_del_agg, - mutable_cf_options_.prefix_extractor.get(), nullptr, + mutable_cf_options_.prefix_extractor, nullptr, cfd_->internal_stats()->GetFileReadHist(0), TableReaderCaller::kUserIterator, &arena, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, @@ -1693,7 +1694,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options, ScopedArenaIterator iter(new (mem) LevelIterator( cfd_->table_cache(), read_options, file_options, cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), - mutable_cf_options_.prefix_extractor.get(), should_sample_file_read(), + mutable_cf_options_.prefix_extractor, should_sample_file_read(), cfd_->internal_stats()->GetFileReadHist(level), TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, &range_del_agg)); @@ -2026,7 +2027,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, StopWatchNano timer(clock_, timer_enabled /* auto_start */); *status = table_cache_->Get( read_options, *internal_comparator(), *f->file_metadata, ikey, - &get_context, mutable_cf_options_.prefix_extractor.get(), + &get_context, mutable_cf_options_.prefix_extractor, cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel()), @@ -2196,7 +2197,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, StopWatchNano timer(clock_, timer_enabled /* auto_start */); s = table_cache_->MultiGet( read_options, *internal_comparator(), *f->file_metadata, &file_range, - mutable_cf_options_.prefix_extractor.get(), + mutable_cf_options_.prefix_extractor, cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel()), @@ -4374,7 +4375,7 @@ Status VersionSet::ProcessManifestWrites( cfd->internal_stats(), 1 /* max_threads */, true /* prefetch_index_and_filter_in_cache */, false /* is_initial_load */, - mutable_cf_options_ptrs[i]->prefix_extractor.get(), + mutable_cf_options_ptrs[i]->prefix_extractor, MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i])); if (!s.ok()) { if (db_options_->paranoid_checks) { @@ -5630,7 +5631,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const FdWithKeyRange& f, if (table_cache != nullptr) { result = table_cache->ApproximateOffsetOf( key, f.file_metadata->fd, caller, icmp, - v->GetMutableCFOptions().prefix_extractor.get()); + v->GetMutableCFOptions().prefix_extractor); } } return result; @@ -5670,7 +5671,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, } return table_cache->ApproximateSize( start, end, f.file_metadata->fd, caller, icmp, - v->GetMutableCFOptions().prefix_extractor.get()); + v->GetMutableCFOptions().prefix_extractor); } void VersionSet::AddLiveFiles(std::vector* live_table_files, @@ -5762,7 +5763,7 @@ InternalIterator* VersionSet::MakeInputIterator( list[num++] = cfd->table_cache()->NewIterator( read_options, file_options_compactions, cfd->internal_comparator(), *flevel->files[i].file_metadata, - range_del_agg, c->mutable_cf_options()->prefix_extractor.get(), + range_del_agg, c->mutable_cf_options()->prefix_extractor, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kCompaction, /*arena=*/nullptr, @@ -5778,7 +5779,7 @@ InternalIterator* VersionSet::MakeInputIterator( list[num++] = new LevelIterator( cfd->table_cache(), read_options, file_options_compactions, cfd->internal_comparator(), c->input_levels(which), - c->mutable_cf_options()->prefix_extractor.get(), + c->mutable_cf_options()->prefix_extractor, /*should_sample=*/false, /*no per level latency histogram=*/nullptr, TableReaderCaller::kCompaction, /*skip_filters=*/false, diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 8928e586a..97e85d7e1 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -125,8 +125,9 @@ void ReleaseCachedEntry(void* arg, void* h) { // For hash based index, return true if prefix_extractor and // prefix_extractor_block mismatch, false otherwise. This flag will be used // as total_order_seek via NewIndexIterator -bool PrefixExtractorChanged(const TableProperties* table_properties, - const SliceTransform* prefix_extractor) { +inline bool PrefixExtractorChangedHelper( + const TableProperties* table_properties, + const SliceTransform* prefix_extractor) { // BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set. // Turn off hash index in prefix_extractor is not set; if prefix_extractor // is set but prefix_extractor_block is not set, also disable hash index @@ -550,7 +551,7 @@ Status BlockBasedTable::Open( const InternalKeyComparator& internal_comparator, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, - const SliceTransform* prefix_extractor, + const std::shared_ptr& prefix_extractor, const bool prefetch_index_and_filter_in_cache, const bool skip_filters, const int level, const bool immortal_table, const SequenceNumber largest_seqno, const bool force_direct_prefetch, @@ -630,7 +631,7 @@ Status BlockBasedTable::Open( // handle prefix correctly. if (prefix_extractor != nullptr) { rep->internal_prefix_transform.reset( - new InternalKeySliceTransform(prefix_extractor)); + new InternalKeySliceTransform(prefix_extractor.get())); } // For fully portable/stable cache keys, we need to read the properties @@ -661,6 +662,30 @@ Status BlockBasedTable::Open( if (!s.ok()) { return s; } + if (!PrefixExtractorChangedHelper(rep->table_properties.get(), + prefix_extractor.get())) { + // Establish fast path for unchanged prefix_extractor + rep->table_prefix_extractor = prefix_extractor; + } else { + // Current prefix_extractor doesn't match table +#ifndef ROCKSDB_LITE + if (rep->table_properties) { + //**TODO: If/When the DBOptions has a registry in it, the ConfigOptions + // will need to use it + ConfigOptions config_options; + Status st = SliceTransform::CreateFromString( + config_options, rep->table_properties->prefix_extractor_name, + &(rep->table_prefix_extractor)); + if (!st.ok()) { + //**TODO: Should this be error be returned or swallowed? + ROCKS_LOG_ERROR(rep->ioptions.logger, + "Failed to create prefix extractor[%s]: %s", + rep->table_properties->prefix_extractor_name.c_str(), + st.ToString().c_str()); + } + } +#endif // ROCKSDB_LITE + } // With properties loaded, we can set up portable/stable cache keys SetupBaseCacheKey(rep->table_properties.get(), cur_db_session_id, @@ -792,23 +817,6 @@ Status BlockBasedTable::ReadPropertiesBlock( ROCKS_LOG_ERROR(rep_->ioptions.logger, "Cannot find Properties block from file."); } -#ifndef ROCKSDB_LITE - if (rep_->table_properties) { - //**TODO: If/When the DBOptions has a registry in it, the ConfigOptions - // will need to use it - ConfigOptions config_options; - Status st = SliceTransform::CreateFromString( - config_options, rep_->table_properties->prefix_extractor_name, - &(rep_->table_prefix_extractor)); - if (!st.ok()) { - //**TODO: Should this be error be returned or swallowed? - ROCKS_LOG_ERROR(rep_->ioptions.logger, - "Failed to create prefix extractor[%s]: %s", - rep_->table_properties->prefix_extractor_name.c_str(), - st.ToString().c_str()); - } - } -#endif // ROCKSDB_LITE // Read the table properties, if provided. if (rep_->table_properties) { @@ -2148,6 +2156,17 @@ bool BlockBasedTable::PrefixMayMatch( return may_match; } +bool BlockBasedTable::PrefixExtractorChanged( + const SliceTransform* prefix_extractor) const { + if (prefix_extractor == nullptr) { + return true; + } else if (prefix_extractor == rep_->table_prefix_extractor.get()) { + return false; + } else { + return PrefixExtractorChangedHelper(rep_->table_properties.get(), + prefix_extractor); + } +} InternalIterator* BlockBasedTable::NewIterator( const ReadOptions& read_options, const SliceTransform* prefix_extractor, @@ -2155,8 +2174,7 @@ InternalIterator* BlockBasedTable::NewIterator( size_t compaction_readahead_size, bool allow_unprepared_value) { BlockCacheLookupContext lookup_context{caller}; bool need_upper_bound_check = - read_options.auto_prefix_mode || - PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor); + read_options.auto_prefix_mode || PrefixExtractorChanged(prefix_extractor); std::unique_ptr> index_iter(NewIndexIterator( read_options, need_upper_bound_check && @@ -2210,9 +2228,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch( may_match = filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, no_io, const_ikey_ptr, get_context, lookup_context); - } else if (!read_options.total_order_seek && prefix_extractor && - rep_->table_properties->prefix_extractor_name == - prefix_extractor->AsString() && + } else if (!read_options.total_order_seek && + !PrefixExtractorChanged(prefix_extractor) && prefix_extractor->InDomain(user_key_without_ts) && !filter->PrefixMayMatch( prefix_extractor->Transform(user_key_without_ts), @@ -2252,9 +2269,8 @@ void BlockBasedTable::FullFilterKeysMayMatch( PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys, rep_->level); } - } else if (!read_options.total_order_seek && prefix_extractor && - rep_->table_properties->prefix_extractor_name == - prefix_extractor->AsString()) { + } else if (!read_options.total_order_seek && + !PrefixExtractorChanged(prefix_extractor)) { filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, lookup_context); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys); @@ -2305,8 +2321,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, // BlockPrefixIndex. Only do this check when index_type is kHashSearch. bool need_upper_bound_check = false; if (rep_->index_type == BlockBasedTableOptions::kHashSearch) { - need_upper_bound_check = PrefixExtractorChanged( - rep_->table_properties.get(), prefix_extractor); + need_upper_bound_check = PrefixExtractorChanged(prefix_extractor); } auto iiter = NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, @@ -2490,8 +2505,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, // BlockPrefixIndex. Only do this check when index_type is kHashSearch. bool need_upper_bound_check = false; if (rep_->index_type == BlockBasedTableOptions::kHashSearch) { - need_upper_bound_check = PrefixExtractorChanged( - rep_->table_properties.get(), prefix_extractor); + need_upper_bound_check = PrefixExtractorChanged(prefix_extractor); } auto iiter = NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 3e5bd0e71..c5875c73c 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -14,6 +14,7 @@ #include "cache/cache_key.h" #include "db/range_tombstone_fragmenter.h" #include "file/filename.h" +#include "rocksdb/slice_transform.h" #include "rocksdb/table_properties.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" @@ -90,24 +91,22 @@ class BlockBasedTable : public TableReader { // are set. // @param force_direct_prefetch if true, always prefetching to RocksDB // buffer, rather than calling RandomAccessFile::Prefetch(). - static Status Open(const ReadOptions& ro, const ImmutableOptions& ioptions, - const EnvOptions& env_options, - const BlockBasedTableOptions& table_options, - const InternalKeyComparator& internal_key_comparator, - std::unique_ptr&& file, - uint64_t file_size, - std::unique_ptr* table_reader, - const SliceTransform* prefix_extractor = nullptr, - bool prefetch_index_and_filter_in_cache = true, - bool skip_filters = false, int level = -1, - const bool immortal_table = false, - const SequenceNumber largest_seqno = 0, - bool force_direct_prefetch = false, - TailPrefetchStats* tail_prefetch_stats = nullptr, - BlockCacheTracer* const block_cache_tracer = nullptr, - size_t max_file_size_for_l0_meta_pin = 0, - const std::string& cur_db_session_id = "", - uint64_t cur_file_num = 0); + static Status Open( + const ReadOptions& ro, const ImmutableOptions& ioptions, + const EnvOptions& env_options, + const BlockBasedTableOptions& table_options, + const InternalKeyComparator& internal_key_comparator, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, + const std::shared_ptr& prefix_extractor = nullptr, + bool prefetch_index_and_filter_in_cache = true, bool skip_filters = false, + int level = -1, const bool immortal_table = false, + const SequenceNumber largest_seqno = 0, + bool force_direct_prefetch = false, + TailPrefetchStats* tail_prefetch_stats = nullptr, + BlockCacheTracer* const block_cache_tracer = nullptr, + size_t max_file_size_for_l0_meta_pin = 0, + const std::string& cur_db_session_id = "", uint64_t cur_file_num = 0); bool PrefixMayMatch(const Slice& internal_key, const ReadOptions& read_options, @@ -506,6 +505,10 @@ class BlockBasedTable : public TableReader { void DumpKeyValue(const Slice& key, const Slice& value, std::ostream& out_stream); + // Returns true if prefix_extractor is compatible with that used in building + // the table file. + bool PrefixExtractorChanged(const SliceTransform* prefix_extractor) const; + // A cumulative data block file read in MultiGet lower than this size will // use a stack buffer static constexpr size_t kMultiGetReadStackBufSize = 8192; diff --git a/table/block_based/data_block_hash_index_test.cc b/table/block_based/data_block_hash_index_test.cc index 121f78cef..3f1aed7b5 100644 --- a/table/block_based/data_block_hash_index_test.cc +++ b/table/block_based/data_block_hash_index_test.cc @@ -580,7 +580,7 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, const bool kSkipFilters = true; const bool kImmortal = true; ASSERT_OK(ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, + TableReaderOptions(ioptions, moptions.prefix_extractor, soptions, internal_comparator, !kSkipFilters, !kImmortal, level_), std::move(file_reader), sink->contents().size(), &table_reader)); diff --git a/table/plain/plain_table_factory.cc b/table/plain/plain_table_factory.cc index 3db484b05..0060a912c 100644 --- a/table/plain/plain_table_factory.cc +++ b/table/plain/plain_table_factory.cc @@ -67,7 +67,7 @@ Status PlainTableFactory::NewTableReader( table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio, table_options_.index_sparseness, table_options_.huge_page_tlb_size, table_options_.full_scan_mode, table_reader_options.immortal, - table_reader_options.prefix_extractor); + table_reader_options.prefix_extractor.get()); } TableBuilder* PlainTableFactory::NewTableBuilder( diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 8d36717df..2fc47264b 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -150,7 +150,7 @@ Status SstFileDumper::NewTableReader( const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size, std::unique_ptr* /*table_reader*/) { auto t_opt = - TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(), soptions_, + TableReaderOptions(ioptions_, moptions_.prefix_extractor, soptions_, internal_comparator_, false /* skip_filters */, false /* imortal */, true /* force_direct_prefetch */); // Allow open file with global sequence number for backward compatibility. diff --git a/table/sst_file_reader.cc b/table/sst_file_reader.cc index e106bca9d..48f1be0be 100644 --- a/table/sst_file_reader.cc +++ b/table/sst_file_reader.cc @@ -56,7 +56,7 @@ Status SstFileReader::Open(const std::string& file_path) { file_reader.reset(new RandomAccessFileReader(std::move(file), file_path)); } if (s.ok()) { - TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor.get(), + TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor, r->soptions, r->ioptions.internal_comparator); // Allow open file with global sequence number for backward compatibility. t_opt.largest_seqno = kMaxSequenceNumber; diff --git a/table/table_builder.h b/table/table_builder.h index 92011910d..6060c6ab5 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -30,16 +30,16 @@ class Status; struct TableReaderOptions { // @param skip_filters Disables loading/accessing the filter block - TableReaderOptions(const ImmutableOptions& _ioptions, - const SliceTransform* _prefix_extractor, - const EnvOptions& _env_options, - const InternalKeyComparator& _internal_comparator, - bool _skip_filters = false, bool _immortal = false, - bool _force_direct_prefetch = false, int _level = -1, - BlockCacheTracer* const _block_cache_tracer = nullptr, - size_t _max_file_size_for_l0_meta_pin = 0, - const std::string& _cur_db_session_id = "", - uint64_t _cur_file_num = 0) + TableReaderOptions( + const ImmutableOptions& _ioptions, + const std::shared_ptr& _prefix_extractor, + const EnvOptions& _env_options, + const InternalKeyComparator& _internal_comparator, + bool _skip_filters = false, bool _immortal = false, + bool _force_direct_prefetch = false, int _level = -1, + BlockCacheTracer* const _block_cache_tracer = nullptr, + size_t _max_file_size_for_l0_meta_pin = 0, + const std::string& _cur_db_session_id = "", uint64_t _cur_file_num = 0) : TableReaderOptions( _ioptions, _prefix_extractor, _env_options, _internal_comparator, _skip_filters, _immortal, _force_direct_prefetch, _level, @@ -48,17 +48,16 @@ struct TableReaderOptions { } // @param skip_filters Disables loading/accessing the filter block - TableReaderOptions(const ImmutableOptions& _ioptions, - const SliceTransform* _prefix_extractor, - const EnvOptions& _env_options, - const InternalKeyComparator& _internal_comparator, - bool _skip_filters, bool _immortal, - bool _force_direct_prefetch, int _level, - SequenceNumber _largest_seqno, - BlockCacheTracer* const _block_cache_tracer, - size_t _max_file_size_for_l0_meta_pin, - const std::string& _cur_db_session_id, - uint64_t _cur_file_num) + TableReaderOptions( + const ImmutableOptions& _ioptions, + const std::shared_ptr& _prefix_extractor, + const EnvOptions& _env_options, + const InternalKeyComparator& _internal_comparator, bool _skip_filters, + bool _immortal, bool _force_direct_prefetch, int _level, + SequenceNumber _largest_seqno, + BlockCacheTracer* const _block_cache_tracer, + size_t _max_file_size_for_l0_meta_pin, + const std::string& _cur_db_session_id, uint64_t _cur_file_num) : ioptions(_ioptions), prefix_extractor(_prefix_extractor), env_options(_env_options), @@ -74,7 +73,7 @@ struct TableReaderOptions { cur_file_num(_cur_file_num) {} const ImmutableOptions& ioptions; - const SliceTransform* prefix_extractor; + const std::shared_ptr& prefix_extractor; const EnvOptions& env_options; const InternalKeyComparator& internal_comparator; // This is only used for BlockBasedTable (reader) diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index df4a750d7..f4181018f 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -143,8 +143,8 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, std::unique_ptr file_reader( new RandomAccessFileReader(std::move(raf), file_name)); s = opts.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor.get(), - env_options, ikc), + TableReaderOptions(ioptions, moptions.prefix_extractor, env_options, + ikc), std::move(file_reader), file_size, &table_reader); if (!s.ok()) { fprintf(stderr, "Open Table Error: %s\n", s.ToString().c_str()); diff --git a/table/table_test.cc b/table/table_test.cc index 6a6a1bb95..1f314ec74 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -438,7 +438,7 @@ class TableConstructor : public Constructor { file_reader_.reset(new RandomAccessFileReader(std::move(source), "test")); return ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, + TableReaderOptions(ioptions, moptions.prefix_extractor, soptions, *last_internal_comparator_, /*skip_filters*/ false, /*immortal*/ false, false, level_, largest_seqno_, &block_cache_tracer_, moptions.write_buffer_size, "", @@ -4472,8 +4472,8 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { new RandomAccessFileReader(std::move(source), "")); options.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor.get(), - EnvOptions(), ikc), + TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(), + ikc), std::move(file_reader), ss_rw.contents().size(), &table_reader); return table_reader->NewIterator( @@ -4640,8 +4640,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { const MutableCFOptions moptions2(options2); ASSERT_OK(ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(), - EnvOptions(), + TableReaderOptions(ioptions2, moptions2.prefix_extractor, EnvOptions(), GetPlainInternalComparator(options2.comparator)), std::move(file_reader), sink->contents().size(), &table_reader)); diff --git a/util/slice.cc b/util/slice.cc index 509b0e885..3c3656de9 100644 --- a/util/slice.cc +++ b/util/slice.cc @@ -267,7 +267,7 @@ Status SliceTransform::CreateFromString( } } return status; -} // namespace ROCKSDB_NAMESPACE +} std::string SliceTransform::AsString() const { #ifndef ROCKSDB_LITE