fc9d4071f0
Summary: Fixes a major performance regression in 6.26, where extra CPU is spent in SliceTransform::AsString when reads involve a prefix_extractor (Get, MultiGet, Seek). Common case performance is now better than 6.25. This change creates a "fast path" for verifying that the current prefix extractor is unchanged and compatible with what was used to generate a table file. This fast path detects the common case by pointer comparison on the current prefix_extractor and a "known good" prefix extractor (if applicable) that is saved at the time the table reader is opened. The "known good" prefix extractor is saved as another shared_ptr copy (in an existing field, however) to ensure the pointer is not recycled. When the prefix_extractor has changed to a different instance but same compatible configuration (rare, odd), performance is still a regression compared to 6.25, but this is likely acceptable because of the oddity of such a case. The performance of incompatible prefix_extractor is essentially unchanged. Also fixed a minor case (ForwardIterator) where a prefix_extractor could be used via a raw pointer after being freed as a shared_ptr, if replaced via SetOptions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9407 Test Plan: ## Performance Populate DB with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Running head-to-head comparisons simultaneously with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -use_existing_db -readonly -benchmarks=seekrandom -num=10000000 -duration=20 -disable_wal=1 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Below each is compared by ops/sec vs. baseline which is version 6.25 (multiple baseline runs because of variable machine load) v6.26: 4833 vs. 6698 (<- major regression!) v6.27: 4737 vs. 6397 (still) New: 6704 vs. 6461 (better than baseline in common case) Disabled fastpath: 4843 vs. 6389 (e.g. if prefix extractor instance changes but is still compatible) Changed prefix size (no usable filter) in new: 787 vs. 5927 Changed prefix size (no usable filter) in new & baseline: 773 vs. 784 Reviewed By: mrambacher Differential Revision: D33677812 Pulled By: pdillinger fbshipit-source-id: 571d9711c461fb97f957378a061b7e7dbc4d6a76
102 lines
3.4 KiB
C++
102 lines
3.4 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#include "rocksdb/sst_file_reader.h"
|
|
|
|
#include "db/arena_wrapped_db_iter.h"
|
|
#include "db/db_iter.h"
|
|
#include "db/dbformat.h"
|
|
#include "file/random_access_file_reader.h"
|
|
#include "options/cf_options.h"
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/file_system.h"
|
|
#include "table/get_context.h"
|
|
#include "table/table_builder.h"
|
|
#include "table/table_reader.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
struct SstFileReader::Rep {
|
|
Options options;
|
|
EnvOptions soptions;
|
|
ImmutableOptions ioptions;
|
|
MutableCFOptions moptions;
|
|
|
|
std::unique_ptr<TableReader> table_reader;
|
|
|
|
Rep(const Options& opts)
|
|
: options(opts),
|
|
soptions(options),
|
|
ioptions(options),
|
|
moptions(ColumnFamilyOptions(options)) {}
|
|
};
|
|
|
|
SstFileReader::SstFileReader(const Options& options) : rep_(new Rep(options)) {}
|
|
|
|
SstFileReader::~SstFileReader() {}
|
|
|
|
Status SstFileReader::Open(const std::string& file_path) {
|
|
auto r = rep_.get();
|
|
Status s;
|
|
uint64_t file_size = 0;
|
|
std::unique_ptr<FSRandomAccessFile> file;
|
|
std::unique_ptr<RandomAccessFileReader> file_reader;
|
|
FileOptions fopts(r->soptions);
|
|
const auto& fs = r->options.env->GetFileSystem();
|
|
|
|
s = fs->GetFileSize(file_path, fopts.io_options, &file_size, nullptr);
|
|
if (s.ok()) {
|
|
s = fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
|
|
}
|
|
if (s.ok()) {
|
|
file_reader.reset(new RandomAccessFileReader(std::move(file), file_path));
|
|
}
|
|
if (s.ok()) {
|
|
TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor,
|
|
r->soptions, r->ioptions.internal_comparator);
|
|
// Allow open file with global sequence number for backward compatibility.
|
|
t_opt.largest_seqno = kMaxSequenceNumber;
|
|
s = r->options.table_factory->NewTableReader(t_opt, std::move(file_reader),
|
|
file_size, &r->table_reader);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
Iterator* SstFileReader::NewIterator(const ReadOptions& roptions) {
|
|
auto r = rep_.get();
|
|
auto sequence = roptions.snapshot != nullptr
|
|
? roptions.snapshot->GetSequenceNumber()
|
|
: kMaxSequenceNumber;
|
|
ArenaWrappedDBIter* res = new ArenaWrappedDBIter();
|
|
res->Init(r->options.env, roptions, r->ioptions, r->moptions,
|
|
nullptr /* version */, sequence,
|
|
r->moptions.max_sequential_skip_in_iterations,
|
|
0 /* version_number */, nullptr /* read_callback */,
|
|
nullptr /* db_impl */, nullptr /* cfd */,
|
|
true /* expose_blob_index */, false /* allow_refresh */);
|
|
auto internal_iter = r->table_reader->NewIterator(
|
|
res->GetReadOptions(), r->moptions.prefix_extractor.get(),
|
|
res->GetArena(), false /* skip_filters */,
|
|
TableReaderCaller::kSSTFileReader);
|
|
res->SetIterUnderDBIter(internal_iter);
|
|
return res;
|
|
}
|
|
|
|
std::shared_ptr<const TableProperties> SstFileReader::GetTableProperties()
|
|
const {
|
|
return rep_->table_reader->GetTableProperties();
|
|
}
|
|
|
|
Status SstFileReader::VerifyChecksum(const ReadOptions& read_options) {
|
|
return rep_->table_reader->VerifyChecksum(read_options,
|
|
TableReaderCaller::kSSTFileReader);
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
#endif // !ROCKSDB_LITE
|