fc9d4071f0
Summary: Fixes a major performance regression in 6.26, where extra CPU is spent in SliceTransform::AsString when reads involve a prefix_extractor (Get, MultiGet, Seek). Common case performance is now better than 6.25. This change creates a "fast path" for verifying that the current prefix extractor is unchanged and compatible with what was used to generate a table file. This fast path detects the common case by pointer comparison on the current prefix_extractor and a "known good" prefix extractor (if applicable) that is saved at the time the table reader is opened. The "known good" prefix extractor is saved as another shared_ptr copy (in an existing field, however) to ensure the pointer is not recycled. When the prefix_extractor has changed to a different instance but same compatible configuration (rare, odd), performance is still a regression compared to 6.25, but this is likely acceptable because of the oddity of such a case. The performance of incompatible prefix_extractor is essentially unchanged. Also fixed a minor case (ForwardIterator) where a prefix_extractor could be used via a raw pointer after being freed as a shared_ptr, if replaced via SetOptions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9407 Test Plan: ## Performance Populate DB with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Running head-to-head comparisons simultaneously with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -use_existing_db -readonly -benchmarks=seekrandom -num=10000000 -duration=20 -disable_wal=1 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Below each is compared by ops/sec vs. baseline which is version 6.25 (multiple baseline runs because of variable machine load) v6.26: 4833 vs. 6698 (<- major regression!) v6.27: 4737 vs. 6397 (still) New: 6704 vs. 6461 (better than baseline in common case) Disabled fastpath: 4843 vs. 6389 (e.g. if prefix extractor instance changes but is still compatible) Changed prefix size (no usable filter) in new: 787 vs. 5927 Changed prefix size (no usable filter) in new & baseline: 773 vs. 784 Reviewed By: mrambacher Differential Revision: D33677812 Pulled By: pdillinger fbshipit-source-id: 571d9711c461fb97f957378a061b7e7dbc4d6a76
340 lines
14 KiB
C++
340 lines
14 KiB
C++
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "table/plain/plain_table_factory.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <memory>
|
|
|
|
#include "db/dbformat.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/convenience.h"
|
|
#include "rocksdb/utilities/customizable_util.h"
|
|
#include "rocksdb/utilities/object_registry.h"
|
|
#include "rocksdb/utilities/options_type.h"
|
|
#include "table/plain/plain_table_builder.h"
|
|
#include "table/plain/plain_table_reader.h"
|
|
#include "util/string_util.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
#ifndef ROCKSDB_LITE
|
|
static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
|
|
{"user_key_len",
|
|
{offsetof(struct PlainTableOptions, user_key_len), OptionType::kUInt32T,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
{"bloom_bits_per_key",
|
|
{offsetof(struct PlainTableOptions, bloom_bits_per_key), OptionType::kInt,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
{"hash_table_ratio",
|
|
{offsetof(struct PlainTableOptions, hash_table_ratio), OptionType::kDouble,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
{"index_sparseness",
|
|
{offsetof(struct PlainTableOptions, index_sparseness), OptionType::kSizeT,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
{"huge_page_tlb_size",
|
|
{offsetof(struct PlainTableOptions, huge_page_tlb_size),
|
|
OptionType::kSizeT, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
{"encoding_type",
|
|
{offsetof(struct PlainTableOptions, encoding_type),
|
|
OptionType::kEncodingType, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
{"full_scan_mode",
|
|
{offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
{"store_index_in_file",
|
|
{offsetof(struct PlainTableOptions, store_index_in_file),
|
|
OptionType::kBoolean, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
};
|
|
|
|
PlainTableFactory::PlainTableFactory(const PlainTableOptions& options)
|
|
: table_options_(options) {
|
|
RegisterOptions(&table_options_, &plain_table_type_info);
|
|
}
|
|
|
|
Status PlainTableFactory::NewTableReader(
|
|
const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
|
|
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
|
std::unique_ptr<TableReader>* table,
|
|
bool /*prefetch_index_and_filter_in_cache*/) const {
|
|
return PlainTableReader::Open(
|
|
table_reader_options.ioptions, table_reader_options.env_options,
|
|
table_reader_options.internal_comparator, std::move(file), file_size,
|
|
table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio,
|
|
table_options_.index_sparseness, table_options_.huge_page_tlb_size,
|
|
table_options_.full_scan_mode, table_reader_options.immortal,
|
|
table_reader_options.prefix_extractor.get());
|
|
}
|
|
|
|
TableBuilder* PlainTableFactory::NewTableBuilder(
|
|
const TableBuilderOptions& table_builder_options,
|
|
WritableFileWriter* file) const {
|
|
// Ignore the skip_filters flag. PlainTable format is optimized for small
|
|
// in-memory dbs. The skip_filters optimization is not useful for plain
|
|
// tables
|
|
//
|
|
return new PlainTableBuilder(
|
|
table_builder_options.ioptions, table_builder_options.moptions,
|
|
table_builder_options.int_tbl_prop_collector_factories,
|
|
table_builder_options.column_family_id,
|
|
table_builder_options.level_at_creation, file,
|
|
table_options_.user_key_len, table_options_.encoding_type,
|
|
table_options_.index_sparseness, table_options_.bloom_bits_per_key,
|
|
table_builder_options.column_family_name, 6,
|
|
table_options_.huge_page_tlb_size, table_options_.hash_table_ratio,
|
|
table_options_.store_index_in_file, table_builder_options.db_id,
|
|
table_builder_options.db_session_id, table_builder_options.cur_file_num);
|
|
}
|
|
|
|
std::string PlainTableFactory::GetPrintableOptions() const {
|
|
std::string ret;
|
|
ret.reserve(20000);
|
|
const int kBufferSize = 200;
|
|
char buffer[kBufferSize];
|
|
|
|
snprintf(buffer, kBufferSize, " user_key_len: %u\n",
|
|
table_options_.user_key_len);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " bloom_bits_per_key: %d\n",
|
|
table_options_.bloom_bits_per_key);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n",
|
|
table_options_.hash_table_ratio);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n",
|
|
table_options_.index_sparseness);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " huge_page_tlb_size: %" ROCKSDB_PRIszt "\n",
|
|
table_options_.huge_page_tlb_size);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " encoding_type: %d\n",
|
|
table_options_.encoding_type);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " full_scan_mode: %d\n",
|
|
table_options_.full_scan_mode);
|
|
ret.append(buffer);
|
|
snprintf(buffer, kBufferSize, " store_index_in_file: %d\n",
|
|
table_options_.store_index_in_file);
|
|
ret.append(buffer);
|
|
return ret;
|
|
}
|
|
|
|
Status GetPlainTableOptionsFromString(const PlainTableOptions& table_options,
|
|
const std::string& opts_str,
|
|
PlainTableOptions* new_table_options) {
|
|
ConfigOptions config_options;
|
|
config_options.input_strings_escaped = false;
|
|
config_options.ignore_unknown_options = false;
|
|
config_options.invoke_prepare_options = false;
|
|
return GetPlainTableOptionsFromString(config_options, table_options, opts_str,
|
|
new_table_options);
|
|
}
|
|
|
|
Status GetPlainTableOptionsFromString(const ConfigOptions& config_options,
|
|
const PlainTableOptions& table_options,
|
|
const std::string& opts_str,
|
|
PlainTableOptions* new_table_options) {
|
|
std::unordered_map<std::string, std::string> opts_map;
|
|
Status s = StringToMap(opts_str, &opts_map);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
|
|
s = GetPlainTableOptionsFromMap(config_options, table_options, opts_map,
|
|
new_table_options);
|
|
// Translate any errors (NotFound, NotSupported, to InvalidArgument
|
|
if (s.ok() || s.IsInvalidArgument()) {
|
|
return s;
|
|
} else {
|
|
return Status::InvalidArgument(s.getState());
|
|
}
|
|
}
|
|
#endif // ROCKSDB_LITE
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library,
|
|
const std::string& /*arg*/) {
|
|
// The MemTableRepFactory built-in classes will be either a class
|
|
// (VectorRepFactory) or a nickname (vector), followed optionally by ":#",
|
|
// where # is the "size" of the factory.
|
|
auto AsPattern = [](const std::string& name, const std::string& alt) {
|
|
auto pattern = ObjectLibrary::PatternEntry(name, true);
|
|
pattern.AnotherName(alt);
|
|
pattern.AddNumber(":");
|
|
return pattern;
|
|
};
|
|
library.AddFactory<MemTableRepFactory>(
|
|
AsPattern(VectorRepFactory::kClassName(), VectorRepFactory::kNickName()),
|
|
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
|
|
std::string* /*errmsg*/) {
|
|
auto colon = uri.find(":");
|
|
if (colon != std::string::npos) {
|
|
size_t count = ParseSizeT(uri.substr(colon + 1));
|
|
guard->reset(new VectorRepFactory(count));
|
|
} else {
|
|
guard->reset(new VectorRepFactory());
|
|
}
|
|
return guard->get();
|
|
});
|
|
library.AddFactory<MemTableRepFactory>(
|
|
AsPattern(SkipListFactory::kClassName(), SkipListFactory::kNickName()),
|
|
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
|
|
std::string* /*errmsg*/) {
|
|
auto colon = uri.find(":");
|
|
if (colon != std::string::npos) {
|
|
size_t lookahead = ParseSizeT(uri.substr(colon + 1));
|
|
guard->reset(new SkipListFactory(lookahead));
|
|
} else {
|
|
guard->reset(new SkipListFactory());
|
|
}
|
|
return guard->get();
|
|
});
|
|
library.AddFactory<MemTableRepFactory>(
|
|
AsPattern("HashLinkListRepFactory", "hash_linkedlist"),
|
|
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
|
|
std::string* /*errmsg*/) {
|
|
// Expecting format: hash_linkedlist:<hash_bucket_count>
|
|
auto colon = uri.find(":");
|
|
if (colon != std::string::npos) {
|
|
size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
|
|
guard->reset(NewHashLinkListRepFactory(hash_bucket_count));
|
|
} else {
|
|
guard->reset(NewHashLinkListRepFactory());
|
|
}
|
|
return guard->get();
|
|
});
|
|
library.AddFactory<MemTableRepFactory>(
|
|
AsPattern("HashSkipListRepFactory", "prefix_hash"),
|
|
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
|
|
std::string* /*errmsg*/) {
|
|
// Expecting format: prefix_hash:<hash_bucket_count>
|
|
auto colon = uri.find(":");
|
|
if (colon != std::string::npos) {
|
|
size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
|
|
guard->reset(NewHashSkipListRepFactory(hash_bucket_count));
|
|
} else {
|
|
guard->reset(NewHashSkipListRepFactory());
|
|
}
|
|
return guard->get();
|
|
});
|
|
library.AddFactory<MemTableRepFactory>(
|
|
"cuckoo",
|
|
[](const std::string& /*uri*/,
|
|
std::unique_ptr<MemTableRepFactory>* /*guard*/, std::string* errmsg) {
|
|
*errmsg = "cuckoo hash memtable is not supported anymore.";
|
|
return nullptr;
|
|
});
|
|
|
|
size_t num_types;
|
|
return static_cast<int>(library.GetFactoryCount(&num_types));
|
|
}
|
|
#endif // ROCKSDB_LITE
|
|
|
|
Status GetMemTableRepFactoryFromString(
|
|
const std::string& opts_str, std::unique_ptr<MemTableRepFactory>* result) {
|
|
ConfigOptions config_options;
|
|
config_options.ignore_unsupported_options = false;
|
|
config_options.ignore_unknown_options = false;
|
|
return MemTableRepFactory::CreateFromString(config_options, opts_str, result);
|
|
}
|
|
|
|
Status MemTableRepFactory::CreateFromString(
|
|
const ConfigOptions& config_options, const std::string& value,
|
|
std::unique_ptr<MemTableRepFactory>* result) {
|
|
#ifndef ROCKSDB_LITE
|
|
static std::once_flag once;
|
|
std::call_once(once, [&]() {
|
|
RegisterBuiltinMemTableRepFactory(*(ObjectLibrary::Default().get()), "");
|
|
});
|
|
#endif // ROCKSDB_LITE
|
|
std::string id;
|
|
std::unordered_map<std::string, std::string> opt_map;
|
|
Status status = Customizable::GetOptionsMap(config_options, result->get(),
|
|
value, &id, &opt_map);
|
|
if (!status.ok()) { // GetOptionsMap failed
|
|
return status;
|
|
} else if (value.empty()) {
|
|
// No Id and no options. Clear the object
|
|
result->reset();
|
|
return Status::OK();
|
|
} else if (id.empty()) { // We have no Id but have options. Not good
|
|
return Status::NotSupported("Cannot reset object ", id);
|
|
} else {
|
|
#ifndef ROCKSDB_LITE
|
|
status = NewUniqueObject<MemTableRepFactory>(config_options, id, opt_map,
|
|
result);
|
|
#else
|
|
// To make it possible to configure the memtables in LITE mode, the ID
|
|
// is of the form <name>:<size>, where name is the name of the class and
|
|
// <size> is the length of the object (e.g. skip_list:10).
|
|
std::vector<std::string> opts_list = StringSplit(id, ':');
|
|
if (opts_list.empty() || opts_list.size() > 2 || !opt_map.empty()) {
|
|
status = Status::InvalidArgument("Can't parse memtable_factory option ",
|
|
value);
|
|
} else if (opts_list[0] == "skip_list" ||
|
|
opts_list[0] == SkipListFactory::kClassName()) {
|
|
// Expecting format
|
|
// skip_list:<lookahead>
|
|
if (opts_list.size() == 2) {
|
|
size_t lookahead = ParseSizeT(opts_list[1]);
|
|
result->reset(new SkipListFactory(lookahead));
|
|
} else {
|
|
result->reset(new SkipListFactory());
|
|
}
|
|
} else if (!config_options.ignore_unsupported_options) {
|
|
status = Status::NotSupported("Cannot load object in LITE mode ", id);
|
|
}
|
|
#endif // ROCKSDB_LITE
|
|
}
|
|
return status;
|
|
}
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
Status GetPlainTableOptionsFromMap(
|
|
const PlainTableOptions& table_options,
|
|
const std::unordered_map<std::string, std::string>& opts_map,
|
|
PlainTableOptions* new_table_options, bool input_strings_escaped,
|
|
bool ignore_unknown_options) {
|
|
ConfigOptions config_options;
|
|
config_options.input_strings_escaped = input_strings_escaped;
|
|
config_options.ignore_unknown_options = ignore_unknown_options;
|
|
return GetPlainTableOptionsFromMap(config_options, table_options, opts_map,
|
|
new_table_options);
|
|
}
|
|
|
|
Status GetPlainTableOptionsFromMap(
|
|
const ConfigOptions& config_options, const PlainTableOptions& table_options,
|
|
const std::unordered_map<std::string, std::string>& opts_map,
|
|
PlainTableOptions* new_table_options) {
|
|
assert(new_table_options);
|
|
PlainTableFactory ptf(table_options);
|
|
Status s = ptf.ConfigureFromMap(config_options, opts_map);
|
|
if (s.ok()) {
|
|
*new_table_options = *(ptf.GetOptions<PlainTableOptions>());
|
|
} else {
|
|
// Restore "new_options" to the default "base_options".
|
|
*new_table_options = table_options;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) {
|
|
return new PlainTableFactory(options);
|
|
}
|
|
|
|
const std::string PlainTablePropertyNames::kEncodingType =
|
|
"rocksdb.plain.table.encoding.type";
|
|
|
|
const std::string PlainTablePropertyNames::kBloomVersion =
|
|
"rocksdb.plain.table.bloom.version";
|
|
|
|
const std::string PlainTablePropertyNames::kNumBloomBlocks =
|
|
"rocksdb.plain.table.bloom.numblocks";
|
|
|
|
#endif // ROCKSDB_LITE
|
|
} // namespace ROCKSDB_NAMESPACE
|