rocksdb/table/plain/plain_table_factory.cc

340 lines
14 KiB
C++
Raw Permalink Normal View History

// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/plain/plain_table_factory.h"
#include <stdint.h>
#include <memory>
#include "db/dbformat.h"
#include "port/port.h"
#include "rocksdb/convenience.h"
#include "rocksdb/utilities/customizable_util.h"
#include "rocksdb/utilities/object_registry.h"
#include "rocksdb/utilities/options_type.h"
#include "table/plain/plain_table_builder.h"
#include "table/plain/plain_table_reader.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE
static std::unordered_map<std::string, OptionTypeInfo> plain_table_type_info = {
{"user_key_len",
{offsetof(struct PlainTableOptions, user_key_len), OptionType::kUInt32T,
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
{"bloom_bits_per_key",
{offsetof(struct PlainTableOptions, bloom_bits_per_key), OptionType::kInt,
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
{"hash_table_ratio",
{offsetof(struct PlainTableOptions, hash_table_ratio), OptionType::kDouble,
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
{"index_sparseness",
{offsetof(struct PlainTableOptions, index_sparseness), OptionType::kSizeT,
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
{"huge_page_tlb_size",
{offsetof(struct PlainTableOptions, huge_page_tlb_size),
OptionType::kSizeT, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"encoding_type",
{offsetof(struct PlainTableOptions, encoding_type),
OptionType::kEncodingType, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"full_scan_mode",
{offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean,
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
{"store_index_in_file",
{offsetof(struct PlainTableOptions, store_index_in_file),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
};
PlainTableFactory::PlainTableFactory(const PlainTableOptions& options)
: table_options_(options) {
RegisterOptions(&table_options_, &plain_table_type_info);
}
Status PlainTableFactory::NewTableReader(
const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const {
return PlainTableReader::Open(
table_reader_options.ioptions, table_reader_options.env_options,
table_reader_options.internal_comparator, std::move(file), file_size,
table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio,
table_options_.index_sparseness, table_options_.huge_page_tlb_size,
table_options_.full_scan_mode, table_reader_options.immortal,
Fast path for detecting unchanged prefix_extractor (#9407) Summary: Fixes a major performance regression in 6.26, where extra CPU is spent in SliceTransform::AsString when reads involve a prefix_extractor (Get, MultiGet, Seek). Common case performance is now better than 6.25. This change creates a "fast path" for verifying that the current prefix extractor is unchanged and compatible with what was used to generate a table file. This fast path detects the common case by pointer comparison on the current prefix_extractor and a "known good" prefix extractor (if applicable) that is saved at the time the table reader is opened. The "known good" prefix extractor is saved as another shared_ptr copy (in an existing field, however) to ensure the pointer is not recycled. When the prefix_extractor has changed to a different instance but same compatible configuration (rare, odd), performance is still a regression compared to 6.25, but this is likely acceptable because of the oddity of such a case. The performance of incompatible prefix_extractor is essentially unchanged. Also fixed a minor case (ForwardIterator) where a prefix_extractor could be used via a raw pointer after being freed as a shared_ptr, if replaced via SetOptions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9407 Test Plan: ## Performance Populate DB with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -benchmarks=fillrandom -num=10000000 -disable_wal=1 -write_buffer_size=10000000 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Running head-to-head comparisons simultaneously with `TEST_TMPDIR=/dev/shm/rocksdb ./db_bench -use_existing_db -readonly -benchmarks=seekrandom -num=10000000 -duration=20 -disable_wal=1 -bloom_bits=16 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -prefix_size=12` Below each is compared by ops/sec vs. baseline which is version 6.25 (multiple baseline runs because of variable machine load) v6.26: 4833 vs. 6698 (<- major regression!) v6.27: 4737 vs. 6397 (still) New: 6704 vs. 6461 (better than baseline in common case) Disabled fastpath: 4843 vs. 6389 (e.g. if prefix extractor instance changes but is still compatible) Changed prefix size (no usable filter) in new: 787 vs. 5927 Changed prefix size (no usable filter) in new & baseline: 773 vs. 784 Reviewed By: mrambacher Differential Revision: D33677812 Pulled By: pdillinger fbshipit-source-id: 571d9711c461fb97f957378a061b7e7dbc4d6a76
2022-01-21 20:36:36 +01:00
table_reader_options.prefix_extractor.get());
}
TableBuilder* PlainTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options,
WritableFileWriter* file) const {
// Ignore the skip_filters flag. PlainTable format is optimized for small
// in-memory dbs. The skip_filters optimization is not useful for plain
// tables
//
return new PlainTableBuilder(
table_builder_options.ioptions, table_builder_options.moptions,
table_builder_options.int_tbl_prop_collector_factories,
Support "level_at_creation" in TablePropertiesCollectorFactory::Context (#8919) Summary: Context: Exposing the level of the sst file (i.e, table) where it is created in `TablePropertiesCollectorFactory::Context` allows users of `TablePropertiesCollectorFactory` to customize some implementation details of `TablePropertiesCollectorFactory` and `TablePropertiesCollector` based on the level of creation. For example, `TablePropertiesCollector::NeedCompact()` can return different values based on level of creation. - Declared an extra field `level_at_creation` in `TablePropertiesCollectorFactory::Context` - Allowed `level_at_creation` to be passed in as an argument in `IntTblPropCollectorFactory::CreateIntTblPropCollector()` and `UserKeyTablePropertiesCollectorFactory::CreateIntTblPropCollector()`, the latter of which is an internal wrapper of user's passed-in `TablePropertiesCollectorFactory::CreateTablePropertiesCollector()` used in table-building process - Called `IntTblPropCollectorFactory::CreateIntTblPropCollector()` with `level_at_creation` passed into both `BlockBasedTableBuilder` and `PlainTableBuilder` - `PlainTableBuilder` previously did not capture `level_at_creation` from `TableBuilderOptions` in `PlainTableFactory`. In order for it to call the method with this parameter, this PR also made `PlainTableBuilder` capture `level_at_creation` as a required parameter - Called `IntTblPropCollectorFactory::CreateIntTblPropCollector()` with `level_at_creation` its overridden functions in its derived classes, including `RegularKeysStartWithAFactory::CreateIntTblPropCollector()` in `table_properties_collector_test.cc`, `SstFileWriterPropertiesCollectorFactory::CreateIntTblPropCollector()` in `sst_file_writer_collectors.h` Pull Request resolved: https://github.com/facebook/rocksdb/pull/8919 Test Plan: - Passed the added assertion for `context.level_at_creation` - Passed existing tests - Run `Make` to make sure adding a required parameter to `PlainTableBuilder`'s constructor does not break anything Reviewed By: anand1976 Differential Revision: D30951729 Pulled By: hx235 fbshipit-source-id: c4a0173b0d9344a4cf47e1b987d759c1c73cb474
2021-09-28 21:33:03 +02:00
table_builder_options.column_family_id,
table_builder_options.level_at_creation, file,
table_options_.user_key_len, table_options_.encoding_type,
table_options_.index_sparseness, table_options_.bloom_bits_per_key,
table_builder_options.column_family_name, 6,
table_options_.huge_page_tlb_size, table_options_.hash_table_ratio,
table_options_.store_index_in_file, table_builder_options.db_id,
Embed original file number in SST table properties (#8686) Summary: I very recently realized that with https://github.com/facebook/rocksdb/issues/8669 we cannot later add file numbers to external SST files (so that more can share db session ids for better uniqueness properties), because of forward compatibility. We would have a version of RocksDB that assumes session IDs are unique on external SST files and therefore can't really break that invariant in future files. This change adds a table property for "orig_file_number" which is populated by normal SST files and also external SST files generated by SstFileWriter. SstFileWriter now keeps a db_session_id for life of the object and increments its own file numbers for embedding in table properties. (They are arguably "fake" file numbers because these numbers and not embedded in the file name.) While updating block_based_table_builder, I removed several unnecessary fields from Rep, because following the pattern would have created another unnecessary field. This change also updates block_based_table_reader to use this new property when available, which means that for newer SST files, we can determine the stable/original <db_session_id,file_number> unique identifier using just the file contents, not the file name. (It's a bit complicated; detailed comments in block_based_table_reader.) Also added DB host id to properties listing by sst_dump, which could be useful in debugging. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8686 Test Plan: majorly overhauled StableCacheKeys test for this change Reviewed By: zhichao-cao Differential Revision: D30457742 Pulled By: pdillinger fbshipit-source-id: 2e5ae7dddeb94fb9d8eac8a928486aed8b8cd445
2021-08-21 05:39:52 +02:00
table_builder_options.db_session_id, table_builder_options.cur_file_num);
}
std::string PlainTableFactory::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
snprintf(buffer, kBufferSize, " user_key_len: %u\n",
table_options_.user_key_len);
ret.append(buffer);
snprintf(buffer, kBufferSize, " bloom_bits_per_key: %d\n",
table_options_.bloom_bits_per_key);
ret.append(buffer);
snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n",
table_options_.hash_table_ratio);
ret.append(buffer);
snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n",
table_options_.index_sparseness);
ret.append(buffer);
snprintf(buffer, kBufferSize, " huge_page_tlb_size: %" ROCKSDB_PRIszt "\n",
table_options_.huge_page_tlb_size);
ret.append(buffer);
snprintf(buffer, kBufferSize, " encoding_type: %d\n",
table_options_.encoding_type);
ret.append(buffer);
snprintf(buffer, kBufferSize, " full_scan_mode: %d\n",
table_options_.full_scan_mode);
ret.append(buffer);
snprintf(buffer, kBufferSize, " store_index_in_file: %d\n",
table_options_.store_index_in_file);
ret.append(buffer);
return ret;
}
Status GetPlainTableOptionsFromString(const PlainTableOptions& table_options,
const std::string& opts_str,
PlainTableOptions* new_table_options) {
ConfigOptions config_options;
config_options.input_strings_escaped = false;
config_options.ignore_unknown_options = false;
config_options.invoke_prepare_options = false;
return GetPlainTableOptionsFromString(config_options, table_options, opts_str,
new_table_options);
}
Status GetPlainTableOptionsFromString(const ConfigOptions& config_options,
const PlainTableOptions& table_options,
const std::string& opts_str,
PlainTableOptions* new_table_options) {
std::unordered_map<std::string, std::string> opts_map;
Status s = StringToMap(opts_str, &opts_map);
if (!s.ok()) {
return s;
}
s = GetPlainTableOptionsFromMap(config_options, table_options, opts_map,
new_table_options);
// Translate any errors (NotFound, NotSupported, to InvalidArgument
if (s.ok() || s.IsInvalidArgument()) {
return s;
} else {
return Status::InvalidArgument(s.getState());
}
}
#endif // ROCKSDB_LITE
#ifndef ROCKSDB_LITE
static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library,
const std::string& /*arg*/) {
// The MemTableRepFactory built-in classes will be either a class
// (VectorRepFactory) or a nickname (vector), followed optionally by ":#",
// where # is the "size" of the factory.
auto AsPattern = [](const std::string& name, const std::string& alt) {
auto pattern = ObjectLibrary::PatternEntry(name, true);
pattern.AnotherName(alt);
pattern.AddNumber(":");
return pattern;
};
library.AddFactory<MemTableRepFactory>(
AsPattern(VectorRepFactory::kClassName(), VectorRepFactory::kNickName()),
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
std::string* /*errmsg*/) {
auto colon = uri.find(":");
if (colon != std::string::npos) {
size_t count = ParseSizeT(uri.substr(colon + 1));
guard->reset(new VectorRepFactory(count));
} else {
guard->reset(new VectorRepFactory());
}
return guard->get();
});
library.AddFactory<MemTableRepFactory>(
AsPattern(SkipListFactory::kClassName(), SkipListFactory::kNickName()),
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
std::string* /*errmsg*/) {
auto colon = uri.find(":");
if (colon != std::string::npos) {
size_t lookahead = ParseSizeT(uri.substr(colon + 1));
guard->reset(new SkipListFactory(lookahead));
} else {
guard->reset(new SkipListFactory());
}
return guard->get();
});
library.AddFactory<MemTableRepFactory>(
AsPattern("HashLinkListRepFactory", "hash_linkedlist"),
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
std::string* /*errmsg*/) {
// Expecting format: hash_linkedlist:<hash_bucket_count>
auto colon = uri.find(":");
if (colon != std::string::npos) {
size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
guard->reset(NewHashLinkListRepFactory(hash_bucket_count));
} else {
guard->reset(NewHashLinkListRepFactory());
}
return guard->get();
});
library.AddFactory<MemTableRepFactory>(
AsPattern("HashSkipListRepFactory", "prefix_hash"),
[](const std::string& uri, std::unique_ptr<MemTableRepFactory>* guard,
std::string* /*errmsg*/) {
// Expecting format: prefix_hash:<hash_bucket_count>
auto colon = uri.find(":");
if (colon != std::string::npos) {
size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1));
guard->reset(NewHashSkipListRepFactory(hash_bucket_count));
} else {
guard->reset(NewHashSkipListRepFactory());
}
return guard->get();
});
library.AddFactory<MemTableRepFactory>(
"cuckoo",
[](const std::string& /*uri*/,
std::unique_ptr<MemTableRepFactory>* /*guard*/, std::string* errmsg) {
*errmsg = "cuckoo hash memtable is not supported anymore.";
return nullptr;
});
size_t num_types;
return static_cast<int>(library.GetFactoryCount(&num_types));
}
#endif // ROCKSDB_LITE
Status GetMemTableRepFactoryFromString(
const std::string& opts_str, std::unique_ptr<MemTableRepFactory>* result) {
ConfigOptions config_options;
config_options.ignore_unsupported_options = false;
config_options.ignore_unknown_options = false;
return MemTableRepFactory::CreateFromString(config_options, opts_str, result);
}
Status MemTableRepFactory::CreateFromString(
const ConfigOptions& config_options, const std::string& value,
std::unique_ptr<MemTableRepFactory>* result) {
#ifndef ROCKSDB_LITE
static std::once_flag once;
std::call_once(once, [&]() {
RegisterBuiltinMemTableRepFactory(*(ObjectLibrary::Default().get()), "");
});
#endif // ROCKSDB_LITE
std::string id;
std::unordered_map<std::string, std::string> opt_map;
Status status = Customizable::GetOptionsMap(config_options, result->get(),
value, &id, &opt_map);
if (!status.ok()) { // GetOptionsMap failed
return status;
} else if (value.empty()) {
// No Id and no options. Clear the object
result->reset();
return Status::OK();
} else if (id.empty()) { // We have no Id but have options. Not good
return Status::NotSupported("Cannot reset object ", id);
} else {
#ifndef ROCKSDB_LITE
status = NewUniqueObject<MemTableRepFactory>(config_options, id, opt_map,
result);
#else
// To make it possible to configure the memtables in LITE mode, the ID
// is of the form <name>:<size>, where name is the name of the class and
// <size> is the length of the object (e.g. skip_list:10).
std::vector<std::string> opts_list = StringSplit(id, ':');
if (opts_list.empty() || opts_list.size() > 2 || !opt_map.empty()) {
status = Status::InvalidArgument("Can't parse memtable_factory option ",
value);
} else if (opts_list[0] == "skip_list" ||
opts_list[0] == SkipListFactory::kClassName()) {
// Expecting format
// skip_list:<lookahead>
if (opts_list.size() == 2) {
size_t lookahead = ParseSizeT(opts_list[1]);
result->reset(new SkipListFactory(lookahead));
} else {
result->reset(new SkipListFactory());
}
} else if (!config_options.ignore_unsupported_options) {
status = Status::NotSupported("Cannot load object in LITE mode ", id);
}
#endif // ROCKSDB_LITE
}
return status;
}
#ifndef ROCKSDB_LITE
Status GetPlainTableOptionsFromMap(
const PlainTableOptions& table_options,
const std::unordered_map<std::string, std::string>& opts_map,
PlainTableOptions* new_table_options, bool input_strings_escaped,
bool ignore_unknown_options) {
ConfigOptions config_options;
config_options.input_strings_escaped = input_strings_escaped;
config_options.ignore_unknown_options = ignore_unknown_options;
return GetPlainTableOptionsFromMap(config_options, table_options, opts_map,
new_table_options);
}
Status GetPlainTableOptionsFromMap(
const ConfigOptions& config_options, const PlainTableOptions& table_options,
const std::unordered_map<std::string, std::string>& opts_map,
PlainTableOptions* new_table_options) {
assert(new_table_options);
PlainTableFactory ptf(table_options);
Status s = ptf.ConfigureFromMap(config_options, opts_map);
if (s.ok()) {
*new_table_options = *(ptf.GetOptions<PlainTableOptions>());
} else {
// Restore "new_options" to the default "base_options".
*new_table_options = table_options;
}
return s;
}
extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) {
return new PlainTableFactory(options);
}
const std::string PlainTablePropertyNames::kEncodingType =
"rocksdb.plain.table.encoding.type";
const std::string PlainTablePropertyNames::kBloomVersion =
"rocksdb.plain.table.bloom.version";
const std::string PlainTablePropertyNames::kNumBloomBlocks =
"rocksdb.plain.table.bloom.numblocks";
#endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE