rocksdb/table/block_based_table_factory.h
Fenggang Wu 19ec44fd39 Improve point-lookup performance using a data block hash index ()
Summary:
Add hash index support to data blocks, which helps to reduce the CPU utilization of point-lookup operations. This feature is backward compatible with the data block created without the hash index. It is disabled by default unless `BlockBasedTableOptions::data_block_index_type` is set to `data_block_index_type = kDataBlockBinaryAndHash.`

The DB size would be bigger with the hash index option as a hash table is added at the end of each data block. If the hash utilization ratio is 1:1, the space overhead is one byte per key. The hash table utilization ratio is adjustable using `BlockBasedTableOptions::data_block_hash_table_util_ratio`. A lower utilization ratio will improve more on the point-lookup efficiency, but take more space too.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4174

Differential Revision: D8965914

Pulled By: fgwu

fbshipit-source-id: 1c6bae5d1fc39c80282d8890a72e9e67bc247198
2018-08-15 14:30:03 -07:00

193 lines
8.1 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <stdint.h>
#include <memory>
#include <string>
#include "db/dbformat.h"
#include "options/options_helper.h"
#include "options/options_parser.h"
#include "rocksdb/flush_block_policy.h"
#include "rocksdb/table.h"
namespace rocksdb {
struct EnvOptions;
using std::unique_ptr;
class BlockBasedTableBuilder;
// A class used to track actual bytes written from the tail in the recent SST
// file opens, and provide a suggestion for following open.
class TailPrefetchStats {
public:
void RecordEffectiveSize(size_t len);
// 0 indicates no information to determine.
size_t GetSuggestedPrefetchSize();
private:
const static size_t kNumTracked = 32;
size_t records_[kNumTracked];
port::Mutex mutex_;
size_t next_ = 0;
size_t num_records_ = 0;
};
class BlockBasedTableFactory : public TableFactory {
public:
explicit BlockBasedTableFactory(
const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
~BlockBasedTableFactory() {}
const char* Name() const override { return kName.c_str(); }
Status NewTableReader(
const TableReaderOptions& table_reader_options,
unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache = true) const override;
TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options,
uint32_t column_family_id, WritableFileWriter* file) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts) const override;
std::string GetPrintableTableOptions() const override;
Status GetOptionString(std::string* opt_string,
const std::string& delimiter) const override;
const BlockBasedTableOptions& table_options() const;
void* GetOptions() override { return &table_options_; }
bool IsDeleteRangeSupported() const override { return true; }
static const std::string kName;
private:
BlockBasedTableOptions table_options_;
mutable TailPrefetchStats tail_prefetch_stats_;
};
extern const std::string kHashIndexPrefixesBlock;
extern const std::string kHashIndexPrefixesMetadataBlock;
extern const std::string kPropTrue;
extern const std::string kPropFalse;
#ifndef ROCKSDB_LITE
extern Status VerifyBlockBasedTableFactory(
const BlockBasedTableFactory* base_tf,
const BlockBasedTableFactory* file_tf,
OptionsSanityCheckLevel sanity_check_level);
static std::unordered_map<std::string, OptionTypeInfo>
block_based_table_type_info = {
/* currently not supported
std::shared_ptr<Cache> block_cache = nullptr;
std::shared_ptr<Cache> block_cache_compressed = nullptr;
*/
{"flush_block_policy_factory",
{offsetof(struct BlockBasedTableOptions, flush_block_policy_factory),
OptionType::kFlushBlockPolicyFactory, OptionVerificationType::kByName,
false, 0}},
{"cache_index_and_filter_blocks",
{offsetof(struct BlockBasedTableOptions,
cache_index_and_filter_blocks),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"cache_index_and_filter_blocks_with_high_priority",
{offsetof(struct BlockBasedTableOptions,
cache_index_and_filter_blocks_with_high_priority),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"pin_l0_filter_and_index_blocks_in_cache",
{offsetof(struct BlockBasedTableOptions,
pin_l0_filter_and_index_blocks_in_cache),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"index_type",
{offsetof(struct BlockBasedTableOptions, index_type),
OptionType::kBlockBasedTableIndexType,
OptionVerificationType::kNormal, false, 0}},
{"hash_index_allow_collision",
{offsetof(struct BlockBasedTableOptions, hash_index_allow_collision),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"data_block_index_type",
{offsetof(struct BlockBasedTableOptions, data_block_index_type),
OptionType::kBlockBasedTableDataBlockIndexType,
OptionVerificationType::kNormal, false, 0}},
{"data_block_hash_table_util_ratio",
{offsetof(struct BlockBasedTableOptions,
data_block_hash_table_util_ratio),
OptionType::kDouble, OptionVerificationType::kNormal, false, 0}},
{"checksum",
{offsetof(struct BlockBasedTableOptions, checksum),
OptionType::kChecksumType, OptionVerificationType::kNormal, false,
0}},
{"no_block_cache",
{offsetof(struct BlockBasedTableOptions, no_block_cache),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"block_size",
{offsetof(struct BlockBasedTableOptions, block_size),
OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}},
{"block_size_deviation",
{offsetof(struct BlockBasedTableOptions, block_size_deviation),
OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
{"block_restart_interval",
{offsetof(struct BlockBasedTableOptions, block_restart_interval),
OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
{"index_block_restart_interval",
{offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
{"index_per_partition",
{0, OptionType::kUInt64T, OptionVerificationType::kDeprecated, false,
0}},
{"metadata_block_size",
{offsetof(struct BlockBasedTableOptions, metadata_block_size),
OptionType::kUInt64T, OptionVerificationType::kNormal, false, 0}},
{"partition_filters",
{offsetof(struct BlockBasedTableOptions, partition_filters),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"filter_policy",
{offsetof(struct BlockBasedTableOptions, filter_policy),
OptionType::kFilterPolicy, OptionVerificationType::kByName, false,
0}},
{"whole_key_filtering",
{offsetof(struct BlockBasedTableOptions, whole_key_filtering),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"skip_table_builder_flush",
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false,
0}},
{"format_version",
{offsetof(struct BlockBasedTableOptions, format_version),
OptionType::kUInt32T, OptionVerificationType::kNormal, false, 0}},
{"verify_compression",
{offsetof(struct BlockBasedTableOptions, verify_compression),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"read_amp_bytes_per_bit",
{offsetof(struct BlockBasedTableOptions, read_amp_bytes_per_bit),
OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}},
{"enable_index_compression",
{offsetof(struct BlockBasedTableOptions, enable_index_compression),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"block_align",
{offsetof(struct BlockBasedTableOptions, block_align),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
{"pin_top_level_index_and_filter",
{offsetof(struct BlockBasedTableOptions,
pin_top_level_index_and_filter),
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}};
#endif // !ROCKSDB_LITE
} // namespace rocksdb