2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-09-17 21:49:13 +02:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
#include <string>
|
2014-10-02 01:19:16 +02:00
|
|
|
#include <vector>
|
2016-09-02 23:16:31 +02:00
|
|
|
|
2016-11-21 21:07:09 +01:00
|
|
|
#include "db/dbformat.h"
|
2017-04-06 04:02:00 +02:00
|
|
|
#include "options/db_options.h"
|
2016-05-17 22:11:56 +02:00
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "util/compression.h"
|
2014-09-17 21:49:13 +02:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
// ImmutableCFOptions is a data struct used by RocksDB internal. It contains a
|
|
|
|
// subset of Options that should not be changed during the entire lifetime
|
|
|
|
// of DB. Raw pointers defined in this struct do not have ownership to the data
|
|
|
|
// they point to. Options contains shared_ptr to these data.
|
|
|
|
struct ImmutableCFOptions {
|
2016-09-24 01:34:04 +02:00
|
|
|
ImmutableCFOptions();
|
2016-09-02 23:16:31 +02:00
|
|
|
explicit ImmutableCFOptions(const Options& options);
|
|
|
|
|
2016-09-24 01:34:04 +02:00
|
|
|
ImmutableCFOptions(const ImmutableDBOptions& db_options,
|
|
|
|
const ColumnFamilyOptions& cf_options);
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
CompactionStyle compaction_style;
|
|
|
|
|
2016-09-14 06:11:59 +02:00
|
|
|
CompactionPri compaction_pri;
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
CompactionOptionsUniversal compaction_options_universal;
|
|
|
|
CompactionOptionsFIFO compaction_options_fifo;
|
|
|
|
|
|
|
|
const SliceTransform* prefix_extractor;
|
|
|
|
|
2016-10-21 20:31:42 +02:00
|
|
|
const Comparator* user_comparator;
|
2016-11-21 21:07:09 +01:00
|
|
|
InternalKeyComparator internal_comparator;
|
2016-09-02 23:16:31 +02:00
|
|
|
|
|
|
|
MergeOperator* merge_operator;
|
|
|
|
|
|
|
|
const CompactionFilter* compaction_filter;
|
|
|
|
|
|
|
|
CompactionFilterFactory* compaction_filter_factory;
|
|
|
|
|
2016-09-24 01:34:04 +02:00
|
|
|
int min_write_buffer_number_to_merge;
|
|
|
|
|
|
|
|
int max_write_buffer_number_to_maintain;
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
bool inplace_update_support;
|
|
|
|
|
|
|
|
UpdateStatus (*inplace_callback)(char* existing_value,
|
|
|
|
uint32_t* existing_value_size,
|
|
|
|
Slice delta_value,
|
|
|
|
std::string* merged_value);
|
|
|
|
|
|
|
|
Logger* info_log;
|
|
|
|
|
|
|
|
Statistics* statistics;
|
|
|
|
|
|
|
|
InfoLogLevel info_log_level;
|
|
|
|
|
|
|
|
Env* env;
|
|
|
|
|
|
|
|
// Allow the OS to mmap file for reading sst tables. Default: false
|
|
|
|
bool allow_mmap_reads;
|
|
|
|
|
|
|
|
// Allow the OS to mmap file for writing. Default: false
|
|
|
|
bool allow_mmap_writes;
|
|
|
|
|
|
|
|
std::vector<DbPath> db_paths;
|
|
|
|
|
|
|
|
MemTableRepFactory* memtable_factory;
|
|
|
|
|
|
|
|
TableFactory* table_factory;
|
|
|
|
|
|
|
|
Options::TablePropertiesCollectorFactories
|
|
|
|
table_properties_collector_factories;
|
|
|
|
|
|
|
|
bool advise_random_on_open;
|
|
|
|
|
|
|
|
// This options is required by PlainTableReader. May need to move it
|
|
|
|
// to PlainTalbeOptions just like bloom_bits_per_key
|
|
|
|
uint32_t bloom_locality;
|
|
|
|
|
|
|
|
bool purge_redundant_kvs_while_flush;
|
|
|
|
|
|
|
|
bool use_fsync;
|
|
|
|
|
|
|
|
std::vector<CompressionType> compression_per_level;
|
|
|
|
|
|
|
|
CompressionType bottommost_compression;
|
|
|
|
|
|
|
|
CompressionOptions compression_opts;
|
|
|
|
|
|
|
|
bool level_compaction_dynamic_level_bytes;
|
|
|
|
|
|
|
|
Options::AccessHint access_hint_on_compaction_start;
|
|
|
|
|
|
|
|
bool new_table_reader_for_compaction_inputs;
|
|
|
|
|
|
|
|
size_t compaction_readahead_size;
|
|
|
|
|
|
|
|
int num_levels;
|
|
|
|
|
|
|
|
bool optimize_filters_for_hits;
|
|
|
|
|
2016-10-08 02:21:45 +02:00
|
|
|
bool force_consistency_checks;
|
|
|
|
|
2016-09-02 23:16:31 +02:00
|
|
|
// A vector of EventListeners which call-back functions will be called
|
|
|
|
// when specific RocksDB event happens.
|
|
|
|
std::vector<std::shared_ptr<EventListener>> listeners;
|
|
|
|
|
|
|
|
std::shared_ptr<Cache> row_cache;
|
2016-09-14 06:11:59 +02:00
|
|
|
|
|
|
|
uint32_t max_subcompactions;
|
2016-11-14 03:58:17 +01:00
|
|
|
|
|
|
|
const SliceTransform* memtable_insert_with_hint_prefix_extractor;
|
2016-09-02 23:16:31 +02:00
|
|
|
};
|
|
|
|
|
2014-09-17 21:49:13 +02:00
|
|
|
struct MutableCFOptions {
|
2016-09-14 06:11:59 +02:00
|
|
|
explicit MutableCFOptions(const ColumnFamilyOptions& options)
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
: write_buffer_size(options.write_buffer_size),
|
|
|
|
max_write_buffer_number(options.max_write_buffer_number),
|
|
|
|
arena_block_size(options.arena_block_size),
|
2016-06-04 02:02:10 +02:00
|
|
|
memtable_prefix_bloom_size_ratio(
|
|
|
|
options.memtable_prefix_bloom_size_ratio),
|
2016-07-27 03:05:30 +02:00
|
|
|
memtable_huge_page_size(options.memtable_huge_page_size),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
max_successive_merges(options.max_successive_merges),
|
|
|
|
inplace_update_num_locks(options.inplace_update_num_locks),
|
|
|
|
disable_auto_compactions(options.disable_auto_compactions),
|
2015-11-19 03:10:20 +01:00
|
|
|
soft_pending_compaction_bytes_limit(
|
|
|
|
options.soft_pending_compaction_bytes_limit),
|
2015-09-11 23:31:23 +02:00
|
|
|
hard_pending_compaction_bytes_limit(
|
|
|
|
options.hard_pending_compaction_bytes_limit),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
level0_file_num_compaction_trigger(
|
|
|
|
options.level0_file_num_compaction_trigger),
|
|
|
|
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
|
|
|
|
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
|
2016-06-17 01:02:52 +02:00
|
|
|
max_compaction_bytes(options.max_compaction_bytes),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
target_file_size_base(options.target_file_size_base),
|
|
|
|
target_file_size_multiplier(options.target_file_size_multiplier),
|
|
|
|
max_bytes_for_level_base(options.max_bytes_for_level_base),
|
|
|
|
max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
|
|
|
|
max_bytes_for_level_multiplier_additional(
|
|
|
|
options.max_bytes_for_level_multiplier_additional),
|
|
|
|
max_sequential_skip_in_iterations(
|
|
|
|
options.max_sequential_skip_in_iterations),
|
|
|
|
paranoid_file_checks(options.paranoid_file_checks),
|
2016-05-17 22:11:56 +02:00
|
|
|
report_bg_io_stats(options.report_bg_io_stats),
|
2017-02-23 23:53:03 +01:00
|
|
|
compression(options.compression) {
|
2016-09-14 06:11:59 +02:00
|
|
|
RefreshDerivedOptions(options.num_levels, options.compaction_style);
|
2014-09-17 21:49:13 +02:00
|
|
|
}
|
2016-09-14 06:11:59 +02:00
|
|
|
|
2014-09-17 21:49:13 +02:00
|
|
|
MutableCFOptions()
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
: write_buffer_size(0),
|
|
|
|
max_write_buffer_number(0),
|
|
|
|
arena_block_size(0),
|
2016-06-04 02:02:10 +02:00
|
|
|
memtable_prefix_bloom_size_ratio(0),
|
2016-07-27 03:05:30 +02:00
|
|
|
memtable_huge_page_size(0),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
max_successive_merges(0),
|
|
|
|
inplace_update_num_locks(0),
|
|
|
|
disable_auto_compactions(false),
|
2015-11-19 03:10:20 +01:00
|
|
|
soft_pending_compaction_bytes_limit(0),
|
2015-09-11 23:31:23 +02:00
|
|
|
hard_pending_compaction_bytes_limit(0),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
level0_file_num_compaction_trigger(0),
|
|
|
|
level0_slowdown_writes_trigger(0),
|
|
|
|
level0_stop_writes_trigger(0),
|
2016-06-17 01:02:52 +02:00
|
|
|
max_compaction_bytes(0),
|
Add options.compaction_measure_io_stats to print write I/O stats in compactions
Summary:
Add options.compaction_measure_io_stats to print out / pass to listener accumulated time spent on write calls. Example outputs in info logs:
2015/08/12-16:27:59.463944 7fd428bff700 (Original Log Time 2015/08/12-16:27:59.463922) EVENT_LOG_v1 {"time_micros": 1439422079463897, "job": 6, "event": "compaction_finished", "output_level": 1, "num_output_files": 4, "total_output_size": 6900525, "num_input_records": 111483, "num_output_records": 106877, "file_write_nanos": 15663206, "file_range_sync_nanos": 649588, "file_fsync_nanos": 349614797, "file_prepare_write_nanos": 1505812, "lsm_state": [2, 4, 0, 0, 0, 0, 0]}
Add two more counters in iostats_context.
Also add a parameter of db_bench.
Test Plan: Add a unit test. Also manually verify LOG outputs in db_bench
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D44115
2015-08-13 02:24:45 +02:00
|
|
|
target_file_size_base(0),
|
|
|
|
target_file_size_multiplier(0),
|
|
|
|
max_bytes_for_level_base(0),
|
|
|
|
max_bytes_for_level_multiplier(0),
|
|
|
|
max_sequential_skip_in_iterations(0),
|
|
|
|
paranoid_file_checks(false),
|
2016-05-17 22:11:56 +02:00
|
|
|
report_bg_io_stats(false),
|
2017-02-23 23:53:03 +01:00
|
|
|
compression(Snappy_Supported() ? kSnappyCompression : kNoCompression) {}
|
2014-09-17 21:49:13 +02:00
|
|
|
|
2014-10-02 01:19:16 +02:00
|
|
|
// Must be called after any change to MutableCFOptions
|
2016-09-14 06:11:59 +02:00
|
|
|
void RefreshDerivedOptions(int num_levels, CompactionStyle compaction_style);
|
|
|
|
|
|
|
|
void RefreshDerivedOptions(const ImmutableCFOptions& ioptions) {
|
|
|
|
RefreshDerivedOptions(ioptions.num_levels, ioptions.compaction_style);
|
|
|
|
}
|
2014-10-02 01:19:16 +02:00
|
|
|
|
|
|
|
// Get the max file size in a given level.
|
|
|
|
uint64_t MaxFileSizeForLevel(int level) const;
|
2015-03-30 23:04:21 +02:00
|
|
|
int MaxBytesMultiplerAdditional(int level) const {
|
|
|
|
if (level >=
|
|
|
|
static_cast<int>(max_bytes_for_level_multiplier_additional.size())) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return max_bytes_for_level_multiplier_additional[level];
|
|
|
|
}
|
2014-10-02 01:19:16 +02:00
|
|
|
|
2014-10-17 02:22:28 +02:00
|
|
|
void Dump(Logger* log) const;
|
|
|
|
|
2014-10-02 01:19:16 +02:00
|
|
|
// Memtable related options
|
2014-09-17 21:49:13 +02:00
|
|
|
size_t write_buffer_size;
|
2014-10-17 01:57:59 +02:00
|
|
|
int max_write_buffer_number;
|
2014-09-17 21:49:13 +02:00
|
|
|
size_t arena_block_size;
|
2016-06-04 02:02:10 +02:00
|
|
|
double memtable_prefix_bloom_size_ratio;
|
2016-07-27 03:05:30 +02:00
|
|
|
size_t memtable_huge_page_size;
|
2014-09-17 21:49:13 +02:00
|
|
|
size_t max_successive_merges;
|
2014-10-27 20:10:13 +01:00
|
|
|
size_t inplace_update_num_locks;
|
2014-10-02 01:19:16 +02:00
|
|
|
|
|
|
|
// Compaction related options
|
2014-10-17 02:14:17 +02:00
|
|
|
bool disable_auto_compactions;
|
2015-11-19 03:10:20 +01:00
|
|
|
uint64_t soft_pending_compaction_bytes_limit;
|
2015-09-11 23:31:23 +02:00
|
|
|
uint64_t hard_pending_compaction_bytes_limit;
|
2014-10-02 01:19:16 +02:00
|
|
|
int level0_file_num_compaction_trigger;
|
|
|
|
int level0_slowdown_writes_trigger;
|
|
|
|
int level0_stop_writes_trigger;
|
2016-06-17 01:02:52 +02:00
|
|
|
uint64_t max_compaction_bytes;
|
2014-11-11 22:47:22 +01:00
|
|
|
uint64_t target_file_size_base;
|
2014-10-02 01:19:16 +02:00
|
|
|
int target_file_size_multiplier;
|
|
|
|
uint64_t max_bytes_for_level_base;
|
2016-11-02 05:05:32 +01:00
|
|
|
double max_bytes_for_level_multiplier;
|
2014-10-02 01:19:16 +02:00
|
|
|
std::vector<int> max_bytes_for_level_multiplier_additional;
|
|
|
|
|
2014-10-24 00:34:21 +02:00
|
|
|
// Misc options
|
|
|
|
uint64_t max_sequential_skip_in_iterations;
|
2015-04-18 00:26:50 +02:00
|
|
|
bool paranoid_file_checks;
|
2016-04-14 22:56:29 +02:00
|
|
|
bool report_bg_io_stats;
|
2016-05-17 22:11:56 +02:00
|
|
|
CompressionType compression;
|
2014-10-24 00:34:21 +02:00
|
|
|
|
2014-10-02 01:19:16 +02:00
|
|
|
// Derived options
|
|
|
|
// Per-level target file size.
|
|
|
|
std::vector<uint64_t> max_file_size;
|
2014-09-17 21:49:13 +02:00
|
|
|
};
|
|
|
|
|
2016-11-02 05:05:32 +01:00
|
|
|
uint64_t MultiplyCheckOverflow(uint64_t op1, double op2);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2014-09-17 21:49:13 +02:00
|
|
|
} // namespace rocksdb
|