rocksdb/options/db_options.h
Mike Kolupaev 637e64b9ac Add an option to prevent DB::Open() from querying sizes of all sst files (#6353)
Summary:
When paranoid_checks is on, DBImpl::CheckConsistency() iterates over all sst files and calls Env::GetFileSize() for each of them. As far as I could understand, this is pretty arbitrary and doesn't affect correctness - if filesystem doesn't corrupt fsynced files, the file sizes will always match; if it does, it may as well corrupt contents as well as sizes, and rocksdb doesn't check contents on open.

If there are thousands of sst files, getting all their sizes takes a while. If, on top of that, Env is overridden to use some remote storage instead of local filesystem, it can be *really* slow and overload the remote storage service. This PR adds an option to not do GetFileSize(); instead it does GetChildren() for parent directory to check that all the expected sst files are at least present, but doesn't check their sizes.

We can't just disable paranoid_checks instead because paranoid_checks do a few other important things: make the DB read-only on write errors, print error messages on read errors, etc.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6353

Test Plan: ran the added sanity check unit test. Will try it out in a LogDevice test cluster where the GetFileSize() calls are causing a lot of trouble.

Differential Revision: D19656425

Pulled By: al13n321

fbshipit-source-id: c2c421b367633033760d1f56747bad206d1fbf82
2020-02-04 01:27:26 -08:00

118 lines
3.5 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <string>
#include <vector>
#include "rocksdb/options.h"
namespace rocksdb {
struct ImmutableDBOptions {
ImmutableDBOptions();
explicit ImmutableDBOptions(const DBOptions& options);
void Dump(Logger* log) const;
bool create_if_missing;
bool create_missing_column_families;
bool error_if_exists;
bool paranoid_checks;
Env* env;
std::shared_ptr<FileSystem> fs;
std::shared_ptr<RateLimiter> rate_limiter;
std::shared_ptr<SstFileManager> sst_file_manager;
std::shared_ptr<Logger> info_log;
InfoLogLevel info_log_level;
int max_file_opening_threads;
std::shared_ptr<Statistics> statistics;
bool use_fsync;
std::vector<DbPath> db_paths;
std::string db_log_dir;
std::string wal_dir;
uint32_t max_subcompactions;
int max_background_flushes;
size_t max_log_file_size;
size_t log_file_time_to_roll;
size_t keep_log_file_num;
size_t recycle_log_file_num;
uint64_t max_manifest_file_size;
int table_cache_numshardbits;
uint64_t wal_ttl_seconds;
uint64_t wal_size_limit_mb;
uint64_t max_write_batch_group_size_bytes;
size_t manifest_preallocation_size;
bool allow_mmap_reads;
bool allow_mmap_writes;
bool use_direct_reads;
bool use_direct_io_for_flush_and_compaction;
bool allow_fallocate;
bool is_fd_close_on_exec;
bool advise_random_on_open;
size_t db_write_buffer_size;
std::shared_ptr<WriteBufferManager> write_buffer_manager;
DBOptions::AccessHint access_hint_on_compaction_start;
bool new_table_reader_for_compaction_inputs;
size_t random_access_max_buffer_size;
bool use_adaptive_mutex;
std::vector<std::shared_ptr<EventListener>> listeners;
bool enable_thread_tracking;
bool enable_pipelined_write;
bool unordered_write;
bool allow_concurrent_memtable_write;
bool enable_write_thread_adaptive_yield;
uint64_t write_thread_max_yield_usec;
uint64_t write_thread_slow_yield_usec;
bool skip_stats_update_on_db_open;
bool skip_checking_sst_file_sizes_on_db_open;
WALRecoveryMode wal_recovery_mode;
bool allow_2pc;
std::shared_ptr<Cache> row_cache;
#ifndef ROCKSDB_LITE
WalFilter* wal_filter;
#endif // ROCKSDB_LITE
bool fail_if_options_file_error;
bool dump_malloc_stats;
bool avoid_flush_during_recovery;
bool allow_ingest_behind;
bool preserve_deletes;
bool two_write_queues;
bool manual_wal_flush;
bool atomic_flush;
bool avoid_unnecessary_blocking_io;
bool persist_stats_to_disk;
bool write_dbid_to_manifest;
size_t log_readahead_size;
};
struct MutableDBOptions {
MutableDBOptions();
explicit MutableDBOptions(const MutableDBOptions& options) = default;
explicit MutableDBOptions(const DBOptions& options);
void Dump(Logger* log) const;
int max_background_jobs;
int base_background_compactions;
int max_background_compactions;
bool avoid_flush_during_shutdown;
size_t writable_file_max_buffer_size;
uint64_t delayed_write_rate;
uint64_t max_total_wal_size;
uint64_t delete_obsolete_files_period_micros;
unsigned int stats_dump_period_sec;
unsigned int stats_persist_period_sec;
size_t stats_history_buffer_size;
int max_open_files;
uint64_t bytes_per_sync;
uint64_t wal_bytes_per_sync;
bool strict_bytes_per_sync;
size_t compaction_readahead_size;
};
} // namespace rocksdb