Dictionary compression for files written by SstFileWriter (#4978)

Summary:
If `CompressionOptions::max_dict_bytes` and/or `CompressionOptions::zstd_max_train_bytes` are set, `SstFileWriter` will now generate files respecting those options.

I refactored the logic a bit for deciding when to use dictionary compression. Previously we plumbed `is_bottommost_level` down to the table builder and used that. However it was kind of confusing in `SstFileWriter`'s context since we don't know what level the file will be ingested to. Instead, now the higher-level callers (e.g., flush, compaction, file writer) are responsible for building the right `CompressionOptions` to give the table builder.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4978

Differential Revision: D14060763

Pulled By: ajkr

fbshipit-source-id: dc802c327896df2b319dc162d6acc82b9cdb452a
This commit is contained in:
Andrew Kryczka 2019-02-14 10:16:12 -08:00 committed by Facebook Github Bot
parent 4fc442029a
commit c8c8104d7e
10 changed files with 56 additions and 23 deletions

View File

@ -11,6 +11,7 @@
* Add support for trace sampling. * Add support for trace sampling.
* Enable properties block checksum verification for block-based tables. * Enable properties block checksum verification for block-based tables.
* For all users of dictionary compression, we now generate a separate dictionary for compressing each bottom-level SST file. Previously we reused a single dictionary for a whole compaction to bottom level. The new approach achieves better compression ratios; however, it uses more memory and CPU for buffering/sampling data blocks and training dictionaries. * For all users of dictionary compression, we now generate a separate dictionary for compressing each bottom-level SST file. Previously we reused a single dictionary for a whole compaction to bottom level. The new approach achieves better compression ratios; however, it uses more memory and CPU for buffering/sampling data blocks and training dictionaries.
* Files written by `SstFileWriter` will now use dictionary compression if it is configured in the file writer's `CompressionOptions`.
### Public API Change ### Public API Change
* Disallow CompactionFilter::IgnoreSnapshots() = false, because it is not very useful and the behavior is confusing. The filter will filter everything if there is no snapshot declared by the time the compaction starts. However, users can define a snapshot after the compaction starts and before it finishes and this new snapshot won't be repeatable, because after the compaction finishes, some keys may be dropped. * Disallow CompactionFilter::IgnoreSnapshots() = false, because it is not very useful and the behavior is confusing. The filter will filter everything if there is no snapshot declared by the time the compaction starts. However, users can define a snapshot after the compaction starts and before it finishes and this new snapshot won't be repeatable, because after the compaction finishes, some keys may be dropped.

View File

@ -49,8 +49,7 @@ TableBuilder* NewTableBuilder(
WritableFileWriter* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, int level, const CompressionOptions& compression_opts, int level,
const bool skip_filters, const uint64_t creation_time, const bool skip_filters, const uint64_t creation_time,
const uint64_t oldest_key_time, const bool is_bottommost_level, const uint64_t oldest_key_time, const uint64_t target_file_size) {
const uint64_t target_file_size) {
assert((column_family_id == assert((column_family_id ==
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
column_family_name.empty()); column_family_name.empty());
@ -59,7 +58,7 @@ TableBuilder* NewTableBuilder(
int_tbl_prop_collector_factories, compression_type, int_tbl_prop_collector_factories, compression_type,
compression_opts, skip_filters, column_family_name, compression_opts, skip_filters, column_family_name,
level, creation_time, oldest_key_time, level, creation_time, oldest_key_time,
is_bottommost_level, target_file_size), target_file_size),
column_family_id, file); column_family_id, file);
} }
@ -106,6 +105,11 @@ Status BuildTable(
if (iter->Valid() || !range_del_agg->IsEmpty()) { if (iter->Valid() || !range_del_agg->IsEmpty()) {
TableBuilder* builder; TableBuilder* builder;
std::unique_ptr<WritableFileWriter> file_writer; std::unique_ptr<WritableFileWriter> file_writer;
// Currently we only enable dictionary compression during compaction to the
// bottommost level.
CompressionOptions compression_opts_for_flush(compression_opts);
compression_opts_for_flush.max_dict_bytes = 0;
compression_opts_for_flush.zstd_max_train_bytes = 0;
{ {
std::unique_ptr<WritableFile> file; std::unique_ptr<WritableFile> file;
#ifndef NDEBUG #ifndef NDEBUG
@ -128,8 +132,9 @@ Status BuildTable(
builder = NewTableBuilder( builder = NewTableBuilder(
ioptions, mutable_cf_options, internal_comparator, ioptions, mutable_cf_options, internal_comparator,
int_tbl_prop_collector_factories, column_family_id, int_tbl_prop_collector_factories, column_family_id,
column_family_name, file_writer.get(), compression, compression_opts, column_family_name, file_writer.get(), compression,
level, false /* skip_filters */, creation_time, oldest_key_time); compression_opts_for_flush, level, false /* skip_filters */,
creation_time, oldest_key_time);
} }
MergeHelper merge(env, internal_comparator.user_comparator(), MergeHelper merge(env, internal_comparator.user_comparator(),

View File

@ -49,8 +49,7 @@ TableBuilder* NewTableBuilder(
WritableFileWriter* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, int level, const CompressionOptions& compression_opts, int level,
const bool skip_filters = false, const uint64_t creation_time = 0, const bool skip_filters = false, const uint64_t creation_time = 0,
const uint64_t oldest_key_time = 0, const bool is_bottommost_level = false, const uint64_t oldest_key_time = 0, const uint64_t target_file_size = 0);
const uint64_t target_file_size = 0);
// Build a Table file from the contents of *iter. The generated file // Build a Table file from the contents of *iter. The generated file
// will be named according to number specified in meta. On success, the rest of // will be named according to number specified in meta. On success, the rest of

View File

@ -250,6 +250,12 @@ Compaction::Compaction(VersionStorageInfo* vstorage,
if (max_subcompactions_ == 0) { if (max_subcompactions_ == 0) {
max_subcompactions_ = immutable_cf_options_.max_subcompactions; max_subcompactions_ = immutable_cf_options_.max_subcompactions;
} }
if (!bottommost_level_) {
// Currently we only enable dictionary compression during compaction to the
// bottommost level.
output_compression_opts_.max_dict_bytes = 0;
output_compression_opts_.zstd_max_train_bytes = 0;
}
#ifndef NDEBUG #ifndef NDEBUG
for (size_t i = 1; i < inputs_.size(); ++i) { for (size_t i = 1; i < inputs_.size(); ++i) {

View File

@ -1501,7 +1501,7 @@ Status CompactionJob::OpenCompactionOutputFile(
sub_compact->compaction->output_compression(), sub_compact->compaction->output_compression(),
sub_compact->compaction->output_compression_opts(), sub_compact->compaction->output_compression_opts(),
sub_compact->compaction->output_level(), skip_filters, sub_compact->compaction->output_level(), skip_filters,
output_file_creation_time, 0 /* oldest_key_time */, bottommost_level_, output_file_creation_time, 0 /* oldest_key_time */,
sub_compact->compaction->max_output_file_size())); sub_compact->compaction->max_output_file_size()));
LogFlush(db_options_.info_log); LogFlush(db_options_.info_log);
return s; return s;

View File

@ -2318,6 +2318,37 @@ TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
} }
} }
TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) {
if (!ZSTD_Supported()) {
return;
}
const int kNumEntries = 1 << 10;
const int kNumBytesPerEntry = 1 << 10;
Options options = CurrentOptions();
options.compression = kZSTD;
options.compression_opts.max_dict_bytes = 1 << 14; // 16KB
options.compression_opts.zstd_max_train_bytes = 1 << 18; // 256KB
DestroyAndReopen(options);
std::atomic<int> num_compression_dicts(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict",
[&](void* /* arg */) {
++num_compression_dicts;
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Random rnd(301);
std::vector<std::pair<std::string, std::string>> random_data;
for (int i = 0; i < kNumEntries; i++) {
std::string val;
test::RandomString(&rnd, kNumBytesPerEntry, &val);
random_data.emplace_back(Key(i), std::move(val));
}
ASSERT_OK(GenerateAndAddExternalFile(options, std::move(random_data)));
ASSERT_EQ(1, num_compression_dicts);
}
TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_Success) { TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_Success) {
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env( std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
new FaultInjectionTestEnv(env_)); new FaultInjectionTestEnv(env_));

View File

@ -317,7 +317,6 @@ struct BlockBasedTableBuilder::Rep {
const std::string& column_family_name; const std::string& column_family_name;
uint64_t creation_time = 0; uint64_t creation_time = 0;
uint64_t oldest_key_time = 0; uint64_t oldest_key_time = 0;
const bool is_bottommost_level;
const uint64_t target_file_size; const uint64_t target_file_size;
std::vector<std::unique_ptr<IntTblPropCollector>> table_properties_collectors; std::vector<std::unique_ptr<IntTblPropCollector>> table_properties_collectors;
@ -331,8 +330,7 @@ struct BlockBasedTableBuilder::Rep {
const CompressionType _compression_type, const CompressionType _compression_type,
const CompressionOptions& _compression_opts, const bool skip_filters, const CompressionOptions& _compression_opts, const bool skip_filters,
const std::string& _column_family_name, const uint64_t _creation_time, const std::string& _column_family_name, const uint64_t _creation_time,
const uint64_t _oldest_key_time, const bool _is_bottommost_level, const uint64_t _oldest_key_time, const uint64_t _target_file_size)
const uint64_t _target_file_size)
: ioptions(_ioptions), : ioptions(_ioptions),
moptions(_moptions), moptions(_moptions),
table_options(table_opt), table_options(table_opt),
@ -356,7 +354,7 @@ struct BlockBasedTableBuilder::Rep {
compression_dict(), compression_dict(),
compression_ctx(_compression_type), compression_ctx(_compression_type),
verify_dict(), verify_dict(),
state((_is_bottommost_level && _compression_opts.max_dict_bytes > 0) state((_compression_opts.max_dict_bytes > 0)
? State::kBuffered ? State::kBuffered
: State::kUnbuffered), : State::kUnbuffered),
use_delta_encoding_for_index_values(table_opt.format_version >= 4 && use_delta_encoding_for_index_values(table_opt.format_version >= 4 &&
@ -369,7 +367,6 @@ struct BlockBasedTableBuilder::Rep {
column_family_name(_column_family_name), column_family_name(_column_family_name),
creation_time(_creation_time), creation_time(_creation_time),
oldest_key_time(_oldest_key_time), oldest_key_time(_oldest_key_time),
is_bottommost_level(_is_bottommost_level),
target_file_size(_target_file_size) { target_file_size(_target_file_size) {
if (table_options.index_type == if (table_options.index_type ==
BlockBasedTableOptions::kTwoLevelIndexSearch) { BlockBasedTableOptions::kTwoLevelIndexSearch) {
@ -421,8 +418,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
const CompressionType compression_type, const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters, const CompressionOptions& compression_opts, const bool skip_filters,
const std::string& column_family_name, const uint64_t creation_time, const std::string& column_family_name, const uint64_t creation_time,
const uint64_t oldest_key_time, const bool is_bottommost_level, const uint64_t oldest_key_time, const uint64_t target_file_size) {
const uint64_t target_file_size) {
BlockBasedTableOptions sanitized_table_options(table_options); BlockBasedTableOptions sanitized_table_options(table_options);
if (sanitized_table_options.format_version == 0 && if (sanitized_table_options.format_version == 0 &&
sanitized_table_options.checksum != kCRC32c) { sanitized_table_options.checksum != kCRC32c) {
@ -439,7 +435,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
internal_comparator, int_tbl_prop_collector_factories, internal_comparator, int_tbl_prop_collector_factories,
column_family_id, file, compression_type, compression_opts, column_family_id, file, compression_type, compression_opts,
skip_filters, column_family_name, creation_time, skip_filters, column_family_name, creation_time,
oldest_key_time, is_bottommost_level, target_file_size); oldest_key_time, target_file_size);
if (rep_->filter_builder != nullptr) { if (rep_->filter_builder != nullptr) {
rep_->filter_builder->StartBlock(0); rep_->filter_builder->StartBlock(0);

View File

@ -47,9 +47,7 @@ class BlockBasedTableBuilder : public TableBuilder {
const CompressionType compression_type, const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters, const CompressionOptions& compression_opts, const bool skip_filters,
const std::string& column_family_name, const uint64_t creation_time = 0, const std::string& column_family_name, const uint64_t creation_time = 0,
const uint64_t oldest_key_time = 0, const uint64_t oldest_key_time = 0, const uint64_t target_file_size = 0);
const bool is_bottommost_level = false,
const uint64_t target_file_size = 0);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder(); ~BlockBasedTableBuilder();

View File

@ -219,7 +219,6 @@ TableBuilder* BlockBasedTableFactory::NewTableBuilder(
table_builder_options.column_family_name, table_builder_options.column_family_name,
table_builder_options.creation_time, table_builder_options.creation_time,
table_builder_options.oldest_key_time, table_builder_options.oldest_key_time,
table_builder_options.is_bottommost_level,
table_builder_options.target_file_size); table_builder_options.target_file_size);
return table_builder; return table_builder;

View File

@ -77,7 +77,7 @@ struct TableBuilderOptions {
const CompressionOptions& _compression_opts, bool _skip_filters, const CompressionOptions& _compression_opts, bool _skip_filters,
const std::string& _column_family_name, int _level, const std::string& _column_family_name, int _level,
const uint64_t _creation_time = 0, const int64_t _oldest_key_time = 0, const uint64_t _creation_time = 0, const int64_t _oldest_key_time = 0,
bool _is_bottommost_level = false, const uint64_t _target_file_size = 0) const uint64_t _target_file_size = 0)
: ioptions(_ioptions), : ioptions(_ioptions),
moptions(_moptions), moptions(_moptions),
internal_comparator(_internal_comparator), internal_comparator(_internal_comparator),
@ -89,7 +89,6 @@ struct TableBuilderOptions {
level(_level), level(_level),
creation_time(_creation_time), creation_time(_creation_time),
oldest_key_time(_oldest_key_time), oldest_key_time(_oldest_key_time),
is_bottommost_level(_is_bottommost_level),
target_file_size(_target_file_size) {} target_file_size(_target_file_size) {}
const ImmutableCFOptions& ioptions; const ImmutableCFOptions& ioptions;
const MutableCFOptions& moptions; const MutableCFOptions& moptions;
@ -103,7 +102,6 @@ struct TableBuilderOptions {
int level; // what level this table/file is on, -1 for "not set, don't know" int level; // what level this table/file is on, -1 for "not set, don't know"
const uint64_t creation_time; const uint64_t creation_time;
const int64_t oldest_key_time; const int64_t oldest_key_time;
const bool is_bottommost_level;
const uint64_t target_file_size; const uint64_t target_file_size;
}; };