Downsample from data buffer for zstd finalizeDict
This commit is contained in:
parent
6e9169d953
commit
0731e4503c
@ -16,6 +16,7 @@
|
|||||||
* CompactionFilter::Decision has a new value: kRemoveWithSingleDelete. If CompactionFilter returns this decision, then CompactionIterator will use `SingleDelete` to mark a key as removed.
|
* CompactionFilter::Decision has a new value: kRemoveWithSingleDelete. If CompactionFilter returns this decision, then CompactionIterator will use `SingleDelete` to mark a key as removed.
|
||||||
* Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys.
|
* Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys.
|
||||||
* Added ability to specify functions for Prepare and Validate to OptionsTypeInfo. Added methods to OptionTypeInfo to set the functions via an API. These methods are intended for RocksDB plugin developers for configuration management.
|
* Added ability to specify functions for Prepare and Validate to OptionsTypeInfo. Added methods to OptionTypeInfo to set the functions via an API. These methods are intended for RocksDB plugin developers for configuration management.
|
||||||
|
* Add an option, `CompressionOptions::use_zstd_dict_trainer`, to indicate whether zstd dictionary trainer should be used for generating a dictionary. The default value of this option is true for backward compatibility. When this option is set to false, zstd API `ZDICT_finalizeDictionary` is used to generate compression dictionaries.
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
|
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
|
||||||
|
24
db/c.cc
24
db/c.cc
@ -2855,6 +2855,20 @@ void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
|
|||||||
opt->rep.bottommost_compression_opts.enabled = enabled;
|
opt->rep.bottommost_compression_opts.enabled = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer,
|
||||||
|
unsigned char enabled) {
|
||||||
|
opt->rep.bottommost_compression_opts.use_zstd_dict_trainer =
|
||||||
|
use_zstd_dict_trainer;
|
||||||
|
opt->rep.bottommost_compression_opts.enabled = enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char
|
||||||
|
rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt) {
|
||||||
|
return opt->rep.bottommost_compression_opts.use_zstd_dict_trainer;
|
||||||
|
}
|
||||||
|
|
||||||
void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
|
void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
|
||||||
rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes,
|
rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes,
|
||||||
unsigned char enabled) {
|
unsigned char enabled) {
|
||||||
@ -2882,6 +2896,16 @@ int rocksdb_options_get_compression_options_zstd_max_train_bytes(
|
|||||||
return opt->rep.compression_opts.zstd_max_train_bytes;
|
return opt->rep.compression_opts.zstd_max_train_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void rocksdb_options_set_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer) {
|
||||||
|
opt->rep.compression_opts.use_zstd_dict_trainer = use_zstd_dict_trainer;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char rocksdb_options_get_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt) {
|
||||||
|
return opt->rep.compression_opts.use_zstd_dict_trainer;
|
||||||
|
}
|
||||||
|
|
||||||
void rocksdb_options_set_compression_options_parallel_threads(
|
void rocksdb_options_set_compression_options_parallel_threads(
|
||||||
rocksdb_options_t* opt, int value) {
|
rocksdb_options_t* opt, int value) {
|
||||||
opt->rep.compression_opts.parallel_threads = value;
|
opt->rep.compression_opts.parallel_threads = value;
|
||||||
|
@ -2539,6 +2539,9 @@ int main(int argc, char** argv) {
|
|||||||
200 ==
|
200 ==
|
||||||
rocksdb_options_get_compression_options_max_dict_buffer_bytes(co));
|
rocksdb_options_get_compression_options_max_dict_buffer_bytes(co));
|
||||||
|
|
||||||
|
rocksdb_options_set_compression_options_use_zstd_dict_trainer(co, 0);
|
||||||
|
CheckCondition(
|
||||||
|
0 == rocksdb_options_get_compression_options_use_zstd_dict_trainer(co));
|
||||||
rocksdb_options_destroy(co);
|
rocksdb_options_destroy(co);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,9 +136,15 @@ Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
|
if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
|
||||||
if (!ZSTD_TrainDictionarySupported()) {
|
if (cf_options.compression_opts.use_zstd_dict_trainer) {
|
||||||
|
if (!ZSTD_TrainDictionarySupported()) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
|
||||||
|
"is not linked with the binary.");
|
||||||
|
}
|
||||||
|
} else if (!ZSTD_FinalizeDictionarySupported()) {
|
||||||
return Status::InvalidArgument(
|
return Status::InvalidArgument(
|
||||||
"zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
|
"zstd finalizeDictionary cannot be used because ZSTD 1.4.5+ "
|
||||||
"is not linked with the binary.");
|
"is not linked with the binary.");
|
||||||
}
|
}
|
||||||
if (cf_options.compression_opts.max_dict_bytes == 0) {
|
if (cf_options.compression_opts.max_dict_bytes == 0) {
|
||||||
|
@ -1296,6 +1296,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
|
|||||||
enum DictionaryTypes : int {
|
enum DictionaryTypes : int {
|
||||||
kWithoutDict,
|
kWithoutDict,
|
||||||
kWithDict,
|
kWithDict,
|
||||||
|
kWithZSTDfinalizeDict,
|
||||||
kWithZSTDTrainedDict,
|
kWithZSTDTrainedDict,
|
||||||
kDictEnd,
|
kDictEnd,
|
||||||
};
|
};
|
||||||
@ -1304,6 +1305,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
|
|||||||
options.compression = compression_type;
|
options.compression = compression_type;
|
||||||
size_t bytes_without_dict = 0;
|
size_t bytes_without_dict = 0;
|
||||||
size_t bytes_with_dict = 0;
|
size_t bytes_with_dict = 0;
|
||||||
|
size_t bytes_with_zstd_finalize_dict = 0;
|
||||||
size_t bytes_with_zstd_trained_dict = 0;
|
size_t bytes_with_zstd_trained_dict = 0;
|
||||||
for (int i = kWithoutDict; i < kDictEnd; i++) {
|
for (int i = kWithoutDict; i < kDictEnd; i++) {
|
||||||
// First iteration: compress without preset dictionary
|
// First iteration: compress without preset dictionary
|
||||||
@ -1323,12 +1325,21 @@ TEST_F(DBTest2, PresetCompressionDict) {
|
|||||||
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
|
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
|
||||||
options.compression_opts.zstd_max_train_bytes = 0;
|
options.compression_opts.zstd_max_train_bytes = 0;
|
||||||
break;
|
break;
|
||||||
|
case kWithZSTDfinalizeDict:
|
||||||
|
if (compression_type != kZSTD) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
|
||||||
|
options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
|
||||||
|
options.compression_opts.use_zstd_dict_trainer = false;
|
||||||
|
break;
|
||||||
case kWithZSTDTrainedDict:
|
case kWithZSTDTrainedDict:
|
||||||
if (compression_type != kZSTD) {
|
if (compression_type != kZSTD) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
|
options.compression_opts.max_dict_bytes = kBlockSizeBytes;
|
||||||
options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
|
options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
|
||||||
|
options.compression_opts.use_zstd_dict_trainer = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
@ -1365,6 +1376,8 @@ TEST_F(DBTest2, PresetCompressionDict) {
|
|||||||
bytes_without_dict = total_sst_bytes;
|
bytes_without_dict = total_sst_bytes;
|
||||||
} else if (i == kWithDict) {
|
} else if (i == kWithDict) {
|
||||||
bytes_with_dict = total_sst_bytes;
|
bytes_with_dict = total_sst_bytes;
|
||||||
|
} else if (i == kWithZSTDfinalizeDict) {
|
||||||
|
bytes_with_zstd_finalize_dict = total_sst_bytes;
|
||||||
} else if (i == kWithZSTDTrainedDict) {
|
} else if (i == kWithZSTDTrainedDict) {
|
||||||
bytes_with_zstd_trained_dict = total_sst_bytes;
|
bytes_with_zstd_trained_dict = total_sst_bytes;
|
||||||
}
|
}
|
||||||
@ -1375,6 +1388,13 @@ TEST_F(DBTest2, PresetCompressionDict) {
|
|||||||
}
|
}
|
||||||
if (i == kWithDict) {
|
if (i == kWithDict) {
|
||||||
ASSERT_GT(bytes_without_dict, bytes_with_dict);
|
ASSERT_GT(bytes_without_dict, bytes_with_dict);
|
||||||
|
} else if (i == kWithZSTDTrainedDict) {
|
||||||
|
// In zstd compression, it is sometimes possible that using a finalized
|
||||||
|
// dictionary does not get as good a compression ratio as raw content
|
||||||
|
// dictionary. But using a dictionary should always get better
|
||||||
|
// compression ratio than not using one.
|
||||||
|
ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_finalize_dict ||
|
||||||
|
bytes_without_dict > bytes_with_zstd_finalize_dict);
|
||||||
} else if (i == kWithZSTDTrainedDict) {
|
} else if (i == kWithZSTDTrainedDict) {
|
||||||
// In zstd compression, it is sometimes possible that using a trained
|
// In zstd compression, it is sometimes possible that using a trained
|
||||||
// dictionary does not get as good a compression ratio as without
|
// dictionary does not get as good a compression ratio as without
|
||||||
|
@ -218,6 +218,7 @@ DECLARE_int32(compression_max_dict_bytes);
|
|||||||
DECLARE_int32(compression_zstd_max_train_bytes);
|
DECLARE_int32(compression_zstd_max_train_bytes);
|
||||||
DECLARE_int32(compression_parallel_threads);
|
DECLARE_int32(compression_parallel_threads);
|
||||||
DECLARE_uint64(compression_max_dict_buffer_bytes);
|
DECLARE_uint64(compression_max_dict_buffer_bytes);
|
||||||
|
DECLARE_bool(compression_use_zstd_dict_trainer);
|
||||||
DECLARE_string(checksum_type);
|
DECLARE_string(checksum_type);
|
||||||
DECLARE_string(env_uri);
|
DECLARE_string(env_uri);
|
||||||
DECLARE_string(fs_uri);
|
DECLARE_string(fs_uri);
|
||||||
|
@ -736,6 +736,11 @@ DEFINE_uint64(compression_max_dict_buffer_bytes, 0,
|
|||||||
"Buffering limit for SST file data to sample for dictionary "
|
"Buffering limit for SST file data to sample for dictionary "
|
||||||
"compression.");
|
"compression.");
|
||||||
|
|
||||||
|
DEFINE_bool(
|
||||||
|
compression_use_zstd_dict_trainer, true,
|
||||||
|
"Use zstd's trainer to generate dictionary. If the options is false, "
|
||||||
|
"zstd's finalizeDictionary() API is used to generate dictionary.");
|
||||||
|
|
||||||
DEFINE_string(bottommost_compression_type, "disable",
|
DEFINE_string(bottommost_compression_type, "disable",
|
||||||
"Algorithm to use to compress bottommost level of the database. "
|
"Algorithm to use to compress bottommost level of the database. "
|
||||||
"\"disable\" means disabling the feature");
|
"\"disable\" means disabling the feature");
|
||||||
|
@ -2412,6 +2412,8 @@ void StressTest::Open(SharedState* shared) {
|
|||||||
FLAGS_compression_parallel_threads;
|
FLAGS_compression_parallel_threads;
|
||||||
options_.compression_opts.max_dict_buffer_bytes =
|
options_.compression_opts.max_dict_buffer_bytes =
|
||||||
FLAGS_compression_max_dict_buffer_bytes;
|
FLAGS_compression_max_dict_buffer_bytes;
|
||||||
|
options_.compression_opts.use_zstd_dict_trainer =
|
||||||
|
FLAGS_compression_use_zstd_dict_trainer;
|
||||||
options_.create_if_missing = true;
|
options_.create_if_missing = true;
|
||||||
options_.max_manifest_file_size = FLAGS_max_manifest_file_size;
|
options_.max_manifest_file_size = FLAGS_max_manifest_file_size;
|
||||||
options_.inplace_update_support = FLAGS_in_place_update;
|
options_.inplace_update_support = FLAGS_in_place_update;
|
||||||
|
@ -100,9 +100,9 @@ struct CompressionOptions {
|
|||||||
//
|
//
|
||||||
// The dictionary is created by sampling the SST file data. If
|
// The dictionary is created by sampling the SST file data. If
|
||||||
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
|
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
|
||||||
// dictionary generator. Otherwise, if ZSTD compression is used, the
|
// dictionary generator (see comments for option `use_zstd_dict_trainer` for
|
||||||
// dictionary is created by calling ZDICT_finalizeDictionary() if available,
|
// detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
|
||||||
// else the random samples are used directly as the dictionary.
|
// random samples are used directly as the dictionary.
|
||||||
//
|
//
|
||||||
// When compression dictionary is disabled, we compress and write each block
|
// When compression dictionary is disabled, we compress and write each block
|
||||||
// before buffering data for the next one. When compression dictionary is
|
// before buffering data for the next one. When compression dictionary is
|
||||||
@ -174,6 +174,20 @@ struct CompressionOptions {
|
|||||||
// Default: 0 (unlimited)
|
// Default: 0 (unlimited)
|
||||||
uint64_t max_dict_buffer_bytes;
|
uint64_t max_dict_buffer_bytes;
|
||||||
|
|
||||||
|
// Use zstd trainer to generate dictionaries. When this option is set to true,
|
||||||
|
// zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
|
||||||
|
// buffered data will be passed to zstd dictionary trainer to generate a
|
||||||
|
// dictionary of size max_dict_bytes.
|
||||||
|
//
|
||||||
|
// When this option is false, zstd's API ZDICT_finalizeDictionary() will be
|
||||||
|
// called to generate dictionaries. zstd_max_train_bytes of training sampled
|
||||||
|
// data will be passed to this API. Using this API should save CPU time on
|
||||||
|
// dictionary training, but the compression ratio may not be as good as using
|
||||||
|
// a dictionary trainer.
|
||||||
|
//
|
||||||
|
// Default: true
|
||||||
|
bool use_zstd_dict_trainer;
|
||||||
|
|
||||||
CompressionOptions()
|
CompressionOptions()
|
||||||
: window_bits(-14),
|
: window_bits(-14),
|
||||||
level(kDefaultCompressionLevel),
|
level(kDefaultCompressionLevel),
|
||||||
@ -182,11 +196,13 @@ struct CompressionOptions {
|
|||||||
zstd_max_train_bytes(0),
|
zstd_max_train_bytes(0),
|
||||||
parallel_threads(1),
|
parallel_threads(1),
|
||||||
enabled(false),
|
enabled(false),
|
||||||
max_dict_buffer_bytes(0) {}
|
max_dict_buffer_bytes(0),
|
||||||
|
use_zstd_dict_trainer(true) {}
|
||||||
CompressionOptions(int wbits, int _lev, int _strategy,
|
CompressionOptions(int wbits, int _lev, int _strategy,
|
||||||
uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
|
uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
|
||||||
uint32_t _parallel_threads, bool _enabled,
|
uint32_t _parallel_threads, bool _enabled,
|
||||||
uint64_t _max_dict_buffer_bytes)
|
uint64_t _max_dict_buffer_bytes,
|
||||||
|
bool _use_zstd_dict_trainer)
|
||||||
: window_bits(wbits),
|
: window_bits(wbits),
|
||||||
level(_lev),
|
level(_lev),
|
||||||
strategy(_strategy),
|
strategy(_strategy),
|
||||||
@ -194,7 +210,8 @@ struct CompressionOptions {
|
|||||||
zstd_max_train_bytes(_zstd_max_train_bytes),
|
zstd_max_train_bytes(_zstd_max_train_bytes),
|
||||||
parallel_threads(_parallel_threads),
|
parallel_threads(_parallel_threads),
|
||||||
enabled(_enabled),
|
enabled(_enabled),
|
||||||
max_dict_buffer_bytes(_max_dict_buffer_bytes) {}
|
max_dict_buffer_bytes(_max_dict_buffer_bytes),
|
||||||
|
use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Temperature of a file. Used to pass to FileSystem for a different
|
// Temperature of a file. Used to pass to FileSystem for a different
|
||||||
|
@ -1041,6 +1041,12 @@ extern ROCKSDB_LIBRARY_API int
|
|||||||
rocksdb_options_get_compression_options_zstd_max_train_bytes(
|
rocksdb_options_get_compression_options_zstd_max_train_bytes(
|
||||||
rocksdb_options_t* opt);
|
rocksdb_options_t* opt);
|
||||||
extern ROCKSDB_LIBRARY_API void
|
extern ROCKSDB_LIBRARY_API void
|
||||||
|
rocksdb_options_set_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t*, unsigned char);
|
||||||
|
extern ROCKSDB_LIBRARY_API unsigned char
|
||||||
|
rocksdb_options_get_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt);
|
||||||
|
extern ROCKSDB_LIBRARY_API void
|
||||||
rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*,
|
rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*,
|
||||||
int);
|
int);
|
||||||
extern ROCKSDB_LIBRARY_API int
|
extern ROCKSDB_LIBRARY_API int
|
||||||
@ -1059,6 +1065,12 @@ extern ROCKSDB_LIBRARY_API void
|
|||||||
rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
|
rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
|
||||||
rocksdb_options_t*, int, unsigned char);
|
rocksdb_options_t*, int, unsigned char);
|
||||||
extern ROCKSDB_LIBRARY_API void
|
extern ROCKSDB_LIBRARY_API void
|
||||||
|
rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t*, unsigned char, unsigned char);
|
||||||
|
extern ROCKSDB_LIBRARY_API unsigned char
|
||||||
|
rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
|
||||||
|
rocksdb_options_t* opt);
|
||||||
|
extern ROCKSDB_LIBRARY_API void
|
||||||
rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
|
rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
|
||||||
rocksdb_options_t*, uint64_t, unsigned char);
|
rocksdb_options_t*, uint64_t, unsigned char);
|
||||||
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor(
|
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor(
|
||||||
|
@ -154,6 +154,30 @@ jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject,
|
|||||||
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
|
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
|
||||||
return static_cast<jlong>(opt->max_dict_buffer_bytes);
|
return static_cast<jlong>(opt->max_dict_buffer_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_rocksdb_CompressionOptions
|
||||||
|
* Method: setZstdMaxTrainBytes
|
||||||
|
* Signature: (JZ)V
|
||||||
|
*/
|
||||||
|
void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer(
|
||||||
|
JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) {
|
||||||
|
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
|
||||||
|
opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_rocksdb_CompressionOptions
|
||||||
|
* Method: zstdMaxTrainBytes
|
||||||
|
* Signature: (J)Z
|
||||||
|
*/
|
||||||
|
jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*,
|
||||||
|
jobject,
|
||||||
|
jlong jhandle) {
|
||||||
|
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
|
||||||
|
return static_cast<bool>(opt->use_zstd_dict_trainer);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Class: org_rocksdb_CompressionOptions
|
* Class: org_rocksdb_CompressionOptions
|
||||||
* Method: setEnabled
|
* Method: setEnabled
|
||||||
|
@ -116,6 +116,15 @@ static Status ParseCompressionOptions(const std::string& value,
|
|||||||
compression_opts.max_dict_buffer_bytes = ParseUint64(field);
|
compression_opts.max_dict_buffer_bytes = ParseUint64(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// use_zstd_dict_trainer is optional for backwards compatibility
|
||||||
|
if (!field_stream.eof()) {
|
||||||
|
if (!std::getline(field_stream, field, kDelimiter)) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"unable to parse the specified CF option " + name);
|
||||||
|
}
|
||||||
|
compression_opts.use_zstd_dict_trainer = ParseBoolean("", field);
|
||||||
|
}
|
||||||
|
|
||||||
if (!field_stream.eof()) {
|
if (!field_stream.eof()) {
|
||||||
return Status::InvalidArgument("unable to parse the specified CF option " +
|
return Status::InvalidArgument("unable to parse the specified CF option " +
|
||||||
name);
|
name);
|
||||||
@ -156,6 +165,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|||||||
{offsetof(struct CompressionOptions, max_dict_buffer_bytes),
|
{offsetof(struct CompressionOptions, max_dict_buffer_bytes),
|
||||||
OptionType::kUInt64T, OptionVerificationType::kNormal,
|
OptionType::kUInt64T, OptionVerificationType::kNormal,
|
||||||
OptionTypeFlags::kMutable}},
|
OptionTypeFlags::kMutable}},
|
||||||
|
{"use_zstd_dict_trainer",
|
||||||
|
{offsetof(struct CompressionOptions, use_zstd_dict_trainer),
|
||||||
|
OptionType::kBoolean, OptionVerificationType::kNormal,
|
||||||
|
OptionTypeFlags::kMutable}},
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::unordered_map<std::string, OptionTypeInfo>
|
static std::unordered_map<std::string, OptionTypeInfo>
|
||||||
|
@ -211,6 +211,10 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
|
|||||||
" Options.bottommost_compression_opts.max_dict_buffer_bytes: "
|
" Options.bottommost_compression_opts.max_dict_buffer_bytes: "
|
||||||
"%" PRIu64,
|
"%" PRIu64,
|
||||||
bottommost_compression_opts.max_dict_buffer_bytes);
|
bottommost_compression_opts.max_dict_buffer_bytes);
|
||||||
|
ROCKS_LOG_HEADER(
|
||||||
|
log,
|
||||||
|
" Options.bottommost_compression_opts.use_zstd_dict_trainer: %s",
|
||||||
|
bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false");
|
||||||
ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
|
ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d",
|
||||||
compression_opts.window_bits);
|
compression_opts.window_bits);
|
||||||
ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
|
ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d",
|
||||||
@ -225,6 +229,9 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
|
|||||||
" Options.compression_opts.zstd_max_train_bytes: "
|
" Options.compression_opts.zstd_max_train_bytes: "
|
||||||
"%" PRIu32,
|
"%" PRIu32,
|
||||||
compression_opts.zstd_max_train_bytes);
|
compression_opts.zstd_max_train_bytes);
|
||||||
|
ROCKS_LOG_HEADER(
|
||||||
|
log, " Options.compression_opts.use_zstd_dict_trainer: %s",
|
||||||
|
compression_opts.use_zstd_dict_trainer ? "true" : "false");
|
||||||
ROCKS_LOG_HEADER(log,
|
ROCKS_LOG_HEADER(log,
|
||||||
" Options.compression_opts.parallel_threads: "
|
" Options.compression_opts.parallel_threads: "
|
||||||
"%" PRIu32,
|
"%" PRIu32,
|
||||||
|
@ -479,8 +479,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|||||||
"max_bytes_for_level_multiplier=60;"
|
"max_bytes_for_level_multiplier=60;"
|
||||||
"memtable_factory=SkipListFactory;"
|
"memtable_factory=SkipListFactory;"
|
||||||
"compression=kNoCompression;"
|
"compression=kNoCompression;"
|
||||||
"compression_opts=5:6:7:8:9:10:true:11;"
|
"compression_opts=5:6:7:8:9:10:true:11:false;"
|
||||||
"bottommost_compression_opts=4:5:6:7:8:9:true:10;"
|
"bottommost_compression_opts=4:5:6:7:8:9:true:10:true;"
|
||||||
"bottommost_compression=kDisableCompressionOption;"
|
"bottommost_compression=kDisableCompressionOption;"
|
||||||
"level0_stop_writes_trigger=33;"
|
"level0_stop_writes_trigger=33;"
|
||||||
"num_levels=99;"
|
"num_levels=99;"
|
||||||
|
@ -68,7 +68,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
|
|||||||
"kZSTDNotFinalCompression"},
|
"kZSTDNotFinalCompression"},
|
||||||
{"bottommost_compression", "kLZ4Compression"},
|
{"bottommost_compression", "kLZ4Compression"},
|
||||||
{"bottommost_compression_opts", "5:6:7:8:10:true"},
|
{"bottommost_compression_opts", "5:6:7:8:10:true"},
|
||||||
{"compression_opts", "4:5:6:7:8:true"},
|
{"compression_opts", "4:5:6:7:8:2:true:100:false"},
|
||||||
{"num_levels", "8"},
|
{"num_levels", "8"},
|
||||||
{"level0_file_num_compaction_trigger", "8"},
|
{"level0_file_num_compaction_trigger", "8"},
|
||||||
{"level0_slowdown_writes_trigger", "9"},
|
{"level0_slowdown_writes_trigger", "9"},
|
||||||
@ -190,9 +190,10 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
|
|||||||
ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
|
ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
|
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads,
|
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 2u);
|
||||||
CompressionOptions().parallel_threads);
|
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 100u);
|
||||||
|
ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
|
ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
||||||
@ -202,6 +203,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
|
|||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
||||||
CompressionOptions().parallel_threads);
|
CompressionOptions().parallel_threads);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
CompressionOptions().use_zstd_dict_trainer);
|
||||||
ASSERT_EQ(new_cf_opt.num_levels, 8);
|
ASSERT_EQ(new_cf_opt.num_levels, 8);
|
||||||
ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
|
ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
|
||||||
ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
|
ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
|
||||||
@ -602,6 +605,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads,
|
ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads,
|
||||||
dflt.parallel_threads);
|
dflt.parallel_threads);
|
||||||
ASSERT_EQ(base_cf_opt.compression_opts.enabled, dflt.enabled);
|
ASSERT_EQ(base_cf_opt.compression_opts.enabled, dflt.enabled);
|
||||||
|
ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer,
|
||||||
|
dflt.use_zstd_dict_trainer);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 4);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 4);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 5);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 5);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 6);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 6);
|
||||||
@ -611,10 +616,12 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
|
||||||
dflt.parallel_threads);
|
dflt.parallel_threads);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, dflt.enabled);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, dflt.enabled);
|
||||||
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
dflt.use_zstd_dict_trainer);
|
||||||
|
|
||||||
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
||||||
config_options, ColumnFamilyOptions(),
|
config_options, ColumnFamilyOptions(),
|
||||||
"compression_opts=4:5:6:7:8:9:true; "
|
"compression_opts=4:5:6:7:8:9:true:10:false; "
|
||||||
"bottommost_compression_opts=5:6:7:8:9:false",
|
"bottommost_compression_opts=5:6:7:8:9:false",
|
||||||
&base_cf_opt));
|
&base_cf_opt));
|
||||||
ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 4);
|
ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 4);
|
||||||
@ -624,6 +631,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
||||||
ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, 9u);
|
ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, 9u);
|
||||||
ASSERT_EQ(base_cf_opt.compression_opts.enabled, true);
|
ASSERT_EQ(base_cf_opt.compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
|
||||||
|
ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 6);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 7);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 7);
|
||||||
@ -632,6 +641,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
|
||||||
dflt.parallel_threads);
|
dflt.parallel_threads);
|
||||||
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, false);
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
dflt.use_zstd_dict_trainer);
|
||||||
|
|
||||||
ASSERT_OK(
|
ASSERT_OK(
|
||||||
GetStringFromColumnFamilyOptions(config_options, base_cf_opt, &opts_str));
|
GetStringFromColumnFamilyOptions(config_options, base_cf_opt, &opts_str));
|
||||||
@ -644,6 +655,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u);
|
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
|
||||||
|
ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7);
|
||||||
@ -652,15 +665,18 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
||||||
dflt.parallel_threads);
|
dflt.parallel_threads);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
dflt.use_zstd_dict_trainer);
|
||||||
|
|
||||||
// Test as struct values
|
// Test as struct values
|
||||||
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
||||||
config_options, ColumnFamilyOptions(),
|
config_options, ColumnFamilyOptions(),
|
||||||
"compression_opts={window_bits=5; level=6; strategy=7; max_dict_bytes=8;"
|
"compression_opts={window_bits=5; level=6; strategy=7; max_dict_bytes=8;"
|
||||||
"zstd_max_train_bytes=9;parallel_threads=10;enabled=true}; "
|
"zstd_max_train_bytes=9;parallel_threads=10;enabled=true;use_zstd_dict_"
|
||||||
|
"trainer=false}; "
|
||||||
"bottommost_compression_opts={window_bits=4; level=5; strategy=6;"
|
"bottommost_compression_opts={window_bits=4; level=5; strategy=6;"
|
||||||
" max_dict_bytes=7;zstd_max_train_bytes=8;parallel_threads=9;"
|
" max_dict_bytes=7;zstd_max_train_bytes=8;parallel_threads=9;"
|
||||||
"enabled=false}; ",
|
"enabled=false;use_zstd_dict_trainer=true}; ",
|
||||||
&new_cf_opt));
|
&new_cf_opt));
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 5);
|
ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.level, 6);
|
ASSERT_EQ(new_cf_opt.compression_opts.level, 6);
|
||||||
@ -669,6 +685,7 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 9u);
|
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 9u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 10u);
|
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 10u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 4);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 4);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 5);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 5);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 6);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 6);
|
||||||
@ -676,6 +693,7 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 8u);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 8u);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, 9u);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, 9u);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, true);
|
||||||
|
|
||||||
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
||||||
config_options, base_cf_opt,
|
config_options, base_cf_opt,
|
||||||
@ -707,6 +725,8 @@ TEST_F(OptionsTest, CompressionOptionsFromString) {
|
|||||||
base_cf_opt.bottommost_compression_opts.parallel_threads);
|
base_cf_opt.bottommost_compression_opts.parallel_threads);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled,
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled,
|
||||||
base_cf_opt.bottommost_compression_opts.enabled);
|
base_cf_opt.bottommost_compression_opts.enabled);
|
||||||
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer);
|
||||||
|
|
||||||
// Test a few individual struct values
|
// Test a few individual struct values
|
||||||
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
||||||
@ -1351,6 +1371,7 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) {
|
|||||||
ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
|
ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
|
||||||
ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
|
ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
|
||||||
ASSERT_EQ(new_options.compression_opts.enabled, false);
|
ASSERT_EQ(new_options.compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true);
|
||||||
ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
|
ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
|
||||||
@ -1359,6 +1380,8 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) {
|
|||||||
ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
|
ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
|
ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
|
ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
true);
|
||||||
ASSERT_EQ(new_options.write_buffer_size, 10U);
|
ASSERT_EQ(new_options.write_buffer_size, 10U);
|
||||||
ASSERT_EQ(new_options.max_write_buffer_number, 16);
|
ASSERT_EQ(new_options.max_write_buffer_number, 16);
|
||||||
const auto new_bbto =
|
const auto new_bbto =
|
||||||
@ -2263,7 +2286,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
|
|||||||
"kZSTDNotFinalCompression"},
|
"kZSTDNotFinalCompression"},
|
||||||
{"bottommost_compression", "kLZ4Compression"},
|
{"bottommost_compression", "kLZ4Compression"},
|
||||||
{"bottommost_compression_opts", "5:6:7:8:9:true"},
|
{"bottommost_compression_opts", "5:6:7:8:9:true"},
|
||||||
{"compression_opts", "4:5:6:7:8:true"},
|
{"compression_opts", "4:5:6:7:8:9:true:10:false"},
|
||||||
{"num_levels", "8"},
|
{"num_levels", "8"},
|
||||||
{"level0_file_num_compaction_trigger", "8"},
|
{"level0_file_num_compaction_trigger", "8"},
|
||||||
{"level0_slowdown_writes_trigger", "9"},
|
{"level0_slowdown_writes_trigger", "9"},
|
||||||
@ -2380,9 +2403,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
|
|||||||
ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
|
ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
|
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads,
|
ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u);
|
||||||
CompressionOptions().parallel_threads);
|
|
||||||
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
|
||||||
|
ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
|
ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
|
||||||
@ -2392,6 +2416,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
|
|||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
|
||||||
CompressionOptions().parallel_threads);
|
CompressionOptions().parallel_threads);
|
||||||
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
|
||||||
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_buffer_bytes,
|
||||||
|
CompressionOptions().max_dict_buffer_bytes);
|
||||||
|
ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
CompressionOptions().use_zstd_dict_trainer);
|
||||||
ASSERT_EQ(new_cf_opt.num_levels, 8);
|
ASSERT_EQ(new_cf_opt.num_levels, 8);
|
||||||
ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
|
ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
|
||||||
ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
|
ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
|
||||||
@ -3054,6 +3082,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) {
|
|||||||
ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
|
ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
|
||||||
ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
|
ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
|
||||||
ASSERT_EQ(new_options.compression_opts.enabled, false);
|
ASSERT_EQ(new_options.compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true);
|
||||||
ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
|
ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
|
ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
|
ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
|
||||||
@ -3062,6 +3091,8 @@ TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) {
|
|||||||
ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
|
ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
|
ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
|
||||||
ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
|
ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
|
||||||
|
ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer,
|
||||||
|
true);
|
||||||
ASSERT_EQ(new_options.write_buffer_size, 10U);
|
ASSERT_EQ(new_options.write_buffer_size, 10U);
|
||||||
ASSERT_EQ(new_options.max_write_buffer_number, 16);
|
ASSERT_EQ(new_options.max_write_buffer_number, 16);
|
||||||
|
|
||||||
|
@ -1886,21 +1886,15 @@ void BlockBasedTableBuilder::EnterUnbuffered() {
|
|||||||
// OK if compression_dict_samples is empty, we'll just get empty dictionary.
|
// OK if compression_dict_samples is empty, we'll just get empty dictionary.
|
||||||
std::string dict;
|
std::string dict;
|
||||||
if (r->compression_opts.zstd_max_train_bytes > 0) {
|
if (r->compression_opts.zstd_max_train_bytes > 0) {
|
||||||
dict = ZSTD_TrainDictionary(compression_dict_samples,
|
if (r->compression_opts.use_zstd_dict_trainer) {
|
||||||
compression_dict_sample_lens,
|
dict = ZSTD_TrainDictionary(compression_dict_samples,
|
||||||
r->compression_opts.max_dict_bytes);
|
compression_dict_sample_lens,
|
||||||
} else if (rep_->compression_type == kZSTD) {
|
r->compression_opts.max_dict_bytes);
|
||||||
// use ZSTD_finalizeDictionary API instead of raw content dictionary
|
} else {
|
||||||
std::string samples;
|
dict = ZSTD_FinalizeDictionary(
|
||||||
std::vector<size_t> sample_lens;
|
compression_dict_samples, compression_dict_sample_lens,
|
||||||
for (size_t i = 0; i < r->data_block_buffers.size(); ++i) {
|
r->compression_opts.max_dict_bytes, r->compression_opts.level);
|
||||||
samples.append(r->data_block_buffers[i]);
|
|
||||||
sample_lens.emplace_back(r->data_block_buffers[i].size());
|
|
||||||
}
|
}
|
||||||
// compression_dict_samples is the starting and fallback dicitonary content
|
|
||||||
dict = ZSTD_FinalizeDictionary(
|
|
||||||
samples, sample_lens, compression_dict_samples,
|
|
||||||
r->compression_opts.max_dict_bytes, r->compression_opts.level);
|
|
||||||
} else {
|
} else {
|
||||||
dict = std::move(compression_dict_samples);
|
dict = std::move(compression_dict_samples);
|
||||||
}
|
}
|
||||||
@ -1936,7 +1930,6 @@ void BlockBasedTableBuilder::EnterUnbuffered() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto& data_block = r->data_block_buffers[i];
|
auto& data_block = r->data_block_buffers[i];
|
||||||
|
|
||||||
if (r->IsParallelCompressionEnabled()) {
|
if (r->IsParallelCompressionEnabled()) {
|
||||||
Slice first_key_in_next_block;
|
Slice first_key_in_next_block;
|
||||||
const Slice* first_key_in_next_block_ptr = &first_key_in_next_block;
|
const Slice* first_key_in_next_block_ptr = &first_key_in_next_block;
|
||||||
|
@ -253,7 +253,7 @@ Status SstFileDumper::ShowAllCompressionSizes(
|
|||||||
compression_types,
|
compression_types,
|
||||||
int32_t compress_level_from, int32_t compress_level_to,
|
int32_t compress_level_from, int32_t compress_level_to,
|
||||||
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
||||||
uint64_t max_dict_buffer_bytes) {
|
uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer) {
|
||||||
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
||||||
for (auto& i : compression_types) {
|
for (auto& i : compression_types) {
|
||||||
if (CompressionTypeSupported(i.first)) {
|
if (CompressionTypeSupported(i.first)) {
|
||||||
@ -262,6 +262,7 @@ Status SstFileDumper::ShowAllCompressionSizes(
|
|||||||
compress_opt.max_dict_bytes = max_dict_bytes;
|
compress_opt.max_dict_bytes = max_dict_bytes;
|
||||||
compress_opt.zstd_max_train_bytes = zstd_max_train_bytes;
|
compress_opt.zstd_max_train_bytes = zstd_max_train_bytes;
|
||||||
compress_opt.max_dict_buffer_bytes = max_dict_buffer_bytes;
|
compress_opt.max_dict_buffer_bytes = max_dict_buffer_bytes;
|
||||||
|
compress_opt.use_zstd_dict_trainer = use_zstd_dict_trainer;
|
||||||
for (int32_t j = compress_level_from; j <= compress_level_to; j++) {
|
for (int32_t j = compress_level_from; j <= compress_level_to; j++) {
|
||||||
fprintf(stdout, "Compression level: %d", j);
|
fprintf(stdout, "Compression level: %d", j);
|
||||||
compress_opt.level = j;
|
compress_opt.level = j;
|
||||||
|
@ -44,7 +44,7 @@ class SstFileDumper {
|
|||||||
compression_types,
|
compression_types,
|
||||||
int32_t compress_level_from, int32_t compress_level_to,
|
int32_t compress_level_from, int32_t compress_level_to,
|
||||||
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
||||||
uint64_t max_dict_buffer_bytes);
|
uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer);
|
||||||
|
|
||||||
Status ShowCompressionSize(size_t block_size, CompressionType compress_type,
|
Status ShowCompressionSize(size_t block_size, CompressionType compress_type,
|
||||||
const CompressionOptions& compress_opt);
|
const CompressionOptions& compress_opt);
|
||||||
|
@ -1204,6 +1204,11 @@ DEFINE_uint64(compression_max_dict_buffer_bytes,
|
|||||||
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes,
|
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes,
|
||||||
"Maximum bytes to buffer to collect samples for dictionary.");
|
"Maximum bytes to buffer to collect samples for dictionary.");
|
||||||
|
|
||||||
|
DEFINE_bool(compression_use_zstd_dict_trainer,
|
||||||
|
ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer,
|
||||||
|
"If true, use ZSTD_TrainDictionary() to create dictionary, else"
|
||||||
|
"use ZSTD_FinalizeDictionary() to create dictionary");
|
||||||
|
|
||||||
static bool ValidateTableCacheNumshardbits(const char* flagname,
|
static bool ValidateTableCacheNumshardbits(const char* flagname,
|
||||||
int32_t value) {
|
int32_t value) {
|
||||||
if (0 >= value || value >= 20) {
|
if (0 >= value || value >= 20) {
|
||||||
@ -3963,6 +3968,8 @@ class Benchmark {
|
|||||||
FLAGS_compression_parallel_threads;
|
FLAGS_compression_parallel_threads;
|
||||||
options.compression_opts.max_dict_buffer_bytes =
|
options.compression_opts.max_dict_buffer_bytes =
|
||||||
FLAGS_compression_max_dict_buffer_bytes;
|
FLAGS_compression_max_dict_buffer_bytes;
|
||||||
|
options.compression_opts.use_zstd_dict_trainer =
|
||||||
|
FLAGS_compression_use_zstd_dict_trainer;
|
||||||
|
|
||||||
options.max_open_files = FLAGS_open_files;
|
options.max_open_files = FLAGS_open_files;
|
||||||
if (FLAGS_cost_write_buffer_to_cache || FLAGS_db_write_buffer_size != 0) {
|
if (FLAGS_cost_write_buffer_to_cache || FLAGS_db_write_buffer_size != 0) {
|
||||||
|
@ -56,6 +56,7 @@ default_params = {
|
|||||||
# lambda: random.choice([1] * 9 + [4])
|
# lambda: random.choice([1] * 9 + [4])
|
||||||
"compression_parallel_threads": 1,
|
"compression_parallel_threads": 1,
|
||||||
"compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1,
|
"compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1,
|
||||||
|
"compression_use_zstd_dict_trainer": lambda: random.randint(0, 1),
|
||||||
"clear_column_family_one_in": 0,
|
"clear_column_family_one_in": 0,
|
||||||
"compact_files_one_in": 1000000,
|
"compact_files_one_in": 1000000,
|
||||||
"compact_range_one_in": 1000000,
|
"compact_range_one_in": 1000000,
|
||||||
|
@ -122,6 +122,9 @@ void print_help(bool to_stderr) {
|
|||||||
|
|
||||||
--compression_max_dict_buffer_bytes=<int64_t>
|
--compression_max_dict_buffer_bytes=<int64_t>
|
||||||
Limit on buffer size from which we collect samples for dictionary generation.
|
Limit on buffer size from which we collect samples for dictionary generation.
|
||||||
|
|
||||||
|
--compression_use_zstd_finalize_dict
|
||||||
|
Use zstd's finalizeDictionary() API instead of zstd's dictionary trainer to generate dictionary.
|
||||||
)",
|
)",
|
||||||
supported_compressions.c_str());
|
supported_compressions.c_str());
|
||||||
}
|
}
|
||||||
@ -188,6 +191,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
|
|||||||
ROCKSDB_NAMESPACE::CompressionOptions().zstd_max_train_bytes;
|
ROCKSDB_NAMESPACE::CompressionOptions().zstd_max_train_bytes;
|
||||||
uint64_t compression_max_dict_buffer_bytes =
|
uint64_t compression_max_dict_buffer_bytes =
|
||||||
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes;
|
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes;
|
||||||
|
bool compression_use_zstd_finalize_dict =
|
||||||
|
!ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer;
|
||||||
|
|
||||||
int64_t tmp_val;
|
int64_t tmp_val;
|
||||||
|
|
||||||
@ -311,6 +316,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
compression_max_dict_buffer_bytes = static_cast<uint64_t>(tmp_val);
|
compression_max_dict_buffer_bytes = static_cast<uint64_t>(tmp_val);
|
||||||
|
} else if (strcmp(argv[i], "--compression_use_zstd_finalize_dict") == 0) {
|
||||||
|
compression_use_zstd_finalize_dict = true;
|
||||||
} else if (strcmp(argv[i], "--help") == 0) {
|
} else if (strcmp(argv[i], "--help") == 0) {
|
||||||
print_help(/*to_stderr*/ false);
|
print_help(/*to_stderr*/ false);
|
||||||
return 0;
|
return 0;
|
||||||
@ -439,7 +446,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
|
|||||||
set_block_size ? block_size : 16384,
|
set_block_size ? block_size : 16384,
|
||||||
compression_types.empty() ? kCompressions : compression_types,
|
compression_types.empty() ? kCompressions : compression_types,
|
||||||
compress_level_from, compress_level_to, compression_max_dict_bytes,
|
compress_level_from, compress_level_to, compression_max_dict_bytes,
|
||||||
compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes);
|
compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes,
|
||||||
|
!compression_use_zstd_finalize_dict);
|
||||||
if (!st.ok()) {
|
if (!st.ok()) {
|
||||||
fprintf(stderr, "Failed to recompress: %s\n", st.ToString().c_str());
|
fprintf(stderr, "Failed to recompress: %s\n", st.ToString().c_str());
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -653,6 +653,9 @@ inline std::string CompressionOptionsToString(
|
|||||||
result.append("max_dict_buffer_bytes=")
|
result.append("max_dict_buffer_bytes=")
|
||||||
.append(std::to_string(compression_options.max_dict_buffer_bytes))
|
.append(std::to_string(compression_options.max_dict_buffer_bytes))
|
||||||
.append("; ");
|
.append("; ");
|
||||||
|
result.append("use_zstd_dict_trainer=")
|
||||||
|
.append(std::to_string(compression_options.use_zstd_dict_trainer))
|
||||||
|
.append("; ");
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1482,14 +1485,23 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples,
|
|||||||
#endif // ZSTD_VERSION_NUMBER >= 10103
|
#endif // ZSTD_VERSION_NUMBER >= 10103
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool ZSTD_FinalizeDictionarySupported() {
|
||||||
|
#ifdef ZSTD
|
||||||
|
// ZDICT_finalizeDictionary API is stable since v1.4.5
|
||||||
|
return (ZSTD_versionNumber() >= 10405);
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
inline std::string ZSTD_FinalizeDictionary(
|
inline std::string ZSTD_FinalizeDictionary(
|
||||||
const std::string& samples, const std::vector<size_t>& sample_lens,
|
const std::string& samples, const std::vector<size_t>& sample_lens,
|
||||||
std::string& dict_content, size_t max_dict_bytes, int level) {
|
size_t max_dict_bytes, int level) {
|
||||||
// ZDICT_finalizeDictionary is only stable since version 1.4.5
|
// ZDICT_finalizeDictionary is stable since version v1.4.5
|
||||||
#if ZSTD_VERSION_NUMBER >= 10405 // v1.4.5+
|
#if ZSTD_VERSION_NUMBER >= 10405 // v1.4.5+
|
||||||
assert(samples.empty() == sample_lens.empty());
|
assert(samples.empty() == sample_lens.empty());
|
||||||
if (samples.empty()) {
|
if (samples.empty()) {
|
||||||
return std::move(dict_content);
|
return "";
|
||||||
}
|
}
|
||||||
if (level == CompressionOptions::kDefaultCompressionLevel) {
|
if (level == CompressionOptions::kDefaultCompressionLevel) {
|
||||||
// 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
|
// 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
|
||||||
@ -1498,11 +1510,13 @@ inline std::string ZSTD_FinalizeDictionary(
|
|||||||
}
|
}
|
||||||
std::string dict_data(max_dict_bytes, '\0');
|
std::string dict_data(max_dict_bytes, '\0');
|
||||||
size_t dict_len = ZDICT_finalizeDictionary(
|
size_t dict_len = ZDICT_finalizeDictionary(
|
||||||
dict_data.data(), max_dict_bytes, dict_content.data(),
|
dict_data.data(), max_dict_bytes, samples.data(),
|
||||||
static_cast<size_t>(dict_content.size()), samples.data(),
|
// static_cast<size_t>(empty.size()),
|
||||||
sample_lens.data(), static_cast<unsigned>(sample_lens.size()), {level});
|
std::min(static_cast<size_t>(samples.size()), max_dict_bytes),
|
||||||
|
samples.data(), sample_lens.data(),
|
||||||
|
static_cast<unsigned>(sample_lens.size()), {level});
|
||||||
if (ZDICT_isError(dict_len)) {
|
if (ZDICT_isError(dict_len)) {
|
||||||
return std::move(dict_content);
|
return "";
|
||||||
} else {
|
} else {
|
||||||
assert(dict_len <= max_dict_bytes);
|
assert(dict_len <= max_dict_bytes);
|
||||||
dict_data.resize(dict_len);
|
dict_data.resize(dict_len);
|
||||||
@ -1513,7 +1527,7 @@ inline std::string ZSTD_FinalizeDictionary(
|
|||||||
(void)sample_lens;
|
(void)sample_lens;
|
||||||
(void)max_dict_bytes;
|
(void)max_dict_bytes;
|
||||||
(void)level;
|
(void)level;
|
||||||
return std::move(dict_content);
|
return "";
|
||||||
#endif // ZSTD_VERSION_NUMBER >= 10405
|
#endif // ZSTD_VERSION_NUMBER >= 10405
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user