diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index d335d2c3b..f7a219504 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -161,6 +161,11 @@ struct BlockBasedTableOptions { // Default: false bool skip_table_builder_flush = false; + // Verify that decompressing the compressed block gives back the input. This + // is a verification mode that we use to detect bugs in compression + // algorithms. + bool verify_compression = false; + // We currently have three versions: // 0 -- This version is currently written out by all RocksDB's versions by // default. Can be read by really old RocksDB's. Doesn't support changing diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 46a4b1403..4ed11355a 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -650,6 +650,7 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, auto type = r->compression_type; Slice block_contents; + bool abort_compression = false; if (raw_block_contents.size() < kCompressionSizeLimit) { Slice compression_dict; if (is_data_block && r->compression_dict && r->compression_dict->size()) { @@ -658,11 +659,46 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, block_contents = CompressBlock(raw_block_contents, r->compression_opts, &type, r->table_options.format_version, compression_dict, &r->compressed_output); + + // Some of the compression algorithms are known to be unreliable. If + // the verify_compression flag is set then try to de-compress the + // compressed data and compare to the input. + if (type != kNoCompression && r->table_options.verify_compression) { + // Retrieve the uncompressed contents into a new buffer + BlockContents contents; + Status stat = UncompressBlockContentsForCompressionType( + block_contents.data(), block_contents.size(), &contents, + r->table_options.format_version, compression_dict, type); + + if (stat.ok()) { + bool compressed_ok = contents.data.compare(raw_block_contents) == 0; + if (!compressed_ok) { + // The result of the compression was invalid. abort. + abort_compression = true; + Log(InfoLogLevel::ERROR_LEVEL, r->ioptions.info_log, + "Decompressed block did not match raw block"); + r->status = + Status::Corruption("Decompressed block did not match raw block"); + } + } else { + // Decompression reported an error. abort. + r->status = Status::Corruption("Could not decompress"); + abort_compression = true; + } + } } else { + // Block is too big to be compressed. + abort_compression = true; + } + + // Abort compression if the block is too big, or did not pass + // verification. + if (abort_compression) { RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED); type = kNoCompression; block_contents = raw_block_contents; } + WriteRawBlock(block_contents, type, handle); r->compressed_output.clear(); } diff --git a/table/format.cc b/table/format.cc index 7a62f66bb..cb58bec1d 100644 --- a/table/format.cc +++ b/table/format.cc @@ -403,20 +403,16 @@ Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, return status; } -// -// The 'data' points to the raw block contents that was read in from file. -// This method allocates a new heap buffer and the raw block -// contents are uncompresed into this buffer. This -// buffer is returned via 'result' and it is upto the caller to -// free this buffer. -// format_version is the block format as defined in include/rocksdb/table.h -Status UncompressBlockContents(const char* data, size_t n, - BlockContents* contents, uint32_t format_version, - const Slice& compression_dict) { +Status UncompressBlockContentsForCompressionType( + const char* data, size_t n, BlockContents* contents, + uint32_t format_version, const Slice& compression_dict, + CompressionType compression_type) { std::unique_ptr ubuf; + + assert(compression_type != kNoCompression && "Invalid compression type"); + int decompress_size = 0; - assert(data[n] != kNoCompression); - switch (data[n]) { + switch (compression_type) { case kSnappyCompression: { size_t ulength = 0; static char snappy_corrupt_msg[] = @@ -509,4 +505,20 @@ Status UncompressBlockContents(const char* data, size_t n, return Status::OK(); } +// +// The 'data' points to the raw block contents that was read in from file. +// This method allocates a new heap buffer and the raw block +// contents are uncompresed into this buffer. This +// buffer is returned via 'result' and it is upto the caller to +// free this buffer. +// format_version is the block format as defined in include/rocksdb/table.h +Status UncompressBlockContents(const char* data, size_t n, + BlockContents* contents, uint32_t format_version, + const Slice& compression_dict) { + assert(data[n] != kNoCompression); + return UncompressBlockContentsForCompressionType( + data, n, contents, format_version, compression_dict, + (CompressionType)data[n]); +} + } // namespace rocksdb diff --git a/table/format.h b/table/format.h index a488f8dd8..2ca7d2520 100644 --- a/table/format.h +++ b/table/format.h @@ -229,6 +229,14 @@ extern Status UncompressBlockContents(const char* data, size_t n, uint32_t compress_format_version, const Slice& compression_dict); +// This is an extension to UncompressBlockContents that accepts +// a specific compression type. This is used by un-wrapped blocks +// with no compression header. +extern Status UncompressBlockContentsForCompressionType( + const char* data, size_t n, BlockContents* contents, + uint32_t compress_format_version, const Slice& compression_dict, + CompressionType compression_type); + // Implementation details follow. Clients should ignore, inline BlockHandle::BlockHandle() diff --git a/util/options_helper.h b/util/options_helper.h index 423e6b784..a7fea3c08 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -546,7 +546,10 @@ static std::unordered_map OptionType::kBoolean, OptionVerificationType::kNormal}}, {"format_version", {offsetof(struct BlockBasedTableOptions, format_version), - OptionType::kUInt32T, OptionVerificationType::kNormal}}}; + OptionType::kUInt32T, OptionVerificationType::kNormal}}, + {"verify_compression", + {offsetof(struct BlockBasedTableOptions, verify_compression), + OptionType::kBoolean, OptionVerificationType::kNormal}}}; static std::unordered_map plain_table_type_info = { {"user_key_len", diff --git a/util/options_settable_test.cc b/util/options_settable_test.cc index 338a4017f..f50ebf011 100644 --- a/util/options_settable_test.cc +++ b/util/options_settable_test.cc @@ -157,7 +157,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { "index_block_restart_interval=4;" "filter_policy=bloomfilter:4:true;whole_key_filtering=1;" "skip_table_builder_flush=1;format_version=1;" - "hash_index_allow_collision=false;", + "hash_index_allow_collision=false;" + "verify_compression=true;", new_bbto)); ASSERT_EQ(unset_bytes_base,