diff --git a/options/options_helper.h b/options/options_helper.h index 488ca9e23..ec84c467f 100644 --- a/options/options_helper.h +++ b/options/options_helper.h @@ -30,6 +30,10 @@ std::vector GetSupportedDictCompressions(); std::vector GetSupportedChecksums(); +inline bool IsSupportedChecksumType(ChecksumType type) { + return type >= kNoChecksum && type <= kXXH3; +} + // Checks that the combination of DBOptions and ColumnFamilyOptions are valid Status ValidateOptions(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 608bcc51f..2a98609d5 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -49,11 +49,9 @@ #include "table/table_builder.h" #include "util/coding.h" #include "util/compression.h" -#include "util/crc32c.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/work_queue.h" -#include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { @@ -1210,60 +1208,6 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock( } } -void BlockBasedTableBuilder::ComputeBlockTrailer( - const Slice& block_contents, CompressionType compression_type, - ChecksumType checksum_type, std::array* trailer) { - (*trailer)[0] = compression_type; - uint32_t checksum = 0; - switch (checksum_type) { - case kNoChecksum: - break; - case kCRC32c: { - uint32_t crc = - crc32c::Value(block_contents.data(), block_contents.size()); - // Extend to cover compression type - crc = crc32c::Extend(crc, trailer->data(), 1); - checksum = crc32c::Mask(crc); - break; - } - case kxxHash: { - XXH32_state_t* const state = XXH32_createState(); - XXH32_reset(state, 0); - XXH32_update(state, block_contents.data(), block_contents.size()); - // Extend to cover compression type - XXH32_update(state, trailer->data(), 1); - checksum = XXH32_digest(state); - XXH32_freeState(state); - break; - } - case kxxHash64: { - XXH64_state_t* const state = XXH64_createState(); - XXH64_reset(state, 0); - XXH64_update(state, block_contents.data(), block_contents.size()); - // Extend to cover compression type - XXH64_update(state, trailer->data(), 1); - checksum = Lower32of64(XXH64_digest(state)); - XXH64_freeState(state); - break; - } - case kXXH3: { - // XXH3 is a complicated hash function that is extremely fast on - // contiguous input, but that makes its streaming support rather - // complex. It is worth custom handling of the last byte (`type`) - // in order to avoid allocating a large state object and bringing - // that code complexity into CPU working set. - checksum = Lower32of64( - XXH3_64bits(block_contents.data(), block_contents.size())); - checksum = ModifyChecksumForCompressionType(checksum, compression_type); - break; - } - default: - assert(false); - break; - } - EncodeFixed32(trailer->data() + 1, checksum); -} - void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, CompressionType type, BlockHandle* handle, @@ -1281,8 +1225,12 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, io_s = r->file->Append(block_contents); if (io_s.ok()) { std::array trailer; - ComputeBlockTrailer(block_contents, type, r->table_options.checksum, - &trailer); + trailer[0] = type; + uint32_t checksum = ComputeBuiltinChecksumWithLastByte( + r->table_options.checksum, block_contents.data(), block_contents.size(), + /*last_byte*/ type); + EncodeFixed32(trailer.data() + 1, checksum); + assert(io_s.ok()); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index e34d644a4..690ae46d7 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -100,12 +100,6 @@ class BlockBasedTableBuilder : public TableBuilder { // Get file checksum function name const char* GetFileChecksumFuncName() const override; - // Computes and populates block trailer for a block - static void ComputeBlockTrailer(const Slice& block_contents, - CompressionType compression_type, - ChecksumType checksum_type, - std::array* trailer); - private: bool ok() const { return status().ok(); } diff --git a/table/block_based/reader_common.cc b/table/block_based/reader_common.cc index b64b6e425..14ed9c79c 100644 --- a/table/block_based/reader_common.cc +++ b/table/block_based/reader_common.cc @@ -9,12 +9,11 @@ #include "table/block_based/reader_common.h" #include "monitoring/perf_context_imp.h" +#include "rocksdb/table.h" #include "table/format.h" #include "util/coding.h" #include "util/crc32c.h" -#include "util/hash.h" #include "util/string_util.h" -#include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { void ForceReleaseCachedEntry(void* arg, void* h) { @@ -33,39 +32,20 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data, // And then the stored checksum value (4 bytes). uint32_t stored = DecodeFixed32(data + len); - Status s; - uint32_t computed = 0; - switch (type) { - case kNoChecksum: - break; - case kCRC32c: + uint32_t computed = ComputeBuiltinChecksum(type, data, len); + if (stored == computed) { + return Status::OK(); + } else { + // Unmask for people who might look for reference crc value + if (type == kCRC32c) { stored = crc32c::Unmask(stored); - computed = crc32c::Value(data, len); - break; - case kxxHash: - computed = XXH32(data, len, 0); - break; - case kxxHash64: - computed = Lower32of64(XXH64(data, len, 0)); - break; - case kXXH3: - computed = Lower32of64(XXH3_64bits(data, block_size)); - // Treat compression type separately for speed in building table files - computed = ModifyChecksumForCompressionType(computed, data[block_size]); - break; - default: - s = Status::Corruption( - "unknown checksum type " + ToString(type) + " from footer of " + - file_name + ", while checking block at offset " + ToString(offset) + - " size " + ToString(block_size)); - } - if (s.ok() && stored != computed) { - s = Status::Corruption( + computed = crc32c::Unmask(computed); + } + return Status::Corruption( "block checksum mismatch: stored = " + ToString(stored) + ", computed = " + ToString(computed) + ", type = " + ToString(type) + " in " + file_name + " offset " + ToString(offset) + " size " + ToString(block_size)); } - return s; } } // namespace ROCKSDB_NAMESPACE diff --git a/table/format.cc b/table/format.cc index 6abe2a4cb..d8add0f80 100644 --- a/table/format.cc +++ b/table/format.cc @@ -17,6 +17,7 @@ #include "memory/memory_allocator.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" +#include "options/options_helper.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "table/block_based/block.h" @@ -25,8 +26,10 @@ #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" +#include "util/hash.h" #include "util/stop_watch.h" #include "util/string_util.h" +#include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { @@ -50,8 +53,8 @@ bool ShouldReportDetailedTime(Env* env, Statistics* stats) { void BlockHandle::EncodeTo(std::string* dst) const { // Sanity check that all fields have been set - assert(offset_ != ~static_cast(0)); - assert(size_ != ~static_cast(0)); + assert(offset_ != ~uint64_t{0}); + assert(size_ != ~uint64_t{0}); PutVarint64Varint64(dst, offset_, size_); } @@ -245,6 +248,11 @@ Status Footer::DecodeFrom(Slice* input) { return Status::Corruption("bad checksum type"); } checksum_ = static_cast(chksum); + if (chksum != static_cast(checksum_) || + !IsSupportedChecksumType(checksum_)) { + return Status::Corruption("unknown checksum type " + + ROCKSDB_NAMESPACE::ToString(chksum)); + } } Status result = metaindex_handle_.DecodeFrom(input); @@ -344,6 +352,88 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, return Status::OK(); } +namespace { +// Custom handling for the last byte of a block, to avoid invoking streaming +// API to get an effective block checksum. This function is its own inverse +// because it uses xor. +inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) { + // This strategy bears some resemblance to extending a CRC checksum by one + // more byte, except we don't need to re-mix the input checksum as long as + // we do this step only once (per checksum). + const uint32_t kRandomPrime = 0x6b9083d9; + return checksum ^ static_cast(last_byte) * kRandomPrime; +} +} // namespace + +uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data, + size_t data_size) { + switch (type) { + case kCRC32c: + return crc32c::Mask(crc32c::Value(data, data_size)); + case kxxHash: + return XXH32(data, data_size, /*seed*/ 0); + case kxxHash64: + return Lower32of64(XXH64(data, data_size, /*seed*/ 0)); + case kXXH3: { + if (data_size == 0) { + // Special case because of special handling for last byte, not + // present in this case. Can be any value different from other + // small input size checksums. + return 0; + } else { + // See corresponding code in ComputeBuiltinChecksumWithLastByte + uint32_t v = Lower32of64(XXH3_64bits(data, data_size - 1)); + return ModifyChecksumForLastByte(v, data[data_size - 1]); + } + } + default: // including kNoChecksum + return 0; + } +} + +uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data, + size_t data_size, char last_byte) { + switch (type) { + case kCRC32c: { + uint32_t crc = crc32c::Value(data, data_size); + // Extend to cover last byte (compression type) + crc = crc32c::Extend(crc, &last_byte, 1); + return crc32c::Mask(crc); + } + case kxxHash: { + XXH32_state_t* const state = XXH32_createState(); + XXH32_reset(state, 0); + XXH32_update(state, data, data_size); + // Extend to cover last byte (compression type) + XXH32_update(state, &last_byte, 1); + uint32_t v = XXH32_digest(state); + XXH32_freeState(state); + return v; + } + case kxxHash64: { + XXH64_state_t* const state = XXH64_createState(); + XXH64_reset(state, 0); + XXH64_update(state, data, data_size); + // Extend to cover last byte (compression type) + XXH64_update(state, &last_byte, 1); + uint32_t v = Lower32of64(XXH64_digest(state)); + XXH64_freeState(state); + return v; + } + case kXXH3: { + // XXH3 is a complicated hash function that is extremely fast on + // contiguous input, but that makes its streaming support rather + // complex. It is worth custom handling of the last byte (`type`) + // in order to avoid allocating a large state object and bringing + // that code complexity into CPU working set. + uint32_t v = Lower32of64(XXH3_64bits(data, data_size)); + return ModifyChecksumForLastByte(v, last_byte); + } + default: // including kNoChecksum + return 0; + } +} + Status UncompressBlockContentsForCompressionType( const UncompressionInfo& uncompression_info, const char* data, size_t n, BlockContents* contents, uint32_t format_version, diff --git a/table/format.h b/table/format.h index c6b6e3bb7..870a40b58 100644 --- a/table/format.h +++ b/table/format.h @@ -226,17 +226,16 @@ inline CompressionType get_block_compression_type(const char* block_data, return static_cast(block_data[block_size]); } -// Custom handling for the last byte of a block, to avoid invoking streaming -// API to get an effective block checksum. This function is its own inverse -// because it uses xor. -inline uint32_t ModifyChecksumForCompressionType(uint32_t checksum, - char compression_type) { - // This strategy bears some resemblance to extending a CRC checksum by one - // more byte, except we don't need to re-mix the input checksum as long as - // we do this step only once (per checksum). - const uint32_t kRandomPrime = 0x6b9083d9; - return checksum ^ static_cast(compression_type) * kRandomPrime; -} +// Computes a checksum using the given ChecksumType. Sometimes we need to +// include one more input byte logically at the end but not part of the main +// data buffer. If data_size >= 1, then +// ComputeBuiltinChecksum(type, data, size) +// == +// ComputeBuiltinChecksumWithLastByte(type, data, size - 1, data[size - 1]) +uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data, + size_t size); +uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data, + size_t size, char last_byte); // Represents the contents of a block read from an SST file. Depending on how // it's created, it may or may not own the actual block bytes. As an example, @@ -313,15 +312,6 @@ struct BlockContents { } }; -// Read the block identified by "handle" from "file". On failure -// return non-OK. On success fill *result and return OK. -extern Status ReadBlockContents( - RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, - const Footer& footer, const ReadOptions& options, const BlockHandle& handle, - BlockContents* contents, const ImmutableOptions& ioptions, - bool do_uncompress = true, const Slice& compression_dict = Slice(), - const PersistentCacheOptions& cache_options = PersistentCacheOptions()); - // The 'data' points to the raw block contents read in from file. // This method allocates a new heap buffer and the raw block // contents are uncompresed into this buffer. This buffer is @@ -352,8 +342,7 @@ extern Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id); // TODO(andrewkr): we should prefer one way of representing a null/uninitialized // BlockHandle. Currently we use zeros for null and use negation-of-zeros for // uninitialized. -inline BlockHandle::BlockHandle() - : BlockHandle(~static_cast(0), ~static_cast(0)) {} +inline BlockHandle::BlockHandle() : BlockHandle(~uint64_t{0}, ~uint64_t{0}) {} inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size) : offset_(_offset), size_(_size) {} diff --git a/table/table_test.cc b/table/table_test.cc index 45f9d950a..df8806cd6 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2231,73 +2231,109 @@ TEST_P(BlockBasedTableTest, BadChecksumType) { } namespace { -std::string TrailerAsString(const std::string& contents, - CompressionType compression_type, - ChecksumType checksum_type) { - std::array trailer; - BlockBasedTableBuilder::ComputeBlockTrailer(contents, compression_type, - checksum_type, &trailer); - return Slice(trailer.data(), trailer.size()).ToString(/*hex*/ true); +std::string ChecksumAsString(const std::string& data, + ChecksumType checksum_type) { + uint32_t v = ComputeBuiltinChecksum(checksum_type, data.data(), data.size()); + + // Verify consistency with other function + if (data.size() >= 1) { + EXPECT_EQ(v, ComputeBuiltinChecksumWithLastByte( + checksum_type, data.data(), data.size() - 1, data.back())); + } + // Little endian as in file + std::array raw_bytes; + EncodeFixed32(raw_bytes.data(), v); + return Slice(raw_bytes.data(), raw_bytes.size()).ToString(/*hex*/ true); +} + +std::string ChecksumAsString(std::string* data, char new_last_byte, + ChecksumType checksum_type) { + data->back() = new_last_byte; + return ChecksumAsString(*data, checksum_type); } } // namespace // Make sure that checksum values don't change in later versions, even if -// consistent within current version. (Other tests check for consistency -// between written checksums and read-time validation, so here we only -// have to verify the writer side.) +// consistent within current version. TEST_P(BlockBasedTableTest, ChecksumSchemas) { - std::string b1 = "This is a short block!"; + std::string b0 = "x"; + std::string b1 = "This is a short block!x"; std::string b2; for (int i = 0; i < 100; ++i) { b2.append("This is a long block!"); } - CompressionType ct1 = kNoCompression; - CompressionType ct2 = kSnappyCompression; - CompressionType ct3 = kZSTD; + b2.append("x"); + // Trailing 'x' will be replaced by compression type + + std::string empty; + + char ct1 = kNoCompression; + char ct2 = kSnappyCompression; + char ct3 = kZSTD; // Note: first byte of trailer is compression type, last 4 are checksum for (ChecksumType t : GetSupportedChecksums()) { switch (t) { case kNoChecksum: - EXPECT_EQ(TrailerAsString(b1, ct1, t), "0000000000"); - EXPECT_EQ(TrailerAsString(b1, ct2, t), "0100000000"); - EXPECT_EQ(TrailerAsString(b1, ct3, t), "0700000000"); - EXPECT_EQ(TrailerAsString(b2, ct1, t), "0000000000"); - EXPECT_EQ(TrailerAsString(b2, ct2, t), "0100000000"); - EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700000000"); + EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00000000"); break; case kCRC32c: - EXPECT_EQ(TrailerAsString(b1, ct1, t), "00583F0355"); - EXPECT_EQ(TrailerAsString(b1, ct2, t), "012F9B0A57"); - EXPECT_EQ(TrailerAsString(b1, ct3, t), "07ECE7DA1D"); - EXPECT_EQ(TrailerAsString(b2, ct1, t), "00943EF0AB"); - EXPECT_EQ(TrailerAsString(b2, ct2, t), "0143A2EDB1"); - EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700E53D63"); + EXPECT_EQ(ChecksumAsString(empty, t), "D8EA82A2"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "D28F2549"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "052B2843"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "46F8F711"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "583F0355"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "2F9B0A57"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "ECE7DA1D"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "943EF0AB"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "43A2EDB1"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00E53D63"); break; case kxxHash: - EXPECT_EQ(TrailerAsString(b1, ct1, t), "004A2E5FB0"); - EXPECT_EQ(TrailerAsString(b1, ct2, t), "010BD9F652"); - EXPECT_EQ(TrailerAsString(b1, ct3, t), "07B4107E50"); - EXPECT_EQ(TrailerAsString(b2, ct1, t), "0020F4D4BA"); - EXPECT_EQ(TrailerAsString(b2, ct2, t), "018F1A1F99"); - EXPECT_EQ(TrailerAsString(b2, ct3, t), "07A191A338"); + EXPECT_EQ(ChecksumAsString(empty, t), "055DCC02"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "3EB065CF"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "31F79238"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "320D2E00"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "4A2E5FB0"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "0BD9F652"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "B4107E50"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "20F4D4BA"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "8F1A1F99"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "A191A338"); break; case kxxHash64: - EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B74655EF"); - EXPECT_EQ(TrailerAsString(b1, ct2, t), "01B6C8BBBE"); - EXPECT_EQ(TrailerAsString(b1, ct3, t), "07AED9E3B4"); - EXPECT_EQ(TrailerAsString(b2, ct1, t), "000D4999FE"); - EXPECT_EQ(TrailerAsString(b2, ct2, t), "01F5932423"); - EXPECT_EQ(TrailerAsString(b2, ct3, t), "076B31BAB1"); + EXPECT_EQ(ChecksumAsString(empty, t), "99E9D851"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "682705DB"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "30E7211B"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "B7BB58E8"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B74655EF"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "B6C8BBBE"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "AED9E3B4"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "0D4999FE"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "F5932423"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "6B31BAB1"); break; case kXXH3: - EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B37FB5E6"); - EXPECT_EQ(TrailerAsString(b1, ct2, t), "016AFC258D"); - EXPECT_EQ(TrailerAsString(b1, ct3, t), "075CE54616"); - EXPECT_EQ(TrailerAsString(b2, ct1, t), "00FA2D482E"); - EXPECT_EQ(TrailerAsString(b2, ct2, t), "0123AED845"); - EXPECT_EQ(TrailerAsString(b2, ct3, t), "0715B7BBDE"); + EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); + EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "C294D338"); + EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "1B174353"); + EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "2D0E20C8"); + EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B37FB5E6"); + EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "6AFC258D"); + EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "5CE54616"); + EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "FA2D482E"); + EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "23AED845"); + EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "15B7BBDE"); break; default: // Force this test to be updated on new ChecksumTypes