Some checksum code refactoring (#9113)

Summary:
To prepare for adding checksum to footer and "context aware"
checksums. This also brings closely related code much closer together.

Recently added `BlockBasedTableBuilder::ComputeBlockTrailer` for testing
is made obsolete in the refactoring, as testing the checksums can happen
at a lower level of abstraction.

Also now checking for unrecognized checksum type on reading footer,
rather than later on use.

Also removed an obsolete function delcaration.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9113

Test Plan:
existing tests worked before refactoring to remove
`ComputeBlockTrailer`. And then refactored+improved tests using it.

Reviewed By: mrambacher

Differential Revision: D32090149

Pulled By: pdillinger

fbshipit-source-id: 2879da683c1498ea85a3b70dace9b6d9f6b47b6e
This commit is contained in:
Peter Dillinger 2021-11-04 09:08:12 -07:00 committed by Facebook GitHub Bot
parent 312d9c47b4
commit dfedc74d82
7 changed files with 203 additions and 162 deletions

View File

@ -30,6 +30,10 @@ std::vector<CompressionType> GetSupportedDictCompressions();
std::vector<ChecksumType> GetSupportedChecksums();
inline bool IsSupportedChecksumType(ChecksumType type) {
return type >= kNoChecksum && type <= kXXH3;
}
// Checks that the combination of DBOptions and ColumnFamilyOptions are valid
Status ValidateOptions(const DBOptions& db_opts,
const ColumnFamilyOptions& cf_opts);

View File

@ -49,11 +49,9 @@
#include "table/table_builder.h"
#include "util/coding.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
#include "util/work_queue.h"
#include "util/xxhash.h"
namespace ROCKSDB_NAMESPACE {
@ -1210,60 +1208,6 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
}
}
void BlockBasedTableBuilder::ComputeBlockTrailer(
const Slice& block_contents, CompressionType compression_type,
ChecksumType checksum_type, std::array<char, kBlockTrailerSize>* trailer) {
(*trailer)[0] = compression_type;
uint32_t checksum = 0;
switch (checksum_type) {
case kNoChecksum:
break;
case kCRC32c: {
uint32_t crc =
crc32c::Value(block_contents.data(), block_contents.size());
// Extend to cover compression type
crc = crc32c::Extend(crc, trailer->data(), 1);
checksum = crc32c::Mask(crc);
break;
}
case kxxHash: {
XXH32_state_t* const state = XXH32_createState();
XXH32_reset(state, 0);
XXH32_update(state, block_contents.data(), block_contents.size());
// Extend to cover compression type
XXH32_update(state, trailer->data(), 1);
checksum = XXH32_digest(state);
XXH32_freeState(state);
break;
}
case kxxHash64: {
XXH64_state_t* const state = XXH64_createState();
XXH64_reset(state, 0);
XXH64_update(state, block_contents.data(), block_contents.size());
// Extend to cover compression type
XXH64_update(state, trailer->data(), 1);
checksum = Lower32of64(XXH64_digest(state));
XXH64_freeState(state);
break;
}
case kXXH3: {
// XXH3 is a complicated hash function that is extremely fast on
// contiguous input, but that makes its streaming support rather
// complex. It is worth custom handling of the last byte (`type`)
// in order to avoid allocating a large state object and bringing
// that code complexity into CPU working set.
checksum = Lower32of64(
XXH3_64bits(block_contents.data(), block_contents.size()));
checksum = ModifyChecksumForCompressionType(checksum, compression_type);
break;
}
default:
assert(false);
break;
}
EncodeFixed32(trailer->data() + 1, checksum);
}
void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
CompressionType type,
BlockHandle* handle,
@ -1281,8 +1225,12 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
io_s = r->file->Append(block_contents);
if (io_s.ok()) {
std::array<char, kBlockTrailerSize> trailer;
ComputeBlockTrailer(block_contents, type, r->table_options.checksum,
&trailer);
trailer[0] = type;
uint32_t checksum = ComputeBuiltinChecksumWithLastByte(
r->table_options.checksum, block_contents.data(), block_contents.size(),
/*last_byte*/ type);
EncodeFixed32(trailer.data() + 1, checksum);
assert(io_s.ok());
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",

View File

@ -100,12 +100,6 @@ class BlockBasedTableBuilder : public TableBuilder {
// Get file checksum function name
const char* GetFileChecksumFuncName() const override;
// Computes and populates block trailer for a block
static void ComputeBlockTrailer(const Slice& block_contents,
CompressionType compression_type,
ChecksumType checksum_type,
std::array<char, kBlockTrailerSize>* trailer);
private:
bool ok() const { return status().ok(); }

View File

@ -9,12 +9,11 @@
#include "table/block_based/reader_common.h"
#include "monitoring/perf_context_imp.h"
#include "rocksdb/table.h"
#include "table/format.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/hash.h"
#include "util/string_util.h"
#include "util/xxhash.h"
namespace ROCKSDB_NAMESPACE {
void ForceReleaseCachedEntry(void* arg, void* h) {
@ -33,39 +32,20 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data,
// And then the stored checksum value (4 bytes).
uint32_t stored = DecodeFixed32(data + len);
Status s;
uint32_t computed = 0;
switch (type) {
case kNoChecksum:
break;
case kCRC32c:
uint32_t computed = ComputeBuiltinChecksum(type, data, len);
if (stored == computed) {
return Status::OK();
} else {
// Unmask for people who might look for reference crc value
if (type == kCRC32c) {
stored = crc32c::Unmask(stored);
computed = crc32c::Value(data, len);
break;
case kxxHash:
computed = XXH32(data, len, 0);
break;
case kxxHash64:
computed = Lower32of64(XXH64(data, len, 0));
break;
case kXXH3:
computed = Lower32of64(XXH3_64bits(data, block_size));
// Treat compression type separately for speed in building table files
computed = ModifyChecksumForCompressionType(computed, data[block_size]);
break;
default:
s = Status::Corruption(
"unknown checksum type " + ToString(type) + " from footer of " +
file_name + ", while checking block at offset " + ToString(offset) +
" size " + ToString(block_size));
}
if (s.ok() && stored != computed) {
s = Status::Corruption(
computed = crc32c::Unmask(computed);
}
return Status::Corruption(
"block checksum mismatch: stored = " + ToString(stored) +
", computed = " + ToString(computed) + ", type = " + ToString(type) +
" in " + file_name + " offset " + ToString(offset) + " size " +
ToString(block_size));
}
return s;
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -17,6 +17,7 @@
#include "memory/memory_allocator.h"
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "options/options_helper.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "table/block_based/block.h"
@ -25,8 +26,10 @@
#include "util/coding.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/hash.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
#include "util/xxhash.h"
namespace ROCKSDB_NAMESPACE {
@ -50,8 +53,8 @@ bool ShouldReportDetailedTime(Env* env, Statistics* stats) {
void BlockHandle::EncodeTo(std::string* dst) const {
// Sanity check that all fields have been set
assert(offset_ != ~static_cast<uint64_t>(0));
assert(size_ != ~static_cast<uint64_t>(0));
assert(offset_ != ~uint64_t{0});
assert(size_ != ~uint64_t{0});
PutVarint64Varint64(dst, offset_, size_);
}
@ -245,6 +248,11 @@ Status Footer::DecodeFrom(Slice* input) {
return Status::Corruption("bad checksum type");
}
checksum_ = static_cast<ChecksumType>(chksum);
if (chksum != static_cast<uint32_t>(checksum_) ||
!IsSupportedChecksumType(checksum_)) {
return Status::Corruption("unknown checksum type " +
ROCKSDB_NAMESPACE::ToString(chksum));
}
}
Status result = metaindex_handle_.DecodeFrom(input);
@ -344,6 +352,88 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
return Status::OK();
}
namespace {
// Custom handling for the last byte of a block, to avoid invoking streaming
// API to get an effective block checksum. This function is its own inverse
// because it uses xor.
inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) {
// This strategy bears some resemblance to extending a CRC checksum by one
// more byte, except we don't need to re-mix the input checksum as long as
// we do this step only once (per checksum).
const uint32_t kRandomPrime = 0x6b9083d9;
return checksum ^ static_cast<uint8_t>(last_byte) * kRandomPrime;
}
} // namespace
uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data,
size_t data_size) {
switch (type) {
case kCRC32c:
return crc32c::Mask(crc32c::Value(data, data_size));
case kxxHash:
return XXH32(data, data_size, /*seed*/ 0);
case kxxHash64:
return Lower32of64(XXH64(data, data_size, /*seed*/ 0));
case kXXH3: {
if (data_size == 0) {
// Special case because of special handling for last byte, not
// present in this case. Can be any value different from other
// small input size checksums.
return 0;
} else {
// See corresponding code in ComputeBuiltinChecksumWithLastByte
uint32_t v = Lower32of64(XXH3_64bits(data, data_size - 1));
return ModifyChecksumForLastByte(v, data[data_size - 1]);
}
}
default: // including kNoChecksum
return 0;
}
}
uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
size_t data_size, char last_byte) {
switch (type) {
case kCRC32c: {
uint32_t crc = crc32c::Value(data, data_size);
// Extend to cover last byte (compression type)
crc = crc32c::Extend(crc, &last_byte, 1);
return crc32c::Mask(crc);
}
case kxxHash: {
XXH32_state_t* const state = XXH32_createState();
XXH32_reset(state, 0);
XXH32_update(state, data, data_size);
// Extend to cover last byte (compression type)
XXH32_update(state, &last_byte, 1);
uint32_t v = XXH32_digest(state);
XXH32_freeState(state);
return v;
}
case kxxHash64: {
XXH64_state_t* const state = XXH64_createState();
XXH64_reset(state, 0);
XXH64_update(state, data, data_size);
// Extend to cover last byte (compression type)
XXH64_update(state, &last_byte, 1);
uint32_t v = Lower32of64(XXH64_digest(state));
XXH64_freeState(state);
return v;
}
case kXXH3: {
// XXH3 is a complicated hash function that is extremely fast on
// contiguous input, but that makes its streaming support rather
// complex. It is worth custom handling of the last byte (`type`)
// in order to avoid allocating a large state object and bringing
// that code complexity into CPU working set.
uint32_t v = Lower32of64(XXH3_64bits(data, data_size));
return ModifyChecksumForLastByte(v, last_byte);
}
default: // including kNoChecksum
return 0;
}
}
Status UncompressBlockContentsForCompressionType(
const UncompressionInfo& uncompression_info, const char* data, size_t n,
BlockContents* contents, uint32_t format_version,

View File

@ -226,17 +226,16 @@ inline CompressionType get_block_compression_type(const char* block_data,
return static_cast<CompressionType>(block_data[block_size]);
}
// Custom handling for the last byte of a block, to avoid invoking streaming
// API to get an effective block checksum. This function is its own inverse
// because it uses xor.
inline uint32_t ModifyChecksumForCompressionType(uint32_t checksum,
char compression_type) {
// This strategy bears some resemblance to extending a CRC checksum by one
// more byte, except we don't need to re-mix the input checksum as long as
// we do this step only once (per checksum).
const uint32_t kRandomPrime = 0x6b9083d9;
return checksum ^ static_cast<uint8_t>(compression_type) * kRandomPrime;
}
// Computes a checksum using the given ChecksumType. Sometimes we need to
// include one more input byte logically at the end but not part of the main
// data buffer. If data_size >= 1, then
// ComputeBuiltinChecksum(type, data, size)
// ==
// ComputeBuiltinChecksumWithLastByte(type, data, size - 1, data[size - 1])
uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data,
size_t size);
uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
size_t size, char last_byte);
// Represents the contents of a block read from an SST file. Depending on how
// it's created, it may or may not own the actual block bytes. As an example,
@ -313,15 +312,6 @@ struct BlockContents {
}
};
// Read the block identified by "handle" from "file". On failure
// return non-OK. On success fill *result and return OK.
extern Status ReadBlockContents(
RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
const Footer& footer, const ReadOptions& options, const BlockHandle& handle,
BlockContents* contents, const ImmutableOptions& ioptions,
bool do_uncompress = true, const Slice& compression_dict = Slice(),
const PersistentCacheOptions& cache_options = PersistentCacheOptions());
// The 'data' points to the raw block contents read in from file.
// This method allocates a new heap buffer and the raw block
// contents are uncompresed into this buffer. This buffer is
@ -352,8 +342,7 @@ extern Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id);
// TODO(andrewkr): we should prefer one way of representing a null/uninitialized
// BlockHandle. Currently we use zeros for null and use negation-of-zeros for
// uninitialized.
inline BlockHandle::BlockHandle()
: BlockHandle(~static_cast<uint64_t>(0), ~static_cast<uint64_t>(0)) {}
inline BlockHandle::BlockHandle() : BlockHandle(~uint64_t{0}, ~uint64_t{0}) {}
inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size)
: offset_(_offset), size_(_size) {}

View File

@ -2231,73 +2231,109 @@ TEST_P(BlockBasedTableTest, BadChecksumType) {
}
namespace {
std::string TrailerAsString(const std::string& contents,
CompressionType compression_type,
ChecksumType checksum_type) {
std::array<char, kBlockTrailerSize> trailer;
BlockBasedTableBuilder::ComputeBlockTrailer(contents, compression_type,
checksum_type, &trailer);
return Slice(trailer.data(), trailer.size()).ToString(/*hex*/ true);
std::string ChecksumAsString(const std::string& data,
ChecksumType checksum_type) {
uint32_t v = ComputeBuiltinChecksum(checksum_type, data.data(), data.size());
// Verify consistency with other function
if (data.size() >= 1) {
EXPECT_EQ(v, ComputeBuiltinChecksumWithLastByte(
checksum_type, data.data(), data.size() - 1, data.back()));
}
// Little endian as in file
std::array<char, 4> raw_bytes;
EncodeFixed32(raw_bytes.data(), v);
return Slice(raw_bytes.data(), raw_bytes.size()).ToString(/*hex*/ true);
}
std::string ChecksumAsString(std::string* data, char new_last_byte,
ChecksumType checksum_type) {
data->back() = new_last_byte;
return ChecksumAsString(*data, checksum_type);
}
} // namespace
// Make sure that checksum values don't change in later versions, even if
// consistent within current version. (Other tests check for consistency
// between written checksums and read-time validation, so here we only
// have to verify the writer side.)
// consistent within current version.
TEST_P(BlockBasedTableTest, ChecksumSchemas) {
std::string b1 = "This is a short block!";
std::string b0 = "x";
std::string b1 = "This is a short block!x";
std::string b2;
for (int i = 0; i < 100; ++i) {
b2.append("This is a long block!");
}
CompressionType ct1 = kNoCompression;
CompressionType ct2 = kSnappyCompression;
CompressionType ct3 = kZSTD;
b2.append("x");
// Trailing 'x' will be replaced by compression type
std::string empty;
char ct1 = kNoCompression;
char ct2 = kSnappyCompression;
char ct3 = kZSTD;
// Note: first byte of trailer is compression type, last 4 are checksum
for (ChecksumType t : GetSupportedChecksums()) {
switch (t) {
case kNoChecksum:
EXPECT_EQ(TrailerAsString(b1, ct1, t), "0000000000");
EXPECT_EQ(TrailerAsString(b1, ct2, t), "0100000000");
EXPECT_EQ(TrailerAsString(b1, ct3, t), "0700000000");
EXPECT_EQ(TrailerAsString(b2, ct1, t), "0000000000");
EXPECT_EQ(TrailerAsString(b2, ct2, t), "0100000000");
EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700000000");
EXPECT_EQ(ChecksumAsString(empty, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00000000");
break;
case kCRC32c:
EXPECT_EQ(TrailerAsString(b1, ct1, t), "00583F0355");
EXPECT_EQ(TrailerAsString(b1, ct2, t), "012F9B0A57");
EXPECT_EQ(TrailerAsString(b1, ct3, t), "07ECE7DA1D");
EXPECT_EQ(TrailerAsString(b2, ct1, t), "00943EF0AB");
EXPECT_EQ(TrailerAsString(b2, ct2, t), "0143A2EDB1");
EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700E53D63");
EXPECT_EQ(ChecksumAsString(empty, t), "D8EA82A2");
EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "D28F2549");
EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "052B2843");
EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "46F8F711");
EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "583F0355");
EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "2F9B0A57");
EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "ECE7DA1D");
EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "943EF0AB");
EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "43A2EDB1");
EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00E53D63");
break;
case kxxHash:
EXPECT_EQ(TrailerAsString(b1, ct1, t), "004A2E5FB0");
EXPECT_EQ(TrailerAsString(b1, ct2, t), "010BD9F652");
EXPECT_EQ(TrailerAsString(b1, ct3, t), "07B4107E50");
EXPECT_EQ(TrailerAsString(b2, ct1, t), "0020F4D4BA");
EXPECT_EQ(TrailerAsString(b2, ct2, t), "018F1A1F99");
EXPECT_EQ(TrailerAsString(b2, ct3, t), "07A191A338");
EXPECT_EQ(ChecksumAsString(empty, t), "055DCC02");
EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "3EB065CF");
EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "31F79238");
EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "320D2E00");
EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "4A2E5FB0");
EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "0BD9F652");
EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "B4107E50");
EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "20F4D4BA");
EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "8F1A1F99");
EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "A191A338");
break;
case kxxHash64:
EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B74655EF");
EXPECT_EQ(TrailerAsString(b1, ct2, t), "01B6C8BBBE");
EXPECT_EQ(TrailerAsString(b1, ct3, t), "07AED9E3B4");
EXPECT_EQ(TrailerAsString(b2, ct1, t), "000D4999FE");
EXPECT_EQ(TrailerAsString(b2, ct2, t), "01F5932423");
EXPECT_EQ(TrailerAsString(b2, ct3, t), "076B31BAB1");
EXPECT_EQ(ChecksumAsString(empty, t), "99E9D851");
EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "682705DB");
EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "30E7211B");
EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "B7BB58E8");
EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B74655EF");
EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "B6C8BBBE");
EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "AED9E3B4");
EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "0D4999FE");
EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "F5932423");
EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "6B31BAB1");
break;
case kXXH3:
EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B37FB5E6");
EXPECT_EQ(TrailerAsString(b1, ct2, t), "016AFC258D");
EXPECT_EQ(TrailerAsString(b1, ct3, t), "075CE54616");
EXPECT_EQ(TrailerAsString(b2, ct1, t), "00FA2D482E");
EXPECT_EQ(TrailerAsString(b2, ct2, t), "0123AED845");
EXPECT_EQ(TrailerAsString(b2, ct3, t), "0715B7BBDE");
EXPECT_EQ(ChecksumAsString(empty, t), "00000000");
EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "C294D338");
EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "1B174353");
EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "2D0E20C8");
EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B37FB5E6");
EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "6AFC258D");
EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "5CE54616");
EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "FA2D482E");
EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "23AED845");
EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "15B7BBDE");
break;
default:
// Force this test to be updated on new ChecksumTypes