Clean up some code related to file checksums (#6861)
Summary: * Add missing unit test for schema stability of FileChecksumGenCrc32c (previously was only comparing to itself) * A lot of clarifying comments * Add some assertions for preconditions * Rename WritableFileWriter::CalculateFileChecksum -> UpdateFileChecksum * Simplify FileChecksumGenCrc32c with shared functions * Implement EndianSwapValue to replace unused EndianTransform And incidentally since I had trouble with 'make check-format' GitHub action disagreeing with local run, * Output full diagnostic information when 'make check-format' fails in CI Pull Request resolved: https://github.com/facebook/rocksdb/pull/6861 Test Plan: new unit test passes before & after other changes Reviewed By: zhichao-cao Differential Revision: D21667115 Pulled By: pdillinger fbshipit-source-id: 6a99970f87605aa024fa540c78cd519ff322c3e6
This commit is contained in:
parent
eb04bb86c6
commit
c7aedf1b48
2
.github/workflows/sanity_check.yml
vendored
2
.github/workflows/sanity_check.yml
vendored
@ -35,7 +35,7 @@ jobs:
|
||||
args: https://raw.githubusercontent.com/llvm-mirror/clang/master/tools/clang-format/clang-format-diff.py
|
||||
|
||||
- name: Check format
|
||||
run: make check-format
|
||||
run: VERBOSE_CHECK=1 make check-format
|
||||
|
||||
- name: Compare buckify output
|
||||
run: make check-buck-targets
|
||||
|
@ -124,6 +124,10 @@ then
|
||||
elif [ $CHECK_ONLY ]
|
||||
then
|
||||
echo "Your change has unformatted code. Please run make format!"
|
||||
if [ $VERBOSE_CHECK ]; then
|
||||
clang-format --version
|
||||
echo "$diffs"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -31,7 +31,7 @@ IOStatus WritableFileWriter::Append(const Slice& data) {
|
||||
rocksdb_kill_odds * REDUCE_ODDS2);
|
||||
|
||||
// Calculate the checksum of appended data
|
||||
CalculateFileChecksum(data);
|
||||
UpdateFileChecksum(data);
|
||||
|
||||
{
|
||||
IOSTATS_TIMER_GUARD(prepare_write_nanos);
|
||||
@ -225,6 +225,7 @@ IOStatus WritableFileWriter::Flush() {
|
||||
|
||||
std::string WritableFileWriter::GetFileChecksum() {
|
||||
if (checksum_generator_ != nullptr) {
|
||||
assert(checksum_finalized_);
|
||||
return checksum_generator_->GetChecksum();
|
||||
} else {
|
||||
return kUnknownFileChecksum;
|
||||
@ -346,7 +347,7 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
|
||||
return s;
|
||||
}
|
||||
|
||||
void WritableFileWriter::CalculateFileChecksum(const Slice& data) {
|
||||
void WritableFileWriter::UpdateFileChecksum(const Slice& data) {
|
||||
if (checksum_generator_ != nullptr) {
|
||||
checksum_generator_->Update(data.data(), data.size());
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ class WritableFileWriter {
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
bool ShouldNotifyListeners() const { return !listeners_.empty(); }
|
||||
void CalculateFileChecksum(const Slice& data);
|
||||
void UpdateFileChecksum(const Slice& data);
|
||||
|
||||
std::unique_ptr<FSWritableFile> writable_file_;
|
||||
std::string file_name_;
|
||||
|
@ -24,6 +24,10 @@ struct FileChecksumGenContext {
|
||||
|
||||
// FileChecksumGenerator is the class to generates the checksum value
|
||||
// for each file when the file is written to the file system.
|
||||
// Implementations may assume that
|
||||
// * Finalize is called at most once during the life of the object
|
||||
// * All calls to Update come before Finalize
|
||||
// * All calls to GetChecksum come after Finalize
|
||||
class FileChecksumGenerator {
|
||||
public:
|
||||
virtual ~FileChecksumGenerator() {}
|
||||
@ -36,7 +40,8 @@ class FileChecksumGenerator {
|
||||
// Generate the final results if no further new data will be updated.
|
||||
virtual void Finalize() = 0;
|
||||
|
||||
// Get the checksum
|
||||
// Get the checksum. The result should not be the empty string and may
|
||||
// include arbitrary bytes, including non-printable characters.
|
||||
virtual std::string GetChecksum() const = 0;
|
||||
|
||||
// Returns a name that identifies the current file checksum function.
|
||||
@ -97,9 +102,13 @@ class FileChecksumList {
|
||||
// Create a new file checksum list.
|
||||
extern FileChecksumList* NewFileChecksumList();
|
||||
|
||||
// Return a shared_ptr of the builtin Crc32 based file checksum generatory
|
||||
// Return a shared_ptr of the builtin Crc32c based file checksum generatory
|
||||
// factory object, which can be shared to create the Crc32c based checksum
|
||||
// generator object.
|
||||
// Note: this implementation is compatible with many other crc32c checksum
|
||||
// implementations and uses big-endian encoding of the result, unlike most
|
||||
// other crc32c checksums in RocksDB, which alter the result with
|
||||
// crc32c::Mask and use little-endian encoding.
|
||||
extern std::shared_ptr<FileChecksumGenFactory>
|
||||
GetFileChecksumGenCrc32cFactory();
|
||||
|
||||
|
@ -89,7 +89,7 @@ struct BackupableDBOptions {
|
||||
|
||||
// Only used if share_table_files is set to true. If true, will consider that
|
||||
// backups can come from different databases, hence a sst is not uniquely
|
||||
// identifed by its name, but by the triple (file name, crc32, file length)
|
||||
// identifed by its name, but by the triple (file name, crc32c, file length)
|
||||
// Default: false
|
||||
// Note: this is an experimental option, and you'll need to set it manually
|
||||
// *turn it on only if you know what you're doing*
|
||||
|
@ -3297,7 +3297,7 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) {
|
||||
ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum.c_str());
|
||||
}
|
||||
|
||||
TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
|
||||
TEST_P(BlockBasedTableTest, Crc32cFileChecksum) {
|
||||
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
|
||||
new FileChecksumGenCrc32cFactory();
|
||||
Options options;
|
||||
@ -3322,12 +3322,12 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
|
||||
|
||||
FileChecksumGenContext gen_context;
|
||||
gen_context.file_name = "db/tmp";
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
|
||||
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
|
||||
gen_context);
|
||||
FileChecksumTestHelper f(true);
|
||||
f.CreateWriteableFile();
|
||||
f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
|
||||
f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
|
||||
std::unique_ptr<TableBuilder> builder;
|
||||
builder.reset(ioptions.table_factory->NewTableBuilder(
|
||||
TableBuilderOptions(ioptions, moptions, *comparator,
|
||||
@ -3342,12 +3342,22 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
|
||||
f.WriteKVAndFlushTable();
|
||||
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
|
||||
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
|
||||
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
|
||||
gen_context);
|
||||
std::string checksum;
|
||||
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
|
||||
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
|
||||
ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
|
||||
|
||||
// Unit test the generator itself for schema stability
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen3 =
|
||||
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
|
||||
gen_context);
|
||||
const char data[] = "here is some data";
|
||||
checksum_crc32c_gen3->Update(data, sizeof(data));
|
||||
checksum_crc32c_gen3->Finalize();
|
||||
checksum = checksum_crc32c_gen3->GetChecksum();
|
||||
ASSERT_STREQ(checksum.c_str(), "\345\245\277\110");
|
||||
}
|
||||
|
||||
// Plain table is not supported in ROCKSDB_LITE
|
||||
@ -3441,7 +3451,7 @@ TEST_F(PlainTableTest, NoFileChecksum) {
|
||||
EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum.c_str());
|
||||
}
|
||||
|
||||
TEST_F(PlainTableTest, Crc32FileChecksum) {
|
||||
TEST_F(PlainTableTest, Crc32cFileChecksum) {
|
||||
PlainTableOptions plain_table_options;
|
||||
plain_table_options.user_key_len = 20;
|
||||
plain_table_options.bloom_bits_per_key = 8;
|
||||
@ -3462,12 +3472,12 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
|
||||
|
||||
FileChecksumGenContext gen_context;
|
||||
gen_context.file_name = "db/tmp";
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen1 =
|
||||
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
|
||||
gen_context);
|
||||
FileChecksumTestHelper f(true);
|
||||
f.CreateWriteableFile();
|
||||
f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
|
||||
f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
|
||||
|
||||
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
|
||||
TableBuilderOptions(
|
||||
@ -3481,11 +3491,11 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
|
||||
f.WriteKVAndFlushTable();
|
||||
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
|
||||
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
|
||||
std::unique_ptr<FileChecksumGenerator> checksum_crc32c_gen2 =
|
||||
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
|
||||
gen_context);
|
||||
std::string checksum;
|
||||
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
|
||||
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum));
|
||||
EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
|
||||
}
|
||||
|
||||
|
@ -7,8 +7,9 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Endian-neutral encoding:
|
||||
// Encoding independent of machine byte order:
|
||||
// * Fixed-length numbers are encoded with least-significant byte first
|
||||
// (little endian, native order on Intel and others)
|
||||
// * In addition we support variable length "varint" encoding
|
||||
// * Strings are encoded prefixed by their length in varint format
|
||||
|
||||
@ -402,13 +403,34 @@ inline bool GetVarsignedint64(Slice* input, int64_t* value) {
|
||||
}
|
||||
}
|
||||
|
||||
// Provide an interface for platform independent endianness transformation
|
||||
inline uint64_t EndianTransform(uint64_t input, size_t size) {
|
||||
char* pos = reinterpret_cast<char*>(&input);
|
||||
uint64_t ret_val = 0;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
ret_val |= (static_cast<uint64_t>(static_cast<unsigned char>(pos[i]))
|
||||
<< ((size - i - 1) << 3));
|
||||
// Swaps between big and little endian. Can be used to in combination
|
||||
// with the little-endian encoding/decoding functions to encode/decode
|
||||
// big endian.
|
||||
template <typename T>
|
||||
inline T EndianSwapValue(T v) {
|
||||
static_assert(std::is_integral<T>::value, "non-integral type");
|
||||
|
||||
#ifdef _MSC_VER
|
||||
if (sizeof(T) == 2) {
|
||||
return static_cast<T>(_byteswap_ushort(static_cast<uint16_t>(v)));
|
||||
} else if (sizeof(T) == 4) {
|
||||
return static_cast<T>(_byteswap_ulong(static_cast<uint32_t>(v)));
|
||||
} else if (sizeof(T) == 8) {
|
||||
return static_cast<T>(_byteswap_uint64(static_cast<uint64_t>(v)));
|
||||
}
|
||||
#else
|
||||
if (sizeof(T) == 2) {
|
||||
return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(v)));
|
||||
} else if (sizeof(T) == 4) {
|
||||
return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(v)));
|
||||
} else if (sizeof(T) == 8) {
|
||||
return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(v)));
|
||||
}
|
||||
#endif
|
||||
// Recognized by clang as bswap, but not by gcc :(
|
||||
T ret_val = 0;
|
||||
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||
ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i));
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
@ -6,11 +6,12 @@
|
||||
#pragma once
|
||||
#include <cassert>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/file_checksum.h"
|
||||
#include "rocksdb/status.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
#include "util/string_util.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
@ -26,60 +27,19 @@ class FileChecksumGenCrc32c : public FileChecksumGenerator {
|
||||
checksum_ = crc32c::Extend(checksum_, data, n);
|
||||
}
|
||||
|
||||
void Finalize() override { checksum_str_ = Uint32ToString(checksum_); }
|
||||
void Finalize() override {
|
||||
assert(checksum_str_.empty());
|
||||
// Store as big endian raw bytes
|
||||
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
|
||||
}
|
||||
|
||||
std::string GetChecksum() const override { return checksum_str_; }
|
||||
std::string GetChecksum() const override {
|
||||
assert(!checksum_str_.empty());
|
||||
return checksum_str_;
|
||||
}
|
||||
|
||||
const char* Name() const override { return "FileChecksumCrc32c"; }
|
||||
|
||||
// Convert a uint32_t type data into a 4 bytes string.
|
||||
static std::string Uint32ToString(uint32_t v) {
|
||||
std::string s;
|
||||
if (port::kLittleEndian) {
|
||||
s.append(reinterpret_cast<char*>(&v), sizeof(v));
|
||||
} else {
|
||||
char buf[sizeof(v)];
|
||||
buf[0] = v & 0xff;
|
||||
buf[1] = (v >> 8) & 0xff;
|
||||
buf[2] = (v >> 16) & 0xff;
|
||||
buf[3] = (v >> 24) & 0xff;
|
||||
s.append(buf, sizeof(v));
|
||||
}
|
||||
size_t i = 0, j = s.size() - 1;
|
||||
while (i < j) {
|
||||
char tmp = s[i];
|
||||
s[i] = s[j];
|
||||
s[j] = tmp;
|
||||
++i;
|
||||
--j;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// Convert a 4 bytes size string into a uint32_t type data.
|
||||
static uint32_t StringToUint32(std::string s) {
|
||||
assert(s.size() == sizeof(uint32_t));
|
||||
size_t i = 0, j = s.size() - 1;
|
||||
while (i < j) {
|
||||
char tmp = s[i];
|
||||
s[i] = s[j];
|
||||
s[j] = tmp;
|
||||
++i;
|
||||
--j;
|
||||
}
|
||||
uint32_t v = 0;
|
||||
if (port::kLittleEndian) {
|
||||
memcpy(&v, s.c_str(), sizeof(uint32_t));
|
||||
} else {
|
||||
const char* buf = s.c_str();
|
||||
v |= static_cast<uint32_t>(buf[0]);
|
||||
v |= (static_cast<uint32_t>(buf[1]) << 8);
|
||||
v |= (static_cast<uint32_t>(buf[2]) << 16);
|
||||
v |= (static_cast<uint32_t>(buf[3]) << 24);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t checksum_;
|
||||
std::string checksum_str_;
|
||||
|
@ -1197,7 +1197,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
|
||||
std::string dst;
|
||||
// 1. extract the filename
|
||||
size_t slash = file.find_last_of('/');
|
||||
// file will either be shared/<file>, shared_checksum/<file_crc32_size>
|
||||
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>
|
||||
// or private/<number>/<file>
|
||||
assert(slash != std::string::npos);
|
||||
dst = file.substr(slash + 1);
|
||||
@ -1759,8 +1759,8 @@ Slice kMetaDataPrefix("metadata ");
|
||||
// <seq number>
|
||||
// <metadata(literal string)> <metadata> (optional)
|
||||
// <number of files>
|
||||
// <file1> <crc32(literal string)> <crc32_value>
|
||||
// <file2> <crc32(literal string)> <crc32_value>
|
||||
// <file1> <crc32(literal string)> <crc32c_value>
|
||||
// <file2> <crc32(literal string)> <crc32c_value>
|
||||
// ...
|
||||
Status BackupEngineImpl::BackupMeta::LoadFromFile(
|
||||
const std::string& backup_dir,
|
||||
@ -1808,6 +1808,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
|
||||
|
||||
std::vector<std::shared_ptr<FileInfo>> files;
|
||||
|
||||
// WART: The checksums are crc32c, not original crc32
|
||||
Slice checksum_prefix("crc32 ");
|
||||
|
||||
for (uint32_t i = 0; s.ok() && i < num_files; ++i) {
|
||||
@ -1921,7 +1922,8 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
|
||||
}
|
||||
|
||||
for (const auto& file : files_) {
|
||||
// use crc32 for now, switch to something else if needed
|
||||
// use crc32c for now, switch to something else if needed
|
||||
// WART: The checksums are crc32c, not original crc32
|
||||
|
||||
size_t newlen = len + file->filename.length() + snprintf(writelen_temp,
|
||||
sizeof(writelen_temp), " crc32 %u\n", file->checksum_value);
|
||||
|
Loading…
Reference in New Issue
Block a user