rocksdb/util/file_checksum_helper.h
Zhichao Cao 4369f2c7bb Checksum for each SST file and stores in MANIFEST (#6216)
Summary:
In the current code base, RocksDB generate the checksum for each block and verify the checksum at usage. Current PR enable SST file checksum. After a SST file is generated by Flush or Compaction, RocksDB generate the SST file checksum and store the checksum value and checksum method name in the vs_info and MANIFEST as part for the FileMetadata.

Added the enable_sst_file_checksum to Options to enable or disable file checksum. Added sst_file_checksum to Options such that user can plugin their own SST file checksum calculate method via overriding the SstFileChecksum class. The checksum information inlcuding uint32_t checksum value and a checksum name (string).  A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST. If user enables the file checksum but does not provide the sst_file_checksum instance, RocksDB will use the default crc32checksum implemented in table/sst_file_checksum_crc32c.h
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6216

Test Plan: Added the testing case in table_test and ldb_cmd_test to verify checksum is correct in different level. Pass make asan_check.

Differential Revision: D19171461

Pulled By: zhichao-cao

fbshipit-source-id: b2e53479eefc5bb0437189eaa1941670e5ba8b87
2020-02-10 15:52:52 -08:00

118 lines
3.6 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include <unordered_map>
#include "port/port.h"
#include "rocksdb/file_checksum.h"
#include "rocksdb/status.h"
#include "util/crc32c.h"
#include "util/string_util.h"
namespace rocksdb {
// This is the class to generate the file checksum based on Crc32. It
// will be used as the default checksum method for SST file checksum
class FileChecksumFuncCrc32c : public FileChecksumFunc {
public:
std::string Extend(const std::string& init_checksum, const char* data,
size_t n) override {
assert(data != nullptr);
uint32_t checksum_value = StringToUint32(init_checksum);
return Uint32ToString(crc32c::Extend(checksum_value, data, n));
}
std::string Value(const char* data, size_t n) override {
assert(data != nullptr);
return Uint32ToString(crc32c::Value(data, n));
}
std::string ProcessChecksum(const std::string& checksum) override {
uint32_t checksum_value = StringToUint32(checksum);
return Uint32ToString(crc32c::Mask(checksum_value));
}
const char* Name() const override { return "FileChecksumCrc32c"; }
// Convert a uint32_t type data into a 4 bytes string.
static std::string Uint32ToString(uint32_t v) {
std::string s;
if (port::kLittleEndian) {
s.append(reinterpret_cast<char*>(&v), sizeof(v));
} else {
char buf[sizeof(v)];
buf[0] = v & 0xff;
buf[1] = (v >> 8) & 0xff;
buf[2] = (v >> 16) & 0xff;
buf[3] = (v >> 24) & 0xff;
s.append(buf, sizeof(v));
}
size_t i = 0, j = s.size() - 1;
while (i < j) {
char tmp = s[i];
s[i] = s[j];
s[j] = tmp;
++i;
--j;
}
return s;
}
// Convert a 4 bytes size string into a uint32_t type data.
static uint32_t StringToUint32(std::string s) {
assert(s.size() == sizeof(uint32_t));
size_t i = 0, j = s.size() - 1;
while (i < j) {
char tmp = s[i];
s[i] = s[j];
s[j] = tmp;
++i;
--j;
}
uint32_t v = 0;
if (port::kLittleEndian) {
memcpy(&v, s.c_str(), sizeof(uint32_t));
} else {
const char* buf = s.c_str();
v |= static_cast<uint32_t>(buf[0]);
v |= (static_cast<uint32_t>(buf[1]) << 8);
v |= (static_cast<uint32_t>(buf[2]) << 16);
v |= (static_cast<uint32_t>(buf[3]) << 24);
}
return v;
}
};
// The default implementaion of FileChecksumList
class FileChecksumListImpl : public FileChecksumList {
public:
FileChecksumListImpl() {}
void reset() override;
size_t size() const override;
Status GetAllFileChecksums(
std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
std::vector<std::string>* checksum_func_names) override;
Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum,
std::string* checksum_func_name) override;
Status InsertOneFileChecksum(uint64_t file_number,
const std::string& checksum,
const std::string& checksum_func_name) override;
Status RemoveOneFileChecksum(uint64_t file_number) override;
private:
// Key is the file number, the first portion of the value is checksum, the
// second portion of the value is checksum function name.
std::unordered_map<uint64_t, std::pair<std::string, std::string>>
checksum_map_;
};
} // namespace rocksdb