Optimize & clean up footer code (#9280)
Summary: Again, ahead of planned changes in https://github.com/facebook/rocksdb/issues/9058. This change improves performance (vs. pre-https://github.com/facebook/rocksdb/issues/9240 baseline) by separating a FooterBuilder from Footer, where FooterBuilder includes (inline owns) the serialized data so that it can be stack allocated. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9280 Test Plan: existing tests + performance testing below Extreme case performance testing as in https://github.com/facebook/rocksdb/issues/9240 with TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=30000000 (Each is ops/s averaged over 50 runs, run simultaneously with competing configuration for load fairness) Pre-https://github.com/facebook/rocksdb/issues/9240 baseline (f577458
): 436389 With https://github.com/facebook/rocksdb/issues/9240 (653c392
): 417946 (-4.2% vs. baseline) This change: 443762 (+1.7% vs. baseline) Reviewed By: ajkr Differential Revision: D33077220 Pulled By: pdillinger fbshipit-source-id: 7eaa6499589aac1693414a758e8c799216c5016c
This commit is contained in:
parent
08721293ea
commit
e92a0ed040
@ -1788,31 +1788,18 @@ void BlockBasedTableBuilder::WriteRangeDelBlock(
|
|||||||
void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
|
void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
|
||||||
BlockHandle& index_block_handle) {
|
BlockHandle& index_block_handle) {
|
||||||
Rep* r = rep_;
|
Rep* r = rep_;
|
||||||
// No need to write out new footer if we're using default checksum.
|
|
||||||
// We're writing legacy magic number because we want old versions of RocksDB
|
|
||||||
// be able to read files generated with new release (just in case if
|
|
||||||
// somebody wants to roll back after an upgrade)
|
|
||||||
// TODO(icanadi) at some point in the future, when we're absolutely sure
|
|
||||||
// nobody will roll back to RocksDB 2.x versions, retire the legacy magic
|
|
||||||
// number and always write new table files with new magic number
|
|
||||||
bool legacy = (r->table_options.format_version == 0);
|
|
||||||
// this is guaranteed by BlockBasedTableBuilder's constructor
|
// this is guaranteed by BlockBasedTableBuilder's constructor
|
||||||
assert(r->table_options.checksum == kCRC32c ||
|
assert(r->table_options.checksum == kCRC32c ||
|
||||||
r->table_options.format_version != 0);
|
r->table_options.format_version != 0);
|
||||||
Footer footer;
|
|
||||||
footer
|
|
||||||
.set_table_magic_number(legacy ? kLegacyBlockBasedTableMagicNumber
|
|
||||||
: kBlockBasedTableMagicNumber)
|
|
||||||
.set_format_version(r->table_options.format_version)
|
|
||||||
.set_metaindex_handle(metaindex_block_handle)
|
|
||||||
.set_index_handle(index_block_handle)
|
|
||||||
.set_checksum_type(r->table_options.checksum);
|
|
||||||
std::string footer_encoding;
|
|
||||||
footer.EncodeTo(&footer_encoding, r->get_offset());
|
|
||||||
assert(ok());
|
assert(ok());
|
||||||
IOStatus ios = r->file->Append(footer_encoding);
|
|
||||||
|
FooterBuilder footer;
|
||||||
|
footer.Build(kBlockBasedTableMagicNumber, r->table_options.format_version,
|
||||||
|
r->get_offset(), r->table_options.checksum,
|
||||||
|
metaindex_block_handle, index_block_handle);
|
||||||
|
IOStatus ios = r->file->Append(footer.GetSlice());
|
||||||
if (ios.ok()) {
|
if (ios.ok()) {
|
||||||
r->set_offset(r->get_offset() + footer_encoding.size());
|
r->set_offset(r->get_offset() + footer.GetSlice().size());
|
||||||
} else {
|
} else {
|
||||||
r->SetIOStatus(ios);
|
r->SetIOStatus(ios);
|
||||||
r->SetStatus(ios);
|
r->SetStatus(ios);
|
||||||
|
@ -393,15 +393,10 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
return status_;
|
return status_;
|
||||||
}
|
}
|
||||||
|
|
||||||
Footer footer;
|
FooterBuilder footer;
|
||||||
footer.set_table_magic_number(kCuckooTableMagicNumber)
|
footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, offset,
|
||||||
.set_format_version(1)
|
kNoChecksum, meta_index_block_handle);
|
||||||
.set_metaindex_handle(meta_index_block_handle)
|
io_status_ = file_->Append(footer.GetSlice());
|
||||||
.set_index_handle(BlockHandle::NullBlockHandle())
|
|
||||||
.set_checksum_type(kNoChecksum);
|
|
||||||
std::string footer_encoding;
|
|
||||||
footer.EncodeTo(&footer_encoding, offset);
|
|
||||||
io_status_ = file_->Append(footer_encoding);
|
|
||||||
status_ = io_status_;
|
status_ = io_status_;
|
||||||
return status_;
|
return status_;
|
||||||
}
|
}
|
||||||
|
135
table/format.cc
135
table/format.cc
@ -173,21 +173,25 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) {
|
|||||||
return kPlainTableMagicNumber;
|
return kPlainTableMagicNumber;
|
||||||
}
|
}
|
||||||
assert(false);
|
assert(false);
|
||||||
return 0;
|
return magic_number;
|
||||||
}
|
}
|
||||||
} // namespace
|
inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) {
|
||||||
|
if (magic_number == kBlockBasedTableMagicNumber) {
|
||||||
Footer& Footer::set_table_magic_number(uint64_t magic_number) {
|
return kLegacyBlockBasedTableMagicNumber;
|
||||||
assert(table_magic_number_ == kNullTableMagicNumber);
|
}
|
||||||
table_magic_number_ = magic_number;
|
if (magic_number == kPlainTableMagicNumber) {
|
||||||
|
return kLegacyPlainTableMagicNumber;
|
||||||
|
}
|
||||||
|
assert(false);
|
||||||
|
return magic_number;
|
||||||
|
}
|
||||||
|
inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) {
|
||||||
if (magic_number == kBlockBasedTableMagicNumber ||
|
if (magic_number == kBlockBasedTableMagicNumber ||
|
||||||
magic_number == kLegacyBlockBasedTableMagicNumber) {
|
magic_number == kLegacyBlockBasedTableMagicNumber) {
|
||||||
block_trailer_size_ =
|
return static_cast<uint8_t>(BlockBasedTable::kBlockTrailerSize);
|
||||||
static_cast<uint8_t>(BlockBasedTable::kBlockTrailerSize);
|
|
||||||
} else {
|
} else {
|
||||||
block_trailer_size_ = 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Footer format, in three parts:
|
// Footer format, in three parts:
|
||||||
@ -206,60 +210,69 @@ Footer& Footer::set_table_magic_number(uint64_t magic_number) {
|
|||||||
// -> format_version >= 1 (inferred from NOT legacy magic number)
|
// -> format_version >= 1 (inferred from NOT legacy magic number)
|
||||||
// format_version (uint32LE, 4 bytes), also called "footer version"
|
// format_version (uint32LE, 4 bytes), also called "footer version"
|
||||||
// newer magic number (8 bytes)
|
// newer magic number (8 bytes)
|
||||||
void Footer::EncodeTo(std::string* dst, uint64_t footer_offset) const {
|
|
||||||
|
constexpr size_t kFooterPart2Size = 2 * BlockHandle::kMaxEncodedLength;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version,
|
||||||
|
uint64_t footer_offset, ChecksumType checksum_type,
|
||||||
|
const BlockHandle& metaindex_handle,
|
||||||
|
const BlockHandle& index_handle) {
|
||||||
(void)footer_offset; // Future use
|
(void)footer_offset; // Future use
|
||||||
|
|
||||||
// Sanitize magic numbers & format versions
|
assert(magic_number != Footer::kNullTableMagicNumber);
|
||||||
assert(table_magic_number_ != kNullTableMagicNumber);
|
assert(IsSupportedFormatVersion(format_version));
|
||||||
uint64_t magic = table_magic_number_;
|
|
||||||
uint32_t fv = format_version_;
|
|
||||||
assert(fv != kInvalidFormatVersion);
|
|
||||||
assert(IsLegacyFooterFormat(magic) == (fv == 0));
|
|
||||||
|
|
||||||
ChecksumType ct = checksum_type();
|
|
||||||
|
|
||||||
// Allocate destination data and generate parts 1 and 3
|
|
||||||
const size_t original_size = dst->size();
|
|
||||||
char* part2;
|
char* part2;
|
||||||
if (fv > 0) {
|
char* part3;
|
||||||
dst->resize(original_size + kNewVersionsEncodedLength);
|
if (format_version > 0) {
|
||||||
char* part1 = &(*dst)[original_size];
|
slice_ = Slice(data_.data(), Footer::kNewVersionsEncodedLength);
|
||||||
part2 = part1 + 1;
|
|
||||||
char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
|
|
||||||
assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 12);
|
|
||||||
// Generate parts 1 and 3
|
// Generate parts 1 and 3
|
||||||
part1[0] = ct;
|
char* cur = data_.data();
|
||||||
EncodeFixed32(part3, fv);
|
// Part 1
|
||||||
EncodeFixed64(part3 + 4, magic);
|
*(cur++) = checksum_type;
|
||||||
|
// Part 2
|
||||||
|
part2 = cur;
|
||||||
|
// Skip over part 2 for now
|
||||||
|
cur += kFooterPart2Size;
|
||||||
|
// Part 3
|
||||||
|
part3 = cur;
|
||||||
|
EncodeFixed32(cur, format_version);
|
||||||
|
cur += 4;
|
||||||
|
EncodeFixed64(cur, magic_number);
|
||||||
|
assert(cur + 8 == slice_.data() + slice_.size());
|
||||||
} else {
|
} else {
|
||||||
dst->resize(original_size + kVersion0EncodedLength);
|
slice_ = Slice(data_.data(), Footer::kVersion0EncodedLength);
|
||||||
part2 = &(*dst)[original_size];
|
|
||||||
char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
|
|
||||||
assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 8);
|
|
||||||
// Legacy SST files use kCRC32c checksum but it's not stored in footer.
|
// Legacy SST files use kCRC32c checksum but it's not stored in footer.
|
||||||
assert(ct == kNoChecksum || ct == kCRC32c);
|
assert(checksum_type == kNoChecksum || checksum_type == kCRC32c);
|
||||||
// Generate part 3 (part 1 empty)
|
// Generate part 3 (part 1 empty, skip part 2 for now)
|
||||||
EncodeFixed64(part3, magic);
|
part2 = data_.data();
|
||||||
|
part3 = part2 + kFooterPart2Size;
|
||||||
|
char* cur = part3;
|
||||||
|
// Use legacy magic numbers to indicate format_version=0, for
|
||||||
|
// compatibility. No other cases should use format_version=0.
|
||||||
|
EncodeFixed64(cur, DownconvertToLegacyFooterFormat(magic_number));
|
||||||
|
assert(cur + 8 == slice_.data() + slice_.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate Part2
|
{
|
||||||
// Variable size encode handles (sigh)
|
char* cur = part2;
|
||||||
part2 = metaindex_handle_.EncodeTo(part2);
|
cur = metaindex_handle.EncodeTo(cur);
|
||||||
/*part2 = */ index_handle_.EncodeTo(part2);
|
cur = index_handle.EncodeTo(cur);
|
||||||
|
// Zero pad remainder
|
||||||
// remainder of part2 is already zero padded
|
std::fill(cur, part3, char{0});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
|
Status Footer::DecodeFrom(Slice input, uint64_t input_offset) {
|
||||||
(void)input_offset; // Future use
|
(void)input_offset; // Future use
|
||||||
|
|
||||||
// Only decode to unused Footer
|
// Only decode to unused Footer
|
||||||
assert(table_magic_number_ == kNullTableMagicNumber);
|
assert(table_magic_number_ == kNullTableMagicNumber);
|
||||||
assert(input != nullptr);
|
assert(input != nullptr);
|
||||||
assert(input->size() >= kMinEncodedLength);
|
assert(input.size() >= kMinEncodedLength);
|
||||||
|
|
||||||
const char* magic_ptr =
|
const char* magic_ptr = input.data() + input.size() - kMagicNumberLengthByte;
|
||||||
input->data() + input->size() - kMagicNumberLengthByte;
|
|
||||||
uint64_t magic = DecodeFixed64(magic_ptr);
|
uint64_t magic = DecodeFixed64(magic_ptr);
|
||||||
|
|
||||||
// We check for legacy formats here and silently upconvert them
|
// We check for legacy formats here and silently upconvert them
|
||||||
@ -267,13 +280,14 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
|
|||||||
if (legacy) {
|
if (legacy) {
|
||||||
magic = UpconvertLegacyFooterFormat(magic);
|
magic = UpconvertLegacyFooterFormat(magic);
|
||||||
}
|
}
|
||||||
set_table_magic_number(magic);
|
table_magic_number_ = magic;
|
||||||
|
block_trailer_size_ = BlockTrailerSizeForMagicNumber(magic);
|
||||||
|
|
||||||
// Parse Part3
|
// Parse Part3
|
||||||
if (legacy) {
|
if (legacy) {
|
||||||
// The size is already asserted to be at least kMinEncodedLength
|
// The size is already asserted to be at least kMinEncodedLength
|
||||||
// at the beginning of the function
|
// at the beginning of the function
|
||||||
input->remove_prefix(input->size() - kVersion0EncodedLength);
|
input.remove_prefix(input.size() - kVersion0EncodedLength);
|
||||||
format_version_ = 0 /* legacy */;
|
format_version_ = 0 /* legacy */;
|
||||||
checksum_type_ = kCRC32c;
|
checksum_type_ = kCRC32c;
|
||||||
} else {
|
} else {
|
||||||
@ -284,14 +298,14 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
|
|||||||
ROCKSDB_NAMESPACE::ToString(format_version_));
|
ROCKSDB_NAMESPACE::ToString(format_version_));
|
||||||
}
|
}
|
||||||
// All known format versions >= 1 occupy exactly this many bytes.
|
// All known format versions >= 1 occupy exactly this many bytes.
|
||||||
if (input->size() < kNewVersionsEncodedLength) {
|
if (input.size() < kNewVersionsEncodedLength) {
|
||||||
return Status::Corruption("Input is too short to be an SST file");
|
return Status::Corruption("Input is too short to be an SST file");
|
||||||
}
|
}
|
||||||
uint64_t adjustment = input->size() - kNewVersionsEncodedLength;
|
uint64_t adjustment = input.size() - kNewVersionsEncodedLength;
|
||||||
input->remove_prefix(adjustment);
|
input.remove_prefix(adjustment);
|
||||||
|
|
||||||
// Parse Part1
|
// Parse Part1
|
||||||
char chksum = input->data()[0];
|
char chksum = input.data()[0];
|
||||||
checksum_type_ = lossless_cast<ChecksumType>(chksum);
|
checksum_type_ = lossless_cast<ChecksumType>(chksum);
|
||||||
if (!IsSupportedChecksumType(checksum_type())) {
|
if (!IsSupportedChecksumType(checksum_type())) {
|
||||||
return Status::Corruption(
|
return Status::Corruption(
|
||||||
@ -299,21 +313,16 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
|
|||||||
ROCKSDB_NAMESPACE::ToString(lossless_cast<uint8_t>(chksum)));
|
ROCKSDB_NAMESPACE::ToString(lossless_cast<uint8_t>(chksum)));
|
||||||
}
|
}
|
||||||
// Consume checksum type field
|
// Consume checksum type field
|
||||||
input->remove_prefix(1);
|
input.remove_prefix(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse Part2
|
// Parse Part2
|
||||||
Status result = metaindex_handle_.DecodeFrom(input);
|
Status result = metaindex_handle_.DecodeFrom(&input);
|
||||||
if (result.ok()) {
|
if (result.ok()) {
|
||||||
result = index_handle_.DecodeFrom(input);
|
result = index_handle_.DecodeFrom(&input);
|
||||||
}
|
}
|
||||||
if (!result.ok()) {
|
|
||||||
return result;
|
return result;
|
||||||
}
|
// Padding in part2 is ignored
|
||||||
|
|
||||||
// Mark all input consumed (skip padding & part3)
|
|
||||||
*input = Slice(input->data() + input->size(), 0U);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Footer::ToString() const {
|
std::string Footer::ToString() const {
|
||||||
@ -384,7 +393,7 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
|
|||||||
file->file_name());
|
file->file_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
s = footer->DecodeFrom(&footer_input, read_offset);
|
s = footer->DecodeFrom(footer_input, read_offset);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -128,66 +129,45 @@ inline bool IsSupportedFormatVersion(uint32_t version) {
|
|||||||
// elsewhere under the metaindex block. For example, checksum_type is
|
// elsewhere under the metaindex block. For example, checksum_type is
|
||||||
// required for verifying metaindex block checksum (when applicable), but
|
// required for verifying metaindex block checksum (when applicable), but
|
||||||
// index block handle can easily go in metaindex block (possible future).
|
// index block handle can easily go in metaindex block (possible future).
|
||||||
|
// See also FooterBuilder below.
|
||||||
class Footer {
|
class Footer {
|
||||||
public:
|
public:
|
||||||
|
// Create empty. Populate using DecodeFrom.
|
||||||
Footer() {}
|
Footer() {}
|
||||||
|
|
||||||
// Uses builder pattern rather than distinctive ctors
|
// Deserialize a footer (populate fields) from `input` and check for various
|
||||||
|
// corruptions. `input_offset` is the offset within the target file of
|
||||||
|
// `input` buffer (future use).
|
||||||
|
Status DecodeFrom(Slice input, uint64_t input_offset);
|
||||||
|
|
||||||
// Table magic number identifies file as RocksDB SST file and which kind of
|
// Table magic number identifies file as RocksDB SST file and which kind of
|
||||||
// SST format is use.
|
// SST format is use.
|
||||||
Footer& set_table_magic_number(uint64_t tmn);
|
|
||||||
uint64_t table_magic_number() const { return table_magic_number_; }
|
uint64_t table_magic_number() const { return table_magic_number_; }
|
||||||
|
|
||||||
// A version (footer and more) within a kind of SST. (It would add more
|
// A version (footer and more) within a kind of SST. (It would add more
|
||||||
// unnecessary complexity to separate footer versions and
|
// unnecessary complexity to separate footer versions and
|
||||||
// BBTO::format_version.)
|
// BBTO::format_version.)
|
||||||
Footer& set_format_version(uint32_t fv) {
|
|
||||||
format_version_ = fv;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
uint32_t format_version() const { return format_version_; }
|
uint32_t format_version() const { return format_version_; }
|
||||||
|
|
||||||
// Block handle for metaindex block.
|
// Block handle for metaindex block.
|
||||||
Footer& set_metaindex_handle(const BlockHandle& h) {
|
|
||||||
metaindex_handle_ = h;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
|
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
|
||||||
|
|
||||||
// Block handle for (top-level) index block.
|
// Block handle for (top-level) index block.
|
||||||
Footer& set_index_handle(const BlockHandle& h) {
|
|
||||||
index_handle_ = h;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
const BlockHandle& index_handle() const { return index_handle_; }
|
const BlockHandle& index_handle() const { return index_handle_; }
|
||||||
|
|
||||||
// Checksum type used in the file.
|
// Checksum type used in the file.
|
||||||
Footer& set_checksum_type(ChecksumType ct) {
|
|
||||||
checksum_type_ = ct;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
ChecksumType checksum_type() const {
|
ChecksumType checksum_type() const {
|
||||||
return static_cast<ChecksumType>(checksum_type_);
|
return static_cast<ChecksumType>(checksum_type_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Appends serialized footer to `dst`. The starting offset of the footer
|
// Block trailer size used by file with this footer (e.g. 5 for block-based
|
||||||
// within the file is required for future work.
|
// table and 0 for plain table). This is inferred from magic number so
|
||||||
void EncodeTo(std::string* dst, uint64_t footer_offset) const;
|
// not in the serialized form.
|
||||||
|
inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
|
||||||
// Deserialize a footer (populate fields) from `input` and check for various
|
|
||||||
// corruptions. On success (and some error cases) `input` is advanced past
|
|
||||||
// the footer. Like EncodeTo, the offset within the file will be nedded for
|
|
||||||
// future work
|
|
||||||
Status DecodeFrom(Slice* input, uint64_t input_offset);
|
|
||||||
|
|
||||||
// Convert this object to a human readable form
|
// Convert this object to a human readable form
|
||||||
std::string ToString() const;
|
std::string ToString() const;
|
||||||
|
|
||||||
// Block trailer size used by file with this footer (e.g. 5 for block-based
|
|
||||||
// table and 0 for plain table)
|
|
||||||
inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
|
|
||||||
|
|
||||||
// Encoded lengths of Footers. Bytes for serialized Footer will always be
|
// Encoded lengths of Footers. Bytes for serialized Footer will always be
|
||||||
// >= kMinEncodedLength and <= kMaxEncodedLength.
|
// >= kMinEncodedLength and <= kMaxEncodedLength.
|
||||||
//
|
//
|
||||||
@ -207,8 +187,9 @@ class Footer {
|
|||||||
|
|
||||||
static constexpr uint64_t kNullTableMagicNumber = 0;
|
static constexpr uint64_t kNullTableMagicNumber = 0;
|
||||||
|
|
||||||
private:
|
|
||||||
static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU;
|
static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU;
|
||||||
|
|
||||||
|
private:
|
||||||
static constexpr int kInvalidChecksumType =
|
static constexpr int kInvalidChecksumType =
|
||||||
(1 << (sizeof(ChecksumType) * 8)) | kNoChecksum;
|
(1 << (sizeof(ChecksumType) * 8)) | kNoChecksum;
|
||||||
|
|
||||||
@ -217,7 +198,40 @@ class Footer {
|
|||||||
BlockHandle metaindex_handle_;
|
BlockHandle metaindex_handle_;
|
||||||
BlockHandle index_handle_;
|
BlockHandle index_handle_;
|
||||||
int checksum_type_ = kInvalidChecksumType;
|
int checksum_type_ = kInvalidChecksumType;
|
||||||
uint8_t block_trailer_size_ = 0; // set based on magic number
|
uint8_t block_trailer_size_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Builder for Footer
|
||||||
|
class FooterBuilder {
|
||||||
|
public:
|
||||||
|
// Run builder in inputs. This is a single step with lots of parameters for
|
||||||
|
// efficiency (based on perf testing).
|
||||||
|
// * table_magic_number identifies file as RocksDB SST file and which kind of
|
||||||
|
// SST format is use.
|
||||||
|
// * format_version is a version for the footer and can also apply to other
|
||||||
|
// aspects of the SST file (see BlockBasedTableOptions::format_version).
|
||||||
|
// NOTE: To save complexity in the caller, when format_version == 0 and
|
||||||
|
// there is a corresponding legacy magic number to the one specified, the
|
||||||
|
// legacy magic number will be written for forward compatibility.
|
||||||
|
// * footer_offset is the file offset where the footer will be written
|
||||||
|
// (for future use).
|
||||||
|
// * checksum_type is for formats using block checksums.
|
||||||
|
// * index_handle is optional for some kinds of SST files.
|
||||||
|
void Build(uint64_t table_magic_number, uint32_t format_version,
|
||||||
|
uint64_t footer_offset, ChecksumType checksum_type,
|
||||||
|
const BlockHandle& metaindex_handle,
|
||||||
|
const BlockHandle& index_handle = BlockHandle::NullBlockHandle());
|
||||||
|
|
||||||
|
// After Builder, get a Slice for the serialized Footer, backed by this
|
||||||
|
// FooterBuilder.
|
||||||
|
const Slice& GetSlice() const {
|
||||||
|
assert(slice_.size());
|
||||||
|
return slice_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Slice slice_;
|
||||||
|
std::array<char, Footer::kMaxEncodedLength> data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Read the footer from file
|
// Read the footer from file
|
||||||
|
@ -292,16 +292,12 @@ Status PlainTableBuilder::Finish() {
|
|||||||
|
|
||||||
// Write Footer
|
// Write Footer
|
||||||
// no need to write out new footer if we're using default checksum
|
// no need to write out new footer if we're using default checksum
|
||||||
Footer footer;
|
FooterBuilder footer;
|
||||||
footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
|
footer.Build(kPlainTableMagicNumber, /* format_version */ 0, offset_,
|
||||||
.set_format_version(0)
|
kNoChecksum, metaindex_block_handle);
|
||||||
.set_metaindex_handle(metaindex_block_handle)
|
io_status_ = file_->Append(footer.GetSlice());
|
||||||
.set_index_handle(BlockHandle::NullBlockHandle());
|
|
||||||
std::string footer_encoding;
|
|
||||||
footer.EncodeTo(&footer_encoding, offset_);
|
|
||||||
io_status_ = file_->Append(footer_encoding);
|
|
||||||
if (io_status_.ok()) {
|
if (io_status_.ok()) {
|
||||||
offset_ += footer_encoding.size();
|
offset_ += footer.GetSlice().size();
|
||||||
}
|
}
|
||||||
status_ = io_status_;
|
status_ = io_status_;
|
||||||
return status_;
|
return status_;
|
||||||
|
@ -4175,17 +4175,12 @@ TEST(TableTest, FooterTests) {
|
|||||||
BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size);
|
BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size);
|
||||||
uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5;
|
uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5;
|
||||||
{
|
{
|
||||||
// upconvert legacy block based
|
// legacy block based
|
||||||
std::string encoded;
|
FooterBuilder footer;
|
||||||
Footer footer;
|
footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0,
|
||||||
footer.set_table_magic_number(kLegacyBlockBasedTableMagicNumber)
|
footer_offset, kCRC32c, meta_index, index);
|
||||||
.set_format_version(0)
|
|
||||||
.set_metaindex_handle(meta_index)
|
|
||||||
.set_index_handle(index);
|
|
||||||
footer.EncodeTo(&encoded, footer_offset);
|
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
@ -4194,21 +4189,19 @@ TEST(TableTest, FooterTests) {
|
|||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
||||||
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
|
||||||
|
// Ensure serialized with legacy magic
|
||||||
|
ASSERT_EQ(
|
||||||
|
DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8),
|
||||||
|
kLegacyBlockBasedTableMagicNumber);
|
||||||
}
|
}
|
||||||
// block based, various checksums, various versions
|
// block based, various checksums, various versions
|
||||||
for (auto t : GetSupportedChecksums()) {
|
for (auto t : GetSupportedChecksums()) {
|
||||||
for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) {
|
for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) {
|
||||||
std::string encoded;
|
FooterBuilder footer;
|
||||||
Footer footer;
|
footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t,
|
||||||
footer.set_table_magic_number(kBlockBasedTableMagicNumber)
|
meta_index, index);
|
||||||
.set_format_version(fv)
|
|
||||||
.set_metaindex_handle(meta_index)
|
|
||||||
.set_index_handle(index)
|
|
||||||
.set_checksum_type(t);
|
|
||||||
footer.EncodeTo(&encoded, footer_offset);
|
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(),
|
ASSERT_EQ(decoded_footer.table_magic_number(),
|
||||||
kBlockBasedTableMagicNumber);
|
kBlockBasedTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum_type(), t);
|
ASSERT_EQ(decoded_footer.checksum_type(), t);
|
||||||
@ -4224,45 +4217,38 @@ TEST(TableTest, FooterTests) {
|
|||||||
// Plain table is not supported in ROCKSDB_LITE
|
// Plain table is not supported in ROCKSDB_LITE
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
{
|
{
|
||||||
// upconvert legacy plain table
|
// legacy plain table
|
||||||
std::string encoded;
|
FooterBuilder footer;
|
||||||
Footer footer;
|
footer.Build(kPlainTableMagicNumber, /* format_version */ 0, footer_offset,
|
||||||
footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
|
kNoChecksum, meta_index);
|
||||||
.set_format_version(0)
|
|
||||||
.set_metaindex_handle(meta_index)
|
|
||||||
.set_index_handle(index);
|
|
||||||
footer.EncodeTo(&encoded, footer_offset);
|
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
||||||
|
// Ensure serialized with legacy magic
|
||||||
|
ASSERT_EQ(
|
||||||
|
DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8),
|
||||||
|
kLegacyPlainTableMagicNumber);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// xxhash plain table (not currently used)
|
// xxhash plain table (not currently used)
|
||||||
std::string encoded;
|
FooterBuilder footer;
|
||||||
Footer footer;
|
footer.Build(kPlainTableMagicNumber, /* format_version */ 1, footer_offset,
|
||||||
footer.set_table_magic_number(kPlainTableMagicNumber)
|
kxxHash, meta_index);
|
||||||
.set_format_version(1)
|
|
||||||
.set_metaindex_handle(meta_index)
|
|
||||||
.set_index_handle(index)
|
|
||||||
.set_checksum_type(kxxHash);
|
|
||||||
footer.EncodeTo(&encoded, footer_offset);
|
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset));
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum_type(), kxxHash);
|
ASSERT_EQ(decoded_footer.checksum_type(), kxxHash);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), 0U);
|
||||||
ASSERT_EQ(decoded_footer.format_version(), 1U);
|
ASSERT_EQ(decoded_footer.format_version(), 1U);
|
||||||
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user