diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 62f0b3bb4..06cb006ad 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1788,31 +1788,18 @@ void BlockBasedTableBuilder::WriteRangeDelBlock( void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, BlockHandle& index_block_handle) { Rep* r = rep_; - // No need to write out new footer if we're using default checksum. - // We're writing legacy magic number because we want old versions of RocksDB - // be able to read files generated with new release (just in case if - // somebody wants to roll back after an upgrade) - // TODO(icanadi) at some point in the future, when we're absolutely sure - // nobody will roll back to RocksDB 2.x versions, retire the legacy magic - // number and always write new table files with new magic number - bool legacy = (r->table_options.format_version == 0); // this is guaranteed by BlockBasedTableBuilder's constructor assert(r->table_options.checksum == kCRC32c || r->table_options.format_version != 0); - Footer footer; - footer - .set_table_magic_number(legacy ? kLegacyBlockBasedTableMagicNumber - : kBlockBasedTableMagicNumber) - .set_format_version(r->table_options.format_version) - .set_metaindex_handle(metaindex_block_handle) - .set_index_handle(index_block_handle) - .set_checksum_type(r->table_options.checksum); - std::string footer_encoding; - footer.EncodeTo(&footer_encoding, r->get_offset()); assert(ok()); - IOStatus ios = r->file->Append(footer_encoding); + + FooterBuilder footer; + footer.Build(kBlockBasedTableMagicNumber, r->table_options.format_version, + r->get_offset(), r->table_options.checksum, + metaindex_block_handle, index_block_handle); + IOStatus ios = r->file->Append(footer.GetSlice()); if (ios.ok()) { - r->set_offset(r->get_offset() + footer_encoding.size()); + r->set_offset(r->get_offset() + footer.GetSlice().size()); } else { r->SetIOStatus(ios); r->SetStatus(ios); diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 0707ddf10..0068770b2 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -393,15 +393,10 @@ Status CuckooTableBuilder::Finish() { return status_; } - Footer footer; - footer.set_table_magic_number(kCuckooTableMagicNumber) - .set_format_version(1) - .set_metaindex_handle(meta_index_block_handle) - .set_index_handle(BlockHandle::NullBlockHandle()) - .set_checksum_type(kNoChecksum); - std::string footer_encoding; - footer.EncodeTo(&footer_encoding, offset); - io_status_ = file_->Append(footer_encoding); + FooterBuilder footer; + footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, offset, + kNoChecksum, meta_index_block_handle); + io_status_ = file_->Append(footer.GetSlice()); status_ = io_status_; return status_; } diff --git a/table/format.cc b/table/format.cc index 3cf91b698..9f79db46c 100644 --- a/table/format.cc +++ b/table/format.cc @@ -173,21 +173,25 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) { return kPlainTableMagicNumber; } assert(false); - return 0; + return magic_number; } -} // namespace - -Footer& Footer::set_table_magic_number(uint64_t magic_number) { - assert(table_magic_number_ == kNullTableMagicNumber); - table_magic_number_ = magic_number; +inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) { + if (magic_number == kBlockBasedTableMagicNumber) { + return kLegacyBlockBasedTableMagicNumber; + } + if (magic_number == kPlainTableMagicNumber) { + return kLegacyPlainTableMagicNumber; + } + assert(false); + return magic_number; +} +inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) { if (magic_number == kBlockBasedTableMagicNumber || magic_number == kLegacyBlockBasedTableMagicNumber) { - block_trailer_size_ = - static_cast(BlockBasedTable::kBlockTrailerSize); + return static_cast(BlockBasedTable::kBlockTrailerSize); } else { - block_trailer_size_ = 0; + return 0; } - return *this; } // Footer format, in three parts: @@ -206,60 +210,69 @@ Footer& Footer::set_table_magic_number(uint64_t magic_number) { // -> format_version >= 1 (inferred from NOT legacy magic number) // format_version (uint32LE, 4 bytes), also called "footer version" // newer magic number (8 bytes) -void Footer::EncodeTo(std::string* dst, uint64_t footer_offset) const { + +constexpr size_t kFooterPart2Size = 2 * BlockHandle::kMaxEncodedLength; +} // namespace + +void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version, + uint64_t footer_offset, ChecksumType checksum_type, + const BlockHandle& metaindex_handle, + const BlockHandle& index_handle) { (void)footer_offset; // Future use - // Sanitize magic numbers & format versions - assert(table_magic_number_ != kNullTableMagicNumber); - uint64_t magic = table_magic_number_; - uint32_t fv = format_version_; - assert(fv != kInvalidFormatVersion); - assert(IsLegacyFooterFormat(magic) == (fv == 0)); + assert(magic_number != Footer::kNullTableMagicNumber); + assert(IsSupportedFormatVersion(format_version)); - ChecksumType ct = checksum_type(); - - // Allocate destination data and generate parts 1 and 3 - const size_t original_size = dst->size(); char* part2; - if (fv > 0) { - dst->resize(original_size + kNewVersionsEncodedLength); - char* part1 = &(*dst)[original_size]; - part2 = part1 + 1; - char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength; - assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 12); + char* part3; + if (format_version > 0) { + slice_ = Slice(data_.data(), Footer::kNewVersionsEncodedLength); // Generate parts 1 and 3 - part1[0] = ct; - EncodeFixed32(part3, fv); - EncodeFixed64(part3 + 4, magic); + char* cur = data_.data(); + // Part 1 + *(cur++) = checksum_type; + // Part 2 + part2 = cur; + // Skip over part 2 for now + cur += kFooterPart2Size; + // Part 3 + part3 = cur; + EncodeFixed32(cur, format_version); + cur += 4; + EncodeFixed64(cur, magic_number); + assert(cur + 8 == slice_.data() + slice_.size()); } else { - dst->resize(original_size + kVersion0EncodedLength); - part2 = &(*dst)[original_size]; - char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength; - assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 8); + slice_ = Slice(data_.data(), Footer::kVersion0EncodedLength); // Legacy SST files use kCRC32c checksum but it's not stored in footer. - assert(ct == kNoChecksum || ct == kCRC32c); - // Generate part 3 (part 1 empty) - EncodeFixed64(part3, magic); + assert(checksum_type == kNoChecksum || checksum_type == kCRC32c); + // Generate part 3 (part 1 empty, skip part 2 for now) + part2 = data_.data(); + part3 = part2 + kFooterPart2Size; + char* cur = part3; + // Use legacy magic numbers to indicate format_version=0, for + // compatibility. No other cases should use format_version=0. + EncodeFixed64(cur, DownconvertToLegacyFooterFormat(magic_number)); + assert(cur + 8 == slice_.data() + slice_.size()); } - // Generate Part2 - // Variable size encode handles (sigh) - part2 = metaindex_handle_.EncodeTo(part2); - /*part2 = */ index_handle_.EncodeTo(part2); - - // remainder of part2 is already zero padded + { + char* cur = part2; + cur = metaindex_handle.EncodeTo(cur); + cur = index_handle.EncodeTo(cur); + // Zero pad remainder + std::fill(cur, part3, char{0}); + } } -Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) { +Status Footer::DecodeFrom(Slice input, uint64_t input_offset) { (void)input_offset; // Future use // Only decode to unused Footer assert(table_magic_number_ == kNullTableMagicNumber); assert(input != nullptr); - assert(input->size() >= kMinEncodedLength); + assert(input.size() >= kMinEncodedLength); - const char* magic_ptr = - input->data() + input->size() - kMagicNumberLengthByte; + const char* magic_ptr = input.data() + input.size() - kMagicNumberLengthByte; uint64_t magic = DecodeFixed64(magic_ptr); // We check for legacy formats here and silently upconvert them @@ -267,13 +280,14 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) { if (legacy) { magic = UpconvertLegacyFooterFormat(magic); } - set_table_magic_number(magic); + table_magic_number_ = magic; + block_trailer_size_ = BlockTrailerSizeForMagicNumber(magic); // Parse Part3 if (legacy) { // The size is already asserted to be at least kMinEncodedLength // at the beginning of the function - input->remove_prefix(input->size() - kVersion0EncodedLength); + input.remove_prefix(input.size() - kVersion0EncodedLength); format_version_ = 0 /* legacy */; checksum_type_ = kCRC32c; } else { @@ -284,14 +298,14 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) { ROCKSDB_NAMESPACE::ToString(format_version_)); } // All known format versions >= 1 occupy exactly this many bytes. - if (input->size() < kNewVersionsEncodedLength) { + if (input.size() < kNewVersionsEncodedLength) { return Status::Corruption("Input is too short to be an SST file"); } - uint64_t adjustment = input->size() - kNewVersionsEncodedLength; - input->remove_prefix(adjustment); + uint64_t adjustment = input.size() - kNewVersionsEncodedLength; + input.remove_prefix(adjustment); // Parse Part1 - char chksum = input->data()[0]; + char chksum = input.data()[0]; checksum_type_ = lossless_cast(chksum); if (!IsSupportedChecksumType(checksum_type())) { return Status::Corruption( @@ -299,21 +313,16 @@ Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) { ROCKSDB_NAMESPACE::ToString(lossless_cast(chksum))); } // Consume checksum type field - input->remove_prefix(1); + input.remove_prefix(1); } // Parse Part2 - Status result = metaindex_handle_.DecodeFrom(input); + Status result = metaindex_handle_.DecodeFrom(&input); if (result.ok()) { - result = index_handle_.DecodeFrom(input); + result = index_handle_.DecodeFrom(&input); } - if (!result.ok()) { - return result; - } - - // Mark all input consumed (skip padding & part3) - *input = Slice(input->data() + input->size(), 0U); - return Status::OK(); + return result; + // Padding in part2 is ignored } std::string Footer::ToString() const { @@ -384,7 +393,7 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, file->file_name()); } - s = footer->DecodeFrom(&footer_input, read_offset); + s = footer->DecodeFrom(footer_input, read_offset); if (!s.ok()) { return s; } diff --git a/table/format.h b/table/format.h index b32ab324e..011dab241 100644 --- a/table/format.h +++ b/table/format.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include @@ -128,66 +129,45 @@ inline bool IsSupportedFormatVersion(uint32_t version) { // elsewhere under the metaindex block. For example, checksum_type is // required for verifying metaindex block checksum (when applicable), but // index block handle can easily go in metaindex block (possible future). +// See also FooterBuilder below. class Footer { public: + // Create empty. Populate using DecodeFrom. Footer() {} - // Uses builder pattern rather than distinctive ctors + // Deserialize a footer (populate fields) from `input` and check for various + // corruptions. `input_offset` is the offset within the target file of + // `input` buffer (future use). + Status DecodeFrom(Slice input, uint64_t input_offset); // Table magic number identifies file as RocksDB SST file and which kind of // SST format is use. - Footer& set_table_magic_number(uint64_t tmn); uint64_t table_magic_number() const { return table_magic_number_; } // A version (footer and more) within a kind of SST. (It would add more // unnecessary complexity to separate footer versions and // BBTO::format_version.) - Footer& set_format_version(uint32_t fv) { - format_version_ = fv; - return *this; - } uint32_t format_version() const { return format_version_; } // Block handle for metaindex block. - Footer& set_metaindex_handle(const BlockHandle& h) { - metaindex_handle_ = h; - return *this; - } const BlockHandle& metaindex_handle() const { return metaindex_handle_; } // Block handle for (top-level) index block. - Footer& set_index_handle(const BlockHandle& h) { - index_handle_ = h; - return *this; - } const BlockHandle& index_handle() const { return index_handle_; } // Checksum type used in the file. - Footer& set_checksum_type(ChecksumType ct) { - checksum_type_ = ct; - return *this; - } ChecksumType checksum_type() const { return static_cast(checksum_type_); } - // Appends serialized footer to `dst`. The starting offset of the footer - // within the file is required for future work. - void EncodeTo(std::string* dst, uint64_t footer_offset) const; - - // Deserialize a footer (populate fields) from `input` and check for various - // corruptions. On success (and some error cases) `input` is advanced past - // the footer. Like EncodeTo, the offset within the file will be nedded for - // future work - Status DecodeFrom(Slice* input, uint64_t input_offset); + // Block trailer size used by file with this footer (e.g. 5 for block-based + // table and 0 for plain table). This is inferred from magic number so + // not in the serialized form. + inline size_t GetBlockTrailerSize() const { return block_trailer_size_; } // Convert this object to a human readable form std::string ToString() const; - // Block trailer size used by file with this footer (e.g. 5 for block-based - // table and 0 for plain table) - inline size_t GetBlockTrailerSize() const { return block_trailer_size_; } - // Encoded lengths of Footers. Bytes for serialized Footer will always be // >= kMinEncodedLength and <= kMaxEncodedLength. // @@ -207,8 +187,9 @@ class Footer { static constexpr uint64_t kNullTableMagicNumber = 0; - private: static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU; + + private: static constexpr int kInvalidChecksumType = (1 << (sizeof(ChecksumType) * 8)) | kNoChecksum; @@ -217,7 +198,40 @@ class Footer { BlockHandle metaindex_handle_; BlockHandle index_handle_; int checksum_type_ = kInvalidChecksumType; - uint8_t block_trailer_size_ = 0; // set based on magic number + uint8_t block_trailer_size_ = 0; +}; + +// Builder for Footer +class FooterBuilder { + public: + // Run builder in inputs. This is a single step with lots of parameters for + // efficiency (based on perf testing). + // * table_magic_number identifies file as RocksDB SST file and which kind of + // SST format is use. + // * format_version is a version for the footer and can also apply to other + // aspects of the SST file (see BlockBasedTableOptions::format_version). + // NOTE: To save complexity in the caller, when format_version == 0 and + // there is a corresponding legacy magic number to the one specified, the + // legacy magic number will be written for forward compatibility. + // * footer_offset is the file offset where the footer will be written + // (for future use). + // * checksum_type is for formats using block checksums. + // * index_handle is optional for some kinds of SST files. + void Build(uint64_t table_magic_number, uint32_t format_version, + uint64_t footer_offset, ChecksumType checksum_type, + const BlockHandle& metaindex_handle, + const BlockHandle& index_handle = BlockHandle::NullBlockHandle()); + + // After Builder, get a Slice for the serialized Footer, backed by this + // FooterBuilder. + const Slice& GetSlice() const { + assert(slice_.size()); + return slice_; + } + + private: + Slice slice_; + std::array data_; }; // Read the footer from file diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 9f09f8349..2c9351601 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -292,16 +292,12 @@ Status PlainTableBuilder::Finish() { // Write Footer // no need to write out new footer if we're using default checksum - Footer footer; - footer.set_table_magic_number(kLegacyPlainTableMagicNumber) - .set_format_version(0) - .set_metaindex_handle(metaindex_block_handle) - .set_index_handle(BlockHandle::NullBlockHandle()); - std::string footer_encoding; - footer.EncodeTo(&footer_encoding, offset_); - io_status_ = file_->Append(footer_encoding); + FooterBuilder footer; + footer.Build(kPlainTableMagicNumber, /* format_version */ 0, offset_, + kNoChecksum, metaindex_block_handle); + io_status_ = file_->Append(footer.GetSlice()); if (io_status_.ok()) { - offset_ += footer_encoding.size(); + offset_ += footer.GetSlice().size(); } status_ = io_status_; return status_; diff --git a/table/table_test.cc b/table/table_test.cc index aa0b3b90f..a98c943f9 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -4175,17 +4175,12 @@ TEST(TableTest, FooterTests) { BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size); uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5; { - // upconvert legacy block based - std::string encoded; - Footer footer; - footer.set_table_magic_number(kLegacyBlockBasedTableMagicNumber) - .set_format_version(0) - .set_metaindex_handle(meta_index) - .set_index_handle(index); - footer.EncodeTo(&encoded, footer_offset); + // legacy block based + FooterBuilder footer; + footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0, + footer_offset, kCRC32c, meta_index, index); Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); @@ -4194,21 +4189,19 @@ TEST(TableTest, FooterTests) { ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.format_version(), 0U); ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); + // Ensure serialized with legacy magic + ASSERT_EQ( + DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), + kLegacyBlockBasedTableMagicNumber); } // block based, various checksums, various versions for (auto t : GetSupportedChecksums()) { for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) { - std::string encoded; - Footer footer; - footer.set_table_magic_number(kBlockBasedTableMagicNumber) - .set_format_version(fv) - .set_metaindex_handle(meta_index) - .set_index_handle(index) - .set_checksum_type(t); - footer.EncodeTo(&encoded, footer_offset); + FooterBuilder footer; + footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t, + meta_index, index); Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum_type(), t); @@ -4224,45 +4217,38 @@ TEST(TableTest, FooterTests) { // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE { - // upconvert legacy plain table - std::string encoded; - Footer footer; - footer.set_table_magic_number(kLegacyPlainTableMagicNumber) - .set_format_version(0) - .set_metaindex_handle(meta_index) - .set_index_handle(index); - footer.EncodeTo(&encoded, footer_offset); + // legacy plain table + FooterBuilder footer; + footer.Build(kPlainTableMagicNumber, /* format_version */ 0, footer_offset, + kNoChecksum, meta_index); Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); + ASSERT_EQ(decoded_footer.index_handle().size(), 0U); ASSERT_EQ(decoded_footer.format_version(), 0U); ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); + // Ensure serialized with legacy magic + ASSERT_EQ( + DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), + kLegacyPlainTableMagicNumber); } { // xxhash plain table (not currently used) - std::string encoded; - Footer footer; - footer.set_table_magic_number(kPlainTableMagicNumber) - .set_format_version(1) - .set_metaindex_handle(meta_index) - .set_index_handle(index) - .set_checksum_type(kxxHash); - footer.EncodeTo(&encoded, footer_offset); + FooterBuilder footer; + footer.Build(kPlainTableMagicNumber, /* format_version */ 1, footer_offset, + kxxHash, meta_index); Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); + ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.checksum_type(), kxxHash); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); + ASSERT_EQ(decoded_footer.index_handle().size(), 0U); ASSERT_EQ(decoded_footer.format_version(), 1U); ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); }