diff --git a/db/log_format.h b/db/log_format.h index 919c087e2..97eb13393 100644 --- a/db/log_format.h +++ b/db/log_format.h @@ -22,14 +22,24 @@ enum RecordType { // For fragments kFirstType = 2, kMiddleType = 3, - kLastType = 4 + kLastType = 4, + + // For recycled log files + kRecyclableFullType = 5, + kRecyclableFirstType = 6, + kRecyclableMiddleType = 7, + kRecyclableLastType = 8, }; -static const int kMaxRecordType = kLastType; +static const int kMaxRecordType = kRecyclableLastType; static const unsigned int kBlockSize = 32768; // Header is checksum (4 bytes), type (1 byte), length (2 bytes). static const int kHeaderSize = 4 + 1 + 2; +// Recyclable header is checksum (4 bytes), type (1 byte), log number +// (4 bytes), length (2 bytes). +static const int kRecyclableHeaderSize = 4 + 1 + 4 + 2; + } // namespace log } // namespace rocksdb diff --git a/db/log_reader.cc b/db/log_reader.cc index d1b136660..512dd08d3 100644 --- a/db/log_reader.cc +++ b/db/log_reader.cc @@ -95,6 +95,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, const unsigned int record_type = ReadPhysicalRecord(&fragment, &drop_size); switch (record_type) { case kFullType: + case kRecyclableFullType: if (in_fragmented_record && !scratch->empty()) { // Handle bug in earlier versions of log::Writer where // it could emit an empty kFirstType record at the tail end @@ -109,6 +110,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, return true; case kFirstType: + case kRecyclableFirstType: if (in_fragmented_record && !scratch->empty()) { // Handle bug in earlier versions of log::Writer where // it could emit an empty kFirstType record at the tail end @@ -122,6 +124,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, break; case kMiddleType: + case kRecyclableMiddleType: if (!in_fragmented_record) { ReportCorruption(fragment.size(), "missing start of fragmented record(1)"); @@ -131,6 +134,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, break; case kLastType: + case kRecyclableLastType: if (!in_fragmented_record) { ReportCorruption(fragment.size(), "missing start of fragmented record(2)"); @@ -162,6 +166,23 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch, } return false; + case kOldRecord: + if (wal_recovery_mode != WALRecoveryMode::kSkipAnyCorruptedRecords) { + // Treat a record from a previous instance of the log as EOF. + if (in_fragmented_record) { + if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) { + // in clean shutdown we don't expect any error in the log files + ReportCorruption(scratch->size(), "error reading trailing data"); + } + // This can be caused by the writer dying immediately after + // writing a physical record but before completing the next; don't + // treat it as a corruption, just ignore the entire logical record. + scratch->clear(); + } + return false; + } + // fall-thru + case kBadRecord: if (in_fragmented_record) { ReportCorruption(scratch->size(), "error in middle of record"); @@ -263,37 +284,49 @@ void Reader::ReportDrop(size_t bytes, const Status& reason) { } } +bool Reader::ReadMore(size_t* drop_size, int *error) { + if (!eof_ && !read_error_) { + // Last read was a full read, so this is a trailer to skip + buffer_.clear(); + Status status = file_->Read(kBlockSize, &buffer_, backing_store_); + end_of_buffer_offset_ += buffer_.size(); + if (!status.ok()) { + buffer_.clear(); + ReportDrop(kBlockSize, status); + read_error_ = true; + *error = kEof; + return false; + } else if (buffer_.size() < (size_t)kBlockSize) { + eof_ = true; + eof_offset_ = buffer_.size(); + } + return true; + } else { + // Note that if buffer_ is non-empty, we have a truncated header at the + // end of the file, which can be caused by the writer crashing in the + // middle of writing the header. Unless explicitly requested we don't + // considering this an error, just report EOF. + if (buffer_.size()) { + *drop_size = buffer_.size(); + buffer_.clear(); + *error = kBadHeader; + return false; + } + buffer_.clear(); + *error = kEof; + return false; + } +} + unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) { while (true) { + // We need at least the minimum header size if (buffer_.size() < (size_t)kHeaderSize) { - if (!eof_ && !read_error_) { - // Last read was a full read, so this is a trailer to skip - buffer_.clear(); - Status status = file_->Read(kBlockSize, &buffer_, backing_store_); - end_of_buffer_offset_ += buffer_.size(); - if (!status.ok()) { - buffer_.clear(); - ReportDrop(kBlockSize, status); - read_error_ = true; - return kEof; - } else if (buffer_.size() < (size_t)kBlockSize) { - eof_ = true; - eof_offset_ = buffer_.size(); - } - continue; - } else { - // Note that if buffer_ is non-empty, we have a truncated header at the - // end of the file, which can be caused by the writer crashing in the - // middle of writing the header. Unless explicitly requested we don't - // considering this an error, just report EOF. - if (buffer_.size()) { - *drop_size = buffer_.size(); - buffer_.clear(); - return kBadHeader; - } - buffer_.clear(); - return kEof; + int r; + if (!ReadMore(drop_size, &r)) { + return r; } + continue; } // Parse the header @@ -302,7 +335,23 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) { const uint32_t b = static_cast(header[5]) & 0xff; const unsigned int type = header[6]; const uint32_t length = a | (b << 8); - if (kHeaderSize + length > buffer_.size()) { + int header_size = kHeaderSize; + if (type >= kRecyclableFullType && type <= kRecyclableLastType) { + header_size = kRecyclableHeaderSize; + // We need enough for the larger header + if (buffer_.size() < (size_t)kRecyclableHeaderSize) { + int r; + if (!ReadMore(drop_size, &r)) { + return r; + } + continue; + } + const uint32_t log_num = DecodeFixed32(header + 7); + if (log_num != log_number_) { + return kOldRecord; + } + } + if (header_size + length > buffer_.size()) { *drop_size = buffer_.size(); buffer_.clear(); if (!eof_) { @@ -331,7 +380,7 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) { // Check crc if (checksum_) { uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); - uint32_t actual_crc = crc32c::Value(header + 6, 1 + length); + uint32_t actual_crc = crc32c::Value(header + 6, length + header_size - 6); if (actual_crc != expected_crc) { // Drop the rest of the buffer since "length" itself may have // been corrupted and if we trust it, we could find some @@ -344,16 +393,16 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) { } } - buffer_.remove_prefix(kHeaderSize + length); + buffer_.remove_prefix(header_size + length); // Skip physical record that started before initial_offset_ - if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < + if (end_of_buffer_offset_ - buffer_.size() - header_size - length < initial_offset_) { result->clear(); return kBadRecord; } - *result = Slice(header + kHeaderSize, length); + *result = Slice(header + header_size, length); return type; } } diff --git a/db/log_reader.h b/db/log_reader.h index 422469bab..28f0a2c1e 100644 --- a/db/log_reader.h +++ b/db/log_reader.h @@ -124,6 +124,8 @@ class Reader { kBadRecord = kMaxRecordType + 2, // Returned when we fail to read a valid header. kBadHeader = kMaxRecordType + 3, + // Returned when we read an old record from a previous user of the log. + kOldRecord = kMaxRecordType + 4, }; // Skips all blocks that are completely before "initial_offset_". @@ -134,6 +136,9 @@ class Reader { // Return type, or one of the preceding special values unsigned int ReadPhysicalRecord(Slice* result, size_t* drop_size); + // Read some more + bool ReadMore(size_t* drop_size, int *error); + // Reports dropped bytes to the reporter. // buffer_ must be updated to remove the dropped bytes prior to invocation. void ReportCorruption(size_t bytes, const char* reason); diff --git a/db/log_test.cc b/db/log_test.cc index 22921090a..41f4c8223 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -153,7 +153,7 @@ class LogTest : public ::testing::TestWithParam { // Record metadata for testing initial offset functionality static size_t initial_offset_record_sizes_[]; - static uint64_t initial_offset_last_record_offsets_[]; + uint64_t initial_offset_last_record_offsets_[4]; public: LogTest() @@ -163,8 +163,16 @@ class LogTest : public ::testing::TestWithParam { source_holder_( test::GetSequentialFileReader(new StringSource(reader_contents_))), writer_(std::move(dest_holder_), 123, GetParam()), - reader_(NULL, std::move(source_holder_), &report_, - true /*checksum*/, 0 /*initial_offset*/, 123) {} + reader_(NULL, std::move(source_holder_), &report_, true /*checksum*/, + 0 /*initial_offset*/, 123) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + initial_offset_last_record_offsets_[0] = 0; + initial_offset_last_record_offsets_[1] = header_size + 10000; + initial_offset_last_record_offsets_[2] = 2 * (header_size + 10000); + initial_offset_last_record_offsets_[3] = 2 * (header_size + 10000) + + (2 * log::kBlockSize - 1000) + + 3 * header_size; + } void Write(const std::string& msg) { writer_.AddRecord(Slice(msg)); @@ -200,9 +208,11 @@ class LogTest : public ::testing::TestWithParam { dest->Drop(bytes); } - void FixChecksum(int header_offset, int len) { + void FixChecksum(int header_offset, int len, bool recyclable) { // Compute crc of type/len/data - uint32_t crc = crc32c::Value(&dest_contents()[header_offset+6], 1 + len); + int header_size = recyclable ? kRecyclableHeaderSize : kHeaderSize; + uint32_t crc = crc32c::Value(&dest_contents()[header_offset + 6], + header_size - 6 + len); crc = crc32c::Mask(crc); EncodeFixed32(&dest_contents()[header_offset], crc); } @@ -292,13 +302,6 @@ size_t LogTest::initial_offset_record_sizes_[] = 2 * log::kBlockSize - 1000, // Span three blocks 1}; -uint64_t LogTest::initial_offset_last_record_offsets_[] = - {0, - kHeaderSize + 10000, - 2 * (kHeaderSize + 10000), - 2 * (kHeaderSize + 10000) + - (2 * log::kBlockSize - 1000) + 3 * kHeaderSize}; - TEST_P(LogTest, Empty) { ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, ReadWrite) { @@ -336,9 +339,10 @@ TEST_P(LogTest, Fragmentation) { TEST_P(LogTest, MarginalTrailer) { // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + const int n = kBlockSize - 2 * header_size; Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - kHeaderSize), WrittenBytes()); + ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); Write(""); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); @@ -349,9 +353,10 @@ TEST_P(LogTest, MarginalTrailer) { TEST_P(LogTest, MarginalTrailer2) { // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + const int n = kBlockSize - 2 * header_size; Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - kHeaderSize), WrittenBytes()); + ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("bar", Read()); @@ -361,9 +366,10 @@ TEST_P(LogTest, MarginalTrailer2) { } TEST_P(LogTest, ShortTrailer) { - const int n = kBlockSize - 2*kHeaderSize + 4; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + const int n = kBlockSize - 2 * header_size + 4; Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - kHeaderSize + 4), WrittenBytes()); + ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); Write(""); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); @@ -373,9 +379,10 @@ TEST_P(LogTest, ShortTrailer) { } TEST_P(LogTest, AlignedEof) { - const int n = kBlockSize - 2*kHeaderSize + 4; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + const int n = kBlockSize - 2 * header_size + 4; Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - kHeaderSize + 4), WrittenBytes()); + ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("EOF", Read()); } @@ -407,7 +414,7 @@ TEST_P(LogTest, BadRecordType) { Write("foo"); // Type is stored in header[6] IncrementByte(6, 100); - FixChecksum(0, 3); + FixChecksum(0, 3, false); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("unknown record type")); @@ -432,7 +439,8 @@ TEST_P(LogTest, TruncatedTrailingRecordIsNotIgnored) { } TEST_P(LogTest, BadLength) { - const int kPayloadSize = kBlockSize - kHeaderSize; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + const int kPayloadSize = kBlockSize - header_size; Write(BigString("bar", kPayloadSize)); Write("foo"); // Least significant size byte is stored in header[4]. @@ -459,17 +467,17 @@ TEST_P(LogTest, BadLengthAtEndIsNotIgnored) { } TEST_P(LogTest, ChecksumMismatch) { - Write("foo"); - IncrementByte(0, 10); + Write("foooooo"); + IncrementByte(0, 14); ASSERT_EQ("EOF", Read()); - ASSERT_EQ(10U, DroppedBytes()); + ASSERT_EQ(14U + 4 * !!GetParam(), DroppedBytes()); ASSERT_EQ("OK", MatchError("checksum mismatch")); } TEST_P(LogTest, UnexpectedMiddleType) { Write("foo"); - SetByte(6, kMiddleType); - FixChecksum(0, 3); + SetByte(6, GetParam() ? kRecyclableMiddleType : kMiddleType); + FixChecksum(0, 3, !!GetParam()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("missing start")); @@ -477,8 +485,8 @@ TEST_P(LogTest, UnexpectedMiddleType) { TEST_P(LogTest, UnexpectedLastType) { Write("foo"); - SetByte(6, kLastType); - FixChecksum(0, 3); + SetByte(6, GetParam() ? kRecyclableLastType : kLastType); + FixChecksum(0, 3, !!GetParam()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("missing start")); @@ -487,8 +495,8 @@ TEST_P(LogTest, UnexpectedLastType) { TEST_P(LogTest, UnexpectedFullType) { Write("foo"); Write("bar"); - SetByte(6, kFirstType); - FixChecksum(0, 3); + SetByte(6, GetParam() ? kRecyclableFirstType : kFirstType); + FixChecksum(0, 3, !!GetParam()); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); @@ -498,8 +506,8 @@ TEST_P(LogTest, UnexpectedFullType) { TEST_P(LogTest, UnexpectedFirstType) { Write("foo"); Write(BigString("bar", 100000)); - SetByte(6, kFirstType); - FixChecksum(0, 3); + SetByte(6, GetParam() ? kRecyclableFirstType : kFirstType); + FixChecksum(0, 3, !!GetParam()); ASSERT_EQ(BigString("bar", 100000), Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); @@ -573,13 +581,25 @@ TEST_P(LogTest, ReadSecondOneOff) { CheckInitialOffsetRecord(1, 1); } TEST_P(LogTest, ReadSecondTenThousand) { CheckInitialOffsetRecord(10000, 1); } -TEST_P(LogTest, ReadSecondStart) { CheckInitialOffsetRecord(10007, 1); } +TEST_P(LogTest, ReadSecondStart) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + CheckInitialOffsetRecord(10000 + header_size, 1); +} -TEST_P(LogTest, ReadThirdOneOff) { CheckInitialOffsetRecord(10008, 2); } +TEST_P(LogTest, ReadThirdOneOff) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + CheckInitialOffsetRecord(10000 + header_size + 1, 2); +} -TEST_P(LogTest, ReadThirdStart) { CheckInitialOffsetRecord(20014, 2); } +TEST_P(LogTest, ReadThirdStart) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + CheckInitialOffsetRecord(20000 + 2 * header_size, 2); +} -TEST_P(LogTest, ReadFourthOneOff) { CheckInitialOffsetRecord(20015, 3); } +TEST_P(LogTest, ReadFourthOneOff) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + CheckInitialOffsetRecord(20000 + 2 * header_size + 1, 3); +} TEST_P(LogTest, ReadFourthFirstBlockTrailer) { CheckInitialOffsetRecord(log::kBlockSize - 4, 3); @@ -594,8 +614,9 @@ TEST_P(LogTest, ReadFourthLastBlock) { } TEST_P(LogTest, ReadFourthStart) { + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; CheckInitialOffsetRecord( - 2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize, + 2 * (header_size + 1000) + (2 * log::kBlockSize - 1000) + 3 * header_size, 3); } @@ -606,7 +627,8 @@ TEST_P(LogTest, ReadPastEnd) { CheckOffsetPastEndReturnsNoRecords(5); } TEST_P(LogTest, ClearEofSingleBlock) { Write("foo"); Write("bar"); - ForceEOF(3 + kHeaderSize + 2); + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + ForceEOF(3 + header_size + 2); ASSERT_EQ("foo", Read()); UnmarkEOF(); ASSERT_EQ("bar", Read()); @@ -620,10 +642,11 @@ TEST_P(LogTest, ClearEofSingleBlock) { TEST_P(LogTest, ClearEofMultiBlock) { size_t num_full_blocks = 5; - size_t n = (kBlockSize - kHeaderSize) * num_full_blocks + 25; + int header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; + size_t n = (kBlockSize - header_size) * num_full_blocks + 25; Write(BigString("foo", n)); Write(BigString("bar", n)); - ForceEOF(n + num_full_blocks * kHeaderSize + 10); + ForceEOF(n + num_full_blocks * header_size + header_size + 3); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_TRUE(IsEOF()); UnmarkEOF(); diff --git a/db/log_writer.cc b/db/log_writer.cc index 9bef090f1..6ba285656 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -37,6 +37,10 @@ Status Writer::AddRecord(const Slice& slice) { const char* ptr = slice.data(); size_t left = slice.size(); + // Header size varies depending on whether we are recycling or not. + const int header_size = + recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize; + // Fragment the record if necessary and emit it. Note that if slice // is empty, we still want to iterate once to emit a single // zero-length record @@ -45,32 +49,34 @@ Status Writer::AddRecord(const Slice& slice) { do { const int leftover = kBlockSize - block_offset_; assert(leftover >= 0); - if (leftover < kHeaderSize) { + if (leftover < header_size) { // Switch to a new block if (leftover > 0) { - // Fill the trailer (literal below relies on kHeaderSize being 7) - assert(kHeaderSize == 7); - dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover)); + // Fill the trailer (literal below relies on kHeaderSize and + // kRecyclableHeaderSize being <= 11) + assert(header_size <= 11); + dest_->Append( + Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", leftover)); } block_offset_ = 0; } - // Invariant: we never leave < kHeaderSize bytes in a block. - assert(static_cast(kBlockSize) - block_offset_ >= kHeaderSize); + // Invariant: we never leave < header_size bytes in a block. + assert(static_cast(kBlockSize) - block_offset_ >= header_size); - const size_t avail = kBlockSize - block_offset_ - kHeaderSize; + const size_t avail = kBlockSize - block_offset_ - header_size; const size_t fragment_length = (left < avail) ? left : avail; RecordType type; const bool end = (left == fragment_length); if (begin && end) { - type = kFullType; + type = recycle_log_files_ ? kRecyclableFullType : kFullType; } else if (begin) { - type = kFirstType; + type = recycle_log_files_ ? kRecyclableFirstType : kFirstType; } else if (end) { - type = kLastType; + type = recycle_log_files_ ? kRecyclableLastType : kLastType; } else { - type = kMiddleType; + type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType; } s = EmitPhysicalRecord(type, ptr, fragment_length); @@ -83,28 +89,48 @@ Status Writer::AddRecord(const Slice& slice) { Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { assert(n <= 0xffff); // Must fit in two bytes - assert(block_offset_ + kHeaderSize + n <= kBlockSize); + + size_t header_size; + char buf[kRecyclableHeaderSize]; // Format the header - char buf[kHeaderSize]; buf[4] = static_cast(n & 0xff); buf[5] = static_cast(n >> 8); buf[6] = static_cast(t); + uint32_t crc = type_crc_[t]; + if (t < kRecyclableFullType) { + // Legacy record format + assert(block_offset_ + kHeaderSize + n <= kBlockSize); + header_size = kHeaderSize; + } else { + // Recyclable record format + assert(block_offset_ + kRecyclableHeaderSize + n <= kBlockSize); + header_size = kRecyclableHeaderSize; + + // Only encode low 32-bits of the 64-bit log number. This means + // we will fail to detect an old record if we recycled a log from + // ~4 billion logs ago, but that is effectively impossible, and + // even if it were we'dbe far more likely to see a false positive + // on the 32-bit CRC. + EncodeFixed32(buf + 7, static_cast(log_number_)); + crc = crc32c::Extend(crc, buf + 7, 4); + } + // Compute the crc of the record type and the payload. - uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); - crc = crc32c::Mask(crc); // Adjust for storage + crc = crc32c::Extend(crc, ptr, n); + crc = crc32c::Mask(crc); // Adjust for storage EncodeFixed32(buf, crc); // Write the header and the payload - Status s = dest_->Append(Slice(buf, kHeaderSize)); + Status s = dest_->Append(Slice(buf, header_size)); if (s.ok()) { s = dest_->Append(Slice(ptr, n)); if (s.ok()) { s = dest_->Flush(); } } - block_offset_ += kHeaderSize + n; + block_offset_ += header_size + n; return s; } diff --git a/db/log_writer.h b/db/log_writer.h index a8aa5d476..8f5c3a10e 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -43,7 +43,7 @@ namespace log { * Data is written out in kBlockSize chunks. If next record does not fit * into the space left, the leftover space will be padded with \0. * - * Record format: + * Legacy record format: * * +---------+-----------+-----------+--- ... ---+ * |CRC (4B) | Size (2B) | Type (1B) | Payload | @@ -57,6 +57,15 @@ namespace log { * blocks that are larger than kBlockSize * Payload = Byte stream as long as specified by the payload size * + * Recyclable record format: + * + * +---------+-----------+-----------+----------------+--- ... ---+ + * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | + * +---------+-----------+-----------+----------------+--- ... ---+ + * + * Same as above, with the addition of + * Log number = 32bit log file number, so that we can distinguish between + * records written by the most recent log writer vs a previous one. */ class Writer { public: