Make it able to ignore WAL related VersionEdits in older versions (#7873)

Summary:
Although the tags for `WalAddition`, `WalDeletion` are after `kTagSafeIgnoreMask`, to actually be able to skip these entries in older versions of RocksDB, we require that they are encoded with their encoded size as the prefix. This requirement is not met in the current codebase, so a downgraded DB may fail to open if these entries exist in the MANIFEST.

If a DB wants to downgrade, and its MANIFEST contains `WalAddition` or `WalDeletion`, it can set `track_and_verify_wals_in_manifest` to `false`, then restart twice, then downgrade. On the first restart, a new MANIFEST will be created with a `WalDeletion` indicating that all previously tracked WALs are removed from MANIFEST. On the second restart, since there is  no tracked WALs in MANIFEST now, a new MANIFEST will be created with neither `WalAddition` nor `WalDeletion`. Then the DB can downgrade.

Tags for `BlobFileAddition`, `BlobFileGarbage` also have the same problem, but this PR focuses on solving the problem for WAL edits.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7873

Test Plan: Added a `VersionEditTest::IgnorableTags` unit test to verify all entries with tags larger than `kTagSafeIgnoreMask` can actually be skipped and won't affect parsing of other entries.

Reviewed By: ajkr

Differential Revision: D25935930

Pulled By: cheng-chang

fbshipit-source-id: 7a02fdba4311d6084328c14aed110a26d08c3efb
This commit is contained in:
Cheng Chang 2021-01-19 19:26:05 -08:00
parent 48edcfc17d
commit 3dd5bc2a25
3 changed files with 142 additions and 18 deletions

View File

@ -226,13 +226,17 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
} }
for (const auto& wal_addition : wal_additions_) { for (const auto& wal_addition : wal_additions_) {
PutVarint32(dst, kWalAddition); PutVarint32(dst, kWalAddition2);
wal_addition.EncodeTo(dst); std::string encoded;
wal_addition.EncodeTo(&encoded);
PutLengthPrefixedSlice(dst, encoded);
} }
if (!wal_deletion_.IsEmpty()) { if (!wal_deletion_.IsEmpty()) {
PutVarint32(dst, kWalDeletion); PutVarint32(dst, kWalDeletion2);
wal_deletion_.EncodeTo(dst); std::string encoded;
wal_deletion_.EncodeTo(&encoded);
PutLengthPrefixedSlice(dst, encoded);
} }
// 0 is default and does not need to be explicitly written // 0 is default and does not need to be explicitly written
@ -375,6 +379,11 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
Status VersionEdit::DecodeFrom(const Slice& src) { Status VersionEdit::DecodeFrom(const Slice& src) {
Clear(); Clear();
#ifndef NDEBUG
bool ignore_ignorable_tags = false;
TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:IgnoreIgnorableTags",
&ignore_ignorable_tags);
#endif
Slice input = src; Slice input = src;
const char* msg = nullptr; const char* msg = nullptr;
uint32_t tag = 0; uint32_t tag = 0;
@ -385,6 +394,11 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
Slice str; Slice str;
InternalKey key; InternalKey key;
while (msg == nullptr && GetVarint32(&input, &tag)) { while (msg == nullptr && GetVarint32(&input, &tag)) {
#ifndef NDEBUG
if (ignore_ignorable_tags && tag > kTagSafeIgnoreMask) {
tag = kTagSafeIgnoreMask;
}
#endif
switch (tag) { switch (tag) {
case kDbId: case kDbId:
if (GetLengthPrefixedSlice(&input, &str)) { if (GetLengthPrefixedSlice(&input, &str)) {
@ -575,6 +589,23 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
break; break;
} }
case kWalAddition2: {
Slice encoded;
if (!GetLengthPrefixedSlice(&input, &encoded)) {
msg = "WalAddition not prefixed by length";
break;
}
WalAddition wal_addition;
const Status s = wal_addition.DecodeFrom(&encoded);
if (!s.ok()) {
return s;
}
wal_additions_.emplace_back(std::move(wal_addition));
break;
}
case kWalDeletion: { case kWalDeletion: {
WalDeletion wal_deletion; WalDeletion wal_deletion;
const Status s = wal_deletion.DecodeFrom(&input); const Status s = wal_deletion.DecodeFrom(&input);
@ -586,6 +617,23 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
break; break;
} }
case kWalDeletion2: {
Slice encoded;
if (!GetLengthPrefixedSlice(&input, &encoded)) {
msg = "WalDeletion not prefixed by length";
break;
}
WalDeletion wal_deletion;
const Status s = wal_deletion.DecodeFrom(&encoded);
if (!s.ok()) {
return s;
}
wal_deletion_ = std::move(wal_deletion);
break;
}
case kColumnFamily: case kColumnFamily:
if (!GetVarint32(&input, &column_family_)) { if (!GetVarint32(&input, &column_family_)) {
if (!msg) { if (!msg) {

View File

@ -62,6 +62,8 @@ enum Tag : uint32_t {
kWalAddition, kWalAddition,
kWalDeletion, kWalDeletion,
kFullHistoryTsLow, kFullHistoryTsLow,
kWalAddition2,
kWalDeletion2,
}; };
enum NewFileCustomTag : uint32_t { enum NewFileCustomTag : uint32_t {

View File

@ -324,14 +324,22 @@ TEST_F(VersionEditTest, AddWalEncodeDecode) {
TestEncodeDecode(edit); TestEncodeDecode(edit);
} }
static std::string PrefixEncodedWalAdditionWithLength(
const std::string& encoded) {
std::string ret;
PutVarint32(&ret, Tag::kWalAddition2);
PutLengthPrefixedSlice(&ret, encoded);
return ret;
}
TEST_F(VersionEditTest, AddWalDecodeBadLogNumber) { TEST_F(VersionEditTest, AddWalDecodeBadLogNumber) {
std::string encoded; std::string encoded;
PutVarint32(&encoded, Tag::kWalAddition);
{ {
// No log number. // No log number.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") != ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") !=
std::string::npos) std::string::npos)
@ -345,8 +353,10 @@ TEST_F(VersionEditTest, AddWalDecodeBadLogNumber) {
unsigned char* ptr = reinterpret_cast<unsigned char*>(&c); unsigned char* ptr = reinterpret_cast<unsigned char*>(&c);
*ptr = 128; *ptr = 128;
encoded.append(1, c); encoded.append(1, c);
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") != ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") !=
std::string::npos) std::string::npos)
@ -358,14 +368,14 @@ TEST_F(VersionEditTest, AddWalDecodeBadTag) {
constexpr WalNumber kLogNumber = 100; constexpr WalNumber kLogNumber = 100;
constexpr uint64_t kSizeInBytes = 100; constexpr uint64_t kSizeInBytes = 100;
std::string encoded_without_tag; std::string encoded;
PutVarint32(&encoded_without_tag, Tag::kWalAddition); PutVarint64(&encoded, kLogNumber);
PutVarint64(&encoded_without_tag, kLogNumber);
{ {
// No tag. // No tag.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded_without_tag); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
<< s.ToString(); << s.ToString();
@ -373,12 +383,15 @@ TEST_F(VersionEditTest, AddWalDecodeBadTag) {
{ {
// Only has size tag, no terminate tag. // Only has size tag, no terminate tag.
std::string encoded_with_size = encoded_without_tag; std::string encoded_with_size = encoded;
PutVarint32(&encoded_with_size, PutVarint32(&encoded_with_size,
static_cast<uint32_t>(WalAdditionTag::kSyncedSize)); static_cast<uint32_t>(WalAdditionTag::kSyncedSize));
PutVarint64(&encoded_with_size, kSizeInBytes); PutVarint64(&encoded_with_size, kSizeInBytes);
std::string encoded_edit =
PrefixEncodedWalAdditionWithLength(encoded_with_size);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded_with_size); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
<< s.ToString(); << s.ToString();
@ -386,11 +399,14 @@ TEST_F(VersionEditTest, AddWalDecodeBadTag) {
{ {
// Only has terminate tag. // Only has terminate tag.
std::string encoded_with_terminate = encoded_without_tag; std::string encoded_with_terminate = encoded;
PutVarint32(&encoded_with_terminate, PutVarint32(&encoded_with_terminate,
static_cast<uint32_t>(WalAdditionTag::kTerminate)); static_cast<uint32_t>(WalAdditionTag::kTerminate));
std::string encoded_edit =
PrefixEncodedWalAdditionWithLength(encoded_with_terminate);
VersionEdit edit; VersionEdit edit;
ASSERT_OK(edit.DecodeFrom(encoded_with_terminate)); ASSERT_OK(edit.DecodeFrom(encoded_edit));
auto& wal_addition = edit.GetWalAdditions()[0]; auto& wal_addition = edit.GetWalAdditions()[0];
ASSERT_EQ(wal_addition.GetLogNumber(), kLogNumber); ASSERT_EQ(wal_addition.GetLogNumber(), kLogNumber);
ASSERT_FALSE(wal_addition.GetMetadata().HasSyncedSize()); ASSERT_FALSE(wal_addition.GetMetadata().HasSyncedSize());
@ -401,15 +417,15 @@ TEST_F(VersionEditTest, AddWalDecodeNoSize) {
constexpr WalNumber kLogNumber = 100; constexpr WalNumber kLogNumber = 100;
std::string encoded; std::string encoded;
PutVarint32(&encoded, Tag::kWalAddition);
PutVarint64(&encoded, kLogNumber); PutVarint64(&encoded, kLogNumber);
PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kSyncedSize)); PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kSyncedSize));
// No real size after the size tag. // No real size after the size tag.
{ {
// Without terminate tag. // Without terminate tag.
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_TRUE(s.ToString().find("Error decoding WAL file size") != ASSERT_TRUE(s.ToString().find("Error decoding WAL file size") !=
std::string::npos) std::string::npos)
@ -419,8 +435,10 @@ TEST_F(VersionEditTest, AddWalDecodeNoSize) {
{ {
// With terminate tag. // With terminate tag.
PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kTerminate)); PutVarint32(&encoded, static_cast<uint32_t>(WalAdditionTag::kTerminate));
std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded);
VersionEdit edit; VersionEdit edit;
Status s = edit.DecodeFrom(encoded); Status s = edit.DecodeFrom(encoded_edit);
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
// The terminate tag is misunderstood as the size. // The terminate tag is misunderstood as the size.
ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos)
@ -515,6 +533,62 @@ TEST_F(VersionEditTest, FullHistoryTsLow) {
TestEncodeDecode(edit); TestEncodeDecode(edit);
} }
// Tests that if RocksDB is downgraded, the new types of VersionEdits
// that have a tag larger than kTagSafeIgnoreMask can be safely ignored.
TEST_F(VersionEditTest, IgnorableTags) {
SyncPoint::GetInstance()->SetCallBack(
"VersionEdit::EncodeTo:IgnoreIgnorableTags", [&](void* arg) {
bool* ignore = static_cast<bool*>(arg);
*ignore = true;
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr uint64_t kPrevLogNumber = 100;
constexpr uint64_t kLogNumber = 200;
constexpr uint64_t kNextFileNumber = 300;
constexpr uint64_t kColumnFamilyId = 400;
VersionEdit edit;
// Add some ignorable entries.
for (int i = 0; i < 2; i++) {
edit.AddWal(i + 1, WalMetadata(i + 2));
}
edit.SetDBId("db_id");
// Add unignorable entries.
edit.SetPrevLogNumber(kPrevLogNumber);
edit.SetLogNumber(kLogNumber);
// Add more ignorable entries.
edit.DeleteWalsBefore(100);
// Add unignorable entry.
edit.SetNextFile(kNextFileNumber);
// Add more ignorable entries.
edit.SetFullHistoryTsLow("ts");
// Add unignorable entry.
edit.SetColumnFamily(kColumnFamilyId);
std::string encoded;
ASSERT_TRUE(edit.EncodeTo(&encoded));
VersionEdit decoded;
ASSERT_OK(decoded.DecodeFrom(encoded));
// Check that all ignorable entries are ignored.
ASSERT_FALSE(decoded.HasDbId());
ASSERT_FALSE(decoded.HasFullHistoryTsLow());
ASSERT_FALSE(decoded.IsWalAddition());
ASSERT_FALSE(decoded.IsWalDeletion());
ASSERT_TRUE(decoded.GetWalAdditions().empty());
ASSERT_TRUE(decoded.GetWalDeletion().IsEmpty());
// Check that unignorable entries are still present.
ASSERT_EQ(edit.GetPrevLogNumber(), kPrevLogNumber);
ASSERT_EQ(edit.GetLogNumber(), kLogNumber);
ASSERT_EQ(edit.GetNextFile(), kNextFileNumber);
ASSERT_EQ(edit.GetColumnFamily(), kColumnFamilyId);
SyncPoint::GetInstance()->DisableProcessing();
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {