Fix seqno in ingested file boundary key metadata (#8209)
Summary: Fixes https://github.com/facebook/rocksdb/issues/6245. Adapted from https://github.com/facebook/rocksdb/issues/8201 and https://github.com/facebook/rocksdb/issues/8205. Previously we were writing the ingested file's smallest/largest internal keys with sequence number zero, or `kMaxSequenceNumber` in case of range tombstone. The former (sequence number zero) is incorrect and can lead to files being incorrectly ordered. The fix in this PR is to overwrite boundary keys that have sequence number zero with the ingested file's assigned sequence number. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8209 Test Plan: repro unit test Reviewed By: riversand963 Differential Revision: D27885678 Pulled By: ajkr fbshipit-source-id: 4a9f2c6efdfff81c3a9923e915ea88b250ee7b6a
This commit is contained in:
parent
1b99947e99
commit
905dd17b35
@ -2,6 +2,7 @@
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened.
|
||||
* Fixed a bug where ingested files were written with incorrect boundary key metadata. In rare cases this could have led to a level's files being wrongly ordered and queries for the boundary keys returning wrong results.
|
||||
* Fixed a data race between insertion into memtables and the retrieval of the DB properties `rocksdb.cur-size-active-mem-table`, `rocksdb.cur-size-all-mem-tables`, and `rocksdb.size-all-mem-tables`.
|
||||
|
||||
## 6.20.0 (04/16/2021)
|
||||
|
@ -1542,6 +1542,44 @@ TEST_F(ExternalSSTFileBasicTest, OverlappingFiles) {
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(0));
|
||||
}
|
||||
|
||||
TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) {
|
||||
// Repro https://github.com/facebook/rocksdb/issues/6245.
|
||||
// Flush three files to L0. Ingest one more file to trigger L0->L1 compaction
|
||||
// via trivial move. The bug happened when L1 files were incorrectly sorted
|
||||
// resulting in an old value for "k" returned by `Get()`.
|
||||
Options options = CurrentOptions();
|
||||
|
||||
ASSERT_OK(Put("k", "a"));
|
||||
Flush();
|
||||
ASSERT_OK(Put("k", "a"));
|
||||
Flush();
|
||||
ASSERT_OK(Put("k", "a"));
|
||||
Flush();
|
||||
SstFileWriter sst_file_writer(EnvOptions(), options);
|
||||
|
||||
// Current file size should be 0 after sst_file_writer init and before open a
|
||||
// file.
|
||||
ASSERT_EQ(sst_file_writer.FileSize(), 0);
|
||||
|
||||
std::string file1 = sst_files_dir_ + "file1.sst";
|
||||
ASSERT_OK(sst_file_writer.Open(file1));
|
||||
ASSERT_OK(sst_file_writer.Put("k", "b"));
|
||||
|
||||
ExternalSstFileInfo file1_info;
|
||||
Status s = sst_file_writer.Finish(&file1_info);
|
||||
ASSERT_OK(s) << s.ToString();
|
||||
|
||||
// Current file size should be non-zero after success write.
|
||||
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
||||
|
||||
IngestExternalFileOptions ifo;
|
||||
s = db_->IngestExternalFile({file1}, ifo);
|
||||
ASSERT_OK(s);
|
||||
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
||||
|
||||
ASSERT_EQ(Get("k"), "b");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
|
||||
testing::Values(std::make_tuple(true, true),
|
||||
std::make_tuple(true, false),
|
||||
|
@ -367,9 +367,32 @@ Status ExternalSstFileIngestionJob::Run() {
|
||||
super_version, force_global_seqno, cfd_->ioptions()->compaction_style,
|
||||
last_seqno, &f, &assigned_seqno);
|
||||
}
|
||||
|
||||
// Modify the smallest/largest internal key to include the sequence number
|
||||
// that we just learned. Only overwrite sequence number zero. There could
|
||||
// be a nonzero sequence number already to indicate a range tombstone's
|
||||
// exclusive endpoint.
|
||||
ParsedInternalKey smallest_parsed, largest_parsed;
|
||||
if (status.ok()) {
|
||||
status = ParseInternalKey(*f.smallest_internal_key.rep(),
|
||||
&smallest_parsed, false /* log_err_key */);
|
||||
}
|
||||
if (status.ok()) {
|
||||
status = ParseInternalKey(*f.largest_internal_key.rep(), &largest_parsed,
|
||||
false /* log_err_key */);
|
||||
}
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
if (smallest_parsed.sequence == 0) {
|
||||
UpdateInternalKey(f.smallest_internal_key.rep(), assigned_seqno,
|
||||
smallest_parsed.type);
|
||||
}
|
||||
if (largest_parsed.sequence == 0) {
|
||||
UpdateInternalKey(f.largest_internal_key.rep(), assigned_seqno,
|
||||
largest_parsed.type);
|
||||
}
|
||||
|
||||
status = AssignGlobalSeqnoForIngestedFile(&f, assigned_seqno);
|
||||
TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Run",
|
||||
&assigned_seqno);
|
||||
|
Loading…
Reference in New Issue
Block a user