fix: Reusing-Iterator reads stale keys after DeleteRange() performed (#9258)

Summary:
fix https://github.com/facebook/rocksdb/issues/9255

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9258

Reviewed By: pdillinger

Differential Revision: D34879684

Pulled By: ajkr

fbshipit-source-id: 5934f4b7524dc27ecdf1430e0456a0fc02958fc7
This commit is contained in:
Jermy Li 2022-03-15 09:50:21 -07:00 committed by Andrew Kryczka
parent 9db48d6e08
commit fddc1a79dd
3 changed files with 79 additions and 22 deletions

View File

@ -1,4 +1,8 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased
### Bug Fixes
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
## 7.0.2 (03/12/2022) ## 7.0.2 (03/12/2022)
* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction. * Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction.

View File

@ -58,6 +58,7 @@ Status ArenaWrappedDBIter::Refresh() {
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
while (true) {
if (sv_number_ != cur_sv_number) { if (sv_number_ != cur_sv_number) {
Env* env = db_iter_->env(); Env* env = db_iter_->env();
db_iter_->~DBIter(); db_iter_->~DBIter();
@ -79,9 +80,33 @@ Status ArenaWrappedDBIter::Refresh() {
read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
latest_seq, /* allow_unprepared_value */ true); latest_seq, /* allow_unprepared_value */ true);
SetIterUnderDBIter(internal_iter); SetIterUnderDBIter(internal_iter);
break;
} else { } else {
db_iter_->set_sequence(db_impl_->GetLatestSequenceNumber()); SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
// Refresh range-tombstones in MemTable
if (!read_options_.ignore_range_deletions) {
SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
ReadRangeDelAggregator* range_del_agg =
db_iter_->GetRangeDelAggregator();
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
range_del_iter.reset(
sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq));
range_del_agg->AddTombstones(std::move(range_del_iter));
cfd_->ReturnThreadLocalSuperVersion(sv);
}
// Refresh latest sequence number
db_iter_->set_sequence(latest_seq);
db_iter_->set_valid(false); db_iter_->set_valid(false);
// Check again if the latest super version number is changed
uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
if (latest_sv_number != cur_sv_number) {
// If the super version number is changed after refreshing,
// fallback to Re-Init the InternalIterator
cur_sv_number = latest_sv_number;
continue;
}
break;
}
} }
return Status::OK(); return Status::OK();
} }

View File

@ -1724,6 +1724,34 @@ TEST_F(DBRangeDelTest, OverlappedKeys) {
ASSERT_EQ(0, NumTableFilesAtLevel(1)); ASSERT_EQ(0, NumTableFilesAtLevel(1));
} }
TEST_F(DBRangeDelTest, IteratorRefresh) {
// Refreshing an iterator after a range tombstone is added should cause the
// deleted range of keys to disappear.
for (bool sv_changed : {false, true}) {
ASSERT_OK(db_->Put(WriteOptions(), "key1", "value1"));
ASSERT_OK(db_->Put(WriteOptions(), "key2", "value2"));
auto* iter = db_->NewIterator(ReadOptions());
ASSERT_OK(iter->status());
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
"key2", "key3"));
if (sv_changed) {
ASSERT_OK(db_->Flush(FlushOptions()));
}
ASSERT_OK(iter->Refresh());
ASSERT_OK(iter->status());
iter->SeekToFirst();
ASSERT_EQ("key1", iter->key());
iter->Next();
ASSERT_FALSE(iter->Valid());
delete iter;
}
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE