Remove duplicates from SnapshotList::GetAll (#4860)

Summary:
The vector returned by SnapshotList::GetAll could have duplicate entries if two separate snapshots have the same sequence number. However, when this vector is used in compaction the duplicate entires are of no use and could be safely ignored. Moreover not having duplicate entires simplifies reasoning in the compaction_iterator.cc code. For example when searching for the previous_snap we currently use the snapshot before the current one but the way the code uses that it expects it to be also less than the current snapshot, which would be simpler to read if there is no duplicate entry in the snapshot list.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4860

Differential Revision: D13615502

Pulled By: maysamyabandeh

fbshipit-source-id: d45bf01213ead5f39db811f951802da6fcc3332b
This commit is contained in:
Maysam Yabandeh 2019-01-09 16:09:36 -08:00 committed by Facebook Github Bot
parent 75714b4c08
commit d56ac22b44
4 changed files with 44 additions and 7 deletions

View File

@ -80,7 +80,7 @@ CompactionIterator::CompactionIterator(
#ifndef NDEBUG
// findEarliestVisibleSnapshot assumes this ordering.
for (size_t i = 1; i < snapshots_->size(); ++i) {
assert(snapshots_->at(i - 1) <= snapshots_->at(i));
assert(snapshots_->at(i - 1) < snapshots_->at(i));
}
#endif
if (compaction_filter_ != nullptr) {

View File

@ -1346,6 +1346,40 @@ TEST_F(DBTest2, FirstSnapshotTest) {
db_->ReleaseSnapshot(s1);
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest2, DuplicateSnapshot) {
Options options;
options = CurrentOptions(options);
std::vector<const Snapshot*> snapshots;
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
SequenceNumber oldest_ww_snap, first_ww_snap;
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
snapshots.push_back(db_->GetSnapshot());
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
first_ww_snap = snapshots.back()->GetSequenceNumber();
Put("k", "v"); // inc seq
snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
snapshots.push_back(db_->GetSnapshot());
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
{
InstrumentedMutexLock l(dbi->mutex());
auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap);
ASSERT_EQ(seqs.size(), 4); // duplicates are not counted
ASSERT_EQ(oldest_ww_snap, first_ww_snap);
}
for (auto s : snapshots) {
db_->ReleaseSnapshot(s);
}
}
#endif // ROCKSDB_LITE
class PinL0IndexAndFilterBlocksTest
: public DBTestBase,
public testing::WithParamInterface<std::tuple<bool, bool>> {

View File

@ -365,17 +365,17 @@ TEST_F(FlushJobTest, Snapshots) {
auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber);
std::vector<SequenceNumber> snapshots;
std::set<SequenceNumber> snapshots_set;
int keys = 10000;
int max_inserts_per_keys = 8;
Random rnd(301);
for (int i = 0; i < keys / 2; ++i) {
snapshots.push_back(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1);
snapshots_set.insert(snapshots.back());
snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1);
}
std::sort(snapshots.begin(), snapshots.end());
// set has already removed the duplicate snapshots
std::vector<SequenceNumber> snapshots(snapshots_set.begin(),
snapshots_set.end());
new_mem->Ref();
SequenceNumber current_seqno = 0;

View File

@ -86,7 +86,7 @@ class SnapshotList {
}
// retrieve all snapshot numbers up until max_seq. They are sorted in
// ascending order.
// ascending order (with no duplicates).
std::vector<SequenceNumber> GetAll(
SequenceNumber* oldest_write_conflict_snapshot = nullptr,
const SequenceNumber& max_seq = kMaxSequenceNumber) const {
@ -104,7 +104,10 @@ class SnapshotList {
if (s->next_->number_ > max_seq) {
break;
}
ret.push_back(s->next_->number_);
// Avoid duplicates
if (ret.empty() || ret.back() != s->next_->number_) {
ret.push_back(s->next_->number_);
}
if (oldest_write_conflict_snapshot != nullptr &&
*oldest_write_conflict_snapshot == kMaxSequenceNumber &&