2016-12-20 01:44:30 +01:00
|
|
|
// Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2016-12-20 01:44:30 +01:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
#include "db/db_test_util.h"
|
|
|
|
#include "db/range_del_aggregator.h"
|
|
|
|
#include "rocksdb/comparator.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class RangeDelAggregatorTest : public testing::Test {};
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct ExpectedPoint {
|
|
|
|
Slice begin;
|
|
|
|
SequenceNumber seq;
|
2018-07-14 02:34:54 +02:00
|
|
|
bool expectAlive;
|
2016-12-20 01:44:30 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
enum Direction {
|
|
|
|
kForward,
|
|
|
|
kReverse,
|
|
|
|
};
|
|
|
|
|
2018-10-10 00:15:27 +02:00
|
|
|
struct AddTombstonesArgs {
|
|
|
|
const std::vector<RangeTombstone> tombstones;
|
|
|
|
const InternalKey* smallest;
|
|
|
|
const InternalKey* largest;
|
|
|
|
};
|
|
|
|
|
2018-09-18 21:06:59 +02:00
|
|
|
static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator());
|
2017-11-28 20:18:42 +01:00
|
|
|
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
void AddTombstones(RangeDelAggregator* range_del_agg,
|
2018-07-14 02:34:54 +02:00
|
|
|
const std::vector<RangeTombstone>& range_dels,
|
|
|
|
const InternalKey* smallest = nullptr,
|
|
|
|
const InternalKey* largest = nullptr) {
|
2017-11-28 20:18:42 +01:00
|
|
|
std::vector<std::string> keys, values;
|
|
|
|
for (const auto& range_del : range_dels) {
|
|
|
|
auto key_and_value = range_del.Serialize();
|
|
|
|
keys.push_back(key_and_value.first.Encode().ToString());
|
|
|
|
values.push_back(key_and_value.second.ToString());
|
|
|
|
}
|
|
|
|
std::unique_ptr<test::VectorIterator> range_del_iter(
|
|
|
|
new test::VectorIterator(keys, values));
|
2018-07-14 02:34:54 +02:00
|
|
|
range_del_agg->AddTombstones(std::move(range_del_iter), smallest, largest);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyTombstonesEq(const RangeTombstone& a, const RangeTombstone& b) {
|
|
|
|
ASSERT_EQ(a.seq_, b.seq_);
|
|
|
|
ASSERT_EQ(a.start_key_, b.start_key_);
|
|
|
|
ASSERT_EQ(a.end_key_, b.end_key_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyRangeDelIter(
|
|
|
|
RangeDelIterator* range_del_iter,
|
|
|
|
const std::vector<RangeTombstone>& expected_range_dels) {
|
|
|
|
size_t i = 0;
|
2018-10-10 00:15:27 +02:00
|
|
|
for (; range_del_iter->Valid(); range_del_iter->Next(), i++) {
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
VerifyTombstonesEq(expected_range_dels[i], range_del_iter->Tombstone());
|
|
|
|
}
|
|
|
|
ASSERT_EQ(expected_range_dels.size(), i);
|
|
|
|
ASSERT_FALSE(range_del_iter->Valid());
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
const std::vector<AddTombstonesArgs>& all_args,
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
const std::vector<ExpectedPoint>& expected_points,
|
2018-07-14 02:34:54 +02:00
|
|
|
const std::vector<RangeTombstone>& expected_collapsed_range_dels,
|
2018-09-18 21:06:59 +02:00
|
|
|
const InternalKeyComparator& icmp = bytewise_icmp) {
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
// Test same result regardless of which order the range deletions are added
|
|
|
|
// and regardless of collapsed mode.
|
|
|
|
for (bool collapsed : {false, true}) {
|
|
|
|
for (Direction dir : {kForward, kReverse}) {
|
|
|
|
RangeDelAggregator range_del_agg(icmp, {} /* snapshots */, collapsed);
|
2018-10-10 00:15:27 +02:00
|
|
|
std::vector<RangeTombstone> all_range_dels;
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
|
2018-10-10 00:15:27 +02:00
|
|
|
for (const auto& args : all_args) {
|
|
|
|
std::vector<RangeTombstone> range_dels = args.tombstones;
|
|
|
|
if (dir == kReverse) {
|
|
|
|
std::reverse(range_dels.begin(), range_dels.end());
|
|
|
|
}
|
|
|
|
all_range_dels.insert(all_range_dels.end(), range_dels.begin(),
|
|
|
|
range_dels.end());
|
|
|
|
AddTombstones(&range_del_agg, range_dels, args.smallest, args.largest);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
auto mode = RangeDelPositioningMode::kFullScan;
|
|
|
|
if (collapsed) {
|
|
|
|
mode = RangeDelPositioningMode::kForwardTraversal;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto expected_point : expected_points) {
|
|
|
|
ParsedInternalKey parsed_key;
|
|
|
|
parsed_key.user_key = expected_point.begin;
|
|
|
|
parsed_key.sequence = expected_point.seq;
|
|
|
|
parsed_key.type = kTypeValue;
|
2018-10-10 00:15:27 +02:00
|
|
|
std::string ikey;
|
|
|
|
AppendInternalKey(&ikey, parsed_key);
|
|
|
|
ASSERT_FALSE(range_del_agg.ShouldDelete(ikey, mode));
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
if (parsed_key.sequence > 0) {
|
|
|
|
--parsed_key.sequence;
|
2018-10-10 00:15:27 +02:00
|
|
|
ikey.clear();
|
|
|
|
AppendInternalKey(&ikey, parsed_key);
|
2018-07-14 02:34:54 +02:00
|
|
|
if (expected_point.expectAlive) {
|
2018-10-10 00:15:27 +02:00
|
|
|
ASSERT_FALSE(range_del_agg.ShouldDelete(ikey, mode));
|
2018-07-14 02:34:54 +02:00
|
|
|
} else {
|
2018-10-10 00:15:27 +02:00
|
|
|
ASSERT_TRUE(range_del_agg.ShouldDelete(ikey, mode));
|
2018-07-14 02:34:54 +02:00
|
|
|
}
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (collapsed) {
|
2018-10-10 00:15:27 +02:00
|
|
|
all_range_dels = expected_collapsed_range_dels;
|
|
|
|
VerifyRangeDelIter(range_del_agg.NewIterator().get(), all_range_dels);
|
|
|
|
} else if (all_args.size() == 1 && all_args[0].smallest == nullptr &&
|
|
|
|
all_args[0].largest == nullptr) {
|
2018-07-14 02:34:54 +02:00
|
|
|
// Tombstones in an uncollapsed map are presented in start key
|
|
|
|
// order. Tombstones with the same start key are presented in
|
|
|
|
// insertion order. We don't handle tombstone truncation here, so the
|
|
|
|
// verification is only performed if no truncation was requested.
|
2018-10-10 00:15:27 +02:00
|
|
|
std::stable_sort(all_range_dels.begin(), all_range_dels.end(),
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
[&](const RangeTombstone& a, const RangeTombstone& b) {
|
|
|
|
return icmp.user_comparator()->Compare(
|
|
|
|
a.start_key_, b.start_key_) < 0;
|
|
|
|
});
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDelIter(range_del_agg.NewIterator().get(), all_range_dels);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RangeDelAggregator range_del_agg(icmp, {} /* snapshots */,
|
|
|
|
false /* collapse_deletions */);
|
2018-10-10 00:15:27 +02:00
|
|
|
for (const auto& args : all_args) {
|
|
|
|
AddTombstones(&range_del_agg, args.tombstones, args.smallest, args.largest);
|
|
|
|
}
|
2017-11-28 20:18:42 +01:00
|
|
|
for (size_t i = 1; i < expected_points.size(); ++i) {
|
|
|
|
bool overlapped = range_del_agg.IsRangeOverlapped(
|
|
|
|
expected_points[i - 1].begin, expected_points[i].begin);
|
|
|
|
if (expected_points[i - 1].seq > 0 || expected_points[i].seq > 0) {
|
|
|
|
ASSERT_TRUE(overlapped);
|
|
|
|
} else {
|
|
|
|
ASSERT_FALSE(overlapped);
|
|
|
|
}
|
|
|
|
}
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
TEST_F(RangeDelAggregatorTest, Empty) { VerifyRangeDels({}, {{"a", 0}}, {}); }
|
2016-12-20 01:44:30 +01:00
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, SameStartAndEnd) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "a", 5}}}}, {{" ", 0}, {"a", 0}, {"b", 0}}, {});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, Single) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "b", 10}}}}, {{" ", 0}, {"a", 10}, {"b", 0}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{"a", "b", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapAboveLeft) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "c", 10}, {"b", "d", 5}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 10}, {"c", 5}, {"d", 0}},
|
|
|
|
{{"a", "c", 10}, {"c", "d", 5}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapAboveRight) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "c", 5}, {"b", "d", 10}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 10}, {"d", 0}},
|
|
|
|
{{"a", "b", 5}, {"b", "d", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapAboveMiddle) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "d", 5}, {"b", "c", 10}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 10}, {"c", 5}, {"d", 0}},
|
|
|
|
{{"a", "b", 5}, {"b", "c", 10}, {"c", "d", 5}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
2018-09-18 21:06:59 +02:00
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapAboveMiddleReverse) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"d", "a", 5}, {"c", "b", 10}}}},
|
2018-09-18 21:06:59 +02:00
|
|
|
{{"z", 0}, {"d", 5}, {"c", 10}, {"b", 5}, {"a", 0}},
|
|
|
|
{{"d", "c", 5}, {"c", "b", 10}, {"b", "a", 5}},
|
|
|
|
InternalKeyComparator(ReverseBytewiseComparator()));
|
|
|
|
}
|
|
|
|
|
2016-12-20 01:44:30 +01:00
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapFully) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "d", 10}, {"b", "c", 5}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 10}, {"d", 0}}, {{"a", "d", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlapPoint) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "b", 5}, {"b", "c", 10}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 10}, {"c", 0}},
|
|
|
|
{{"a", "b", 5}, {"b", "c", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, SameStartKey) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "c", 5}, {"a", "b", 10}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 10}, {"b", 5}, {"c", 0}},
|
|
|
|
{{"a", "b", 10}, {"b", "c", 5}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, SameEndKey) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "d", 5}, {"b", "d", 10}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 10}, {"d", 0}},
|
|
|
|
{{"a", "b", 5}, {"b", "d", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, GapsBetweenRanges) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "b", 5}, {"c", "d", 10}, {"e", "f", 15}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0},
|
|
|
|
{"a", 5},
|
|
|
|
{"b", 0},
|
|
|
|
{"c", 10},
|
|
|
|
{"d", 0},
|
|
|
|
{"da", 0},
|
|
|
|
{"e", 15},
|
|
|
|
{"f", 0}},
|
|
|
|
{{"a", "b", 5}, {"c", "d", 10}, {"e", "f", 15}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
2018-09-25 23:48:44 +02:00
|
|
|
TEST_F(RangeDelAggregatorTest, IdenticalSameSeqNo) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "b", 5}, {"a", "b", 5}}}},
|
Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
2018-09-25 23:48:44 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 0}},
|
|
|
|
{{"a", "b", 5}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, ContiguousSameSeqNo) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "b", 5}, {"b", "c", 5}}}},
|
Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
2018-09-25 23:48:44 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 0}},
|
|
|
|
{{"a", "c", 5}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingSameSeqNo) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "c", 5}, {"b", "d", 5}}}},
|
Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
2018-09-25 23:48:44 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
|
|
|
{{"a", "d", 5}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, CoverSameSeqNo) {
|
2018-10-10 00:15:27 +02:00
|
|
|
VerifyRangeDels({{{{"a", "d", 5}, {"b", "c", 5}}}},
|
Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
2018-09-25 23:48:44 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
|
|
|
{{"a", "d", 5}});
|
|
|
|
}
|
|
|
|
|
2016-12-20 01:44:30 +01:00
|
|
|
// Note the Cover* tests also test cases where tombstones are inserted under a
|
|
|
|
// larger one when VerifyRangeDels() runs them in reverse
|
|
|
|
TEST_F(RangeDelAggregatorTest, CoverMultipleFromLeft) {
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"b", "d", 5}, {"c", "f", 10}, {"e", "g", 15}, {"a", "f", 20}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 20}, {"f", 15}, {"g", 0}},
|
|
|
|
{{"a", "f", 20}, {"f", "g", 15}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, CoverMultipleFromRight) {
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"b", "d", 5}, {"c", "f", 10}, {"e", "g", 15}, {"c", "h", 20}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"b", 5}, {"c", 20}, {"h", 0}},
|
|
|
|
{{"b", "c", 5}, {"c", "h", 20}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, CoverMultipleFully) {
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"b", "d", 5}, {"c", "f", 10}, {"e", "g", 15}, {"a", "h", 20}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 20}, {"h", 0}}, {{"a", "h", 20}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, AlternateMultipleAboveBelow) {
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"b", "d", 15}, {"c", "f", 10}, {"e", "g", 20}, {"a", "h", 5}}}},
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
{{" ", 0}, {"a", 5}, {"b", 15}, {"d", 10}, {"e", 20}, {"g", 5}, {"h", 0}},
|
|
|
|
{{"a", "b", 5},
|
|
|
|
{"b", "d", 15},
|
|
|
|
{"d", "e", 10},
|
|
|
|
{"e", "g", 20},
|
|
|
|
{"g", "h", 5}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, MergingIteratorAllEmptyStripes) {
|
|
|
|
for (bool collapsed : {true, false}) {
|
2018-09-18 21:06:59 +02:00
|
|
|
RangeDelAggregator range_del_agg(bytewise_icmp, {1, 2}, collapsed);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
VerifyRangeDelIter(range_del_agg.NewIterator().get(), {});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, MergingIteratorOverlappingStripes) {
|
|
|
|
for (bool collapsed : {true, false}) {
|
2018-09-18 21:06:59 +02:00
|
|
|
RangeDelAggregator range_del_agg(bytewise_icmp, {5, 15, 25, 35}, collapsed);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
AddTombstones(
|
|
|
|
&range_del_agg,
|
|
|
|
{{"d", "e", 10}, {"aa", "b", 20}, {"c", "d", 30}, {"a", "b", 10}});
|
|
|
|
VerifyRangeDelIter(
|
|
|
|
range_del_agg.NewIterator().get(),
|
|
|
|
{{"a", "b", 10}, {"aa", "b", 20}, {"c", "d", 30}, {"d", "e", 10}});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, MergingIteratorSeek) {
|
2018-09-18 21:06:59 +02:00
|
|
|
RangeDelAggregator range_del_agg(bytewise_icmp, {5, 15},
|
|
|
|
true /* collapsed */);
|
Range deletion performance improvements + cleanup (#4014)
Summary:
This fixes the same performance issue that #3992 fixes but with much more invasive cleanup.
I'm more excited about this PR because it paves the way for fixing another problem we uncovered at Cockroach where range deletion tombstones can cause massive compactions. For example, suppose L4 contains deletions from [a, c) and [x, z) and no other keys, and L5 is entirely empty. L6, however, is full of data. When compacting L4 -> L5, we'll end up with one file that spans, massively, from [a, z). When we go to compact L5 -> L6, we'll have to rewrite all of L6! If, instead of range deletions in L4, we had keys a, b, x, y, and z, RocksDB would have been smart enough to create two files in L5: one for a and b and another for x, y, and z.
With the changes in this PR, it will be possible to adjust the compaction logic to split tombstones/start new output files when they would span too many files in the grandparent level.
ajkr please take a look when you have a minute!
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4014
Differential Revision: D8773253
Pulled By: ajkr
fbshipit-source-id: ec62fa85f648fdebe1380b83ed997f9baec35677
2018-07-12 23:28:10 +02:00
|
|
|
AddTombstones(&range_del_agg, {{"a", "c", 10},
|
|
|
|
{"b", "c", 11},
|
|
|
|
{"f", "g", 10},
|
|
|
|
{"c", "d", 20},
|
|
|
|
{"e", "f", 20}});
|
|
|
|
auto it = range_del_agg.NewIterator();
|
|
|
|
|
|
|
|
// Verify seek positioning.
|
|
|
|
it->Seek("");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"a", "b", 10});
|
|
|
|
it->Seek("a");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"a", "b", 10});
|
|
|
|
it->Seek("aa");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"a", "b", 10});
|
|
|
|
it->Seek("b");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"b", "c", 11});
|
|
|
|
it->Seek("c");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"c", "d", 20});
|
|
|
|
it->Seek("dd");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"e", "f", 20});
|
|
|
|
it->Seek("f");
|
|
|
|
VerifyTombstonesEq(it->Tombstone(), {"f", "g", 10});
|
|
|
|
it->Seek("g");
|
|
|
|
ASSERT_EQ(it->Valid(), false);
|
|
|
|
it->Seek("h");
|
|
|
|
ASSERT_EQ(it->Valid(), false);
|
|
|
|
|
|
|
|
// Verify iteration after seek.
|
|
|
|
it->Seek("c");
|
|
|
|
VerifyRangeDelIter(it.get(),
|
|
|
|
{{"c", "d", 20}, {"e", "f", 20}, {"f", "g", 10}});
|
2016-12-20 01:44:30 +01:00
|
|
|
}
|
|
|
|
|
2018-07-14 02:34:54 +02:00
|
|
|
TEST_F(RangeDelAggregatorTest, TruncateTombstones) {
|
2018-10-10 00:15:27 +02:00
|
|
|
const InternalKey smallest("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
2018-07-14 02:34:54 +02:00
|
|
|
const InternalKey largest("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"a", "c", 10}, {"d", "f", 10}}, &smallest, &largest}},
|
2018-07-14 02:34:54 +02:00
|
|
|
{{"a", 10, true}, // truncated
|
|
|
|
{"b", 10, false}, // not truncated
|
|
|
|
{"d", 10, false}, // not truncated
|
|
|
|
{"e", 10, true}}, // truncated
|
2018-10-10 00:15:27 +02:00
|
|
|
{{"b", "c", 10}, {"d", "e", 10}});
|
2018-07-14 02:34:54 +02:00
|
|
|
}
|
|
|
|
|
2018-10-10 00:15:27 +02:00
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingLargestKeyTruncateBelowTombstone) {
|
|
|
|
const InternalKey smallest("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
2018-09-11 02:38:15 +02:00
|
|
|
const InternalKey largest(
|
|
|
|
"e", 3, // could happen if "e" is in consecutive sstables
|
|
|
|
kTypeValue);
|
|
|
|
VerifyRangeDels(
|
2018-10-10 00:15:27 +02:00
|
|
|
{{{{"a", "c", 10}, {"d", "f", 10}}, &smallest, &largest}},
|
|
|
|
{{"a", 10, true}, // truncated
|
|
|
|
{"b", 10, false}, // not truncated
|
|
|
|
{"d", 10, false}, // not truncated
|
|
|
|
{"e", 10, false}, // not truncated
|
|
|
|
{"e", 2, true}}, // truncated here
|
|
|
|
{{"b", "c", 10}, {"d", "e", 10}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingLargestKeyTruncateAboveTombstone) {
|
|
|
|
const InternalKey smallest("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest(
|
|
|
|
"e", 15, // could happen if "e" is in consecutive sstables
|
|
|
|
kTypeValue);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"a", "c", 10}, {"d", "f", 10}}, &smallest, &largest}},
|
|
|
|
{{"a", 10, true}, // truncated
|
|
|
|
{"b", 10, false}, // not truncated
|
|
|
|
{"d", 10, false}, // not truncated
|
|
|
|
{"e", kMaxSequenceNumber, true}}, // truncated
|
|
|
|
{{"b", "c", 10}, {"d", "e", 10}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingSmallestKeyTruncateBelowTombstone) {
|
|
|
|
const InternalKey smallest("b", 5, kTypeValue);
|
|
|
|
const InternalKey largest("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"a", "c", 10}, {"d", "f", 10}}, &smallest, &largest}},
|
|
|
|
{{"a", 10, true}, // truncated
|
|
|
|
{"b", 10, true}, // truncated
|
|
|
|
{"b", 6, false}, // not truncated; start boundary moved
|
|
|
|
{"d", 10, false}, // not truncated
|
|
|
|
{"e", kMaxSequenceNumber, true}}, // truncated
|
|
|
|
{{"b", "c", 10}, {"d", "e", 10}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingSmallestKeyTruncateAboveTombstone) {
|
|
|
|
const InternalKey smallest("b", 15, kTypeValue);
|
|
|
|
const InternalKey largest("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"a", "c", 10}, {"d", "f", 10}}, &smallest, &largest}},
|
2018-09-11 02:38:15 +02:00
|
|
|
{{"a", 10, true}, // truncated
|
2018-10-10 00:15:27 +02:00
|
|
|
{"b", 15, true}, // truncated
|
2018-09-11 02:38:15 +02:00
|
|
|
{"b", 10, false}, // not truncated
|
|
|
|
{"d", 10, false}, // not truncated
|
2018-10-10 00:15:27 +02:00
|
|
|
{"e", kMaxSequenceNumber, true}}, // truncated
|
|
|
|
{{"b", "c", 10}, {"d", "e", 10}});
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingBoundaryGapAboveTombstone) {
|
|
|
|
const InternalKey smallest1("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest1("c", 20, kTypeValue);
|
|
|
|
const InternalKey smallest2("c", 10, kTypeValue);
|
|
|
|
const InternalKey largest2("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"b", "d", 5}}, &smallest1, &largest1},
|
|
|
|
{{{"b", "d", 5}}, &smallest2, &largest2}},
|
|
|
|
{{"b", 5, false}, // not truncated
|
|
|
|
{"c", 5, false}}, // not truncated
|
|
|
|
{{"b", "c", 5}, {"c", "d", 5}}); // not collapsed due to boundaries
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingBoundaryGapBelowTombstone) {
|
|
|
|
const InternalKey smallest1("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest1("c", 20, kTypeValue);
|
|
|
|
const InternalKey smallest2("c", 10, kTypeValue);
|
|
|
|
const InternalKey largest2("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"b", "d", 30}}, &smallest1, &largest1},
|
|
|
|
{{{"b", "d", 30}}, &smallest2, &largest2}},
|
|
|
|
{{"b", 30, false}, // not truncated
|
|
|
|
{"c", 30, false}, // not truncated
|
|
|
|
{"c", 19, true}, // truncated here (keys in this range should not exist)
|
|
|
|
{"c", 11, false}}, // not truncated again
|
|
|
|
{{"b", "c", 30}, {"c", "d", 30}}); // not collapsed due to boundaries
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, OverlappingBoundaryGapContainsTombstone) {
|
|
|
|
const InternalKey smallest1("b", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest1("c", 20, kTypeValue);
|
|
|
|
const InternalKey smallest2("c", 10, kTypeValue);
|
|
|
|
const InternalKey largest2("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"b", "d", 15}}, &smallest1, &largest1},
|
|
|
|
{{{"b", "d", 15}}, &smallest2, &largest2}},
|
|
|
|
{{"b", 15, false}, // not truncated
|
|
|
|
{"c", 15, true}, // truncated (keys in this range should not exist)
|
|
|
|
{"c", 11, false}}, // not truncated here
|
|
|
|
{{"b", "c", 15}, {"c", "d", 15}}); // not collapsed due to boundaries
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, FileCoversOneKeyAndTombstoneAbove) {
|
|
|
|
const InternalKey smallest("a", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest("a", 20, kTypeValue);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"a", "b", 35}}, &smallest, &largest}},
|
|
|
|
{{"a", 40, true}, // not truncated
|
|
|
|
{"a", 35, false}}, // not truncated
|
|
|
|
{{"a", "a", 35}}); // empty tombstone but can't occur during a compaction
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RangeDelAggregatorTest, FileCoversOneKeyAndTombstoneBelow) {
|
|
|
|
const InternalKey smallest("a", kMaxSequenceNumber, kTypeRangeDeletion);
|
|
|
|
const InternalKey largest("a", 20, kTypeValue);
|
|
|
|
VerifyRangeDels(
|
|
|
|
{{{{"a", "b", 15}}, &smallest, &largest}},
|
|
|
|
{{"a", 20, true}, // truncated here
|
|
|
|
{"a", 15, true}}, // truncated
|
|
|
|
{{"a", "a", 15}}); // empty tombstone but can't occur during a compaction
|
2018-09-11 02:38:15 +02:00
|
|
|
}
|
|
|
|
|
2016-12-20 01:44:30 +01:00
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|