Relax VersionStorageInfo::GetOverlappingInputs check (#4050)
Summary: Do not consider the range tombstone sentinel key as causing 2 adjacent sstables in a level to overlap. When a range tombstone's end key is the largest key in an sstable, the sstable's end key is so to a "sentinel" value that is the smallest key in the next sstable with a sequence number of kMaxSequenceNumber. This "sentinel" is guaranteed to not overlap in internal-key space with the next sstable. Unfortunately, GetOverlappingFiles uses user-keys to determine overlap and was thus considering 2 adjacent sstables in a level to overlap if they were separated by this sentinel key. This in turn would cause compactions to be larger than necessary. Note that this conflicts with https://github.com/facebook/rocksdb/pull/2769 and cases `DBRangeDelTest.CompactionTreatsSplitInputLevelDeletionAtomically` to fail. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4050 Differential Revision: D8844423 Pulled By: ajkr fbshipit-source-id: df3f9f1db8f4cff2bff77376b98b83c2ae1d155b
This commit is contained in:
parent
21171615c1
commit
90fc40690a
@ -204,7 +204,7 @@ Status BuildTable(
|
||||
// we will regrad this verification as user reads since the goal is
|
||||
// to cache it here for further user reads
|
||||
std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
|
||||
ReadOptions(), env_options, internal_comparator, meta->fd,
|
||||
ReadOptions(), env_options, internal_comparator, *meta,
|
||||
nullptr /* range_del_agg */,
|
||||
mutable_cf_options.prefix_extractor.get(), nullptr,
|
||||
(internal_stats == nullptr) ? nullptr
|
||||
|
@ -635,7 +635,7 @@ Status CompactionJob::Run() {
|
||||
// to cache it here for further user reads
|
||||
InternalIterator* iter = cfd->table_cache()->NewIterator(
|
||||
ReadOptions(), env_options_, cfd->internal_comparator(),
|
||||
files_meta[file_idx]->fd, nullptr /* range_del_agg */,
|
||||
*files_meta[file_idx], nullptr /* range_del_agg */,
|
||||
prefix_extractor, nullptr,
|
||||
cfd->internal_stats()->GetFileReadHist(
|
||||
compact_->compaction->output_level()),
|
||||
|
@ -916,11 +916,14 @@ TEST_F(DBRangeDelTest, MemtableBloomFilter) {
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
|
||||
// make sure compaction treats files containing a split range deletion in the
|
||||
// input level as an atomic unit. I.e., compacting any input-level file(s)
|
||||
// containing a portion of the range deletion causes all other input-level
|
||||
// files containing portions of that same range deletion to be included in the
|
||||
// compaction.
|
||||
// This test originally verified that compaction treated files containing a
|
||||
// split range deletion in the input level as an atomic unit. I.e.,
|
||||
// compacting any input-level file(s) containing a portion of the range
|
||||
// deletion causes all other input-level files containing portions of that
|
||||
// same range deletion to be included in the compaction. Range deletion
|
||||
// tombstones are now truncated to sstable boundaries which removed the need
|
||||
// for that behavior (which could lead to excessively large
|
||||
// compactions).
|
||||
const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10;
|
||||
Options options = CurrentOptions();
|
||||
options.compression = kNoCompression;
|
||||
@ -967,22 +970,111 @@ TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
|
||||
if (i == 0) {
|
||||
ASSERT_OK(db_->CompactFiles(
|
||||
CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */));
|
||||
ASSERT_EQ(0, NumTableFilesAtLevel(1));
|
||||
} else if (i == 1) {
|
||||
auto begin_str = Key(0), end_str = Key(1);
|
||||
Slice begin = begin_str, end = end_str;
|
||||
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end));
|
||||
ASSERT_EQ(3, NumTableFilesAtLevel(1));
|
||||
} else if (i == 2) {
|
||||
ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
|
||||
{{"max_bytes_for_level_base", "10000"}}));
|
||||
dbfull()->TEST_WaitForCompact();
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(1));
|
||||
}
|
||||
ASSERT_EQ(0, NumTableFilesAtLevel(1));
|
||||
ASSERT_GT(NumTableFilesAtLevel(2), 0);
|
||||
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
|
||||
// Test the handling of the range-tombstone end-key as the
|
||||
// upper-bound for an sstable.
|
||||
|
||||
const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10;
|
||||
Options options = CurrentOptions();
|
||||
options.compression = kNoCompression;
|
||||
options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
|
||||
options.memtable_factory.reset(
|
||||
new SpecialSkipListFactory(2 /* num_entries_flush */));
|
||||
options.target_file_size_base = kValueBytes;
|
||||
options.disable_auto_compactions = true;
|
||||
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// Create an initial sstable at L2:
|
||||
// [key000000#1,1, key000000#1,1]
|
||||
ASSERT_OK(Put(Key(0), ""));
|
||||
ASSERT_OK(db_->Flush(FlushOptions()));
|
||||
MoveFilesToLevel(2);
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(2));
|
||||
|
||||
// A snapshot protects the range tombstone from dropping due to
|
||||
// becoming obsolete.
|
||||
const Snapshot* snapshot = db_->GetSnapshot();
|
||||
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
||||
Key(0), Key(2 * kNumFilesPerLevel));
|
||||
|
||||
// Create 2 additional sstables in L0. Note that the first sstable
|
||||
// contains the range tombstone.
|
||||
// [key000000#3,1, key000004#72057594037927935,15]
|
||||
// [key000001#5,1, key000002#6,1]
|
||||
Random rnd(301);
|
||||
std::string value = RandomString(&rnd, kValueBytes);
|
||||
for (int j = 0; j < kNumFilesPerLevel; ++j) {
|
||||
// Give files overlapping key-ranges to prevent a trivial move when we
|
||||
// compact from L0 to L1.
|
||||
ASSERT_OK(Put(Key(j), value));
|
||||
ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
|
||||
ASSERT_OK(db_->Flush(FlushOptions()));
|
||||
ASSERT_EQ(j + 1, NumTableFilesAtLevel(0));
|
||||
}
|
||||
// Compact the 2 L0 sstables to L1, resulting in the following LSM. There
|
||||
// are 2 sstables generated in L1 due to the target_file_size_base setting.
|
||||
// L1:
|
||||
// [key000000#3,1, key000002#72057594037927935,15]
|
||||
// [key000002#6,1, key000004#72057594037927935,15]
|
||||
// L2:
|
||||
// [key000000#1,1, key000000#1,1]
|
||||
MoveFilesToLevel(1);
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(1));
|
||||
|
||||
{
|
||||
// Compact the second sstable in L1:
|
||||
// L1:
|
||||
// [key000000#3,1, key000002#72057594037927935,15]
|
||||
// L2:
|
||||
// [key000000#1,1, key000000#1,1]
|
||||
// [key000002#6,1, key000004#72057594037927935,15]
|
||||
auto begin_str = Key(3);
|
||||
const rocksdb::Slice begin = begin_str;
|
||||
dbfull()->TEST_CompactRange(1, &begin, nullptr);
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(1));
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(2));
|
||||
}
|
||||
|
||||
{
|
||||
// Compact the first sstable in L1. This should be copacetic, but
|
||||
// was previously resulting in overlapping sstables in L2 due to
|
||||
// mishandling of the range tombstone end-key when used as the
|
||||
// largest key for an sstable. The resulting LSM structure should
|
||||
// be:
|
||||
//
|
||||
// L2:
|
||||
// [key000000#1,1, key000001#72057594037927935,15]
|
||||
// [key000001#5,1, key000002#72057594037927935,15]
|
||||
// [key000002#6,1, key000004#72057594037927935,15]
|
||||
auto begin_str = Key(0);
|
||||
const rocksdb::Slice begin = begin_str;
|
||||
dbfull()->TEST_CompactRange(1, &begin, &begin);
|
||||
ASSERT_EQ(0, NumTableFilesAtLevel(1));
|
||||
ASSERT_EQ(3, NumTableFilesAtLevel(2));
|
||||
}
|
||||
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, UnorderedTombstones) {
|
||||
// Regression test for #2752. Range delete tombstones between
|
||||
// different snapshot stripes are not stored in order, so the first
|
||||
|
@ -140,10 +140,14 @@ inline Slice ExtractUserKey(const Slice& internal_key) {
|
||||
return Slice(internal_key.data(), internal_key.size() - 8);
|
||||
}
|
||||
|
||||
inline ValueType ExtractValueType(const Slice& internal_key) {
|
||||
inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
|
||||
assert(internal_key.size() >= 8);
|
||||
const size_t n = internal_key.size();
|
||||
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
|
||||
return DecodeFixed64(internal_key.data() + n - 8);
|
||||
}
|
||||
|
||||
inline ValueType ExtractValueType(const Slice& internal_key) {
|
||||
uint64_t num = ExtractInternalKeyFooter(internal_key);
|
||||
unsigned char c = num & 0xff;
|
||||
return static_cast<ValueType>(c);
|
||||
}
|
||||
@ -606,9 +610,15 @@ struct RangeTombstone {
|
||||
return InternalKey(start_key_, seq_, kTypeRangeDeletion);
|
||||
}
|
||||
|
||||
// The tombstone end-key is exclusive, so we generate an internal-key here
|
||||
// which has a similar property. Using kMaxSequenceNumber guarantees that
|
||||
// the returned internal-key will compare less than any other internal-key
|
||||
// with the same user-key. This in turn guarantees that the serialized
|
||||
// end-key for a tombstone such as [a-b] will compare less than the key "b".
|
||||
//
|
||||
// be careful to use SerializeEndKey(), allocates new memory
|
||||
InternalKey SerializeEndKey() const {
|
||||
return InternalKey(end_key_, seq_, kTypeRangeDeletion);
|
||||
return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -192,6 +192,13 @@ TEST_F(FormatTest, UpdateInternalKey) {
|
||||
ASSERT_EQ(new_val_type, decoded.type);
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, RangeTombstoneSerializeEndKey) {
|
||||
RangeTombstone t("a", "b", 2);
|
||||
InternalKey k("b", 3, kTypeValue);
|
||||
const InternalKeyComparator cmp(BytewiseComparator());
|
||||
ASSERT_LT(cmp.Compare(t.SerializeEndKey(), k), 0);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -75,7 +75,7 @@ class ForwardLevelIterator : public InternalIterator {
|
||||
cfd_->internal_comparator(), {} /* snapshots */);
|
||||
file_iter_ = cfd_->table_cache()->NewIterator(
|
||||
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
|
||||
files_[file_index_]->fd,
|
||||
*files_[file_index_],
|
||||
read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
|
||||
prefix_extractor_, nullptr /* table_reader_ptr */, nullptr, false);
|
||||
file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
|
||||
@ -635,7 +635,7 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
|
||||
continue;
|
||||
}
|
||||
l0_iters_.push_back(cfd_->table_cache()->NewIterator(
|
||||
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), l0->fd,
|
||||
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0,
|
||||
read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
|
||||
sv_->mutable_cf_options.prefix_extractor.get()));
|
||||
}
|
||||
@ -706,7 +706,7 @@ void ForwardIterator::RenewIterators() {
|
||||
}
|
||||
l0_iters_new.push_back(cfd_->table_cache()->NewIterator(
|
||||
read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
|
||||
l0_files_new[inew]->fd,
|
||||
*l0_files_new[inew],
|
||||
read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
|
||||
svnew->mutable_cf_options.prefix_extractor.get()));
|
||||
}
|
||||
@ -766,7 +766,7 @@ void ForwardIterator::ResetIncompleteIterators() {
|
||||
DeleteIterator(l0_iters_[i]);
|
||||
l0_iters_[i] = cfd_->table_cache()->NewIterator(
|
||||
read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
|
||||
l0_files[i]->fd, nullptr /* range_del_agg */,
|
||||
*l0_files[i], nullptr /* range_del_agg */,
|
||||
sv_->mutable_cf_options.prefix_extractor.get());
|
||||
l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
|
||||
}
|
||||
|
@ -426,7 +426,9 @@ bool RangeDelAggregator::IsRangeOverlapped(const Slice& start,
|
||||
}
|
||||
|
||||
Status RangeDelAggregator::AddTombstones(
|
||||
std::unique_ptr<InternalIterator> input) {
|
||||
std::unique_ptr<InternalIterator> input,
|
||||
const InternalKey* smallest,
|
||||
const InternalKey* largest) {
|
||||
if (input == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -450,6 +452,29 @@ Status RangeDelAggregator::AddTombstones(
|
||||
return Status::Corruption("Unable to parse range tombstone InternalKey");
|
||||
}
|
||||
RangeTombstone tombstone(parsed_key, input->value());
|
||||
// Truncate the tombstone to the range [smallest, largest].
|
||||
if (smallest != nullptr) {
|
||||
if (icmp_.user_comparator()->Compare(
|
||||
tombstone.start_key_, smallest->user_key()) < 0) {
|
||||
tombstone.start_key_ = smallest->user_key();
|
||||
}
|
||||
}
|
||||
if (largest != nullptr) {
|
||||
// This is subtly correct despite the discrepancy between
|
||||
// FileMetaData::largest being inclusive while RangeTombstone::end_key_
|
||||
// is exclusive. A tombstone will only extend past the bounds of an
|
||||
// sstable if its end-key is the largest key in the table. If that
|
||||
// occurs, the largest key for the table is set based on the smallest
|
||||
// key in the next table in the level. In that case, largest->user_key()
|
||||
// is not actually a key in the current table and thus we can use it as
|
||||
// the exclusive end-key for the tombstone.
|
||||
if (icmp_.user_comparator()->Compare(
|
||||
tombstone.end_key_, largest->user_key()) > 0) {
|
||||
// The largest key should be a tombstone sentinel key.
|
||||
assert(GetInternalKeySeqno(largest->Encode()) == kMaxSequenceNumber);
|
||||
tombstone.end_key_ = largest->user_key();
|
||||
}
|
||||
}
|
||||
GetRangeDelMap(tombstone.seq_).AddTombstone(std::move(tombstone));
|
||||
input->Next();
|
||||
}
|
||||
|
@ -137,9 +137,15 @@ class RangeDelAggregator {
|
||||
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
||||
|
||||
// Adds tombstones to the tombstone aggregation structure maintained by this
|
||||
// object.
|
||||
// object. Tombstones are truncated to smallest and largest. If smallest (or
|
||||
// largest) is null, it is not used for truncation. When adding range
|
||||
// tombstones present in an sstable, smallest and largest should be set to
|
||||
// the smallest and largest keys from the sstable file metadata. Note that
|
||||
// tombstones end keys are exclusive while largest is inclusive.
|
||||
// @return non-OK status if any of the tombstone keys are corrupted.
|
||||
Status AddTombstones(std::unique_ptr<InternalIterator> input);
|
||||
Status AddTombstones(std::unique_ptr<InternalIterator> input,
|
||||
const InternalKey* smallest = nullptr,
|
||||
const InternalKey* largest = nullptr);
|
||||
|
||||
// Resets iterators maintained across calls to ShouldDelete(). This may be
|
||||
// called when the tombstones change, or the owner may call explicitly, e.g.,
|
||||
|
@ -19,6 +19,7 @@ namespace {
|
||||
struct ExpectedPoint {
|
||||
Slice begin;
|
||||
SequenceNumber seq;
|
||||
bool expectAlive;
|
||||
};
|
||||
|
||||
enum Direction {
|
||||
@ -29,7 +30,9 @@ enum Direction {
|
||||
static auto icmp = InternalKeyComparator(BytewiseComparator());
|
||||
|
||||
void AddTombstones(RangeDelAggregator* range_del_agg,
|
||||
const std::vector<RangeTombstone>& range_dels) {
|
||||
const std::vector<RangeTombstone>& range_dels,
|
||||
const InternalKey* smallest = nullptr,
|
||||
const InternalKey* largest = nullptr) {
|
||||
std::vector<std::string> keys, values;
|
||||
for (const auto& range_del : range_dels) {
|
||||
auto key_and_value = range_del.Serialize();
|
||||
@ -38,7 +41,7 @@ void AddTombstones(RangeDelAggregator* range_del_agg,
|
||||
}
|
||||
std::unique_ptr<test::VectorIterator> range_del_iter(
|
||||
new test::VectorIterator(keys, values));
|
||||
range_del_agg->AddTombstones(std::move(range_del_iter));
|
||||
range_del_agg->AddTombstones(std::move(range_del_iter), smallest, largest);
|
||||
}
|
||||
|
||||
void VerifyTombstonesEq(const RangeTombstone& a, const RangeTombstone& b) {
|
||||
@ -62,7 +65,9 @@ void VerifyRangeDelIter(
|
||||
void VerifyRangeDels(
|
||||
const std::vector<RangeTombstone>& range_dels_in,
|
||||
const std::vector<ExpectedPoint>& expected_points,
|
||||
const std::vector<RangeTombstone>& expected_collapsed_range_dels) {
|
||||
const std::vector<RangeTombstone>& expected_collapsed_range_dels,
|
||||
const InternalKey* smallest = nullptr,
|
||||
const InternalKey* largest = nullptr) {
|
||||
// Test same result regardless of which order the range deletions are added
|
||||
// and regardless of collapsed mode.
|
||||
for (bool collapsed : {false, true}) {
|
||||
@ -73,7 +78,7 @@ void VerifyRangeDels(
|
||||
if (dir == kReverse) {
|
||||
std::reverse(range_dels.begin(), range_dels.end());
|
||||
}
|
||||
AddTombstones(&range_del_agg, range_dels);
|
||||
AddTombstones(&range_del_agg, range_dels, smallest, largest);
|
||||
|
||||
auto mode = RangeDelPositioningMode::kFullScan;
|
||||
if (collapsed) {
|
||||
@ -88,22 +93,29 @@ void VerifyRangeDels(
|
||||
ASSERT_FALSE(range_del_agg.ShouldDelete(parsed_key, mode));
|
||||
if (parsed_key.sequence > 0) {
|
||||
--parsed_key.sequence;
|
||||
ASSERT_TRUE(range_del_agg.ShouldDelete(parsed_key, mode));
|
||||
if (expected_point.expectAlive) {
|
||||
ASSERT_FALSE(range_del_agg.ShouldDelete(parsed_key, mode));
|
||||
} else {
|
||||
ASSERT_TRUE(range_del_agg.ShouldDelete(parsed_key, mode));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (collapsed) {
|
||||
range_dels = expected_collapsed_range_dels;
|
||||
} else {
|
||||
// Tombstones in an uncollapsed map are presented in start key order.
|
||||
// Tombstones with the same start key are presented in insertion order.
|
||||
VerifyRangeDelIter(range_del_agg.NewIterator().get(), range_dels);
|
||||
} else if (smallest == nullptr && largest == nullptr) {
|
||||
// Tombstones in an uncollapsed map are presented in start key
|
||||
// order. Tombstones with the same start key are presented in
|
||||
// insertion order. We don't handle tombstone truncation here, so the
|
||||
// verification is only performed if no truncation was requested.
|
||||
std::stable_sort(range_dels.begin(), range_dels.end(),
|
||||
[&](const RangeTombstone& a, const RangeTombstone& b) {
|
||||
return icmp.user_comparator()->Compare(
|
||||
a.start_key_, b.start_key_) < 0;
|
||||
});
|
||||
VerifyRangeDelIter(range_del_agg.NewIterator().get(), range_dels);
|
||||
}
|
||||
VerifyRangeDelIter(range_del_agg.NewIterator().get(), range_dels);
|
||||
}
|
||||
}
|
||||
|
||||
@ -275,6 +287,19 @@ TEST_F(RangeDelAggregatorTest, MergingIteratorSeek) {
|
||||
{{"c", "d", 20}, {"e", "f", 20}, {"f", "g", 10}});
|
||||
}
|
||||
|
||||
TEST_F(RangeDelAggregatorTest, TruncateTombstones) {
|
||||
const InternalKey smallest("b", 1, kTypeRangeDeletion);
|
||||
const InternalKey largest("e", kMaxSequenceNumber, kTypeRangeDeletion);
|
||||
VerifyRangeDels(
|
||||
{{"a", "c", 10}, {"d", "f", 10}},
|
||||
{{"a", 10, true}, // truncated
|
||||
{"b", 10, false}, // not truncated
|
||||
{"d", 10, false}, // not truncated
|
||||
{"e", 10, true}}, // truncated
|
||||
{{"b", "c", 10}, {"d", "e", 10}},
|
||||
&smallest, &largest);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -500,7 +500,7 @@ class Repairer {
|
||||
}
|
||||
if (status.ok()) {
|
||||
InternalIterator* iter = table_cache_->NewIterator(
|
||||
ReadOptions(), env_options_, cfd->internal_comparator(), t->meta.fd,
|
||||
ReadOptions(), env_options_, cfd->internal_comparator(), t->meta,
|
||||
nullptr /* range_del_agg */,
|
||||
cfd->GetLatestMutableCFOptions()->prefix_extractor.get());
|
||||
bool empty = true;
|
||||
|
@ -180,7 +180,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
||||
|
||||
InternalIterator* TableCache::NewIterator(
|
||||
const ReadOptions& options, const EnvOptions& env_options,
|
||||
const InternalKeyComparator& icomparator, const FileDescriptor& fd,
|
||||
const InternalKeyComparator& icomparator, const FileMetaData& file_meta,
|
||||
RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor,
|
||||
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
|
||||
bool for_compaction, Arena* arena, bool skip_filters, int level) {
|
||||
@ -211,6 +211,7 @@ InternalIterator* TableCache::NewIterator(
|
||||
create_new_table_reader = readahead > 0;
|
||||
}
|
||||
|
||||
auto& fd = file_meta.fd;
|
||||
if (create_new_table_reader) {
|
||||
unique_ptr<TableReader> table_reader_unique_ptr;
|
||||
s = GetTableReader(
|
||||
@ -265,7 +266,10 @@ InternalIterator* TableCache::NewIterator(
|
||||
s = range_del_iter->status();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = range_del_agg->AddTombstones(std::move(range_del_iter));
|
||||
s = range_del_agg->AddTombstones(
|
||||
std::move(range_del_iter),
|
||||
&file_meta.smallest,
|
||||
&file_meta.largest);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -280,51 +284,14 @@ InternalIterator* TableCache::NewIterator(
|
||||
return result;
|
||||
}
|
||||
|
||||
InternalIterator* TableCache::NewRangeTombstoneIterator(
|
||||
const ReadOptions& options, const EnvOptions& env_options,
|
||||
const InternalKeyComparator& icomparator, const FileDescriptor& fd,
|
||||
HistogramImpl* file_read_hist, bool skip_filters, int level,
|
||||
const SliceTransform* prefix_extractor) {
|
||||
Status s;
|
||||
Cache::Handle* handle = nullptr;
|
||||
TableReader* table_reader = fd.table_reader;
|
||||
if (table_reader == nullptr) {
|
||||
s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record read_stats */, file_read_hist, skip_filters,
|
||||
level);
|
||||
if (s.ok()) {
|
||||
table_reader = GetTableReaderFromHandle(handle);
|
||||
}
|
||||
}
|
||||
InternalIterator* result = nullptr;
|
||||
if (s.ok()) {
|
||||
result = table_reader->NewRangeTombstoneIterator(options);
|
||||
if (result != nullptr) {
|
||||
if (handle != nullptr) {
|
||||
result->RegisterCleanup(&UnrefEntry, cache_, handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (result == nullptr && handle != nullptr) {
|
||||
// the range deletion block didn't exist, or there was a failure between
|
||||
// getting handle and getting iterator.
|
||||
ReleaseHandle(handle);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
assert(result == nullptr);
|
||||
result = NewErrorInternalIterator(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status TableCache::Get(const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& fd, const Slice& k,
|
||||
const FileMetaData& file_meta, const Slice& k,
|
||||
GetContext* get_context,
|
||||
const SliceTransform* prefix_extractor,
|
||||
HistogramImpl* file_read_hist, bool skip_filters,
|
||||
int level) {
|
||||
auto& fd = file_meta.fd;
|
||||
std::string* row_cache_entry = nullptr;
|
||||
bool done = false;
|
||||
#ifndef ROCKSDB_LITE
|
||||
@ -405,7 +372,9 @@ Status TableCache::Get(const ReadOptions& options,
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = get_context->range_del_agg()->AddTombstones(
|
||||
std::move(range_del_iter));
|
||||
std::move(range_del_iter),
|
||||
&file_meta.smallest,
|
||||
&file_meta.largest);
|
||||
}
|
||||
}
|
||||
if (s.ok()) {
|
||||
|
@ -53,19 +53,12 @@ class TableCache {
|
||||
InternalIterator* NewIterator(
|
||||
const ReadOptions& options, const EnvOptions& toptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& file_fd, RangeDelAggregator* range_del_agg,
|
||||
const FileMetaData& file_meta, RangeDelAggregator* range_del_agg,
|
||||
const SliceTransform* prefix_extractor = nullptr,
|
||||
TableReader** table_reader_ptr = nullptr,
|
||||
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
|
||||
Arena* arena = nullptr, bool skip_filters = false, int level = -1);
|
||||
|
||||
InternalIterator* NewRangeTombstoneIterator(
|
||||
const ReadOptions& options, const EnvOptions& toptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& file_fd, HistogramImpl* file_read_hist,
|
||||
bool skip_filters, int level,
|
||||
const SliceTransform* prefix_extractor = nullptr);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
// call (*handle_result)(arg, found_key, found_value) repeatedly until
|
||||
// it returns false.
|
||||
@ -76,7 +69,7 @@ class TableCache {
|
||||
// @param level The level this table is at, -1 for "not set / don't know"
|
||||
Status Get(const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& file_fd, const Slice& k,
|
||||
const FileMetaData& file_meta, const Slice& k,
|
||||
GetContext* get_context,
|
||||
const SliceTransform* prefix_extractor = nullptr,
|
||||
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
|
||||
|
@ -548,8 +548,9 @@ class LevelIterator final : public InternalIterator {
|
||||
}
|
||||
|
||||
return table_cache_->NewIterator(
|
||||
read_options_, env_options_, icomparator_, file_meta.fd, range_del_agg_,
|
||||
prefix_extractor_, nullptr /* don't need reference to table */,
|
||||
read_options_, env_options_, icomparator_, *file_meta.file_metadata,
|
||||
range_del_agg_, prefix_extractor_,
|
||||
nullptr /* don't need reference to table */,
|
||||
file_read_hist_, for_compaction_, nullptr /* arena */, skip_filters_,
|
||||
level_);
|
||||
}
|
||||
@ -1000,7 +1001,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
||||
for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
|
||||
const auto& file = storage_info_.LevelFilesBrief(0).files[i];
|
||||
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator(
|
||||
read_options, soptions, cfd_->internal_comparator(), file.fd,
|
||||
read_options, soptions, cfd_->internal_comparator(), *file.file_metadata,
|
||||
range_del_agg, mutable_cf_options_.prefix_extractor.get(), nullptr,
|
||||
cfd_->internal_stats()->GetFileReadHist(0), false, arena,
|
||||
false /* skip_filters */, 0 /* level */));
|
||||
@ -1053,7 +1054,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
||||
continue;
|
||||
}
|
||||
ScopedArenaIterator iter(cfd_->table_cache()->NewIterator(
|
||||
read_options, env_options, cfd_->internal_comparator(), file->fd,
|
||||
read_options, env_options, cfd_->internal_comparator(), *file->file_metadata,
|
||||
&range_del_agg, mutable_cf_options_.prefix_extractor.get(), nullptr,
|
||||
cfd_->internal_stats()->GetFileReadHist(0), false, &arena,
|
||||
false /* skip_filters */, 0 /* level */));
|
||||
@ -1198,8 +1199,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
}
|
||||
|
||||
*status = table_cache_->Get(
|
||||
read_options, *internal_comparator(), f->fd, ikey, &get_context,
|
||||
mutable_cf_options_.prefix_extractor.get(),
|
||||
read_options, *internal_comparator(), *f->file_metadata, ikey,
|
||||
&get_context, mutable_cf_options_.prefix_extractor.get(),
|
||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
||||
fp.IsHitFileLastInLevel()),
|
||||
@ -2053,8 +2054,8 @@ void VersionStorageInfo::GetOverlappingInputs(
|
||||
*file_index = -1;
|
||||
}
|
||||
const Comparator* user_cmp = user_comparator_;
|
||||
if (begin != nullptr && end != nullptr && level > 0) {
|
||||
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
|
||||
if (level > 0) {
|
||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
||||
hint_index, file_index);
|
||||
return;
|
||||
}
|
||||
@ -2109,24 +2110,73 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
|
||||
return;
|
||||
}
|
||||
|
||||
Slice user_begin, user_end;
|
||||
const auto& level_files = level_files_brief_[level];
|
||||
if (begin == nullptr) {
|
||||
user_begin = ExtractUserKey(level_files.files[0].smallest_key);
|
||||
} else {
|
||||
user_begin = begin->user_key();
|
||||
begin = &level_files.files[0].file_metadata->smallest;
|
||||
}
|
||||
if (end == nullptr) {
|
||||
user_end = ExtractUserKey(
|
||||
level_files.files[level_files.num_files - 1].largest_key);
|
||||
} else {
|
||||
user_end = end->user_key();
|
||||
end = &level_files.files[level_files.num_files - 1].file_metadata->largest;
|
||||
}
|
||||
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
|
||||
|
||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
||||
hint_index, file_index,
|
||||
true /* within_interval */);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const uint64_t kRangeTombstoneSentinel =
|
||||
PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
|
||||
|
||||
// Utility for comparing sstable boundary keys. Returns -1 if either a or b is
|
||||
// null which provides the property that a==null indicates a key that is less
|
||||
// than any key and b==null indicates a key that is greater than any key. Note
|
||||
// that the comparison is performed primarily on the user-key portion of the
|
||||
// key. If the user-keys compare equal, an additional test is made to sort
|
||||
// range tombstone sentinel keys before other keys with the same user-key. The
|
||||
// result is that 2 user-keys will compare equal if they differ purely on
|
||||
// their sequence number and value, but the range tombstone sentinel for that
|
||||
// user-key will compare not equal. This is necessary because the range
|
||||
// tombstone sentinel key is set as the largest key for an sstable even though
|
||||
// that key never appears in the database. We don't want adjacent sstables to
|
||||
// be considered overlapping if they are separated by the range tombstone
|
||||
// sentinel.
|
||||
int sstableKeyCompare(const Comparator* user_cmp,
|
||||
const InternalKey& a, const InternalKey& b) {
|
||||
auto c = user_cmp->Compare(a.user_key(), b.user_key());
|
||||
if (c != 0) {
|
||||
return c;
|
||||
}
|
||||
auto a_footer = ExtractInternalKeyFooter(a.Encode());
|
||||
auto b_footer = ExtractInternalKeyFooter(b.Encode());
|
||||
if (a_footer == kRangeTombstoneSentinel) {
|
||||
if (b_footer != kRangeTombstoneSentinel) {
|
||||
return -1;
|
||||
}
|
||||
} else if (b_footer == kRangeTombstoneSentinel) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sstableKeyCompare(const Comparator* user_cmp,
|
||||
const InternalKey* a, const InternalKey& b) {
|
||||
if (a == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return sstableKeyCompare(user_cmp, *a, b);
|
||||
}
|
||||
|
||||
int sstableKeyCompare(const Comparator* user_cmp,
|
||||
const InternalKey& a, const InternalKey* b) {
|
||||
if (b == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return sstableKeyCompare(user_cmp, a, *b);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
||||
// Employ binary search to find at least one file that overlaps the
|
||||
// specified range. From that file, iterate backwards and
|
||||
@ -2135,7 +2185,7 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
|
||||
// within range [begin, end]. "clean" means there is a boudnary
|
||||
// between the files in "*inputs" and the surrounding files
|
||||
void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
int level, const Slice& user_begin, const Slice& user_end,
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
||||
bool within_interval) const {
|
||||
assert(level > 0);
|
||||
@ -2143,7 +2193,7 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
int mid = 0;
|
||||
int max = static_cast<int>(files_[level].size()) - 1;
|
||||
bool foundOverlap = false;
|
||||
const Comparator* user_cmp = user_comparator_;
|
||||
auto user_cmp = user_comparator_;
|
||||
|
||||
// if the caller already knows the index of a file that has overlap,
|
||||
// then we can skip the binary search.
|
||||
@ -2155,15 +2205,15 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
while (!foundOverlap && min <= max) {
|
||||
mid = (min + max)/2;
|
||||
FdWithKeyRange* f = &(level_files_brief_[level].files[mid]);
|
||||
const Slice file_start = ExtractUserKey(f->smallest_key);
|
||||
const Slice file_limit = ExtractUserKey(f->largest_key);
|
||||
if ((!within_interval && user_cmp->Compare(file_limit, user_begin) < 0) ||
|
||||
(within_interval && user_cmp->Compare(file_start, user_begin) < 0)) {
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if ((!within_interval && sstableKeyCompare(user_cmp, begin, largest) > 0) ||
|
||||
(within_interval && sstableKeyCompare(user_cmp, begin, smallest) > 0)) {
|
||||
min = mid + 1;
|
||||
} else if ((!within_interval &&
|
||||
user_cmp->Compare(user_end, file_start) < 0) ||
|
||||
sstableKeyCompare(user_cmp, smallest, end) > 0) ||
|
||||
(within_interval &&
|
||||
user_cmp->Compare(user_end, file_limit) < 0)) {
|
||||
sstableKeyCompare(user_cmp, largest, end) > 0)) {
|
||||
max = mid - 1;
|
||||
} else {
|
||||
foundOverlap = true;
|
||||
@ -2182,10 +2232,10 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
|
||||
int start_index, end_index;
|
||||
if (within_interval) {
|
||||
ExtendFileRangeWithinInterval(level, user_begin, user_end, mid, &start_index,
|
||||
&end_index);
|
||||
ExtendFileRangeWithinInterval(level, begin, end, mid,
|
||||
&start_index, &end_index);
|
||||
} else {
|
||||
ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid,
|
||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid,
|
||||
&start_index, &end_index);
|
||||
assert(end_index >= start_index);
|
||||
}
|
||||
@ -2202,21 +2252,28 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
// and forward to find all overlapping files.
|
||||
// Use FileLevel in searching, make it faster
|
||||
void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
||||
int level, const Slice& user_begin, const Slice& user_end,
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
||||
const Comparator* user_cmp = user_comparator_;
|
||||
auto user_cmp = user_comparator_;
|
||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
// assert that the file at mid_index overlaps with the range
|
||||
assert(mid_index < level_files_brief_[level].num_files);
|
||||
const FdWithKeyRange* f = &files[mid_index];
|
||||
const Slice fstart = ExtractUserKey(f->smallest_key);
|
||||
const Slice flimit = ExtractUserKey(f->largest_key);
|
||||
if (user_cmp->Compare(fstart, user_begin) >= 0) {
|
||||
assert(user_cmp->Compare(fstart, user_end) <= 0);
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, begin, smallest) <= 0) {
|
||||
assert(sstableKeyCompare(user_cmp, smallest, end) <= 0);
|
||||
} else {
|
||||
assert(user_cmp->Compare(flimit, user_begin) >= 0);
|
||||
// fprintf(stderr, "ExtendFileRangeOverlappingInterval\n%s - %s\n%s - %s\n%d %d\n",
|
||||
// begin ? begin->DebugString().c_str() : "(null)",
|
||||
// end ? end->DebugString().c_str() : "(null)",
|
||||
// smallest->DebugString().c_str(),
|
||||
// largest->DebugString().c_str(),
|
||||
// sstableKeyCompare(user_cmp, smallest, begin),
|
||||
// sstableKeyCompare(user_cmp, largest, begin));
|
||||
assert(sstableKeyCompare(user_cmp, begin, largest) <= 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2228,8 +2285,8 @@ void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
||||
// check backwards from 'mid' to lower indices
|
||||
for (int i = mid_index; i >= 0 ; i--) {
|
||||
const FdWithKeyRange* f = &files[i];
|
||||
const Slice file_limit = ExtractUserKey(f->largest_key);
|
||||
if (user_cmp->Compare(file_limit, user_begin) >= 0) {
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, begin, largest) <= 0) {
|
||||
*start_index = i;
|
||||
assert((count++, true));
|
||||
} else {
|
||||
@ -2240,8 +2297,8 @@ void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
||||
for (unsigned int i = mid_index+1;
|
||||
i < level_files_brief_[level].num_files; i++) {
|
||||
const FdWithKeyRange* f = &files[i];
|
||||
const Slice file_start = ExtractUserKey(f->smallest_key);
|
||||
if (user_cmp->Compare(file_start, user_end) <= 0) {
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, end) <= 0) {
|
||||
assert((count++, true));
|
||||
*end_index = i;
|
||||
} else {
|
||||
@ -2259,39 +2316,36 @@ void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
||||
// the clean range required.
|
||||
// Use FileLevel in searching, make it faster
|
||||
void VersionStorageInfo::ExtendFileRangeWithinInterval(
|
||||
int level, const Slice& user_begin, const Slice& user_end,
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
||||
assert(level != 0);
|
||||
const Comparator* user_cmp = user_comparator_;
|
||||
auto* user_cmp = user_comparator_;
|
||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
// assert that the file at mid_index is within the range
|
||||
assert(mid_index < level_files_brief_[level].num_files);
|
||||
const FdWithKeyRange* f = &files[mid_index];
|
||||
const Slice fstart = ExtractUserKey(f->smallest_key);
|
||||
const Slice flimit = ExtractUserKey(f->largest_key);
|
||||
assert(user_cmp->Compare(fstart, user_begin) >= 0 &&
|
||||
user_cmp->Compare(flimit, user_end) <= 0);
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
assert(sstableKeyCompare(user_cmp, begin, smallest) <= 0 &&
|
||||
sstableKeyCompare(user_cmp, largest, end) <= 0);
|
||||
}
|
||||
#endif
|
||||
ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid_index,
|
||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid_index,
|
||||
start_index, end_index);
|
||||
int left = *start_index;
|
||||
int right = *end_index;
|
||||
// shrink from left to right
|
||||
while (left <= right) {
|
||||
const Slice& first_key_in_range = ExtractUserKey(files[left].smallest_key);
|
||||
if (user_cmp->Compare(first_key_in_range, user_begin) < 0) {
|
||||
auto& smallest = files[left].file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, begin, smallest) > 0) {
|
||||
left++;
|
||||
continue;
|
||||
}
|
||||
if (left > 0) { // If not first file
|
||||
const Slice& last_key_before =
|
||||
ExtractUserKey(files[left - 1].largest_key);
|
||||
if (user_cmp->Equal(first_key_in_range, last_key_before)) {
|
||||
// The first user key in range overlaps with the previous file's last
|
||||
// key
|
||||
auto& largest = files[left - 1].file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
||||
left++;
|
||||
continue;
|
||||
}
|
||||
@ -2300,16 +2354,15 @@ void VersionStorageInfo::ExtendFileRangeWithinInterval(
|
||||
}
|
||||
// shrink from right to left
|
||||
while (left <= right) {
|
||||
const Slice last_key_in_range = ExtractUserKey(files[right].largest_key);
|
||||
if (user_cmp->Compare(last_key_in_range, user_end) > 0) {
|
||||
auto& largest = files[right].file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, largest, end) > 0) {
|
||||
right--;
|
||||
continue;
|
||||
}
|
||||
if (right < static_cast<int>(level_files_brief_[level].num_files) -
|
||||
1) { // If not the last file
|
||||
const Slice first_key_after =
|
||||
ExtractUserKey(files[right + 1].smallest_key);
|
||||
if (user_cmp->Equal(last_key_in_range, first_key_after)) {
|
||||
auto& smallest = files[right + 1].file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
||||
// The last user key in range overlaps with the next file's first key
|
||||
right--;
|
||||
continue;
|
||||
@ -4030,8 +4083,8 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
|
||||
// approximate offset of "key" within the table.
|
||||
TableReader* table_reader_ptr;
|
||||
InternalIterator* iter = v->cfd_->table_cache()->NewIterator(
|
||||
ReadOptions(), v->env_options_, v->cfd_->internal_comparator(), f.fd,
|
||||
nullptr /* range_del_agg */,
|
||||
ReadOptions(), v->env_options_, v->cfd_->internal_comparator(),
|
||||
*f.file_metadata, nullptr /* range_del_agg */,
|
||||
v->GetMutableCFOptions().prefix_extractor.get(), &table_reader_ptr);
|
||||
if (table_reader_ptr != nullptr) {
|
||||
result = table_reader_ptr->ApproximateOffsetOf(key);
|
||||
@ -4111,7 +4164,7 @@ InternalIterator* VersionSet::MakeInputIterator(
|
||||
for (size_t i = 0; i < flevel->num_files; i++) {
|
||||
list[num++] = cfd->table_cache()->NewIterator(
|
||||
read_options, env_options_compactions, cfd->internal_comparator(),
|
||||
flevel->files[i].fd, range_del_agg,
|
||||
*flevel->files[i].file_metadata, range_del_agg,
|
||||
c->mutable_cf_options()->prefix_extractor.get(),
|
||||
nullptr /* table_reader_ptr */,
|
||||
nullptr /* no per level latency histogram */,
|
||||
|
@ -201,8 +201,8 @@ class VersionStorageInfo {
|
||||
|
||||
void GetOverlappingInputsRangeBinarySearch(
|
||||
int level, // level > 0
|
||||
const Slice& begin, // nullptr means before all keys
|
||||
const Slice& end, // nullptr means after all keys
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
std::vector<FileMetaData*>* inputs,
|
||||
int hint_index, // index of overlap file
|
||||
int* file_index, // return index of overlap file
|
||||
@ -211,20 +211,20 @@ class VersionStorageInfo {
|
||||
|
||||
void ExtendFileRangeOverlappingInterval(
|
||||
int level,
|
||||
const Slice& begin, // nullptr means before all keys
|
||||
const Slice& end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const;
|
||||
|
||||
void ExtendFileRangeWithinInterval(
|
||||
int level,
|
||||
const Slice& begin, // nullptr means before all keys
|
||||
const Slice& end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const;
|
||||
|
||||
// Returns true iff some file in the specified level overlaps
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "db/log_writer.h"
|
||||
#include "table/mock_table.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
@ -139,6 +140,35 @@ class VersionStorageInfoTest : public testing::Test {
|
||||
f->num_deletions = 0;
|
||||
vstorage_.AddFile(level, f);
|
||||
}
|
||||
|
||||
void Add(int level, uint32_t file_number, const InternalKey& smallest,
|
||||
const InternalKey& largest, uint64_t file_size = 0) {
|
||||
assert(level < vstorage_.num_levels());
|
||||
FileMetaData* f = new FileMetaData;
|
||||
f->fd = FileDescriptor(file_number, 0, file_size);
|
||||
f->smallest = smallest;
|
||||
f->largest = largest;
|
||||
f->compensated_file_size = file_size;
|
||||
f->refs = 0;
|
||||
f->num_entries = 0;
|
||||
f->num_deletions = 0;
|
||||
vstorage_.AddFile(level, f);
|
||||
}
|
||||
|
||||
std::string GetOverlappingFiles(int level, const InternalKey& begin,
|
||||
const InternalKey& end) {
|
||||
std::vector<FileMetaData*> inputs;
|
||||
vstorage_.GetOverlappingInputs(level, &begin, &end, &inputs);
|
||||
|
||||
std::string result;
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
if (i > 0) {
|
||||
result += ",";
|
||||
}
|
||||
AppendNumberTo(&result, inputs[i]->fd.GetNumber());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(VersionStorageInfoTest, MaxBytesForLevelStatic) {
|
||||
@ -261,6 +291,40 @@ TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) {
|
||||
ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize());
|
||||
}
|
||||
|
||||
TEST_F(VersionStorageInfoTest, GetOverlappingInputs) {
|
||||
// Two files that overlap at the range deletion tombstone sentinel.
|
||||
Add(1, 1U, {"a", 0, kTypeValue}, {"b", kMaxSequenceNumber, kTypeRangeDeletion}, 1);
|
||||
Add(1, 2U, {"b", 0, kTypeValue}, {"c", 0, kTypeValue}, 1);
|
||||
// Two files that overlap at the same user key.
|
||||
Add(1, 3U, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeValue}, 1);
|
||||
Add(1, 4U, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}, 1);
|
||||
// Two files that do not overlap.
|
||||
Add(1, 5U, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}, 1);
|
||||
Add(1, 6U, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}, 1);
|
||||
vstorage_.UpdateNumNonEmptyLevels();
|
||||
vstorage_.GenerateLevelFilesBrief();
|
||||
|
||||
ASSERT_EQ("1,2", GetOverlappingFiles(
|
||||
1, {"a", 0, kTypeValue}, {"b", 0, kTypeValue}));
|
||||
ASSERT_EQ("1", GetOverlappingFiles(
|
||||
1, {"a", 0, kTypeValue}, {"b", kMaxSequenceNumber, kTypeRangeDeletion}));
|
||||
ASSERT_EQ("2", GetOverlappingFiles(
|
||||
1, {"b", kMaxSequenceNumber, kTypeValue}, {"c", 0, kTypeValue}));
|
||||
ASSERT_EQ("3,4", GetOverlappingFiles(
|
||||
1, {"d", 0, kTypeValue}, {"e", 0, kTypeValue}));
|
||||
ASSERT_EQ("3", GetOverlappingFiles(
|
||||
1, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeRangeDeletion}));
|
||||
ASSERT_EQ("3,4", GetOverlappingFiles(
|
||||
1, {"e", kMaxSequenceNumber, kTypeValue}, {"f", 0, kTypeValue}));
|
||||
ASSERT_EQ("3,4", GetOverlappingFiles(
|
||||
1, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}));
|
||||
ASSERT_EQ("5", GetOverlappingFiles(
|
||||
1, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}));
|
||||
ASSERT_EQ("6", GetOverlappingFiles(
|
||||
1, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}));
|
||||
}
|
||||
|
||||
|
||||
class FindLevelFileTest : public testing::Test {
|
||||
public:
|
||||
LevelFilesBrief file_level_;
|
||||
|
Loading…
Reference in New Issue
Block a user