Refactor the detailed consistency checks and the SST saving logic in VersionBuilder (#9099)

Summary:
The patch refactors the parts of `VersionBuilder` that deal with SST file
comparisons. Specifically, it makes the following changes:
* Turns `NewestFirstBySeqNo` and `BySmallestKey` from free-standing
functions into function objects. Note: `BySmallestKey` has a pointer to the
`InternalKeyComparator`, while `NewestFirstBySeqNo` is completely
stateless.
* Eliminates the wrapper `FileComparator`, which was essentially an
unnecessary DIY virtual function call mechanism.
* Refactors `CheckConsistencyDetails` and `SaveSSTFilesTo` using helper
function templates that take comparator/checker function objects. Using
static polymorphism eliminates the need to make runtime decisions about
which comparator to use.
* Extends some error messages returned by the consistency checks and
makes them more uniform.
* Removes some incomplete/redundant consistency checks from `VersionBuilder`
and `FilePicker`.
* Improves const correctness in several places.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9099

Test Plan: `make check`

Reviewed By: riversand963

Differential Revision: D32027503

Pulled By: ltamasi

fbshipit-source-id: 621326ae41f4f55f7ad6a91abbd6e666d5c7857c
This commit is contained in:
Levi Tamasi 2021-11-03 11:51:22 -07:00 committed by Facebook GitHub Bot
parent 2b60621f16
commit 081722780b
4 changed files with 240 additions and 206 deletions

View File

@ -34,52 +34,48 @@
namespace ROCKSDB_NAMESPACE {
bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
if (a->fd.largest_seqno != b->fd.largest_seqno) {
return a->fd.largest_seqno > b->fd.largest_seqno;
}
if (a->fd.smallest_seqno != b->fd.smallest_seqno) {
return a->fd.smallest_seqno > b->fd.smallest_seqno;
}
// Break ties by file number
return a->fd.GetNumber() > b->fd.GetNumber();
}
namespace {
bool BySmallestKey(FileMetaData* a, FileMetaData* b,
const InternalKeyComparator* cmp) {
int r = cmp->Compare(a->smallest, b->smallest);
if (r != 0) {
return (r < 0);
}
// Break ties by file number
return (a->fd.GetNumber() < b->fd.GetNumber());
}
} // namespace
class VersionBuilder::Rep {
private:
// Helper to sort files_ in v
// kLevel0 -- NewestFirstBySeqNo
// kLevelNon0 -- BySmallestKey
struct FileComparator {
enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method;
const InternalKeyComparator* internal_comparator;
class NewestFirstBySeqNo {
public:
bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
assert(lhs);
assert(rhs);
FileComparator() : internal_comparator(nullptr) {}
bool operator()(FileMetaData* f1, FileMetaData* f2) const {
switch (sort_method) {
case kLevel0:
return NewestFirstBySeqNo(f1, f2);
case kLevelNon0:
return BySmallestKey(f1, f2, internal_comparator);
if (lhs->fd.largest_seqno != rhs->fd.largest_seqno) {
return lhs->fd.largest_seqno > rhs->fd.largest_seqno;
}
assert(false);
return false;
if (lhs->fd.smallest_seqno != rhs->fd.smallest_seqno) {
return lhs->fd.smallest_seqno > rhs->fd.smallest_seqno;
}
// Break ties by file number
return lhs->fd.GetNumber() > rhs->fd.GetNumber();
}
};
class BySmallestKey {
public:
explicit BySmallestKey(const InternalKeyComparator* cmp) : cmp_(cmp) {}
bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
assert(lhs);
assert(rhs);
assert(cmp_);
const int r = cmp_->Compare(lhs->smallest, rhs->smallest);
if (r != 0) {
return (r < 0);
}
// Break ties by file number
return (lhs->fd.GetNumber() < rhs->fd.GetNumber());
}
private:
const InternalKeyComparator* cmp_;
};
struct LevelState {
std::unordered_set<uint64_t> deleted_files;
// Map from file number to file meta data.
@ -247,8 +243,8 @@ class VersionBuilder::Rep {
bool has_invalid_levels_;
// Current levels of table files affected by additions/deletions.
std::unordered_map<uint64_t, int> table_file_levels_;
FileComparator level_zero_cmp_;
FileComparator level_nonzero_cmp_;
NewestFirstBySeqNo level_zero_cmp_;
BySmallestKey level_nonzero_cmp_;
// Mutable metadata objects for all blob files affected by the series of
// version edits.
@ -264,14 +260,11 @@ class VersionBuilder::Rep {
base_vstorage_(base_vstorage),
version_set_(version_set),
num_levels_(base_vstorage->num_levels()),
has_invalid_levels_(false) {
has_invalid_levels_(false),
level_nonzero_cmp_(base_vstorage_->InternalComparator()) {
assert(ioptions_);
levels_ = new LevelState[num_levels_];
level_zero_cmp_.sort_method = FileComparator::kLevel0;
level_nonzero_cmp_.sort_method = FileComparator::kLevelNon0;
level_nonzero_cmp_.internal_comparator =
base_vstorage_->InternalComparator();
}
~Rep() {
@ -297,6 +290,10 @@ class VersionBuilder::Rep {
}
}
// Mapping used for checking the consistency of links between SST files and
// blob files. It is built using the forward links (table file -> blob file),
// and is subsequently compared with the inverse mapping stored in the
// BlobFileMetaData objects.
using ExpectedLinkedSsts =
std::unordered_map<uint64_t, BlobFileMetaData::LinkedSsts>;
@ -312,91 +309,155 @@ class VersionBuilder::Rep {
(*expected_linked_ssts)[blob_file_number].emplace(table_file_number);
}
Status CheckConsistencyDetails(VersionStorageInfo* vstorage) {
// Make sure the files are sorted correctly and that the links between
// table files and blob files are consistent. The latter is checked using
// the following mapping, which is built using the forward links
// (table file -> blob file), and is subsequently compared with the inverse
// mapping stored in the BlobFileMetaData objects.
template <class Checker>
Status CheckConsistencyDetailsForLevel(
const VersionStorageInfo* vstorage, int level, Checker checker,
const std::string& sync_point,
ExpectedLinkedSsts* expected_linked_ssts) const {
#ifdef NDEBUG
(void)sync_point;
#endif
assert(vstorage);
assert(level >= 0 && level < num_levels_);
assert(expected_linked_ssts);
const auto& level_files = vstorage->LevelFiles(level);
if (level_files.empty()) {
return Status::OK();
}
assert(level_files[0]);
UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(),
level_files[0]->oldest_blob_file_number,
expected_linked_ssts);
for (size_t i = 1; i < level_files.size(); ++i) {
assert(level_files[i]);
UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(),
level_files[i]->oldest_blob_file_number,
expected_linked_ssts);
auto lhs = level_files[i - 1];
auto rhs = level_files[i];
#ifndef NDEBUG
auto pair = std::make_pair(&lhs, &rhs);
TEST_SYNC_POINT_CALLBACK(sync_point, &pair);
#endif
const Status s = checker(lhs, rhs);
if (!s.ok()) {
return s;
}
}
return Status::OK();
}
// Make sure table files are sorted correctly and that the links between
// table files and blob files are consistent.
Status CheckConsistencyDetails(const VersionStorageInfo* vstorage) const {
assert(vstorage);
ExpectedLinkedSsts expected_linked_ssts;
for (int level = 0; level < num_levels_; level++) {
auto& level_files = vstorage->LevelFiles(level);
if (num_levels_ > 0) {
// Check L0
{
auto l0_checker = [this](const FileMetaData* lhs,
const FileMetaData* rhs) {
assert(lhs);
assert(rhs);
if (level_files.empty()) {
continue;
if (!level_zero_cmp_(lhs, rhs)) {
std::ostringstream oss;
oss << "L0 files are not sorted properly: files #"
<< lhs->fd.GetNumber() << ", #" << rhs->fd.GetNumber();
return Status::Corruption("VersionBuilder", oss.str());
}
if (rhs->fd.smallest_seqno == rhs->fd.largest_seqno) {
// This is an external file that we ingested
const SequenceNumber external_file_seqno = rhs->fd.smallest_seqno;
if (!(external_file_seqno < lhs->fd.largest_seqno ||
external_file_seqno == 0)) {
std::ostringstream oss;
oss << "L0 file #" << lhs->fd.GetNumber() << " with seqno "
<< lhs->fd.smallest_seqno << ' ' << lhs->fd.largest_seqno
<< " vs. file #" << rhs->fd.GetNumber()
<< " with global_seqno " << external_file_seqno;
return Status::Corruption("VersionBuilder", oss.str());
}
} else if (lhs->fd.smallest_seqno <= rhs->fd.smallest_seqno) {
std::ostringstream oss;
oss << "L0 file #" << lhs->fd.GetNumber() << " with seqno "
<< lhs->fd.smallest_seqno << ' ' << lhs->fd.largest_seqno
<< " vs. file #" << rhs->fd.GetNumber() << " with seqno "
<< rhs->fd.smallest_seqno << ' ' << rhs->fd.largest_seqno;
return Status::Corruption("VersionBuilder", oss.str());
}
return Status::OK();
};
const Status s = CheckConsistencyDetailsForLevel(
vstorage, /* level */ 0, l0_checker,
"VersionBuilder::CheckConsistency0", &expected_linked_ssts);
if (!s.ok()) {
return s;
}
}
assert(level_files[0]);
UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(),
level_files[0]->oldest_blob_file_number,
&expected_linked_ssts);
for (size_t i = 1; i < level_files.size(); i++) {
assert(level_files[i]);
UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(),
level_files[i]->oldest_blob_file_number,
&expected_linked_ssts);
// Check L1 and up
const InternalKeyComparator* const icmp = vstorage->InternalComparator();
assert(icmp);
auto f1 = level_files[i - 1];
auto f2 = level_files[i];
if (level == 0) {
#ifndef NDEBUG
auto pair = std::make_pair(&f1, &f2);
TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency0", &pair);
#endif
if (!level_zero_cmp_(f1, f2)) {
return Status::Corruption("L0 files are not sorted properly");
for (int level = 1; level < num_levels_; ++level) {
auto checker = [this, level, icmp](const FileMetaData* lhs,
const FileMetaData* rhs) {
assert(lhs);
assert(rhs);
if (!level_nonzero_cmp_(lhs, rhs)) {
std::ostringstream oss;
oss << 'L' << level << " files are not sorted properly: files #"
<< lhs->fd.GetNumber() << ", #" << rhs->fd.GetNumber();
return Status::Corruption("VersionBuilder", oss.str());
}
if (f2->fd.smallest_seqno == f2->fd.largest_seqno) {
// This is an external file that we ingested
SequenceNumber external_file_seqno = f2->fd.smallest_seqno;
if (!(external_file_seqno < f1->fd.largest_seqno ||
external_file_seqno == 0)) {
return Status::Corruption(
"L0 file with seqno " + ToString(f1->fd.smallest_seqno) +
" " + ToString(f1->fd.largest_seqno) +
" vs. file with global_seqno" +
ToString(external_file_seqno) + " with fileNumber " +
ToString(f1->fd.GetNumber()));
}
} else if (f1->fd.smallest_seqno <= f2->fd.smallest_seqno) {
return Status::Corruption("L0 files seqno " +
ToString(f1->fd.smallest_seqno) + " " +
ToString(f1->fd.largest_seqno) + " " +
ToString(f1->fd.GetNumber()) + " vs. " +
ToString(f2->fd.smallest_seqno) + " " +
ToString(f2->fd.largest_seqno) + " " +
ToString(f2->fd.GetNumber()));
}
} else {
#ifndef NDEBUG
auto pair = std::make_pair(&f1, &f2);
TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency1", &pair);
#endif
if (!level_nonzero_cmp_(f1, f2)) {
return Status::Corruption(
"L" + ToString(level) +
" files are not sorted properly: files #" +
ToString(f1->fd.GetNumber()) + ", #" +
ToString(f2->fd.GetNumber()));
// Make sure there is no overlap in level
if (icmp->Compare(lhs->largest, rhs->smallest) >= 0) {
std::ostringstream oss;
oss << 'L' << level << " has overlapping ranges: file #"
<< lhs->fd.GetNumber()
<< " largest key: " << lhs->largest.DebugString(true)
<< " vs. file #" << rhs->fd.GetNumber()
<< " smallest key: " << rhs->smallest.DebugString(true);
return Status::Corruption("VersionBuilder", oss.str());
}
// Make sure there is no overlap in levels > 0
if (vstorage->InternalComparator()->Compare(f1->largest,
f2->smallest) >= 0) {
return Status::Corruption(
"L" + ToString(level) + " have overlapping ranges: file #" +
ToString(f1->fd.GetNumber()) +
" largest key: " + (f1->largest).DebugString(true) +
" vs. file #" + ToString(f2->fd.GetNumber()) +
" smallest key: " + (f2->smallest).DebugString(true));
}
return Status::OK();
};
const Status s = CheckConsistencyDetailsForLevel(
vstorage, level, checker, "VersionBuilder::CheckConsistency1",
&expected_linked_ssts);
if (!s.ok()) {
return s;
}
}
}
// Make sure that all blob files in the version have non-garbage data.
// Make sure that all blob files in the version have non-garbage data and
// the links between them and the table files are consistent.
const auto& blob_files = vstorage->GetBlobFiles();
for (const auto& pair : blob_files) {
const uint64_t blob_file_number = pair.first;
@ -428,7 +489,9 @@ class VersionBuilder::Rep {
return ret_s;
}
Status CheckConsistency(VersionStorageInfo* vstorage) {
Status CheckConsistency(const VersionStorageInfo* vstorage) const {
assert(vstorage);
// Always run consistency checks in debug build
#ifdef NDEBUG
if (!vstorage->force_consistency_checks()) {
@ -748,7 +811,7 @@ class VersionBuilder::Rep {
}
// Apply all of the edits in *edit to the current state.
Status Apply(VersionEdit* edit) {
Status Apply(const VersionEdit* edit) {
{
const Status s = CheckConsistency(base_vstorage_);
if (!s.ok()) {
@ -910,7 +973,8 @@ class VersionBuilder::Rep {
}
}
void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) {
void MaybeAddFile(VersionStorageInfo* vstorage, int level,
FileMetaData* f) const {
const uint64_t file_number = f->fd.GetNumber();
const auto& level_state = levels_[level];
@ -935,52 +999,52 @@ class VersionBuilder::Rep {
}
}
void SaveSSTFilesTo(VersionStorageInfo* vstorage) {
for (int level = 0; level < num_levels_; level++) {
const auto& cmp = (level == 0) ? level_zero_cmp_ : level_nonzero_cmp_;
// Merge the set of added files with the set of pre-existing files.
// Drop any deleted files. Store the result in *v.
const auto& base_files = base_vstorage_->LevelFiles(level);
const auto& unordered_added_files = levels_[level].added_files;
vstorage->Reserve(level,
base_files.size() + unordered_added_files.size());
template <class Cmp>
void SaveSSTFilesTo(VersionStorageInfo* vstorage, int level, Cmp cmp) const {
// Merge the set of added files with the set of pre-existing files.
// Drop any deleted files. Store the result in *vstorage.
const auto& base_files = base_vstorage_->LevelFiles(level);
const auto& unordered_added_files = levels_[level].added_files;
vstorage->Reserve(level, base_files.size() + unordered_added_files.size());
// Sort added files for the level.
std::vector<FileMetaData*> added_files;
added_files.reserve(unordered_added_files.size());
for (const auto& pair : unordered_added_files) {
added_files.push_back(pair.second);
}
std::sort(added_files.begin(), added_files.end(), cmp);
// Sort added files for the level.
std::vector<FileMetaData*> added_files;
added_files.reserve(unordered_added_files.size());
for (const auto& pair : unordered_added_files) {
added_files.push_back(pair.second);
}
std::sort(added_files.begin(), added_files.end(), cmp);
#ifndef NDEBUG
FileMetaData* prev_added_file = nullptr;
for (const auto& added : added_files) {
if (level > 0 && prev_added_file != nullptr) {
assert(base_vstorage_->InternalComparator()->Compare(
prev_added_file->smallest, added->smallest) <= 0);
}
prev_added_file = added;
}
#endif
auto base_iter = base_files.begin();
auto base_end = base_files.end();
auto added_iter = added_files.begin();
auto added_end = added_files.end();
while (added_iter != added_end || base_iter != base_end) {
if (base_iter == base_end ||
(added_iter != added_end && cmp(*added_iter, *base_iter))) {
MaybeAddFile(vstorage, level, *added_iter++);
} else {
MaybeAddFile(vstorage, level, *base_iter++);
}
auto base_iter = base_files.begin();
auto base_end = base_files.end();
auto added_iter = added_files.begin();
auto added_end = added_files.end();
while (added_iter != added_end || base_iter != base_end) {
if (base_iter == base_end ||
(added_iter != added_end && cmp(*added_iter, *base_iter))) {
MaybeAddFile(vstorage, level, *added_iter++);
} else {
MaybeAddFile(vstorage, level, *base_iter++);
}
}
}
void SaveSSTFilesTo(VersionStorageInfo* vstorage) const {
assert(vstorage);
if (!num_levels_) {
return;
}
SaveSSTFilesTo(vstorage, /* level */ 0, level_zero_cmp_);
for (int level = 1; level < num_levels_; ++level) {
SaveSSTFilesTo(vstorage, level, level_nonzero_cmp_);
}
}
// Save the current state in *vstorage.
Status SaveTo(VersionStorageInfo* vstorage) {
Status SaveTo(VersionStorageInfo* vstorage) const {
Status s = CheckConsistency(base_vstorage_);
if (!s.ok()) {
return s;
@ -1117,9 +1181,11 @@ bool VersionBuilder::CheckConsistencyForNumLevels() {
return rep_->CheckConsistencyForNumLevels();
}
Status VersionBuilder::Apply(VersionEdit* edit) { return rep_->Apply(edit); }
Status VersionBuilder::Apply(const VersionEdit* edit) {
return rep_->Apply(edit);
}
Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) {
Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) const {
return rep_->SaveTo(vstorage);
}

View File

@ -37,8 +37,8 @@ class VersionBuilder {
~VersionBuilder();
bool CheckConsistencyForNumLevels();
Status Apply(VersionEdit* edit);
Status SaveTo(VersionStorageInfo* vstorage);
Status Apply(const VersionEdit* edit);
Status SaveTo(VersionStorageInfo* vstorage) const;
Status LoadTableHandlers(InternalStats* internal_stats, int max_threads,
bool prefetch_index_and_filter_in_cache,
bool is_initial_load,
@ -66,5 +66,4 @@ class BaseReferencedVersionBuilder {
Version* version_;
};
extern bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b);
} // namespace ROCKSDB_NAMESPACE

View File

@ -120,10 +120,9 @@ Status OverlapWithIterator(const Comparator* ucmp,
// are MergeInProgress).
class FilePicker {
public:
FilePicker(std::vector<FileMetaData*>* files, const Slice& user_key,
const Slice& ikey, autovector<LevelFilesBrief>* file_levels,
unsigned int num_levels, FileIndexer* file_indexer,
const Comparator* user_comparator,
FilePicker(const Slice& user_key, const Slice& ikey,
autovector<LevelFilesBrief>* file_levels, unsigned int num_levels,
FileIndexer* file_indexer, const Comparator* user_comparator,
const InternalKeyComparator* internal_comparator)
: num_levels_(num_levels),
curr_level_(static_cast<unsigned int>(-1)),
@ -131,9 +130,6 @@ class FilePicker {
hit_file_level_(static_cast<unsigned int>(-1)),
search_left_bound_(0),
search_right_bound_(FileIndexer::kLevelMaxIndex),
#ifndef NDEBUG
files_(files),
#endif
level_files_brief_(file_levels),
is_hit_file_last_in_level_(false),
curr_file_level_(nullptr),
@ -142,9 +138,6 @@ class FilePicker {
file_indexer_(file_indexer),
user_comparator_(user_comparator),
internal_comparator_(internal_comparator) {
#ifdef NDEBUG
(void)files;
#endif
// Setup member variables to search first level.
search_ended_ = !PrepareNextLevel();
if (!search_ended_) {
@ -214,23 +207,7 @@ class FilePicker {
}
}
}
#ifndef NDEBUG
// Sanity check to make sure that the files are correctly sorted
if (prev_file_) {
if (curr_level_ != 0) {
int comp_sign = internal_comparator_->Compare(
prev_file_->largest_key, f->smallest_key);
assert(comp_sign < 0);
} else {
// level == 0, the current file cannot be newer than the previous
// one. Use compressed data structure, has no attribute seqNo
assert(curr_index_in_curr_level_ > 0);
assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_],
files_[0][curr_index_in_curr_level_-1]));
}
}
prev_file_ = f;
#endif
returned_file_level_ = curr_level_;
if (curr_level_ > 0 && cmp_largest < 0) {
// No more files to search in this level.
@ -262,9 +239,6 @@ class FilePicker {
unsigned int hit_file_level_;
int32_t search_left_bound_;
int32_t search_right_bound_;
#ifndef NDEBUG
std::vector<FileMetaData*>* files_;
#endif
autovector<LevelFilesBrief>* level_files_brief_;
bool search_ended_;
bool is_hit_file_last_in_level_;
@ -276,9 +250,6 @@ class FilePicker {
FileIndexer* file_indexer_;
const Comparator* user_comparator_;
const InternalKeyComparator* internal_comparator_;
#ifndef NDEBUG
FdWithKeyRange* prev_file_;
#endif
// Setup local variables to search next level.
// Returns false if there are no more levels to search.
@ -348,9 +319,7 @@ class FilePicker {
}
start_index_in_curr_level_ = start_index;
curr_index_in_curr_level_ = start_index;
#ifndef NDEBUG
prev_file_ = nullptr;
#endif
return true;
}
// curr_level_ = num_levels_. So, no more levels to search.
@ -2022,10 +1991,10 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
pinned_iters_mgr.StartPinning();
}
FilePicker fp(
storage_info_.files_, user_key, ikey, &storage_info_.level_files_brief_,
storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_,
user_comparator(), internal_comparator());
FilePicker fp(user_key, ikey, &storage_info_.level_files_brief_,
storage_info_.num_non_empty_levels_,
&storage_info_.file_indexer_, user_comparator(),
internal_comparator());
FdWithKeyRange* f = fp.GetNextFile();
while (f != nullptr) {

View File

@ -491,7 +491,7 @@ class VersionStorageInfo {
next_file_to_compact_by_size_[level] = 0;
}
const InternalKeyComparator* InternalComparator() {
const InternalKeyComparator* InternalComparator() const {
return internal_comparator_;
}