0acc738810
Summary: There are two versions of FindObsoleteFiles(): * full scan, which is executed every 6 hours (and it's terribly slow) * no full scan, which is executed every time a background process finishes and iterator is deleted This diff is optimizing the second case (no full scan). Here's what we do before the diff: * Get the list of obsolete files (files with ref==0). Some files in obsolete_files set might actually be live. * Get the list of live files to avoid deleting files that are live. * Delete files that are in obsolete_files and not in live_files. After this diff: * The only files with ref==0 that are still live are files that have been part of move compaction. Don't include moved files in obsolete_files. * Get the list of obsolete files (which exclude moved files). * No need to get the list of live files, since all files in obsolete_files need to be deleted. I'll post the benchmark results, but you can get the feel of it here: https://reviews.facebook.net/D30123 This depends on D30123. P.S. We should do full scan only in failure scenarios, not every 6 hours. I'll do this in a follow-up diff. Test Plan: One new unit test. Made sure that unit test fails if we don't have a `if (!f->moved)` safeguard in ~Version. make check Big number of compactions and flushes: ./db_stress --threads=30 --ops_per_thread=20000000 --max_key=10000 --column_families=20 --clear_column_family_one_in=10000000 --verify_before_write=0 --reopen=15 --max_background_compactions=10 --max_background_flushes=10 --db=/fast-rocksdb-tmp/db_stress --prefixpercent=0 --iterpercent=0 --writepercent=75 --db_write_buffer_size=2000000 Reviewers: yhchiang, rven, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D30249
105 lines
3.2 KiB
C++
105 lines
3.2 KiB
C++
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "db/column_family.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
class MemTable;
|
|
|
|
struct JobContext {
|
|
inline bool HaveSomethingToDelete() const {
|
|
return full_scan_candidate_files.size() || sst_delete_files.size() ||
|
|
log_delete_files.size() || new_superversion != nullptr ||
|
|
superversions_to_free.size() > 0 || memtables_to_free.size() > 0;
|
|
}
|
|
|
|
// Structure to store information for candidate files to delete.
|
|
struct CandidateFileInfo {
|
|
std::string file_name;
|
|
uint32_t path_id;
|
|
CandidateFileInfo(std::string name, uint32_t path)
|
|
: file_name(std::move(name)), path_id(path) {}
|
|
bool operator==(const CandidateFileInfo& other) const {
|
|
return file_name == other.file_name && path_id == other.path_id;
|
|
}
|
|
};
|
|
|
|
// a list of all files that we'll consider deleting
|
|
// (every once in a while this is filled up with all files
|
|
// in the DB directory)
|
|
// (filled only if we're doing full scan)
|
|
std::vector<CandidateFileInfo> full_scan_candidate_files;
|
|
|
|
// the list of all live sst files that cannot be deleted
|
|
// (filled only if we're doing full scan)
|
|
std::vector<FileDescriptor> full_scan_sst_live;
|
|
|
|
// a list of sst files that we need to delete
|
|
std::vector<FileMetaData*> sst_delete_files;
|
|
|
|
// a list of log files that we need to delete
|
|
std::vector<uint64_t> log_delete_files;
|
|
|
|
// a list of memtables to be free
|
|
autovector<MemTable*> memtables_to_free;
|
|
|
|
autovector<SuperVersion*> superversions_to_free;
|
|
|
|
SuperVersion* new_superversion; // if nullptr no new superversion
|
|
|
|
// the current manifest_file_number, log_number and prev_log_number
|
|
// that corresponds to the set of files in 'live'.
|
|
uint64_t manifest_file_number;
|
|
uint64_t pending_manifest_file_number;
|
|
uint64_t log_number;
|
|
uint64_t prev_log_number;
|
|
|
|
uint64_t min_pending_output = 0;
|
|
|
|
explicit JobContext(bool create_superversion = false) {
|
|
manifest_file_number = 0;
|
|
pending_manifest_file_number = 0;
|
|
log_number = 0;
|
|
prev_log_number = 0;
|
|
new_superversion = create_superversion ? new SuperVersion() : nullptr;
|
|
}
|
|
|
|
void Clean() {
|
|
// free pending memtables
|
|
for (auto m : memtables_to_free) {
|
|
delete m;
|
|
}
|
|
// free superversions
|
|
for (auto s : superversions_to_free) {
|
|
delete s;
|
|
}
|
|
// if new_superversion was not used, it will be non-nullptr and needs
|
|
// to be freed here
|
|
delete new_superversion;
|
|
|
|
memtables_to_free.clear();
|
|
superversions_to_free.clear();
|
|
new_superversion = nullptr;
|
|
}
|
|
|
|
~JobContext() {
|
|
assert(memtables_to_free.size() == 0);
|
|
assert(superversions_to_free.size() == 0);
|
|
assert(new_superversion == nullptr);
|
|
}
|
|
};
|
|
|
|
} // namespace rocksdb
|