879357fdb0
Summary: A current limitation of backups is that you don't know the exact database state of when the backup was taken. With this new feature, you can at least inspect the backup's DB state without restoring it by opening it as a read-only DB. Rather than add something like OpenAsReadOnlyDB to the BackupEngine API, which would inhibit opening stackable DB implementations read-only (if/when their APIs support it), we instead provide a DB name and Env that can be used to open as a read-only DB. Possible follow-up work: * Add a version of GetBackupInfo for a single backup. * Let CreateNewBackup return the BackupID of the newly-created backup. Implementation details: Refactored ChrootFileSystem to split off new base class RemapFileSystem, which allows more general remapping of files. We use this base class to implement BackupEngineImpl::RemapSharedFileSystem. To minimize API impact, I decided to just add these fields `name_for_open` and `env_for_open` to those set by GetBackupInfo when include_file_details=true. Creating the RemapSharedFileSystem adds a bit to the memory consumption, perhaps unnecessarily in some cases, but this has been mitigated by (a) only initialize the RemapSharedFileSystem lazily when GetBackupInfo with include_file_details=true is called, and (b) using the existing `shared_ptr<FileInfo>` objects to hold most of the mapping data. To enhance API safety, RemapSharedFileSystem is wrapped by new ReadOnlyFileSystem which rejects any attempts to write. This uncovered a couple of places in which DB::OpenForReadOnly would write to the filesystem, so I fixed these. Added a release note because this affects logging. Additional minor refactoring in backupable_db.cc to support the new functionality. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8142 Test Plan: new test (run with ASAN and UBSAN), added to stress test and ran it for a while with amplified backup_one_in Reviewed By: ajkr Differential Revision: D27535408 Pulled By: pdillinger fbshipit-source-id: 04666d310aa0261ef6b2385c43ca793ce1dfd148
173 lines
6.0 KiB
C++
173 lines
6.0 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
#include "db/db_impl/compacted_db_impl.h"
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "db/version_set.h"
|
|
#include "table/get_context.h"
|
|
#include "util/cast_util.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
extern void MarkKeyMayExist(void* arg);
|
|
extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
|
|
const Slice& v, bool hit_and_return);
|
|
|
|
CompactedDBImpl::CompactedDBImpl(const DBOptions& options,
|
|
const std::string& dbname)
|
|
: DBImpl(options, dbname, /*seq_per_batch*/ false, +/*batch_per_txn*/ true,
|
|
/*read_only*/ true),
|
|
cfd_(nullptr),
|
|
version_(nullptr),
|
|
user_comparator_(nullptr) {}
|
|
|
|
CompactedDBImpl::~CompactedDBImpl() {
|
|
}
|
|
|
|
size_t CompactedDBImpl::FindFile(const Slice& key) {
|
|
size_t right = files_.num_files - 1;
|
|
auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool {
|
|
return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0;
|
|
};
|
|
return static_cast<size_t>(std::lower_bound(files_.files,
|
|
files_.files + right, key, cmp) - files_.files);
|
|
}
|
|
|
|
Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*,
|
|
const Slice& key, PinnableSlice* value) {
|
|
GetContext get_context(user_comparator_, nullptr, nullptr, nullptr,
|
|
GetContext::kNotFound, key, value, nullptr, nullptr,
|
|
nullptr, true, nullptr, nullptr);
|
|
LookupKey lkey(key, kMaxSequenceNumber);
|
|
Status s = files_.files[FindFile(key)].fd.table_reader->Get(
|
|
options, lkey.internal_key(), &get_context, nullptr);
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
return s;
|
|
}
|
|
if (get_context.State() == GetContext::kFound) {
|
|
return Status::OK();
|
|
}
|
|
return Status::NotFound();
|
|
}
|
|
|
|
std::vector<Status> CompactedDBImpl::MultiGet(const ReadOptions& options,
|
|
const std::vector<ColumnFamilyHandle*>&,
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) {
|
|
autovector<TableReader*, 16> reader_list;
|
|
for (const auto& key : keys) {
|
|
const FdWithKeyRange& f = files_.files[FindFile(key)];
|
|
if (user_comparator_->Compare(key, ExtractUserKey(f.smallest_key)) < 0) {
|
|
reader_list.push_back(nullptr);
|
|
} else {
|
|
LookupKey lkey(key, kMaxSequenceNumber);
|
|
f.fd.table_reader->Prepare(lkey.internal_key());
|
|
reader_list.push_back(f.fd.table_reader);
|
|
}
|
|
}
|
|
std::vector<Status> statuses(keys.size(), Status::NotFound());
|
|
values->resize(keys.size());
|
|
int idx = 0;
|
|
for (auto* r : reader_list) {
|
|
if (r != nullptr) {
|
|
PinnableSlice pinnable_val;
|
|
std::string& value = (*values)[idx];
|
|
GetContext get_context(user_comparator_, nullptr, nullptr, nullptr,
|
|
GetContext::kNotFound, keys[idx], &pinnable_val,
|
|
nullptr, nullptr, nullptr, true, nullptr, nullptr);
|
|
LookupKey lkey(keys[idx], kMaxSequenceNumber);
|
|
Status s = r->Get(options, lkey.internal_key(), &get_context, nullptr);
|
|
assert(static_cast<size_t>(idx) < statuses.size());
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
statuses[idx] = s;
|
|
} else {
|
|
value.assign(pinnable_val.data(), pinnable_val.size());
|
|
if (get_context.State() == GetContext::kFound) {
|
|
statuses[idx] = Status::OK();
|
|
}
|
|
}
|
|
}
|
|
++idx;
|
|
}
|
|
return statuses;
|
|
}
|
|
|
|
Status CompactedDBImpl::Init(const Options& options) {
|
|
SuperVersionContext sv_context(/* create_superversion */ true);
|
|
mutex_.Lock();
|
|
ColumnFamilyDescriptor cf(kDefaultColumnFamilyName,
|
|
ColumnFamilyOptions(options));
|
|
Status s = Recover({cf}, true /* read only */, false, true);
|
|
if (s.ok()) {
|
|
cfd_ = static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
|
|
->cfd();
|
|
cfd_->InstallSuperVersion(&sv_context, &mutex_);
|
|
}
|
|
mutex_.Unlock();
|
|
sv_context.Clean();
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
NewThreadStatusCfInfo(cfd_);
|
|
version_ = cfd_->GetSuperVersion()->current;
|
|
user_comparator_ = cfd_->user_comparator();
|
|
auto* vstorage = version_->storage_info();
|
|
if (vstorage->num_non_empty_levels() == 0) {
|
|
return Status::NotSupported("no file exists");
|
|
}
|
|
const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0);
|
|
// L0 should not have files
|
|
if (l0.num_files > 1) {
|
|
return Status::NotSupported("L0 contain more than 1 file");
|
|
}
|
|
if (l0.num_files == 1) {
|
|
if (vstorage->num_non_empty_levels() > 1) {
|
|
return Status::NotSupported("Both L0 and other level contain files");
|
|
}
|
|
files_ = l0;
|
|
return Status::OK();
|
|
}
|
|
|
|
for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) {
|
|
if (vstorage->LevelFilesBrief(i).num_files > 0) {
|
|
return Status::NotSupported("Other levels also contain files");
|
|
}
|
|
}
|
|
|
|
int level = vstorage->num_non_empty_levels() - 1;
|
|
if (vstorage->LevelFilesBrief(level).num_files > 0) {
|
|
files_ = vstorage->LevelFilesBrief(level);
|
|
return Status::OK();
|
|
}
|
|
return Status::NotSupported("no file exists");
|
|
}
|
|
|
|
Status CompactedDBImpl::Open(const Options& options,
|
|
const std::string& dbname, DB** dbptr) {
|
|
*dbptr = nullptr;
|
|
|
|
if (options.max_open_files != -1) {
|
|
return Status::InvalidArgument("require max_open_files = -1");
|
|
}
|
|
if (options.merge_operator.get() != nullptr) {
|
|
return Status::InvalidArgument("merge operator is not supported");
|
|
}
|
|
DBOptions db_options(options);
|
|
std::unique_ptr<CompactedDBImpl> db(new CompactedDBImpl(db_options, dbname));
|
|
Status s = db->Init(options);
|
|
if (s.ok()) {
|
|
db->StartPeriodicWorkScheduler();
|
|
ROCKS_LOG_INFO(db->immutable_db_options_.info_log,
|
|
"Opened the db as fully compacted mode");
|
|
LogFlush(db->immutable_db_options_.info_log);
|
|
*dbptr = db.release();
|
|
}
|
|
return s;
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
#endif // ROCKSDB_LITE
|