rocksdb/env/env.cc
Andrew Kryczka a4fb1f8c04 Add crash-recovery correctness check to db_stress
Summary:
Previously, our `db_stress` tool held the expected state of the DB in-memory, so after crash-recovery, there was no way to verify data correctness. This PR adds an option, `--expected_values_file`, which specifies a file holding the expected values.

In black-box testing, the `db_stress` process can be killed arbitrarily, so updates to the `--expected_values_file` must be atomic. We achieve this by `mmap`ing the file and relying on `std::atomic<uint32_t>` for atomicity. Actually this doesn't provide a total guarantee on what we want as `std::atomic<uint32_t>` could, in theory, be translated into multiple stores surrounded by a mutex. We can verify our assumption by looking at `std::atomic::is_always_lock_free`.

For the `mmap`'d file, we didn't have an existing way to expose its contents as a raw memory buffer. This PR adds it in the `Env::NewMemoryMappedFileBuffer` function, and `MemoryMappedFileBuffer` class.

`db_crashtest.py` is updated to use an expected values file for black-box testing. On the first iteration (when the DB is created), an empty file is provided as `db_stress` will populate it when it runs. On subsequent iterations, that same filename is provided so `db_stress` can check the data is as expected on startup.
Closes https://github.com/facebook/rocksdb/pull/3629

Differential Revision: D7463144

Pulled By: ajkr

fbshipit-source-id: c8f3e82c93e045a90055e2468316be155633bd8b
2018-04-24 15:58:22 -07:00

423 lines
11 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/env.h"
#include <thread>
#include "options/db_options.h"
#include "port/port.h"
#include "port/sys_time.h"
#include "rocksdb/options.h"
#include "util/arena.h"
#include "util/autovector.h"
namespace rocksdb {
Env::~Env() {
}
std::string Env::PriorityToString(Env::Priority priority) {
switch (priority) {
case Env::Priority::BOTTOM:
return "Bottom";
case Env::Priority::LOW:
return "Low";
case Env::Priority::HIGH:
return "High";
case Env::Priority::TOTAL:
assert(false);
}
return "Invalid";
}
uint64_t Env::GetThreadID() const {
std::hash<std::thread::id> hasher;
return hasher(std::this_thread::get_id());
}
Status Env::ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
unique_ptr<WritableFile>* result,
const EnvOptions& options) {
Status s = RenameFile(old_fname, fname);
if (!s.ok()) {
return s;
}
return NewWritableFile(fname, result, options);
}
Status Env::GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result) {
assert(result != nullptr);
std::vector<std::string> child_fnames;
Status s = GetChildren(dir, &child_fnames);
if (!s.ok()) {
return s;
}
result->resize(child_fnames.size());
size_t result_size = 0;
for (size_t i = 0; i < child_fnames.size(); ++i) {
const std::string path = dir + "/" + child_fnames[i];
if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
if (FileExists(path).IsNotFound()) {
// The file may have been deleted since we listed the directory
continue;
}
return s;
}
(*result)[result_size].name = std::move(child_fnames[i]);
result_size++;
}
result->resize(result_size);
return Status::OK();
}
SequentialFile::~SequentialFile() {
}
RandomAccessFile::~RandomAccessFile() {
}
WritableFile::~WritableFile() {
}
MemoryMappedFileBuffer::~MemoryMappedFileBuffer() {}
Logger::~Logger() {}
Status Logger::Close() {
if (!closed_) {
closed_ = true;
return CloseImpl();
} else {
return Status::OK();
}
}
Status Logger::CloseImpl() { return Status::NotSupported(); }
FileLock::~FileLock() {
}
void LogFlush(Logger *info_log) {
if (info_log) {
info_log->Flush();
}
}
static void Logv(Logger *info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) {
info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap);
}
}
void Log(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Logv(info_log, format, ap);
va_end(ap);
}
void Logger::Logv(const InfoLogLevel log_level, const char* format, va_list ap) {
static const char* kInfoLogLevelNames[5] = { "DEBUG", "INFO", "WARN",
"ERROR", "FATAL" };
if (log_level < log_level_) {
return;
}
if (log_level == InfoLogLevel::INFO_LEVEL) {
// Doesn't print log level if it is INFO level.
// This is to avoid unexpected performance regression after we add
// the feature of log level. All the logs before we add the feature
// are INFO level. We don't want to add extra costs to those existing
// logging.
Logv(format, ap);
} else {
char new_format[500];
snprintf(new_format, sizeof(new_format) - 1, "[%s] %s",
kInfoLogLevelNames[log_level], format);
Logv(new_format, ap);
}
}
static void Logv(const InfoLogLevel log_level, Logger *info_log, const char *format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= log_level) {
if (log_level == InfoLogLevel::HEADER_LEVEL) {
info_log->LogHeader(format, ap);
} else {
info_log->Logv(log_level, format, ap);
}
}
}
void Log(const InfoLogLevel log_level, Logger* info_log, const char* format,
...) {
va_list ap;
va_start(ap, format);
Logv(log_level, info_log, format, ap);
va_end(ap);
}
static void Headerv(Logger *info_log, const char *format, va_list ap) {
if (info_log) {
info_log->LogHeader(format, ap);
}
}
void Header(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Headerv(info_log, format, ap);
va_end(ap);
}
static void Debugv(Logger* info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) {
info_log->Logv(InfoLogLevel::DEBUG_LEVEL, format, ap);
}
}
void Debug(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Debugv(info_log, format, ap);
va_end(ap);
}
static void Infov(Logger* info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) {
info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap);
}
}
void Info(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Infov(info_log, format, ap);
va_end(ap);
}
static void Warnv(Logger* info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::WARN_LEVEL) {
info_log->Logv(InfoLogLevel::WARN_LEVEL, format, ap);
}
}
void Warn(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Warnv(info_log, format, ap);
va_end(ap);
}
static void Errorv(Logger* info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::ERROR_LEVEL) {
info_log->Logv(InfoLogLevel::ERROR_LEVEL, format, ap);
}
}
void Error(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Errorv(info_log, format, ap);
va_end(ap);
}
static void Fatalv(Logger* info_log, const char* format, va_list ap) {
if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::FATAL_LEVEL) {
info_log->Logv(InfoLogLevel::FATAL_LEVEL, format, ap);
}
}
void Fatal(Logger* info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Fatalv(info_log, format, ap);
va_end(ap);
}
void LogFlush(const shared_ptr<Logger>& info_log) {
LogFlush(info_log.get());
}
void Log(const InfoLogLevel log_level, const shared_ptr<Logger>& info_log,
const char* format, ...) {
va_list ap;
va_start(ap, format);
Logv(log_level, info_log.get(), format, ap);
va_end(ap);
}
void Header(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Headerv(info_log.get(), format, ap);
va_end(ap);
}
void Debug(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Debugv(info_log.get(), format, ap);
va_end(ap);
}
void Info(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Infov(info_log.get(), format, ap);
va_end(ap);
}
void Warn(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Warnv(info_log.get(), format, ap);
va_end(ap);
}
void Error(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Errorv(info_log.get(), format, ap);
va_end(ap);
}
void Fatal(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Fatalv(info_log.get(), format, ap);
va_end(ap);
}
void Log(const shared_ptr<Logger>& info_log, const char* format, ...) {
va_list ap;
va_start(ap, format);
Logv(info_log.get(), format, ap);
va_end(ap);
}
Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname,
bool should_sync) {
unique_ptr<WritableFile> file;
EnvOptions soptions;
Status s = env->NewWritableFile(fname, &file, soptions);
if (!s.ok()) {
return s;
}
s = file->Append(data);
if (s.ok() && should_sync) {
s = file->Sync();
}
if (!s.ok()) {
env->DeleteFile(fname);
}
return s;
}
Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
EnvOptions soptions;
data->clear();
unique_ptr<SequentialFile> file;
Status s = env->NewSequentialFile(fname, &file, soptions);
if (!s.ok()) {
return s;
}
static const int kBufferSize = 8192;
char* space = new char[kBufferSize];
while (true) {
Slice fragment;
s = file->Read(kBufferSize, &fragment, space);
if (!s.ok()) {
break;
}
data->append(fragment.data(), fragment.size());
if (fragment.empty()) {
break;
}
}
delete[] space;
return s;
}
EnvWrapper::~EnvWrapper() {
}
namespace { // anonymous namespace
void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) {
env_options->use_mmap_reads = options.allow_mmap_reads;
env_options->use_mmap_writes = options.allow_mmap_writes;
env_options->use_direct_reads = options.use_direct_reads;
env_options->set_fd_cloexec = options.is_fd_close_on_exec;
env_options->bytes_per_sync = options.bytes_per_sync;
env_options->compaction_readahead_size = options.compaction_readahead_size;
env_options->random_access_max_buffer_size =
options.random_access_max_buffer_size;
env_options->rate_limiter = options.rate_limiter.get();
env_options->writable_file_max_buffer_size =
options.writable_file_max_buffer_size;
env_options->allow_fallocate = options.allow_fallocate;
}
}
EnvOptions Env::OptimizeForLogWrite(const EnvOptions& env_options,
const DBOptions& db_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.bytes_per_sync = db_options.wal_bytes_per_sync;
optimized_env_options.writable_file_max_buffer_size =
db_options.writable_file_max_buffer_size;
return optimized_env_options;
}
EnvOptions Env::OptimizeForManifestWrite(const EnvOptions& env_options) const {
return env_options;
}
EnvOptions Env::OptimizeForLogRead(const EnvOptions& env_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.use_direct_reads = false;
return optimized_env_options;
}
EnvOptions Env::OptimizeForManifestRead(const EnvOptions& env_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.use_direct_reads = false;
return optimized_env_options;
}
EnvOptions Env::OptimizeForCompactionTableWrite(
const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.use_direct_writes =
db_options.use_direct_io_for_flush_and_compaction;
return optimized_env_options;
}
EnvOptions Env::OptimizeForCompactionTableRead(
const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.use_direct_reads =
db_options.use_direct_io_for_flush_and_compaction;
return optimized_env_options;
}
EnvOptions::EnvOptions(const DBOptions& options) {
AssignEnvOptions(this, options);
}
EnvOptions::EnvOptions() {
DBOptions options;
AssignEnvOptions(this, options);
}
} // namespace rocksdb