rocksdb/file/delete_scheduler.h
mrambacher 3dff28cf9b Use SystemClock* instead of std::shared_ptr<SystemClock> in lower level routines (#8033)
Summary:
For performance purposes, the lower level routines were changed to use a SystemClock* instead of a std::shared_ptr<SystemClock>.  The shared ptr has some performance degradation on certain hardware classes.

For most of the system, there is no risk of the pointer being deleted/invalid because the shared_ptr will be stored elsewhere.  For example, the ImmutableDBOptions stores the Env which has a std::shared_ptr<SystemClock> in it.  The SystemClock* within the ImmutableDBOptions is essentially a "short cut" to gain access to this constant resource.

There were a few classes (PeriodicWorkScheduler?) where the "short cut" property did not hold.  In those cases, the shared pointer was preserved.

Using db_bench readrandom perf_level=3 on my EC2 box, this change performed as well or better than 6.17:

6.17: readrandom   :      28.046 micros/op 854902 ops/sec;   61.3 MB/s (355999 of 355999 found)
6.18: readrandom   :      32.615 micros/op 735306 ops/sec;   52.7 MB/s (290999 of 290999 found)
PR: readrandom   :      27.500 micros/op 871909 ops/sec;   62.5 MB/s (367999 of 367999 found)

(Note that the times for 6.18 are prior to revert of the SystemClock).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8033

Reviewed By: pdillinger

Differential Revision: D27014563

Pulled By: mrambacher

fbshipit-source-id: ad0459eba03182e454391b5926bf5cdd45657b67
2021-03-15 04:34:11 -07:00

153 lines
5.1 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <map>
#include <queue>
#include <string>
#include <thread>
#include "monitoring/instrumented_mutex.h"
#include "port/port.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Env;
class FileSystem;
class Logger;
class SstFileManagerImpl;
class SystemClock;
// DeleteScheduler allows the DB to enforce a rate limit on file deletion,
// Instead of deleteing files immediately, files are marked as trash
// and deleted in a background thread that apply sleep penalty between deletes
// if they are happening in a rate faster than rate_bytes_per_sec,
//
// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
// case DeleteScheduler will delete files immediately.
class DeleteScheduler {
public:
DeleteScheduler(SystemClock* clock, FileSystem* fs,
int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
~DeleteScheduler();
// Return delete rate limit in bytes per second
int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_.load(); }
// Set delete rate limit in bytes per second
void SetRateBytesPerSecond(int64_t bytes_per_sec) {
rate_bytes_per_sec_.store(bytes_per_sec);
MaybeCreateBackgroundThread();
}
// Mark file as trash directory and schedule its deletion. If force_bg is
// set, it forces the file to always be deleted in the background thread,
// except when rate limiting is disabled
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
const bool force_bg = false);
// Wait for all files being deleteing in the background to finish or for
// destructor to be called.
void WaitForEmptyTrash();
// Return a map containing errors that happened in BackgroundEmptyTrash
// file_path => error status
std::map<std::string, Status> GetBackgroundErrors();
uint64_t GetTotalTrashSize() { return total_trash_size_.load(); }
// Return trash/DB size ratio where new files will be deleted immediately
double GetMaxTrashDBRatio() {
return max_trash_db_ratio_.load();
}
// Update trash/DB size ratio where new files will be deleted immediately
void SetMaxTrashDBRatio(double r) {
assert(r >= 0);
max_trash_db_ratio_.store(r);
}
static const std::string kTrashExtension;
static bool IsTrashFile(const std::string& file_path);
// Check if there are any .trash files in path, and schedule their deletion
// Or delete immediately if sst_file_manager is nullptr
static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
const std::string& path);
void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) {
InstrumentedMutexLock l(&mu_);
stats_ = stats;
}
private:
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
Status DeleteTrashFile(const std::string& path_in_trash,
const std::string& dir_to_sync,
uint64_t* deleted_bytes, bool* is_complete);
void BackgroundEmptyTrash();
void MaybeCreateBackgroundThread();
SystemClock* clock_;
FileSystem* fs_;
// total size of trash files
std::atomic<uint64_t> total_trash_size_;
// Maximum number of bytes that should be deleted per second
std::atomic<int64_t> rate_bytes_per_sec_;
// Mutex to protect queue_, pending_files_, bg_errors_, closing_, stats_
InstrumentedMutex mu_;
struct FileAndDir {
FileAndDir(const std::string& f, const std::string& d) : fname(f), dir(d) {}
std::string fname;
std::string dir; // empty will be skipped.
};
// Queue of trash files that need to be deleted
std::queue<FileAndDir> queue_;
// Number of trash files that are waiting to be deleted
int32_t pending_files_;
uint64_t bytes_max_delete_chunk_;
// Errors that happened in BackgroundEmptyTrash (file_path => error)
std::map<std::string, Status> bg_errors_;
bool num_link_error_printed_ = false;
// Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop
bool closing_;
// Condition variable signaled in these conditions
// - pending_files_ value change from 0 => 1
// - pending_files_ value change from 1 => 0
// - closing_ value is set to true
InstrumentedCondVar cv_;
// Background thread running BackgroundEmptyTrash
std::unique_ptr<port::Thread> bg_thread_;
// Mutex to protect threads from file name conflicts
InstrumentedMutex file_move_mu_;
Logger* info_log_;
SstFileManagerImpl* sst_file_manager_;
// If the trash size constitutes for more than this fraction of the total DB
// size we will start deleting new files passed to DeleteScheduler
// immediately
std::atomic<double> max_trash_db_ratio_;
static const uint64_t kMicrosInSecond = 1000 * 1000LL;
std::shared_ptr<Statistics> stats_;
};
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE