Add DBOptions. avoid_unnecessary_blocking_io to defer file deletions (#5043)

Summary:
Just like ReadOptions::background_purge_on_iterator_cleanup but for ColumnFamilyHandle instead of Iterator.

In our use case we sometimes call ColumnFamilyHandle's destructor from low-latency threads, and sometimes it blocks the thread for a few seconds deleting the files. To avoid that, we can either offload ColumnFamilyHandle's destruction to a background thread on our side, or add this option on rocksdb side. This PR does the latter, to be consistent with how we solve exactly the same problem for iterators using background_purge_on_iterator_cleanup option.

(EDIT: It's avoid_unnecessary_blocking_io now, and affects both CF drops and iterator destructors.)
I'm not quite comfortable with having two separate options (background_purge_on_iterator_cleanup and background_purge_on_cf_cleanup) for such a rarely used thing. Maybe we should merge them? Rename background_purge_on_cf_cleanup to something like delete_files_on_background_threads_only or avoid_blocking_io_in_unexpected_places, and make iterators use it instead of the one in ReadOptions? I can do that here if you guys think it's better.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5043

Differential Revision: D14339233

Pulled By: al13n321

fbshipit-source-id: ccf7efa11c85c9a5b91d969bb55627d0fb01e7b8
This commit is contained in:
Mike Kolupaev 2019-04-01 17:07:38 -07:00 committed by Facebook Github Bot
parent 127a850beb
commit 120bc4715b
10 changed files with 90 additions and 9 deletions

View File

@ -9,6 +9,8 @@
* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers.
* Added a feature to perform data-block sampling for compressibility, and report stats to user.
* Add support for trace filtering.
* Add DBOptions.avoid_unnecessary_blocking_io. If true, we avoid file deletion when destorying ColumnFamilyHandle and Iterator. Instead, a job is scheduled to delete the files in background.
### Public API Change
* Remove bundled fbson library.
* statistics.stats_level_ becomes atomic. It is preferred to use statistics.set_stats_level() and statistics.get_stats_level() to access it.

View File

@ -68,7 +68,14 @@ ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
db_->FindObsoleteFiles(&job_context, false, true);
mutex_->Unlock();
if (job_context.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(job_context);
bool defer_purge =
db_->immutable_db_options().avoid_unnecessary_blocking_io;
db_->PurgeObsoleteFiles(job_context, defer_purge);
if (defer_purge) {
mutex_->Lock();
db_->SchedulePurge();
mutex_->Unlock();
}
}
job_context.Clean();
}

View File

@ -1313,7 +1313,8 @@ InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
internal_iter = merge_iter_builder.Finish();
IterState* cleanup =
new IterState(this, &mutex_, super_version,
read_options.background_purge_on_iterator_cleanup);
read_options.background_purge_on_iterator_cleanup ||
immutable_db_options_.avoid_unnecessary_blocking_io);
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
return internal_iter;

View File

@ -241,7 +241,7 @@ TEST_F(DeleteFileTest, PurgeObsoleteFilesTest) {
CloseDB();
}
TEST_F(DeleteFileTest, BackgroundPurgeTest) {
TEST_F(DeleteFileTest, BackgroundPurgeIteratorTest) {
std::string first("0"), last("999999");
CompactRangeOptions compact_options;
compact_options.change_level = true;
@ -279,6 +279,53 @@ TEST_F(DeleteFileTest, BackgroundPurgeTest) {
CloseDB();
}
TEST_F(DeleteFileTest, BackgroundPurgeCFDropTest) {
auto do_test = [&](bool bg_purge) {
ColumnFamilyOptions co;
WriteOptions wo;
FlushOptions fo;
ColumnFamilyHandle* cfh = nullptr;
ASSERT_OK(db_->CreateColumnFamily(co, "dropme", &cfh));
ASSERT_OK(db_->Put(wo, cfh, "pika", "chu"));
ASSERT_OK(db_->Flush(fo, cfh));
// Expect 1 sst file.
CheckFileTypeCounts(dbname_, 0, 1, 1);
ASSERT_OK(db_->DropColumnFamily(cfh));
// Still 1 file, it won't be deleted while ColumnFamilyHandle is alive.
CheckFileTypeCounts(dbname_, 0, 1, 1);
delete cfh;
test::SleepingBackgroundTask sleeping_task_after;
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_after, Env::Priority::HIGH);
// If background purge is enabled, the file should still be there.
CheckFileTypeCounts(dbname_, 0, bg_purge ? 1 : 0, 1);
// Execute background purges.
sleeping_task_after.WakeUp();
sleeping_task_after.WaitUntilDone();
// The file should have been deleted.
CheckFileTypeCounts(dbname_, 0, 0, 1);
};
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = false");
do_test(false);
}
options_.avoid_unnecessary_blocking_io = true;
ASSERT_OK(ReopenDB(false));
{
SCOPED_TRACE("avoid_unnecessary_blocking_io = true");
do_test(true);
}
CloseDB();
}
// This test is to reproduce a bug that read invalid ReadOption in iterator
// cleanup function
TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) {

View File

@ -265,16 +265,18 @@ void ForwardIterator::SVCleanup() {
if (sv_ == nullptr) {
return;
}
bool background_purge =
read_options_.background_purge_on_iterator_cleanup ||
db_->immutable_db_options().avoid_unnecessary_blocking_io;
if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) {
// pinned_iters_mgr_ tells us to make sure that all visited key-value slices
// are alive until pinned_iters_mgr_->ReleasePinnedData() is called.
// The slices may point into some memtables owned by sv_, so we need to keep
// sv_ referenced until pinned_iters_mgr_ unpins everything.
auto p = new SVCleanupParams{
db_, sv_, read_options_.background_purge_on_iterator_cleanup};
auto p = new SVCleanupParams{db_, sv_, background_purge};
pinned_iters_mgr_->PinPtr(p, &ForwardIterator::DeferredSVCleanup);
} else {
SVCleanup(db_, sv_, read_options_.background_purge_on_iterator_cleanup);
SVCleanup(db_, sv_, background_purge);
}
}

View File

@ -1003,6 +1003,14 @@ struct DBOptions {
// Currently, any WAL-enabled writes after atomic flush may be replayed
// independently if the process crashes later and tries to recover.
bool atomic_flush = false;
// If true, ColumnFamilyHandle's and Iterator's destructors won't delete
// obsolete files directly and will instead schedule a background job
// to do it. Use it if you're destroying iterators or ColumnFamilyHandle-s
// from latency-sensitive threads.
// If set to true, takes precedence over
// ReadOptions::background_purge_on_iterator_cleanup.
bool avoid_unnecessary_blocking_io = false;
};
// Options to control the behavior of a database (passed to DB::Open)

View File

@ -86,7 +86,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
preserve_deletes(options.preserve_deletes),
two_write_queues(options.two_write_queues),
manual_wal_flush(options.manual_wal_flush),
atomic_flush(options.atomic_flush) {
atomic_flush(options.atomic_flush),
avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io) {
}
void ImmutableDBOptions::Dump(Logger* log) const {
@ -217,6 +218,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
two_write_queues);
ROCKS_LOG_HEADER(log, " Options.manual_wal_flush: %d",
manual_wal_flush);
ROCKS_LOG_HEADER(log, " Options.atomic_flush: %d", atomic_flush);
ROCKS_LOG_HEADER(log,
" Options.avoid_unnecessary_blocking_io: %d",
avoid_unnecessary_blocking_io);
}
MutableDBOptions::MutableDBOptions()

View File

@ -79,6 +79,7 @@ struct ImmutableDBOptions {
bool two_write_queues;
bool manual_wal_flush;
bool atomic_flush;
bool avoid_unnecessary_blocking_io;
};
struct MutableDBOptions {

View File

@ -132,6 +132,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.two_write_queues = immutable_db_options.two_write_queues;
options.manual_wal_flush = immutable_db_options.manual_wal_flush;
options.atomic_flush = immutable_db_options.atomic_flush;
options.avoid_unnecessary_blocking_io =
immutable_db_options.avoid_unnecessary_blocking_io;
return options;
}
@ -1615,7 +1617,12 @@ std::unordered_map<std::string, OptionTypeInfo>
{"atomic_flush",
{offsetof(struct DBOptions, atomic_flush), OptionType::kBoolean,
OptionVerificationType::kNormal, false,
offsetof(struct ImmutableDBOptions, atomic_flush)}}};
offsetof(struct ImmutableDBOptions, atomic_flush)}},
{"avoid_unnecessary_blocking_io",
{offsetof(struct DBOptions, avoid_unnecessary_blocking_io),
OptionType::kBoolean, OptionVerificationType::kNormal, false,
offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io)}}
};
std::unordered_map<std::string, BlockBasedTableOptions::IndexType>
OptionsHelper::block_base_table_index_type_string_map = {

View File

@ -294,7 +294,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
"two_write_queues=false;"
"manual_wal_flush=false;"
"seq_per_batch=false;"
"atomic_flush=false",
"atomic_flush=false;"
"avoid_unnecessary_blocking_io=false",
new_options));
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),