Add event listener support on remote compactor side (#9821)

Summary:
So the user is able to set event listener on the compactor
side.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9821

Test Plan: unittest added

Reviewed By: ajkr

Differential Revision: D35485388

Pulled By: jay-zhuang

fbshipit-source-id: 669d8a3aaee012b75b940470306756c03ffa09b2
This commit is contained in:
Jay Zhuang 2022-04-12 17:25:36 -07:00 committed by Facebook GitHub Bot
parent 1eee99fc8c
commit f934a0af46
5 changed files with 106 additions and 5 deletions

View File

@ -18,6 +18,7 @@
* Added an option to dynamically charge an updating estimated memory usage of block-based table reader to block cache if block cache available. To enable this feature, set `BlockBasedTableOptions::reserve_table_reader_memory = true`. * Added an option to dynamically charge an updating estimated memory usage of block-based table reader to block cache if block cache available. To enable this feature, set `BlockBasedTableOptions::reserve_table_reader_memory = true`.
* Add new stat ASYNC_READ_BYTES that calculates number of bytes read during async read call and users can check if async code path is being called by RocksDB internal automatic prefetching for sequential reads. * Add new stat ASYNC_READ_BYTES that calculates number of bytes read during async read call and users can check if async code path is being called by RocksDB internal automatic prefetching for sequential reads.
* Enable async prefetching if ReadOptions.readahead_size is set along with ReadOptions.async_io in FilePrefetchBuffer. * Enable async prefetching if ReadOptions.readahead_size is set along with ReadOptions.async_io in FilePrefetchBuffer.
* Add event listener support on remote compaction compactor side.
### Behavior changes ### Behavior changes
* Disallow usage of commit-time-write-batch for write-prepared/write-unprepared transactions if TransactionOptions::use_only_the_last_commit_time_batch_for_recovery is false to prevent two (or more) uncommitted versions of the same key in the database. Otherwise, bottommost compaction may violate the internal key uniqueness invariant of SSTs if the sequence numbers of both internal keys are zeroed out (#9794). * Disallow usage of commit-time-write-batch for write-prepared/write-unprepared transactions if TransactionOptions::use_only_the_last_commit_time_batch_for_recovery is false to prevent two (or more) uncommitted versions of the same key in the database. Otherwise, bottommost compaction may violate the internal key uniqueness invariant of SSTs if the sequence numbers of both internal keys are zeroed out (#9794).

View File

@ -1251,7 +1251,7 @@ void CompactionJob::NotifyOnSubcompactionBegin(
if (shutting_down_->load(std::memory_order_acquire)) { if (shutting_down_->load(std::memory_order_acquire)) {
return; return;
} }
if (c->is_manual_compaction() && if (c->is_manual_compaction() && manual_compaction_paused_ &&
manual_compaction_paused_->load(std::memory_order_acquire) > 0) { manual_compaction_paused_->load(std::memory_order_acquire) > 0) {
return; return;
} }

View File

@ -12,13 +12,16 @@ namespace ROCKSDB_NAMESPACE {
class MyTestCompactionService : public CompactionService { class MyTestCompactionService : public CompactionService {
public: public:
MyTestCompactionService(std::string db_path, Options& options, MyTestCompactionService(
std::shared_ptr<Statistics>& statistics) std::string db_path, Options& options,
std::shared_ptr<Statistics>& statistics,
std::vector<std::shared_ptr<EventListener>>& listeners)
: db_path_(std::move(db_path)), : db_path_(std::move(db_path)),
options_(options), options_(options),
statistics_(statistics), statistics_(statistics),
start_info_("na", "na", "na", 0, Env::TOTAL), start_info_("na", "na", "na", 0, Env::TOTAL),
wait_info_("na", "na", "na", 0, Env::TOTAL) {} wait_info_("na", "na", "na", 0, Env::TOTAL),
listeners_(listeners) {}
static const char* kClassName() { return "MyTestCompactionService"; } static const char* kClassName() { return "MyTestCompactionService"; }
@ -71,6 +74,9 @@ class MyTestCompactionService : public CompactionService {
options_override.table_factory = options_.table_factory; options_override.table_factory = options_.table_factory;
options_override.sst_partitioner_factory = options_.sst_partitioner_factory; options_override.sst_partitioner_factory = options_.sst_partitioner_factory;
options_override.statistics = statistics_; options_override.statistics = statistics_;
if (!listeners_.empty()) {
options_override.listeners = listeners_;
}
Status s = DB::OpenAndCompact( Status s = DB::OpenAndCompact(
db_path_, db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id), db_path_, db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id),
@ -129,6 +135,7 @@ class MyTestCompactionService : public CompactionService {
CompactionServiceJobStatus::kFailure; CompactionServiceJobStatus::kFailure;
bool is_override_wait_result_ = false; bool is_override_wait_result_ = false;
std::string override_wait_result_; std::string override_wait_result_;
std::vector<std::shared_ptr<EventListener>> listeners_;
}; };
class CompactionServiceTest : public DBTestBase { class CompactionServiceTest : public DBTestBase {
@ -144,7 +151,7 @@ class CompactionServiceTest : public DBTestBase {
compactor_statistics_ = CreateDBStatistics(); compactor_statistics_ = CreateDBStatistics();
compaction_service_ = std::make_shared<MyTestCompactionService>( compaction_service_ = std::make_shared<MyTestCompactionService>(
dbname_, *options, compactor_statistics_); dbname_, *options, compactor_statistics_, remote_listeners);
options->compaction_service = compaction_service_; options->compaction_service = compaction_service_;
DestroyAndReopen(*options); DestroyAndReopen(*options);
} }
@ -192,6 +199,8 @@ class CompactionServiceTest : public DBTestBase {
} }
} }
std::vector<std::shared_ptr<EventListener>> remote_listeners;
private: private:
std::shared_ptr<Statistics> compactor_statistics_; std::shared_ptr<Statistics> compactor_statistics_;
std::shared_ptr<Statistics> primary_statistics_; std::shared_ptr<Statistics> primary_statistics_;
@ -685,6 +694,88 @@ TEST_F(CompactionServiceTest, FallbackLocalManual) {
VerifyTestData(); VerifyTestData();
} }
TEST_F(CompactionServiceTest, RemoteEventListener) {
class RemoteEventListenerTest : public EventListener {
public:
const char* Name() const override { return "RemoteEventListenerTest"; }
void OnSubcompactionBegin(const SubcompactionJobInfo& info) override {
auto result = on_going_compactions.emplace(info.job_id);
ASSERT_TRUE(result.second); // make sure there's no duplication
compaction_num++;
EventListener::OnSubcompactionBegin(info);
}
void OnSubcompactionCompleted(const SubcompactionJobInfo& info) override {
auto num = on_going_compactions.erase(info.job_id);
ASSERT_TRUE(num == 1); // make sure the compaction id exists
EventListener::OnSubcompactionCompleted(info);
}
void OnTableFileCreated(const TableFileCreationInfo& info) override {
ASSERT_EQ(on_going_compactions.count(info.job_id), 1);
file_created++;
EventListener::OnTableFileCreated(info);
}
void OnTableFileCreationStarted(
const TableFileCreationBriefInfo& info) override {
ASSERT_EQ(on_going_compactions.count(info.job_id), 1);
file_creation_started++;
EventListener::OnTableFileCreationStarted(info);
}
bool ShouldBeNotifiedOnFileIO() override {
file_io_notified++;
return EventListener::ShouldBeNotifiedOnFileIO();
}
std::atomic_uint64_t file_io_notified{0};
std::atomic_uint64_t file_creation_started{0};
std::atomic_uint64_t file_created{0};
std::set<int> on_going_compactions; // store the job_id
std::atomic_uint64_t compaction_num{0};
};
auto listener = new RemoteEventListenerTest();
remote_listeners.emplace_back(listener);
Options options = CurrentOptions();
ReopenWithCompactionService(&options);
for (int i = 0; i < 20; i++) {
for (int j = 0; j < 10; j++) {
int key_id = i * 10 + j;
ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
}
ASSERT_OK(Flush());
}
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
int key_id = i * 20 + j * 2;
ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
}
ASSERT_OK(Flush());
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
// check the events are triggered
ASSERT_TRUE(listener->file_io_notified > 0);
ASSERT_TRUE(listener->file_creation_started > 0);
ASSERT_TRUE(listener->file_created > 0);
ASSERT_TRUE(listener->compaction_num > 0);
ASSERT_TRUE(listener->on_going_compactions.empty());
// verify result
for (int i = 0; i < 200; i++) {
auto result = Get(Key(i));
if (i % 2) {
ASSERT_EQ(result, "value" + ToString(i));
} else {
ASSERT_EQ(result, "value_new" + ToString(i));
}
}
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {

View File

@ -825,6 +825,7 @@ Status DB::OpenAndCompact(
override_options.table_factory; override_options.table_factory;
compaction_input.column_family.options.sst_partitioner_factory = compaction_input.column_family.options.sst_partitioner_factory =
override_options.sst_partitioner_factory; override_options.sst_partitioner_factory;
compaction_input.db_options.listeners = override_options.listeners;
std::vector<ColumnFamilyDescriptor> column_families; std::vector<ColumnFamilyDescriptor> column_families;
column_families.push_back(compaction_input.column_family); column_families.push_back(compaction_input.column_family);

View File

@ -1932,6 +1932,14 @@ struct CompactionServiceOptionsOverride {
std::shared_ptr<TableFactory> table_factory; std::shared_ptr<TableFactory> table_factory;
std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory = nullptr; std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory = nullptr;
// Only subsets of events are triggered in remote compaction worker, like:
// `OnTableFileCreated`, `OnTableFileCreationStarted`,
// `ShouldBeNotifiedOnFileIO` `OnSubcompactionBegin`,
// `OnSubcompactionCompleted`, etc. Worth mentioning, `OnCompactionBegin` and
// `OnCompactionCompleted` won't be triggered. They will be triggered on the
// primary DB side.
std::vector<std::shared_ptr<EventListener>> listeners;
// statistics is used to collect DB operation metrics, the metrics won't be // statistics is used to collect DB operation metrics, the metrics won't be
// returned to CompactionService primary host, to collect that, the user needs // returned to CompactionService primary host, to collect that, the user needs
// to set it here. // to set it here.