2016-02-09 15:12:00 -08:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 16:03:42 -07:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2015-08-31 23:11:12 -07:00
|
|
|
|
2020-03-12 10:58:27 -07:00
|
|
|
#include "db/blob/blob_index.h"
|
2019-05-31 15:21:36 -07:00
|
|
|
#include "db/db_impl/db_impl.h"
|
2015-12-14 13:36:32 -08:00
|
|
|
#include "db/db_test_util.h"
|
2015-08-31 23:11:12 -07:00
|
|
|
#include "db/dbformat.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
2019-05-29 20:44:08 -07:00
|
|
|
#include "file/filename.h"
|
2017-04-05 19:02:00 -07:00
|
|
|
#include "monitoring/statistics.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/compaction_filter.h"
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/filter_policy.h"
|
2015-08-31 23:11:12 -07:00
|
|
|
#include "rocksdb/options.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
#include "rocksdb/perf_context.h"
|
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/slice_transform.h"
|
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "rocksdb/table_properties.h"
|
2019-05-30 17:39:43 -07:00
|
|
|
#include "test_util/sync_point.h"
|
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
#include "util/hash.h"
|
|
|
|
#include "util/mutexlock.h"
|
|
|
|
#include "util/rate_limiter.h"
|
2015-03-19 17:29:37 -07:00
|
|
|
#include "util/string_util.h"
|
2015-08-31 23:11:12 -07:00
|
|
|
#include "utilities/merge_operators.h"
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
2015-12-14 13:36:32 -08:00
|
|
|
class EventListenerTest : public DBTestBase {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
public:
|
2021-07-23 08:37:27 -07:00
|
|
|
EventListenerTest() : DBTestBase("listener_test", /*env_do_fsync=*/true) {}
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
2019-10-24 14:42:43 -07:00
|
|
|
static std::string BlobStr(uint64_t blob_file_number, uint64_t offset,
|
|
|
|
uint64_t size) {
|
|
|
|
std::string blob_index;
|
|
|
|
BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
|
|
|
|
kNoCompression);
|
|
|
|
return blob_index;
|
|
|
|
}
|
|
|
|
|
2015-08-31 23:11:12 -07:00
|
|
|
const size_t k110KB = 110 << 10;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
};
|
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
struct TestPropertiesCollector
|
|
|
|
: public ROCKSDB_NAMESPACE::TablePropertiesCollector {
|
|
|
|
ROCKSDB_NAMESPACE::Status AddUserKey(
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*key*/,
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*value*/,
|
|
|
|
ROCKSDB_NAMESPACE::EntryType /*type*/,
|
|
|
|
ROCKSDB_NAMESPACE::SequenceNumber /*seq*/,
|
|
|
|
uint64_t /*file_size*/) override {
|
2015-09-15 09:03:08 -07:00
|
|
|
return Status::OK();
|
|
|
|
}
|
2020-02-20 12:07:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::Status Finish(
|
|
|
|
ROCKSDB_NAMESPACE::UserCollectedProperties* properties) override {
|
2015-09-15 09:03:08 -07:00
|
|
|
properties->insert({"0", "1"});
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
2019-02-14 13:52:47 -08:00
|
|
|
const char* Name() const override { return "TestTablePropertiesCollector"; }
|
2015-09-15 09:03:08 -07:00
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::UserCollectedProperties GetReadableProperties()
|
|
|
|
const override {
|
|
|
|
ROCKSDB_NAMESPACE::UserCollectedProperties ret;
|
2015-09-15 09:03:08 -07:00
|
|
|
ret["2"] = "3";
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class TestPropertiesCollectorFactory : public TablePropertiesCollectorFactory {
|
|
|
|
public:
|
2019-02-14 13:52:47 -08:00
|
|
|
TablePropertiesCollector* CreateTablePropertiesCollector(
|
2018-03-05 13:08:17 -08:00
|
|
|
TablePropertiesCollectorFactory::Context /*context*/) override {
|
2015-09-15 09:03:08 -07:00
|
|
|
return new TestPropertiesCollector;
|
|
|
|
}
|
|
|
|
const char* Name() const override { return "TestTablePropertiesCollector"; }
|
|
|
|
};
|
|
|
|
|
2015-01-27 14:44:02 -08:00
|
|
|
class TestCompactionListener : public EventListener {
|
|
|
|
public:
|
2019-10-24 14:42:43 -07:00
|
|
|
explicit TestCompactionListener(EventListenerTest* test) : test_(test) {}
|
|
|
|
|
2015-01-27 15:01:04 -08:00
|
|
|
void OnCompactionCompleted(DB *db, const CompactionJobInfo& ci) override {
|
2015-05-12 16:10:23 -07:00
|
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
2015-01-27 14:44:02 -08:00
|
|
|
compacted_dbs_.push_back(db);
|
2015-05-12 16:10:23 -07:00
|
|
|
ASSERT_GT(ci.input_files.size(), 0U);
|
2019-10-24 14:42:43 -07:00
|
|
|
ASSERT_EQ(ci.input_files.size(), ci.input_file_infos.size());
|
|
|
|
|
|
|
|
for (size_t i = 0; i < ci.input_file_infos.size(); ++i) {
|
|
|
|
ASSERT_EQ(ci.input_file_infos[i].level, ci.base_input_level);
|
|
|
|
ASSERT_EQ(ci.input_file_infos[i].file_number,
|
|
|
|
TableFileNameToNumber(ci.input_files[i]));
|
|
|
|
}
|
|
|
|
|
2015-05-12 16:10:23 -07:00
|
|
|
ASSERT_GT(ci.output_files.size(), 0U);
|
2019-10-24 14:42:43 -07:00
|
|
|
ASSERT_EQ(ci.output_files.size(), ci.output_file_infos.size());
|
|
|
|
|
|
|
|
ASSERT_TRUE(test_);
|
|
|
|
ASSERT_EQ(test_->db_, db);
|
|
|
|
|
|
|
|
std::vector<std::vector<FileMetaData>> files_by_level;
|
|
|
|
test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[ci.cf_id],
|
|
|
|
&files_by_level);
|
|
|
|
ASSERT_GT(files_by_level.size(), ci.output_level);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < ci.output_file_infos.size(); ++i) {
|
|
|
|
ASSERT_EQ(ci.output_file_infos[i].level, ci.output_level);
|
|
|
|
ASSERT_EQ(ci.output_file_infos[i].file_number,
|
|
|
|
TableFileNameToNumber(ci.output_files[i]));
|
|
|
|
|
|
|
|
auto it = std::find_if(
|
|
|
|
files_by_level[ci.output_level].begin(),
|
|
|
|
files_by_level[ci.output_level].end(), [&](const FileMetaData& meta) {
|
|
|
|
return meta.fd.GetNumber() == ci.output_file_infos[i].file_number;
|
|
|
|
});
|
|
|
|
ASSERT_NE(it, files_by_level[ci.output_level].end());
|
|
|
|
|
|
|
|
ASSERT_EQ(ci.output_file_infos[i].oldest_blob_file_number,
|
|
|
|
it->oldest_blob_file_number);
|
|
|
|
}
|
|
|
|
|
2015-06-11 14:18:02 -07:00
|
|
|
ASSERT_EQ(db->GetEnv()->GetThreadID(), ci.thread_id);
|
|
|
|
ASSERT_GT(ci.thread_id, 0U);
|
2015-09-15 09:03:08 -07:00
|
|
|
|
|
|
|
for (auto fl : {ci.input_files, ci.output_files}) {
|
|
|
|
for (auto fn : fl) {
|
|
|
|
auto it = ci.table_properties.find(fn);
|
|
|
|
ASSERT_NE(it, ci.table_properties.end());
|
|
|
|
auto tp = it->second;
|
|
|
|
ASSERT_TRUE(tp != nullptr);
|
|
|
|
ASSERT_EQ(tp->user_collected_properties.find("0")->second, "1");
|
|
|
|
}
|
|
|
|
}
|
2015-01-27 14:44:02 -08:00
|
|
|
}
|
|
|
|
|
2019-10-24 14:42:43 -07:00
|
|
|
EventListenerTest* test_;
|
2015-01-27 14:44:02 -08:00
|
|
|
std::vector<DB*> compacted_dbs_;
|
2015-05-12 16:10:23 -07:00
|
|
|
std::mutex mutex_;
|
2015-01-27 14:44:02 -08:00
|
|
|
};
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(EventListenerTest, OnSingleDBCompactionTest) {
|
2015-01-27 14:44:02 -08:00
|
|
|
const int kTestKeySize = 16;
|
|
|
|
const int kTestValueSize = 984;
|
|
|
|
const int kEntrySize = kTestKeySize + kTestValueSize;
|
|
|
|
const int kEntriesPerBuffer = 100;
|
|
|
|
const int kNumL0Files = 4;
|
|
|
|
|
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-01-27 14:44:02 -08:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
|
|
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
|
|
options.target_file_size_base = options.write_buffer_size;
|
|
|
|
options.max_bytes_for_level_base = options.target_file_size_base * 2;
|
|
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
|
|
options.compression = kNoCompression;
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-01-27 14:44:02 -08:00
|
|
|
options.enable_thread_tracking = true;
|
2015-06-11 14:18:02 -07:00
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2015-01-27 14:44:02 -08:00
|
|
|
options.level0_file_num_compaction_trigger = kNumL0Files;
|
2015-09-15 09:03:08 -07:00
|
|
|
options.table_properties_collector_factories.push_back(
|
|
|
|
std::make_shared<TestPropertiesCollectorFactory>());
|
2015-01-27 14:44:02 -08:00
|
|
|
|
2019-10-24 14:42:43 -07:00
|
|
|
TestCompactionListener* listener = new TestCompactionListener(this);
|
2015-01-27 14:44:02 -08:00
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
std::vector<std::string> cf_names = {
|
|
|
|
"pikachu", "ilya", "muromec", "dobrynia",
|
|
|
|
"nikitich", "alyosha", "popovich"};
|
2015-12-14 13:36:32 -08:00
|
|
|
CreateAndReopenWithCF(cf_names, options);
|
2015-01-27 14:44:02 -08:00
|
|
|
ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p')));
|
2019-10-24 14:42:43 -07:00
|
|
|
|
|
|
|
WriteBatch batch;
|
|
|
|
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto",
|
|
|
|
BlobStr(123, 0, 1 << 10)));
|
|
|
|
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
|
|
|
|
|
2015-01-27 14:44:02 -08:00
|
|
|
ASSERT_OK(Put(2, "ilya", std::string(90000, 'i')));
|
|
|
|
ASSERT_OK(Put(3, "muromec", std::string(90000, 'm')));
|
|
|
|
ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd')));
|
|
|
|
ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n')));
|
|
|
|
ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a')));
|
|
|
|
ASSERT_OK(Put(7, "popovich", std::string(90000, 'p')));
|
2015-12-14 13:36:32 -08:00
|
|
|
for (int i = 1; i < 8; ++i) {
|
2015-08-31 23:11:12 -07:00
|
|
|
ASSERT_OK(Flush(i));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2019-10-24 14:42:43 -07:00
|
|
|
ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[i],
|
|
|
|
nullptr, nullptr));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-01-27 14:44:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(listener->compacted_dbs_.size(), cf_names.size());
|
|
|
|
for (size_t i = 0; i < cf_names.size(); ++i) {
|
|
|
|
ASSERT_EQ(listener->compacted_dbs_[i], db_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-02 14:12:23 -07:00
|
|
|
// This simple Listener can only handle one flush at a time.
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
class TestFlushListener : public EventListener {
|
|
|
|
public:
|
2019-10-24 14:42:43 -07:00
|
|
|
TestFlushListener(Env* env, EventListenerTest* test)
|
|
|
|
: slowdown_count(0), stop_count(0), db_closed(), env_(env), test_(test) {
|
2015-07-01 16:13:49 -07:00
|
|
|
db_closed = false;
|
|
|
|
}
|
2020-12-22 23:44:44 -08:00
|
|
|
|
|
|
|
virtual ~TestFlushListener() {
|
|
|
|
prev_fc_info_.status.PermitUncheckedError(); // Ignore the status
|
|
|
|
}
|
2015-06-02 14:12:23 -07:00
|
|
|
void OnTableFileCreated(
|
2015-06-02 14:20:27 -07:00
|
|
|
const TableFileCreationInfo& info) override {
|
2015-06-05 12:28:51 -07:00
|
|
|
// remember the info for later checking the FlushJobInfo.
|
|
|
|
prev_fc_info_ = info;
|
|
|
|
ASSERT_GT(info.db_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.cf_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.file_path.size(), 0U);
|
|
|
|
ASSERT_GT(info.job_id, 0);
|
2015-06-02 14:12:23 -07:00
|
|
|
ASSERT_GT(info.table_properties.data_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.raw_key_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.raw_value_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.num_data_blocks, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.num_entries, 0U);
|
2020-08-25 10:44:39 -07:00
|
|
|
ASSERT_EQ(info.file_checksum, kUnknownFileChecksum);
|
|
|
|
ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName);
|
2015-06-11 14:18:02 -07:00
|
|
|
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-06-11 14:18:02 -07:00
|
|
|
// Verify the id of the current thread that created this table
|
|
|
|
// file matches the id of any active flush or compaction thread.
|
|
|
|
uint64_t thread_id = env_->GetThreadID();
|
|
|
|
std::vector<ThreadStatus> thread_list;
|
|
|
|
ASSERT_OK(env_->GetThreadList(&thread_list));
|
|
|
|
bool found_match = false;
|
|
|
|
for (auto thread_status : thread_list) {
|
|
|
|
if (thread_status.operation_type == ThreadStatus::OP_FLUSH ||
|
|
|
|
thread_status.operation_type == ThreadStatus::OP_COMPACTION) {
|
|
|
|
if (thread_id == thread_status.thread_id) {
|
|
|
|
found_match = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(found_match);
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2015-06-02 14:12:23 -07:00
|
|
|
}
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
void OnFlushCompleted(
|
2015-06-05 12:28:51 -07:00
|
|
|
DB* db, const FlushJobInfo& info) override {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
flushed_dbs_.push_back(db);
|
2015-06-05 12:28:51 -07:00
|
|
|
flushed_column_family_names_.push_back(info.cf_name);
|
|
|
|
if (info.triggered_writes_slowdown) {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
slowdown_count++;
|
|
|
|
}
|
2015-06-05 12:28:51 -07:00
|
|
|
if (info.triggered_writes_stop) {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
stop_count++;
|
|
|
|
}
|
2015-06-05 12:28:51 -07:00
|
|
|
// verify whether the previously created file matches the flushed file.
|
|
|
|
ASSERT_EQ(prev_fc_info_.db_name, db->GetName());
|
|
|
|
ASSERT_EQ(prev_fc_info_.cf_name, info.cf_name);
|
|
|
|
ASSERT_EQ(prev_fc_info_.job_id, info.job_id);
|
|
|
|
ASSERT_EQ(prev_fc_info_.file_path, info.file_path);
|
2019-10-24 14:42:43 -07:00
|
|
|
ASSERT_EQ(TableFileNameToNumber(info.file_path), info.file_number);
|
|
|
|
|
|
|
|
// Note: the following chunk relies on the notification pertaining to the
|
|
|
|
// database pointed to by DBTestBase::db_, and is thus bypassed when
|
|
|
|
// that assumption does not hold (see the test case MultiDBMultiListeners
|
|
|
|
// below).
|
|
|
|
ASSERT_TRUE(test_);
|
|
|
|
if (db == test_->db_) {
|
|
|
|
std::vector<std::vector<FileMetaData>> files_by_level;
|
More asserts in listener_test for debuggability (#9320)
Summary:
We ran into a flake I could not debug so instead added assertions in
case it happens again.
Command was:
```
TEST_TMPDIR=/dev/shm/rocksdb COMPILE_WITH_UBSAN=1 USE_CLANG=1 OPT=-g SKIP_FORMAT_BUCK_CHECKS=1 make J=80 -j80 ubsan_check
```
Failure output was:
```
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.DisableBGCompaction
UndefinedBehaviorSanitizer:DEADLYSIGNAL
==1558126==ERROR: UndefinedBehaviorSanitizer: SEGV on unknown address 0x000000000031 (pc 0x7fd9c04dda22 bp 0x7fd9bf8aa580 sp 0x7fd9bf8aa540 T1558147)
==1558126==The signal is caused by a READ memory access.
==1558126==Hint: address points to the zero page.
#0 0x7fd9c04dda21 in __dynamic_cast /home/engshare/third-party2/libgcc/9.x/src/gcc-9.x/x86_64-facebook-linux/libstdc++-v3/libsupc++/../../.././libstdc++-v3/libsupc++/dyncast.cc:49:3
https://github.com/facebook/rocksdb/issues/1 0x510c53 in __ubsan::checkDynamicType(void*, void*, unsigned long) (/data/sandcastle/boxes/eden-trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/listener_test+0x510c53)
https://github.com/facebook/rocksdb/issues/2 0x50fb32 in HandleDynamicTypeCacheMiss(__ubsan::DynamicTypeCacheMissData*, unsigned long, unsigned long, __ubsan::ReportOptions) (/data/sandcastle/boxes/eden-trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/listener_test+0x50fb32)
https://github.com/facebook/rocksdb/issues/3 0x510230 in __ubsan_handle_dynamic_type_cache_miss_abort (/data/sandcastle/boxes/eden-trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/listener_test+0x510230)
https://github.com/facebook/rocksdb/issues/4 0x63221a in rocksdb::ColumnFamilyHandleImpl* rocksdb::static_cast_with_check<rocksdb::ColumnFamilyHandleImpl, rocksdb::ColumnFamilyHandle>(rocksdb::ColumnFamilyHandle*) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/./util/cast_util.h:19:20
https://github.com/facebook/rocksdb/issues/5 0x71cafa in rocksdb::DBImpl::TEST_GetFilesMetaData(rocksdb::ColumnFamilyHandle*, std::vector<std::vector<rocksdb::FileMetaData, std::allocator<rocksdb::FileMetaData> >, std::allocator<std::vector<rocksdb::FileMetaData, std::allocator<rocksdb::FileMetaData> > > >*, std::vector<std::shared_ptr<rocksdb::BlobFileMetaData>, std::allocator<std::shared_ptr<rocksdb::BlobFileMetaData> > >*) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_debug.cc:63:14
https://github.com/facebook/rocksdb/issues/6 0x53f6b4 in rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/listener_test.cc:277:24
https://github.com/facebook/rocksdb/issues/7 0x6e2f7d in rocksdb::DBImpl::NotifyOnFlushCompleted(rocksdb::ColumnFamilyData*, rocksdb::MutableCFOptions const&, std::__cxx11::list<std::unique_ptr<rocksdb::FlushJobInfo, std::default_delete<rocksdb::FlushJobInfo> >, std::allocator<std::unique_ptr<rocksdb::FlushJobInfo, std::default_delete<rocksdb::FlushJobInfo> > > >*) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:863:19
https://github.com/facebook/rocksdb/issues/8 0x6e1074 in rocksdb::DBImpl::FlushMemTableToOutputFile(rocksdb::ColumnFamilyData*, rocksdb::MutableCFOptions const&, bool*, rocksdb::JobContext*, rocksdb::SuperVersionContext*, std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, rocksdb::SnapshotChecker*, rocksdb::LogBuffer*, rocksdb::Env::Priority) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:314:5
https://github.com/facebook/rocksdb/issues/9 0x6e3412 in rocksdb::DBImpl::FlushMemTablesToOutputFiles(rocksdb::autovector<rocksdb::DBImpl::BGFlushArg, 8ul> const&, bool*, rocksdb::JobContext*, rocksdb::LogBuffer*, rocksdb::Env::Priority) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:359:14
https://github.com/facebook/rocksdb/issues/10 0x700df6 in rocksdb::DBImpl::BackgroundFlush(bool*, rocksdb::JobContext*, rocksdb::LogBuffer*, rocksdb::FlushReason*, rocksdb::Env::Priority) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:2703:14
https://github.com/facebook/rocksdb/issues/11 0x6fe1f0 in rocksdb::DBImpl::BackgroundCallFlush(rocksdb::Env::Priority) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:2742:16
https://github.com/facebook/rocksdb/issues/12 0x6fc732 in rocksdb::DBImpl::BGWorkFlush(void*) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/db/db_impl/db_impl_compaction_flush.cc:2569:44
https://github.com/facebook/rocksdb/issues/13 0xb3a820 in void std::_Bind<void (* (void*))(void*)>::operator()<void>() /mnt/gvfs/third-party2/libgcc/4959b39cfbe5965a37c861c4c327fa7c5c759b87/9.x/platform009/9202ce7/include/c++/9.x/functional:482:17
https://github.com/facebook/rocksdb/issues/14 0xb3a820 in std::_Function_handler<void (), std::_Bind<void (* (void*))(void*)> >::_M_invoke(std::_Any_data const&) /mnt/gvfs/third-party2/libgcc/4959b39cfbe5965a37c861c4c327fa7c5c759b87/9.x/platform009/9202ce7/include/c++/9.x/bits/std_function.h:300:2
https://github.com/facebook/rocksdb/issues/15 0xb347cc in rocksdb::ThreadPoolImpl::Impl::BGThread(unsigned long) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/util/threadpool_imp.cc:266:5
https://github.com/facebook/rocksdb/issues/16 0xb34a2f in rocksdb::ThreadPoolImpl::Impl::BGThreadWrapper(void*) /data/sandcastle/boxes/trunk-hg-fbcode-fbsource/fbcode/internal_repo_rocksdb/repo/util/threadpool_imp.cc:307:7
https://github.com/facebook/rocksdb/issues/17 0x7fd9c051a660 in execute_native_thread_routine /home/engshare/third-party2/libgcc/9.x/src/gcc-9.x/x86_64-facebook-linux/libstdc++-v3/src/c++11/../../../.././libstdc++-v3/src/c++11/thread.cc:80:18
https://github.com/facebook/rocksdb/issues/18 0x7fd9c041e20b in start_thread /home/engshare/third-party2/glibc/2.30/src/glibc-2.30/nptl/pthread_create.c:479:8
https://github.com/facebook/rocksdb/issues/19 0x7fd9c01dd16e in clone /home/engshare/third-party2/glibc/2.30/src/glibc-2.30/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9320
Reviewed By: jay-zhuang
Differential Revision: D33242185
Pulled By: ajkr
fbshipit-source-id: 741984b10a610e0509e0d4e54c42cdbac03f5285
2021-12-21 12:24:28 -08:00
|
|
|
ASSERT_LT(info.cf_id, test_->handles_.size());
|
|
|
|
ASSERT_GE(info.cf_id, 0u);
|
|
|
|
ASSERT_NE(test_->handles_[info.cf_id], nullptr);
|
2019-10-24 14:42:43 -07:00
|
|
|
test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[info.cf_id],
|
|
|
|
&files_by_level);
|
|
|
|
|
|
|
|
ASSERT_FALSE(files_by_level.empty());
|
|
|
|
auto it = std::find_if(files_by_level[0].begin(), files_by_level[0].end(),
|
|
|
|
[&](const FileMetaData& meta) {
|
|
|
|
return meta.fd.GetNumber() == info.file_number;
|
|
|
|
});
|
|
|
|
ASSERT_NE(it, files_by_level[0].end());
|
|
|
|
ASSERT_EQ(info.oldest_blob_file_number, it->oldest_blob_file_number);
|
|
|
|
}
|
|
|
|
|
2015-06-11 14:18:02 -07:00
|
|
|
ASSERT_EQ(db->GetEnv()->GetThreadID(), info.thread_id);
|
|
|
|
ASSERT_GT(info.thread_id, 0U);
|
2015-09-15 09:03:08 -07:00
|
|
|
ASSERT_EQ(info.table_properties.user_collected_properties.find("0")->second,
|
|
|
|
"1");
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> flushed_column_family_names_;
|
|
|
|
std::vector<DB*> flushed_dbs_;
|
|
|
|
int slowdown_count;
|
|
|
|
int stop_count;
|
2015-06-11 14:18:02 -07:00
|
|
|
bool db_closing;
|
|
|
|
std::atomic_bool db_closed;
|
2015-06-05 12:28:51 -07:00
|
|
|
TableFileCreationInfo prev_fc_info_;
|
2015-06-11 14:18:02 -07:00
|
|
|
|
|
|
|
protected:
|
|
|
|
Env* env_;
|
2019-10-24 14:42:43 -07:00
|
|
|
EventListenerTest* test_;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
};
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(EventListenerTest, OnSingleDBFlushTest) {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-08-31 23:11:12 -07:00
|
|
|
options.write_buffer_size = k110KB;
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-06-11 14:18:02 -07:00
|
|
|
options.enable_thread_tracking = true;
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2019-10-24 14:42:43 -07:00
|
|
|
TestFlushListener* listener = new TestFlushListener(options.env, this);
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
std::vector<std::string> cf_names = {
|
|
|
|
"pikachu", "ilya", "muromec", "dobrynia",
|
|
|
|
"nikitich", "alyosha", "popovich"};
|
2015-09-15 09:03:08 -07:00
|
|
|
options.table_properties_collector_factories.push_back(
|
|
|
|
std::make_shared<TestPropertiesCollectorFactory>());
|
2015-12-14 13:36:32 -08:00
|
|
|
CreateAndReopenWithCF(cf_names, options);
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
2014-11-24 18:28:06 -08:00
|
|
|
ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p')));
|
2019-10-24 14:42:43 -07:00
|
|
|
|
|
|
|
WriteBatch batch;
|
|
|
|
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto",
|
|
|
|
BlobStr(456, 0, 1 << 10)));
|
|
|
|
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
|
|
|
|
|
2014-11-24 18:28:06 -08:00
|
|
|
ASSERT_OK(Put(2, "ilya", std::string(90000, 'i')));
|
|
|
|
ASSERT_OK(Put(3, "muromec", std::string(90000, 'm')));
|
|
|
|
ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd')));
|
|
|
|
ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n')));
|
|
|
|
ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a')));
|
|
|
|
ASSERT_OK(Put(7, "popovich", std::string(90000, 'p')));
|
2015-12-14 13:36:32 -08:00
|
|
|
for (int i = 1; i < 8; ++i) {
|
2015-08-31 23:11:12 -07:00
|
|
|
ASSERT_OK(Flush(i));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2022-02-22 12:13:39 -08:00
|
|
|
// Ensure background work is fully finished including listener callbacks
|
|
|
|
// before accessing listener state.
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
ASSERT_EQ(listener->flushed_dbs_.size(), i);
|
|
|
|
ASSERT_EQ(listener->flushed_column_family_names_.size(), i);
|
|
|
|
}
|
|
|
|
|
2018-04-10 15:47:54 -07:00
|
|
|
// make sure callback functions are called in the right order
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
for (size_t i = 0; i < cf_names.size(); ++i) {
|
|
|
|
ASSERT_EQ(listener->flushed_dbs_[i], db_);
|
|
|
|
ASSERT_EQ(listener->flushed_column_family_names_[i], cf_names[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(EventListenerTest, MultiCF) {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-08-31 23:11:12 -07:00
|
|
|
options.write_buffer_size = k110KB;
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-06-11 14:18:02 -07:00
|
|
|
options.enable_thread_tracking = true;
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2021-08-03 13:30:05 -07:00
|
|
|
for (auto atomic_flush : {false, true}) {
|
|
|
|
options.atomic_flush = atomic_flush;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
TestFlushListener* listener = new TestFlushListener(options.env, this);
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
options.table_properties_collector_factories.push_back(
|
|
|
|
std::make_shared<TestPropertiesCollectorFactory>());
|
|
|
|
std::vector<std::string> cf_names = {"pikachu", "ilya", "muromec",
|
|
|
|
"dobrynia", "nikitich", "alyosha",
|
|
|
|
"popovich"};
|
|
|
|
CreateAndReopenWithCF(cf_names, options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p')));
|
|
|
|
ASSERT_OK(Put(2, "ilya", std::string(90000, 'i')));
|
|
|
|
ASSERT_OK(Put(3, "muromec", std::string(90000, 'm')));
|
|
|
|
ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd')));
|
|
|
|
ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n')));
|
|
|
|
ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a')));
|
|
|
|
ASSERT_OK(Put(7, "popovich", std::string(90000, 'p')));
|
Fix TSAN data race in EventListenerTest.MultiCF (#9528)
Summary:
**Context:**
`EventListenerTest.MultiCF` occasionally failed on TSAN data race as below:
```
WARNING: ThreadSanitizer: data race (pid=2047633)
Read of size 8 at 0x7b6000001440 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size() const /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:916:40 (listener_test+0x52337c)
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() /home/circleci/project/db/listener_test.cc:384:7 (listener_test+0x52337c)
Previous write of size 8 at 0x7b6000001440 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/vector.tcc:503:31 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:1195:4 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /home/circleci/project/db/listener_test.cc:255:18 (listener_test+0x550654)
```
After investigation, it is due to the following:
(1) `ASSERT_OK(Flush(i));` before the read `std::vector::size()` is supposed to be [blocked on `DB::Impl::bg_cv_` for memtable flush to finish](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2319) and get signaled [at the end of background flush ](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2830), which happens after the write `std::vector::push_back()` . So the sequence of execution should have been synchronized as `call flush() -> write -> return from flush() -> read` and would not cause any TSAN data race.
- The subsequent `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());` serves a similar purpose based on [the previous attempt to deflake the test.](https://github.com/facebook/rocksdb/pull/9084)
(2) However, there are multiple places in the code can signal this `DB::Impl::bg_cv_` and mistakenly wake up `ASSERT_OK(Flush(i));` (or `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());`) too early (and with the lock available to them), resulting in non-synchronized read and write thus a TSAN data race.
- Reproduced by the following, suggested by ajkr:
```
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 4ff87c1e4..52492e9cf 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -22,7 +22,7 @@
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/concurrent_task_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
bool DBImpl::EnoughRoomForCompaction(
@@ -855,6 +855,7 @@ void DBImpl::NotifyOnFlushCompleted(
mutable_cf_options.level0_stop_writes_trigger);
// release lock while notifying events
mutex_.Unlock();
+ bg_cv_.SignalAll();
```
**Summary:**
- Added synchornization between read and write by ` ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency()` mechanism
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9528
Test Plan:
`./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10`
- pre-fix:
```
Repeating all tests (iteration 3)
Note: Google Test filter = EventListenerTest.MultiCF
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.MultiCF
==================
WARNING: ThreadSanitizer: data race (pid=3377137)
Read of size 8 at 0x7b6000000840 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size()
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() db/listener_test.cc:384 (listener_test+0x4bb300)
Previous write of size 8 at 0x7b6000000840 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) db/listener_test.cc:255 (listener_test+0x4e820f)
```
- post-fix: `All passed`
Reviewed By: ajkr
Differential Revision: D34085791
Pulled By: hx235
fbshipit-source-id: f877aa687ea1d5cb6f31ef8c4772625d22868e8b
2022-02-10 10:17:53 -08:00
|
|
|
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
2021-08-03 13:30:05 -07:00
|
|
|
for (int i = 1; i < 8; ++i) {
|
Fix TSAN data race in EventListenerTest.MultiCF (#9528)
Summary:
**Context:**
`EventListenerTest.MultiCF` occasionally failed on TSAN data race as below:
```
WARNING: ThreadSanitizer: data race (pid=2047633)
Read of size 8 at 0x7b6000001440 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size() const /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:916:40 (listener_test+0x52337c)
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() /home/circleci/project/db/listener_test.cc:384:7 (listener_test+0x52337c)
Previous write of size 8 at 0x7b6000001440 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/vector.tcc:503:31 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:1195:4 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /home/circleci/project/db/listener_test.cc:255:18 (listener_test+0x550654)
```
After investigation, it is due to the following:
(1) `ASSERT_OK(Flush(i));` before the read `std::vector::size()` is supposed to be [blocked on `DB::Impl::bg_cv_` for memtable flush to finish](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2319) and get signaled [at the end of background flush ](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2830), which happens after the write `std::vector::push_back()` . So the sequence of execution should have been synchronized as `call flush() -> write -> return from flush() -> read` and would not cause any TSAN data race.
- The subsequent `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());` serves a similar purpose based on [the previous attempt to deflake the test.](https://github.com/facebook/rocksdb/pull/9084)
(2) However, there are multiple places in the code can signal this `DB::Impl::bg_cv_` and mistakenly wake up `ASSERT_OK(Flush(i));` (or `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());`) too early (and with the lock available to them), resulting in non-synchronized read and write thus a TSAN data race.
- Reproduced by the following, suggested by ajkr:
```
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 4ff87c1e4..52492e9cf 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -22,7 +22,7 @@
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/concurrent_task_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
bool DBImpl::EnoughRoomForCompaction(
@@ -855,6 +855,7 @@ void DBImpl::NotifyOnFlushCompleted(
mutable_cf_options.level0_stop_writes_trigger);
// release lock while notifying events
mutex_.Unlock();
+ bg_cv_.SignalAll();
```
**Summary:**
- Added synchornization between read and write by ` ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency()` mechanism
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9528
Test Plan:
`./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10`
- pre-fix:
```
Repeating all tests (iteration 3)
Note: Google Test filter = EventListenerTest.MultiCF
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.MultiCF
==================
WARNING: ThreadSanitizer: data race (pid=3377137)
Read of size 8 at 0x7b6000000840 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size()
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() db/listener_test.cc:384 (listener_test+0x4bb300)
Previous write of size 8 at 0x7b6000000840 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) db/listener_test.cc:255 (listener_test+0x4e820f)
```
- post-fix: `All passed`
Reviewed By: ajkr
Differential Revision: D34085791
Pulled By: hx235
fbshipit-source-id: f877aa687ea1d5cb6f31ef8c4772625d22868e8b
2022-02-10 10:17:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
|
|
|
{{"DBImpl::NotifyOnFlushCompleted::PostAllOnFlushCompleted",
|
|
|
|
"EventListenerTest.MultiCF:PreVerifyListener"}});
|
2021-08-03 13:30:05 -07:00
|
|
|
ASSERT_OK(Flush(i));
|
Fix TSAN data race in EventListenerTest.MultiCF (#9528)
Summary:
**Context:**
`EventListenerTest.MultiCF` occasionally failed on TSAN data race as below:
```
WARNING: ThreadSanitizer: data race (pid=2047633)
Read of size 8 at 0x7b6000001440 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size() const /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:916:40 (listener_test+0x52337c)
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() /home/circleci/project/db/listener_test.cc:384:7 (listener_test+0x52337c)
Previous write of size 8 at 0x7b6000001440 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/vector.tcc:503:31 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:1195:4 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /home/circleci/project/db/listener_test.cc:255:18 (listener_test+0x550654)
```
After investigation, it is due to the following:
(1) `ASSERT_OK(Flush(i));` before the read `std::vector::size()` is supposed to be [blocked on `DB::Impl::bg_cv_` for memtable flush to finish](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2319) and get signaled [at the end of background flush ](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2830), which happens after the write `std::vector::push_back()` . So the sequence of execution should have been synchronized as `call flush() -> write -> return from flush() -> read` and would not cause any TSAN data race.
- The subsequent `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());` serves a similar purpose based on [the previous attempt to deflake the test.](https://github.com/facebook/rocksdb/pull/9084)
(2) However, there are multiple places in the code can signal this `DB::Impl::bg_cv_` and mistakenly wake up `ASSERT_OK(Flush(i));` (or `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());`) too early (and with the lock available to them), resulting in non-synchronized read and write thus a TSAN data race.
- Reproduced by the following, suggested by ajkr:
```
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 4ff87c1e4..52492e9cf 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -22,7 +22,7 @@
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/concurrent_task_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
bool DBImpl::EnoughRoomForCompaction(
@@ -855,6 +855,7 @@ void DBImpl::NotifyOnFlushCompleted(
mutable_cf_options.level0_stop_writes_trigger);
// release lock while notifying events
mutex_.Unlock();
+ bg_cv_.SignalAll();
```
**Summary:**
- Added synchornization between read and write by ` ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency()` mechanism
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9528
Test Plan:
`./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10`
- pre-fix:
```
Repeating all tests (iteration 3)
Note: Google Test filter = EventListenerTest.MultiCF
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.MultiCF
==================
WARNING: ThreadSanitizer: data race (pid=3377137)
Read of size 8 at 0x7b6000000840 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size()
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() db/listener_test.cc:384 (listener_test+0x4bb300)
Previous write of size 8 at 0x7b6000000840 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) db/listener_test.cc:255 (listener_test+0x4e820f)
```
- post-fix: `All passed`
Reviewed By: ajkr
Differential Revision: D34085791
Pulled By: hx235
fbshipit-source-id: f877aa687ea1d5cb6f31ef8c4772625d22868e8b
2022-02-10 10:17:53 -08:00
|
|
|
TEST_SYNC_POINT("EventListenerTest.MultiCF:PreVerifyListener");
|
2021-08-03 13:30:05 -07:00
|
|
|
ASSERT_EQ(listener->flushed_dbs_.size(), i);
|
|
|
|
ASSERT_EQ(listener->flushed_column_family_names_.size(), i);
|
Fix TSAN data race in EventListenerTest.MultiCF (#9528)
Summary:
**Context:**
`EventListenerTest.MultiCF` occasionally failed on TSAN data race as below:
```
WARNING: ThreadSanitizer: data race (pid=2047633)
Read of size 8 at 0x7b6000001440 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size() const /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:916:40 (listener_test+0x52337c)
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() /home/circleci/project/db/listener_test.cc:384:7 (listener_test+0x52337c)
Previous write of size 8 at 0x7b6000001440 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/vector.tcc:503:31 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:1195:4 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /home/circleci/project/db/listener_test.cc:255:18 (listener_test+0x550654)
```
After investigation, it is due to the following:
(1) `ASSERT_OK(Flush(i));` before the read `std::vector::size()` is supposed to be [blocked on `DB::Impl::bg_cv_` for memtable flush to finish](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2319) and get signaled [at the end of background flush ](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2830), which happens after the write `std::vector::push_back()` . So the sequence of execution should have been synchronized as `call flush() -> write -> return from flush() -> read` and would not cause any TSAN data race.
- The subsequent `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());` serves a similar purpose based on [the previous attempt to deflake the test.](https://github.com/facebook/rocksdb/pull/9084)
(2) However, there are multiple places in the code can signal this `DB::Impl::bg_cv_` and mistakenly wake up `ASSERT_OK(Flush(i));` (or `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());`) too early (and with the lock available to them), resulting in non-synchronized read and write thus a TSAN data race.
- Reproduced by the following, suggested by ajkr:
```
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 4ff87c1e4..52492e9cf 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -22,7 +22,7 @@
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/concurrent_task_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
bool DBImpl::EnoughRoomForCompaction(
@@ -855,6 +855,7 @@ void DBImpl::NotifyOnFlushCompleted(
mutable_cf_options.level0_stop_writes_trigger);
// release lock while notifying events
mutex_.Unlock();
+ bg_cv_.SignalAll();
```
**Summary:**
- Added synchornization between read and write by ` ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency()` mechanism
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9528
Test Plan:
`./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10`
- pre-fix:
```
Repeating all tests (iteration 3)
Note: Google Test filter = EventListenerTest.MultiCF
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.MultiCF
==================
WARNING: ThreadSanitizer: data race (pid=3377137)
Read of size 8 at 0x7b6000000840 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size()
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() db/listener_test.cc:384 (listener_test+0x4bb300)
Previous write of size 8 at 0x7b6000000840 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) db/listener_test.cc:255 (listener_test+0x4e820f)
```
- post-fix: `All passed`
Reviewed By: ajkr
Differential Revision: D34085791
Pulled By: hx235
fbshipit-source-id: f877aa687ea1d5cb6f31ef8c4772625d22868e8b
2022-02-10 10:17:53 -08:00
|
|
|
// make sure callback functions are called in the right order
|
|
|
|
if (i == 7) {
|
|
|
|
for (size_t j = 0; j < cf_names.size(); j++) {
|
|
|
|
ASSERT_EQ(listener->flushed_dbs_[j], db_);
|
|
|
|
ASSERT_EQ(listener->flushed_column_family_names_[j], cf_names[j]);
|
|
|
|
}
|
|
|
|
}
|
2021-08-03 13:30:05 -07:00
|
|
|
}
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
Fix TSAN data race in EventListenerTest.MultiCF (#9528)
Summary:
**Context:**
`EventListenerTest.MultiCF` occasionally failed on TSAN data race as below:
```
WARNING: ThreadSanitizer: data race (pid=2047633)
Read of size 8 at 0x7b6000001440 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size() const /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:916:40 (listener_test+0x52337c)
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() /home/circleci/project/db/listener_test.cc:384:7 (listener_test+0x52337c)
Previous write of size 8 at 0x7b6000001440 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/vector.tcc:503:31 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_vector.h:1195:4 (listener_test+0x550654)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) /home/circleci/project/db/listener_test.cc:255:18 (listener_test+0x550654)
```
After investigation, it is due to the following:
(1) `ASSERT_OK(Flush(i));` before the read `std::vector::size()` is supposed to be [blocked on `DB::Impl::bg_cv_` for memtable flush to finish](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2319) and get signaled [at the end of background flush ](https://github.com/facebook/rocksdb/blob/320d9a8e8a1b6998f92934f87fc71ad8bd6d4596/db/db_impl/db_impl_compaction_flush.cc#L2830), which happens after the write `std::vector::push_back()` . So the sequence of execution should have been synchronized as `call flush() -> write -> return from flush() -> read` and would not cause any TSAN data race.
- The subsequent `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());` serves a similar purpose based on [the previous attempt to deflake the test.](https://github.com/facebook/rocksdb/pull/9084)
(2) However, there are multiple places in the code can signal this `DB::Impl::bg_cv_` and mistakenly wake up `ASSERT_OK(Flush(i));` (or `ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());`) too early (and with the lock available to them), resulting in non-synchronized read and write thus a TSAN data race.
- Reproduced by the following, suggested by ajkr:
```
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 4ff87c1e4..52492e9cf 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -22,7 +22,7 @@
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/concurrent_task_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
bool DBImpl::EnoughRoomForCompaction(
@@ -855,6 +855,7 @@ void DBImpl::NotifyOnFlushCompleted(
mutable_cf_options.level0_stop_writes_trigger);
// release lock while notifying events
mutex_.Unlock();
+ bg_cv_.SignalAll();
```
**Summary:**
- Added synchornization between read and write by ` ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency()` mechanism
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9528
Test Plan:
`./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10`
- pre-fix:
```
Repeating all tests (iteration 3)
Note: Google Test filter = EventListenerTest.MultiCF
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from EventListenerTest
[ RUN ] EventListenerTest.MultiCF
==================
WARNING: ThreadSanitizer: data race (pid=3377137)
Read of size 8 at 0x7b6000000840 by main thread:
#0 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::size()
https://github.com/facebook/rocksdb/issues/1 rocksdb::EventListenerTest_MultiCF_Test::TestBody() db/listener_test.cc:384 (listener_test+0x4bb300)
Previous write of size 8 at 0x7b6000000840 by thread T2:
#0 void std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::_M_realloc_insert<rocksdb::DB* const&>(__gnu_cxx::__normal_iterator<rocksdb::DB**, std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> > >, rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/1 std::vector<rocksdb::DB*, std::allocator<rocksdb::DB*> >::push_back(rocksdb::DB* const&)
https://github.com/facebook/rocksdb/issues/2 rocksdb::TestFlushListener::OnFlushCompleted(rocksdb::DB*, rocksdb::FlushJobInfo const&) db/listener_test.cc:255 (listener_test+0x4e820f)
```
- post-fix: `All passed`
Reviewed By: ajkr
Differential Revision: D34085791
Pulled By: hx235
fbshipit-source-id: f877aa687ea1d5cb6f31ef8c4772625d22868e8b
2022-02-10 10:17:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2021-08-03 13:30:05 -07:00
|
|
|
Close();
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(EventListenerTest, MultiDBMultiListeners) {
|
2015-06-11 14:18:02 -07:00
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-06-11 14:18:02 -07:00
|
|
|
options.enable_thread_tracking = true;
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2015-09-15 09:03:08 -07:00
|
|
|
options.table_properties_collector_factories.push_back(
|
|
|
|
std::make_shared<TestPropertiesCollectorFactory>());
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
std::vector<TestFlushListener*> listeners;
|
|
|
|
const int kNumDBs = 5;
|
|
|
|
const int kNumListeners = 10;
|
|
|
|
for (int i = 0; i < kNumListeners; ++i) {
|
2019-10-24 14:42:43 -07:00
|
|
|
listeners.emplace_back(new TestFlushListener(options.env, this));
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> cf_names = {
|
|
|
|
"pikachu", "ilya", "muromec", "dobrynia",
|
|
|
|
"nikitich", "alyosha", "popovich"};
|
|
|
|
|
|
|
|
options.create_if_missing = true;
|
|
|
|
for (int i = 0; i < kNumListeners; ++i) {
|
|
|
|
options.listeners.emplace_back(listeners[i]);
|
|
|
|
}
|
|
|
|
DBOptions db_opts(options);
|
|
|
|
ColumnFamilyOptions cf_opts(options);
|
|
|
|
|
|
|
|
std::vector<DB*> dbs;
|
|
|
|
std::vector<std::vector<ColumnFamilyHandle *>> vec_handles;
|
|
|
|
|
|
|
|
for (int d = 0; d < kNumDBs; ++d) {
|
2022-05-06 13:03:58 -07:00
|
|
|
ASSERT_OK(DestroyDB(dbname_ + std::to_string(d), options));
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
DB* db;
|
|
|
|
std::vector<ColumnFamilyHandle*> handles;
|
2022-05-06 13:03:58 -07:00
|
|
|
ASSERT_OK(DB::Open(options, dbname_ + std::to_string(d), &db));
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
for (size_t c = 0; c < cf_names.size(); ++c) {
|
|
|
|
ColumnFamilyHandle* handle;
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(db->CreateColumnFamily(cf_opts, cf_names[c], &handle));
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
handles.push_back(handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
vec_handles.push_back(std::move(handles));
|
|
|
|
dbs.push_back(db);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int d = 0; d < kNumDBs; ++d) {
|
|
|
|
for (size_t c = 0; c < cf_names.size(); ++c) {
|
|
|
|
ASSERT_OK(dbs[d]->Put(WriteOptions(), vec_handles[d][c],
|
|
|
|
cf_names[c], cf_names[c]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t c = 0; c < cf_names.size(); ++c) {
|
|
|
|
for (int d = 0; d < kNumDBs; ++d) {
|
|
|
|
ASSERT_OK(dbs[d]->Flush(FlushOptions(), vec_handles[d][c]));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
static_cast_with_check<DBImpl>(dbs[d])->TEST_WaitForFlushMemTable());
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-22 12:13:39 -08:00
|
|
|
for (int d = 0; d < kNumDBs; ++d) {
|
|
|
|
// Ensure background work is fully finished including listener callbacks
|
|
|
|
// before accessing listener state.
|
|
|
|
ASSERT_OK(
|
|
|
|
static_cast_with_check<DBImpl>(dbs[d])->TEST_WaitForBackgroundWork());
|
|
|
|
}
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
for (auto* listener : listeners) {
|
|
|
|
int pos = 0;
|
|
|
|
for (size_t c = 0; c < cf_names.size(); ++c) {
|
|
|
|
for (int d = 0; d < kNumDBs; ++d) {
|
|
|
|
ASSERT_EQ(listener->flushed_dbs_[pos], dbs[d]);
|
|
|
|
ASSERT_EQ(listener->flushed_column_family_names_[pos], cf_names[c]);
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-11 14:18:02 -07:00
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
for (auto handles : vec_handles) {
|
|
|
|
for (auto h : handles) {
|
|
|
|
delete h;
|
|
|
|
}
|
|
|
|
handles.clear();
|
|
|
|
}
|
|
|
|
vec_handles.clear();
|
|
|
|
|
|
|
|
for (auto db : dbs) {
|
|
|
|
delete db;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(EventListenerTest, DisableBGCompaction) {
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2016-12-13 18:22:00 -08:00
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
2015-06-11 14:18:02 -07:00
|
|
|
options.enable_thread_tracking = true;
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
2019-10-24 14:42:43 -07:00
|
|
|
TestFlushListener* listener = new TestFlushListener(options.env, this);
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
const int kCompactionTrigger = 1;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
const int kSlowdownTrigger = 5;
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
const int kStopTrigger = 100;
|
|
|
|
options.level0_file_num_compaction_trigger = kCompactionTrigger;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
options.level0_slowdown_writes_trigger = kSlowdownTrigger;
|
|
|
|
options.level0_stop_writes_trigger = kStopTrigger;
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
options.max_write_buffer_number = 10;
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
// BG compaction is disabled. Number of L0 files will simply keeps
|
|
|
|
// increasing in this test.
|
|
|
|
options.compaction_style = kCompactionStyleNone;
|
|
|
|
options.compression = kNoCompression;
|
|
|
|
options.write_buffer_size = 100000; // Small write buffer
|
2015-09-15 09:03:08 -07:00
|
|
|
options.table_properties_collector_factories.push_back(
|
|
|
|
std::make_shared<TestPropertiesCollectorFactory>());
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
2015-12-14 13:36:32 -08:00
|
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
ColumnFamilyMetaData cf_meta;
|
|
|
|
db_->GetColumnFamilyMetaData(handles_[1], &cf_meta);
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
// keep writing until writes are forced to stop.
|
Deprecate WriteOptions::timeout_hint_us
Summary:
In one of our recent meetings, we discussed deprecating features that are not being actively used. One of those features, at least within Facebook, is timeout_hint. The feature is really nicely implemented, but if nobody needs it, we should remove it from our code-base (until we get a valid use-case). Some arguments:
* Less code == better icache hit rate, smaller builds, simpler code
* The motivation for adding timeout_hint_us was to work-around RocksDB's stall issue. However, we're currently addressing the stall issue itself (see @sdong's recent work on stall write_rate), so we should never see sharp lock-ups in the future.
* Nobody is using the feature within Facebook's code-base. Googling for `timeout_hint_us` also doesn't yield any users.
Test Plan: make check
Reviewers: anthony, kradhakrishnan, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D41937
2015-07-14 09:35:48 +02:00
|
|
|
for (int i = 0; static_cast<int>(cf_meta.file_count) < kSlowdownTrigger * 10;
|
|
|
|
++i) {
|
2022-05-06 13:03:58 -07:00
|
|
|
ASSERT_OK(
|
|
|
|
Put(1, std::to_string(i), std::string(10000, 'x'), WriteOptions()));
|
2018-08-29 11:58:13 -07:00
|
|
|
FlushOptions fo;
|
|
|
|
fo.allow_write_stall = true;
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(db_->Flush(fo, handles_[1]));
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
db_->GetColumnFamilyMetaData(handles_[1], &cf_meta);
|
|
|
|
}
|
2022-02-22 12:13:39 -08:00
|
|
|
// Ensure background work is fully finished including listener callbacks
|
|
|
|
// before accessing listener state.
|
2022-01-21 08:24:06 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
|
2022-02-22 12:13:39 -08:00
|
|
|
ASSERT_GE(listener->slowdown_count, kSlowdownTrigger * 9);
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|
|
|
|
|
2015-12-22 11:37:19 -08:00
|
|
|
class TestCompactionReasonListener : public EventListener {
|
|
|
|
public:
|
2018-03-05 13:08:17 -08:00
|
|
|
void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
|
2015-12-22 11:37:19 -08:00
|
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
|
|
compaction_reasons_.push_back(ci.compaction_reason);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<CompactionReason> compaction_reasons_;
|
|
|
|
std::mutex mutex_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, CompactionReasonLevel) {
|
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-12-22 11:37:19 -08:00
|
|
|
options.create_if_missing = true;
|
2021-09-08 07:45:59 -07:00
|
|
|
options.memtable_factory.reset(test::NewSpecialSkipListFactory(
|
|
|
|
DBTestBase::kNumKeysByGenerateNewRandomFile));
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
TestCompactionReasonListener* listener = new TestCompactionReasonListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
Random rnd(301);
|
|
|
|
|
|
|
|
// Write 4 files in L0
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
GenerateNewRandomFile(&rnd);
|
|
|
|
}
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
ASSERT_EQ(listener->compaction_reasons_.size(), 1);
|
|
|
|
ASSERT_EQ(listener->compaction_reasons_[0],
|
|
|
|
CompactionReason::kLevelL0FilesNum);
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// Write 3 non-overlapping files in L0
|
|
|
|
for (int k = 1; k <= 30; k++) {
|
|
|
|
ASSERT_OK(Put(Key(k), Key(k)));
|
|
|
|
if (k % 10 == 0) {
|
|
|
|
Flush();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do a trivial move from L0 -> L1
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
options.max_bytes_for_level_base = 1;
|
|
|
|
Close();
|
|
|
|
listener->compaction_reasons_.clear();
|
|
|
|
Reopen(options);
|
|
|
|
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-12-22 11:37:19 -08:00
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 1);
|
|
|
|
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kLevelMaxLevelSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
Close();
|
|
|
|
listener->compaction_reasons_.clear();
|
|
|
|
Reopen(options);
|
|
|
|
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(Put("key", "value"));
|
2016-04-25 18:18:35 -07:00
|
|
|
CompactRangeOptions cro;
|
2019-04-16 23:29:32 -07:00
|
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
|
2016-04-25 18:18:35 -07:00
|
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
2015-12-22 11:37:19 -08:00
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 0);
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, CompactionReasonUniversal) {
|
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-12-22 11:37:19 -08:00
|
|
|
options.create_if_missing = true;
|
2021-09-08 07:45:59 -07:00
|
|
|
options.memtable_factory.reset(test::NewSpecialSkipListFactory(
|
|
|
|
DBTestBase::kNumKeysByGenerateNewRandomFile));
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
TestCompactionReasonListener* listener = new TestCompactionReasonListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
|
|
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
Random rnd(301);
|
|
|
|
|
|
|
|
options.level0_file_num_compaction_trigger = 8;
|
|
|
|
options.compaction_options_universal.max_size_amplification_percent = 100000;
|
|
|
|
options.compaction_options_universal.size_ratio = 100000;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
listener->compaction_reasons_.clear();
|
|
|
|
|
|
|
|
// Write 8 files in L0
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
GenerateNewRandomFile(&rnd);
|
|
|
|
}
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 0);
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
2018-01-26 11:01:54 -08:00
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeRatio);
|
2015-12-22 11:37:19 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
options.level0_file_num_compaction_trigger = 8;
|
|
|
|
options.compaction_options_universal.max_size_amplification_percent = 1;
|
|
|
|
options.compaction_options_universal.size_ratio = 100000;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
listener->compaction_reasons_.clear();
|
|
|
|
|
|
|
|
// Write 8 files in L0
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
GenerateNewRandomFile(&rnd);
|
|
|
|
}
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 0);
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeAmplification);
|
|
|
|
}
|
|
|
|
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
Close();
|
|
|
|
listener->compaction_reasons_.clear();
|
|
|
|
Reopen(options);
|
|
|
|
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 0);
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, CompactionReasonFIFO) {
|
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2015-12-22 11:37:19 -08:00
|
|
|
options.create_if_missing = true;
|
2021-09-08 07:45:59 -07:00
|
|
|
options.memtable_factory.reset(test::NewSpecialSkipListFactory(
|
|
|
|
DBTestBase::kNumKeysByGenerateNewRandomFile));
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
TestCompactionReasonListener* listener = new TestCompactionReasonListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
|
|
options.compaction_style = kCompactionStyleFIFO;
|
|
|
|
options.compaction_options_fifo.max_table_files_size = 1;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
Random rnd(301);
|
|
|
|
|
|
|
|
// Write 4 files in L0
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
GenerateNewRandomFile(&rnd);
|
|
|
|
}
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2015-12-22 11:37:19 -08:00
|
|
|
|
|
|
|
ASSERT_GT(listener->compaction_reasons_.size(), 0);
|
|
|
|
for (auto compaction_reason : listener->compaction_reasons_) {
|
|
|
|
ASSERT_EQ(compaction_reason, CompactionReason::kFIFOMaxSize);
|
|
|
|
}
|
|
|
|
}
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
|
|
|
|
class TableFileCreationListener : public EventListener {
|
|
|
|
public:
|
|
|
|
class TestEnv : public EnvWrapper {
|
|
|
|
public:
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
explicit TestEnv(Env* t) : EnvWrapper(t) {}
|
2022-01-04 16:44:54 -08:00
|
|
|
static const char* kClassName() { return "TestEnv"; }
|
|
|
|
const char* Name() const override { return kClassName(); }
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
|
|
|
|
void SetStatus(Status s) { status_ = s; }
|
|
|
|
|
|
|
|
Status NewWritableFile(const std::string& fname,
|
|
|
|
std::unique_ptr<WritableFile>* result,
|
2019-02-14 13:52:47 -08:00
|
|
|
const EnvOptions& options) override {
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
if (fname.size() > 4 && fname.substr(fname.size() - 4) == ".sst") {
|
|
|
|
if (!status_.ok()) {
|
|
|
|
return status_;
|
|
|
|
}
|
|
|
|
}
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
return target()->NewWritableFile(fname, result, options);
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Status status_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TableFileCreationListener() {
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
started_[i] = finished_[i] = failure_[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int Index(TableFileCreationReason reason) {
|
|
|
|
int idx;
|
|
|
|
switch (reason) {
|
|
|
|
case TableFileCreationReason::kFlush:
|
|
|
|
idx = 0;
|
|
|
|
break;
|
|
|
|
case TableFileCreationReason::kCompaction:
|
|
|
|
idx = 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
idx = -1;
|
|
|
|
}
|
|
|
|
return idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckAndResetCounters(int flush_started, int flush_finished,
|
|
|
|
int flush_failure, int compaction_started,
|
|
|
|
int compaction_finished, int compaction_failure) {
|
|
|
|
ASSERT_EQ(started_[0], flush_started);
|
|
|
|
ASSERT_EQ(finished_[0], flush_finished);
|
|
|
|
ASSERT_EQ(failure_[0], flush_failure);
|
|
|
|
ASSERT_EQ(started_[1], compaction_started);
|
|
|
|
ASSERT_EQ(finished_[1], compaction_finished);
|
|
|
|
ASSERT_EQ(failure_[1], compaction_failure);
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
started_[i] = finished_[i] = failure_[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnTableFileCreationStarted(
|
|
|
|
const TableFileCreationBriefInfo& info) override {
|
|
|
|
int idx = Index(info.reason);
|
|
|
|
if (idx >= 0) {
|
|
|
|
started_[idx]++;
|
|
|
|
}
|
|
|
|
ASSERT_GT(info.db_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.cf_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.file_path.size(), 0U);
|
|
|
|
ASSERT_GT(info.job_id, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnTableFileCreated(const TableFileCreationInfo& info) override {
|
|
|
|
int idx = Index(info.reason);
|
|
|
|
if (idx >= 0) {
|
|
|
|
finished_[idx]++;
|
|
|
|
}
|
|
|
|
ASSERT_GT(info.db_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.cf_name.size(), 0U);
|
|
|
|
ASSERT_GT(info.file_path.size(), 0U);
|
|
|
|
ASSERT_GT(info.job_id, 0);
|
2020-08-25 10:44:39 -07:00
|
|
|
ASSERT_EQ(info.file_checksum, kUnknownFileChecksum);
|
|
|
|
ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName);
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
if (info.status.ok()) {
|
2022-02-01 11:06:57 -08:00
|
|
|
if (info.table_properties.num_range_deletions == 0U) {
|
|
|
|
ASSERT_GT(info.table_properties.data_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.raw_key_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.raw_value_size, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.num_data_blocks, 0U);
|
|
|
|
ASSERT_GT(info.table_properties.num_entries, 0U);
|
|
|
|
}
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
} else {
|
|
|
|
if (idx >= 0) {
|
|
|
|
failure_[idx]++;
|
2021-11-03 08:42:08 -07:00
|
|
|
last_failure_ = info.status;
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int started_[2];
|
|
|
|
int finished_[2];
|
|
|
|
int failure_[2];
|
2021-11-03 08:42:08 -07:00
|
|
|
Status last_failure_;
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, TableFileCreationListenersTest) {
|
|
|
|
auto listener = std::make_shared<TableFileCreationListener>();
|
|
|
|
Options options;
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
std::unique_ptr<TableFileCreationListener::TestEnv> test_env(
|
|
|
|
new TableFileCreationListener::TestEnv(CurrentOptions().env));
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.listeners.push_back(listener);
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
options.env = test_env.get();
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("foo", "aaa"));
|
|
|
|
ASSERT_OK(Put("bar", "bbb"));
|
|
|
|
ASSERT_OK(Flush());
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0);
|
|
|
|
ASSERT_OK(Put("foo", "aaa1"));
|
|
|
|
ASSERT_OK(Put("bar", "bbb1"));
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
test_env->SetStatus(Status::NotSupported("not supported"));
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
ASSERT_NOK(Flush());
|
|
|
|
listener->CheckAndResetCounters(1, 1, 1, 0, 0, 0);
|
2021-11-03 08:42:08 -07:00
|
|
|
ASSERT_TRUE(listener->last_failure_.IsNotSupported());
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
test_env->SetStatus(Status::OK());
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
|
|
|
|
Reopen(options);
|
|
|
|
ASSERT_OK(Put("foo", "aaa2"));
|
|
|
|
ASSERT_OK(Put("bar", "bbb2"));
|
|
|
|
ASSERT_OK(Flush());
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0);
|
|
|
|
|
|
|
|
const Slice kRangeStart = "a";
|
|
|
|
const Slice kRangeEnd = "z";
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(
|
|
|
|
dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
listener->CheckAndResetCounters(0, 0, 0, 1, 1, 0);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("foo", "aaa3"));
|
|
|
|
ASSERT_OK(Put("bar", "bbb3"));
|
|
|
|
ASSERT_OK(Flush());
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
test_env->SetStatus(Status::NotSupported("not supported"));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_NOK(
|
|
|
|
dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd));
|
|
|
|
ASSERT_NOK(dbfull()->TEST_WaitForCompact());
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
listener->CheckAndResetCounters(1, 1, 0, 1, 1, 1);
|
2021-11-03 08:42:08 -07:00
|
|
|
ASSERT_TRUE(listener->last_failure_.IsNotSupported());
|
2022-02-01 11:06:57 -08:00
|
|
|
|
|
|
|
// Reset
|
|
|
|
test_env->SetStatus(Status::OK());
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// Verify that an empty table file that is immediately deleted gives Aborted
|
|
|
|
// status to listener.
|
|
|
|
ASSERT_OK(Put("baz", "z"));
|
|
|
|
ASSERT_OK(SingleDelete("baz"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
listener->CheckAndResetCounters(1, 1, 1, 0, 0, 0);
|
|
|
|
ASSERT_TRUE(listener->last_failure_.IsAborted());
|
|
|
|
|
|
|
|
// Also in compaction
|
|
|
|
ASSERT_OK(Put("baz", "z"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
|
|
kRangeStart, kRangeEnd));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
listener->CheckAndResetCounters(2, 2, 0, 1, 1, 1);
|
|
|
|
ASSERT_TRUE(listener->last_failure_.IsAborted());
|
|
|
|
|
|
|
|
Close(); // Avoid UAF on listener
|
Added EventListener::OnTableFileCreationStarted() callback
Summary: Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
Test Plan: unit test.
Reviewers: dhruba, yhchiang, ott, sdong
Reviewed By: sdong
Subscribers: sdong, kradhakrishnan, IslamAbdelRahman, andrewkr, yhchiang, leveldb, ott, dhruba
Differential Revision: https://reviews.facebook.net/D56337
2016-04-29 11:35:00 -07:00
|
|
|
}
|
2016-06-02 11:57:31 -07:00
|
|
|
|
|
|
|
class MemTableSealedListener : public EventListener {
|
|
|
|
private:
|
|
|
|
SequenceNumber latest_seq_number_;
|
|
|
|
public:
|
|
|
|
MemTableSealedListener() {}
|
|
|
|
void OnMemTableSealed(const MemTableInfo& info) override {
|
|
|
|
latest_seq_number_ = info.first_seqno;
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnFlushCompleted(DB* /*db*/,
|
|
|
|
const FlushJobInfo& flush_job_info) override {
|
|
|
|
ASSERT_LE(flush_job_info.smallest_seqno, latest_seq_number_);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, MemTableSealedListenerTest) {
|
|
|
|
auto listener = std::make_shared<MemTableSealedListener>();
|
|
|
|
Options options;
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2016-06-02 11:57:31 -07:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.listeners.push_back(listener);
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < 10; i++) {
|
|
|
|
std::string tag = std::to_string(i);
|
|
|
|
ASSERT_OK(Put("foo"+tag, "aaa"));
|
|
|
|
ASSERT_OK(Put("bar"+tag, "bbb"));
|
|
|
|
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-22 11:56:18 -07:00
|
|
|
class ColumnFamilyHandleDeletionStartedListener : public EventListener {
|
|
|
|
private:
|
|
|
|
std::vector<std::string> cfs_;
|
|
|
|
int counter;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit ColumnFamilyHandleDeletionStartedListener(
|
|
|
|
const std::vector<std::string>& cfs)
|
|
|
|
: cfs_(cfs), counter(0) {
|
|
|
|
cfs_.insert(cfs_.begin(), kDefaultColumnFamilyName);
|
|
|
|
}
|
|
|
|
void OnColumnFamilyHandleDeletionStarted(
|
|
|
|
ColumnFamilyHandle* handle) override {
|
|
|
|
ASSERT_EQ(cfs_[handle->GetID()], handle->GetName());
|
|
|
|
counter++;
|
|
|
|
}
|
|
|
|
int getCounter() { return counter; }
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, ColumnFamilyHandleDeletionStartedListenerTest) {
|
|
|
|
std::vector<std::string> cfs{"pikachu", "eevee", "Mewtwo"};
|
|
|
|
auto listener =
|
|
|
|
std::make_shared<ColumnFamilyHandleDeletionStartedListener>(cfs);
|
|
|
|
Options options;
|
2017-06-26 16:52:06 -07:00
|
|
|
options.env = CurrentOptions().env;
|
2016-09-22 11:56:18 -07:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.listeners.push_back(listener);
|
|
|
|
CreateAndReopenWithCF(cfs, options);
|
|
|
|
ASSERT_EQ(handles_.size(), 4);
|
|
|
|
delete handles_[3];
|
|
|
|
delete handles_[2];
|
|
|
|
delete handles_[1];
|
|
|
|
handles_.resize(1);
|
|
|
|
ASSERT_EQ(listener->getCounter(), 3);
|
|
|
|
}
|
|
|
|
|
2017-06-22 19:30:39 -07:00
|
|
|
class BackgroundErrorListener : public EventListener {
|
|
|
|
private:
|
|
|
|
SpecialEnv* env_;
|
|
|
|
int counter_;
|
|
|
|
|
|
|
|
public:
|
|
|
|
BackgroundErrorListener(SpecialEnv* env) : env_(env), counter_(0) {}
|
|
|
|
|
2018-03-05 13:08:17 -08:00
|
|
|
void OnBackgroundError(BackgroundErrorReason /*reason*/,
|
|
|
|
Status* bg_error) override {
|
2017-06-22 19:30:39 -07:00
|
|
|
if (counter_ == 0) {
|
|
|
|
// suppress the first error and disable write-dropping such that a retry
|
|
|
|
// can succeed.
|
|
|
|
*bg_error = Status::OK();
|
|
|
|
env_->drop_writes_.store(false, std::memory_order_release);
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
env_->SetMockSleep(false);
|
2017-06-22 19:30:39 -07:00
|
|
|
}
|
|
|
|
++counter_;
|
|
|
|
}
|
|
|
|
|
|
|
|
int counter() { return counter_; }
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, BackgroundErrorListenerFailedFlushTest) {
|
|
|
|
auto listener = std::make_shared<BackgroundErrorListener>(env_);
|
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.env = env_;
|
|
|
|
options.listeners.push_back(listener);
|
2021-09-08 07:45:59 -07:00
|
|
|
options.memtable_factory.reset(test::NewSpecialSkipListFactory(1));
|
2017-06-22 19:30:39 -07:00
|
|
|
options.paranoid_checks = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// the usual TEST_WaitForFlushMemTable() doesn't work for failed flushes, so
|
|
|
|
// forge a custom one for the failed flush case.
|
2020-02-20 12:07:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
2017-06-22 19:30:39 -07:00
|
|
|
{{"DBImpl::BGWorkFlush:done",
|
|
|
|
"EventListenerTest:BackgroundErrorListenerFailedFlushTest:1"}});
|
2020-02-20 12:07:53 -08:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2017-06-22 19:30:39 -07:00
|
|
|
|
|
|
|
env_->drop_writes_.store(true, std::memory_order_release);
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
env_->SetMockSleep();
|
2017-06-22 19:30:39 -07:00
|
|
|
|
|
|
|
ASSERT_OK(Put("key0", "val"));
|
|
|
|
ASSERT_OK(Put("key1", "val"));
|
|
|
|
TEST_SYNC_POINT("EventListenerTest:BackgroundErrorListenerFailedFlushTest:1");
|
|
|
|
ASSERT_EQ(1, listener->counter());
|
|
|
|
ASSERT_OK(Put("key2", "val"));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
|
|
ASSERT_EQ(1, NumTableFilesAtLevel(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, BackgroundErrorListenerFailedCompactionTest) {
|
|
|
|
auto listener = std::make_shared<BackgroundErrorListener>(env_);
|
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.env = env_;
|
|
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
|
|
options.listeners.push_back(listener);
|
2021-09-08 07:45:59 -07:00
|
|
|
options.memtable_factory.reset(test::NewSpecialSkipListFactory(2));
|
2017-06-22 19:30:39 -07:00
|
|
|
options.paranoid_checks = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// third iteration triggers the second memtable's flush
|
|
|
|
for (int i = 0; i < 3; ++i) {
|
|
|
|
ASSERT_OK(Put("key0", "val"));
|
|
|
|
if (i > 0) {
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
|
|
}
|
|
|
|
ASSERT_OK(Put("key1", "val"));
|
|
|
|
}
|
|
|
|
ASSERT_EQ(2, NumTableFilesAtLevel(0));
|
|
|
|
|
|
|
|
env_->drop_writes_.store(true, std::memory_order_release);
|
Fix+clean up handling of mock sleeps (#7101)
Summary:
We have a number of tests hanging on MacOS and windows due to
mishandling of code for mock sleeps. In addition, the code was in
terrible shape because the same variable (addon_time_) would sometimes
refer to microseconds and sometimes to seconds. One test even assumed it
was nanoseconds but was written to pass anyway.
This has been cleaned up so that DB tests generally use a SpecialEnv
function to mock sleep, for either some number of microseconds or seconds
depending on the function called. But to call one of these, the test must first
call SetMockSleep (precondition enforced with assertion), which also turns
sleeps in RocksDB into mock sleeps. To also removes accounting for actual
clock time, call SetTimeElapseOnlySleepOnReopen, which implies
SetMockSleep (on DB re-open). This latter setting only works by applying
on DB re-open, otherwise havoc can ensue if Env goes back in time with
DB open.
More specifics:
Removed some unused test classes, and updated comments on the general
problem.
Fixed DBSSTTest.GetTotalSstFilesSize using a sync point callback instead
of mock time. For this we have the only modification to production code,
inserting a sync point callback in flush_job.cc, which is not a change to
production behavior.
Removed unnecessary resetting of mock times to 0 in many tests. RocksDB
deals in relative time. Any behaviors relying on absolute date/time are likely
a bug. (The above test DBSSTTest.GetTotalSstFilesSize was the only one
clearly injecting a specific absolute time for actual testing convenience.) Just
in case I misunderstood some test, I put this note in each replacement:
// NOTE: Presumed unnecessary and removed: resetting mock time in env
Strengthened some tests like MergeTestTime, MergeCompactionTimeTest, and
FilterCompactionTimeTest in db_test.cc
stats_history_test and blob_db_test are each their own beast, rather deeply
dependent on MockTimeEnv. Each gets its own variant of a work-around for
TimedWait in a mock time environment. (Reduces redundancy and
inconsistency in stats_history_test.)
Intended follow-up:
Remove TimedWait from the public API of InstrumentedCondVar, and only
make that accessible through Env by passing in an InstrumentedCondVar and
a deadline. Then the Env implementations mocking time can fix this problem
without using sync points. (Test infrastructure using sync points interferes
with individual tests' control over sync points.)
With that change, we can simplify/consolidate the scattered work-arounds.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7101
Test Plan: make check on Linux and MacOS
Reviewed By: zhichao-cao
Differential Revision: D23032815
Pulled By: pdillinger
fbshipit-source-id: 7f33967ada8b83011fb54e8279365c008bd6610b
2020-08-11 12:39:49 -07:00
|
|
|
env_->SetMockSleep();
|
2017-06-22 19:30:39 -07:00
|
|
|
ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
ASSERT_EQ(1, listener->counter());
|
|
|
|
|
|
|
|
// trigger flush so compaction is triggered again; this time it succeeds
|
Auto recovery from out of space errors (#4164)
Summary:
This commit implements automatic recovery from a Status::NoSpace() error
during background operations such as write callback, flush and
compaction. The broad design is as follows -
1. Compaction errors are treated as soft errors and don't put the
database in read-only mode. A compaction is delayed until enough free
disk space is available to accomodate the compaction outputs, which is
estimated based on the input size. This means that users can continue to
write, and we rely on the WriteController to delay or stop writes if the
compaction debt becomes too high due to persistent low disk space
condition
2. Errors during write callback and flush are treated as hard errors,
i.e the database is put in read-only mode and goes back to read-write
only fater certain recovery actions are taken.
3. Both types of recovery rely on the SstFileManagerImpl to poll for
sufficient disk space. We assume that there is a 1-1 mapping between an
SFM and the underlying OS storage container. For cases where multiple
DBs are hosted on a single storage container, the user is expected to
allocate a single SFM instance and use the same one for all the DBs. If
no SFM is specified by the user, DBImpl::Open() will allocate one, but
this will be one per DB and each DB will recover independently. The
recovery implemented by SFM is as follows -
a) On the first occurance of an out of space error during compaction,
subsequent
compactions will be delayed until the disk free space check indicates
enough available space. The required space is computed as the sum of
input sizes.
b) The free space check requirement will be removed once the amount of
free space is greater than the size reserved by in progress
compactions when the first error occured
c) If the out of space error is a hard error, a background thread in
SFM will poll for sufficient headroom before triggering the recovery
of the database and putting it in write-only mode. The headroom is
calculated as the sum of the write_buffer_size of all the DB instances
associated with the SFM
4. EventListener callbacks will be called at the start and completion of
automatic recovery. Users can disable the auto recov ery in the start
callback, and later initiate it manually by calling DB::Resume()
Todo:
1. More extensive testing
2. Add disk full condition to db_stress (follow-on PR)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4164
Differential Revision: D9846378
Pulled By: anand1976
fbshipit-source-id: 80ea875dbd7f00205e19c82215ff6e37da10da4a
2018-09-15 13:36:19 -07:00
|
|
|
// The previous failed compaction may get retried automatically, so we may
|
|
|
|
// be left with 0 or 1 files in level 1, depending on when the retry gets
|
|
|
|
// scheduled
|
2017-06-22 19:30:39 -07:00
|
|
|
ASSERT_OK(Put("key0", "val"));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
Auto recovery from out of space errors (#4164)
Summary:
This commit implements automatic recovery from a Status::NoSpace() error
during background operations such as write callback, flush and
compaction. The broad design is as follows -
1. Compaction errors are treated as soft errors and don't put the
database in read-only mode. A compaction is delayed until enough free
disk space is available to accomodate the compaction outputs, which is
estimated based on the input size. This means that users can continue to
write, and we rely on the WriteController to delay or stop writes if the
compaction debt becomes too high due to persistent low disk space
condition
2. Errors during write callback and flush are treated as hard errors,
i.e the database is put in read-only mode and goes back to read-write
only fater certain recovery actions are taken.
3. Both types of recovery rely on the SstFileManagerImpl to poll for
sufficient disk space. We assume that there is a 1-1 mapping between an
SFM and the underlying OS storage container. For cases where multiple
DBs are hosted on a single storage container, the user is expected to
allocate a single SFM instance and use the same one for all the DBs. If
no SFM is specified by the user, DBImpl::Open() will allocate one, but
this will be one per DB and each DB will recover independently. The
recovery implemented by SFM is as follows -
a) On the first occurance of an out of space error during compaction,
subsequent
compactions will be delayed until the disk free space check indicates
enough available space. The required space is computed as the sum of
input sizes.
b) The free space check requirement will be removed once the amount of
free space is greater than the size reserved by in progress
compactions when the first error occured
c) If the out of space error is a hard error, a background thread in
SFM will poll for sufficient headroom before triggering the recovery
of the database and putting it in write-only mode. The headroom is
calculated as the sum of the write_buffer_size of all the DB instances
associated with the SFM
4. EventListener callbacks will be called at the start and completion of
automatic recovery. Users can disable the auto recov ery in the start
callback, and later initiate it manually by calling DB::Resume()
Todo:
1. More extensive testing
2. Add disk full condition to db_stress (follow-on PR)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4164
Differential Revision: D9846378
Pulled By: anand1976
fbshipit-source-id: 80ea875dbd7f00205e19c82215ff6e37da10da4a
2018-09-15 13:36:19 -07:00
|
|
|
ASSERT_LE(1, NumTableFilesAtLevel(0));
|
2017-06-22 19:30:39 -07:00
|
|
|
}
|
|
|
|
|
2018-10-12 18:34:03 -07:00
|
|
|
class TestFileOperationListener : public EventListener {
|
|
|
|
public:
|
|
|
|
TestFileOperationListener() {
|
|
|
|
file_reads_.store(0);
|
|
|
|
file_reads_success_.store(0);
|
|
|
|
file_writes_.store(0);
|
|
|
|
file_writes_success_.store(0);
|
2020-07-07 18:19:32 -07:00
|
|
|
file_flushes_.store(0);
|
|
|
|
file_flushes_success_.store(0);
|
|
|
|
file_closes_.store(0);
|
|
|
|
file_closes_success_.store(0);
|
|
|
|
file_syncs_.store(0);
|
|
|
|
file_syncs_success_.store(0);
|
|
|
|
file_truncates_.store(0);
|
|
|
|
file_truncates_success_.store(0);
|
2021-10-05 10:50:27 -07:00
|
|
|
file_seq_reads_.store(0);
|
2021-09-16 17:17:40 -07:00
|
|
|
blob_file_reads_.store(0);
|
|
|
|
blob_file_writes_.store(0);
|
|
|
|
blob_file_flushes_.store(0);
|
|
|
|
blob_file_closes_.store(0);
|
|
|
|
blob_file_syncs_.store(0);
|
|
|
|
blob_file_truncates_.store(0);
|
2018-10-12 18:34:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void OnFileReadFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_reads_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_reads_success_;
|
|
|
|
}
|
2021-10-05 10:50:27 -07:00
|
|
|
if (info.path.find("MANIFEST") != std::string::npos) {
|
|
|
|
++file_seq_reads_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_reads_;
|
|
|
|
}
|
2019-01-16 09:48:01 -08:00
|
|
|
ReportDuration(info);
|
2018-10-12 18:34:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void OnFileWriteFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_writes_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_writes_success_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_writes_;
|
|
|
|
}
|
2019-01-16 09:48:01 -08:00
|
|
|
ReportDuration(info);
|
2018-10-12 18:34:03 -07:00
|
|
|
}
|
|
|
|
|
2020-07-07 18:19:32 -07:00
|
|
|
void OnFileFlushFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_flushes_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_flushes_success_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_flushes_;
|
|
|
|
}
|
2020-07-07 18:19:32 -07:00
|
|
|
ReportDuration(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnFileCloseFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_closes_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_closes_success_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_closes_;
|
|
|
|
}
|
2020-07-07 18:19:32 -07:00
|
|
|
ReportDuration(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnFileSyncFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_syncs_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_syncs_success_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_syncs_;
|
|
|
|
}
|
2020-07-07 18:19:32 -07:00
|
|
|
ReportDuration(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnFileTruncateFinish(const FileOperationInfo& info) override {
|
|
|
|
++file_truncates_;
|
|
|
|
if (info.status.ok()) {
|
|
|
|
++file_truncates_success_;
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
if (EndsWith(info.path, ".blob")) {
|
|
|
|
++blob_file_truncates_;
|
|
|
|
}
|
2020-07-07 18:19:32 -07:00
|
|
|
ReportDuration(info);
|
|
|
|
}
|
|
|
|
|
2018-10-12 18:34:03 -07:00
|
|
|
bool ShouldBeNotifiedOnFileIO() override { return true; }
|
|
|
|
|
|
|
|
std::atomic<size_t> file_reads_;
|
|
|
|
std::atomic<size_t> file_reads_success_;
|
|
|
|
std::atomic<size_t> file_writes_;
|
|
|
|
std::atomic<size_t> file_writes_success_;
|
2020-07-07 18:19:32 -07:00
|
|
|
std::atomic<size_t> file_flushes_;
|
|
|
|
std::atomic<size_t> file_flushes_success_;
|
|
|
|
std::atomic<size_t> file_closes_;
|
|
|
|
std::atomic<size_t> file_closes_success_;
|
|
|
|
std::atomic<size_t> file_syncs_;
|
|
|
|
std::atomic<size_t> file_syncs_success_;
|
|
|
|
std::atomic<size_t> file_truncates_;
|
|
|
|
std::atomic<size_t> file_truncates_success_;
|
2021-10-05 10:50:27 -07:00
|
|
|
std::atomic<size_t> file_seq_reads_;
|
2021-09-16 17:17:40 -07:00
|
|
|
std::atomic<size_t> blob_file_reads_;
|
|
|
|
std::atomic<size_t> blob_file_writes_;
|
|
|
|
std::atomic<size_t> blob_file_flushes_;
|
|
|
|
std::atomic<size_t> blob_file_closes_;
|
|
|
|
std::atomic<size_t> blob_file_syncs_;
|
|
|
|
std::atomic<size_t> blob_file_truncates_;
|
2019-01-16 09:48:01 -08:00
|
|
|
|
|
|
|
private:
|
|
|
|
void ReportDuration(const FileOperationInfo& info) const {
|
2020-07-22 08:53:21 -07:00
|
|
|
ASSERT_GT(info.duration.count(), 0);
|
2019-01-16 09:48:01 -08:00
|
|
|
}
|
2018-10-12 18:34:03 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, OnFileOperationTest) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
|
|
|
|
TestFileOperationListener* listener = new TestFileOperationListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 10:31:34 -07:00
|
|
|
options.use_direct_io_for_flush_and_compaction = false;
|
2020-07-07 18:19:32 -07:00
|
|
|
Status s = TryReopen(options);
|
|
|
|
if (s.IsInvalidArgument()) {
|
|
|
|
options.use_direct_io_for_flush_and_compaction = false;
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
2018-10-12 18:34:03 -07:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
ASSERT_OK(Put("foo", "aaa"));
|
2020-12-22 23:44:44 -08:00
|
|
|
ASSERT_OK(dbfull()->Flush(FlushOptions()));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2018-10-12 18:34:03 -07:00
|
|
|
ASSERT_GE(listener->file_writes_.load(),
|
|
|
|
listener->file_writes_success_.load());
|
|
|
|
ASSERT_GT(listener->file_writes_.load(), 0);
|
2020-07-07 18:19:32 -07:00
|
|
|
ASSERT_GE(listener->file_flushes_.load(),
|
|
|
|
listener->file_flushes_success_.load());
|
|
|
|
ASSERT_GT(listener->file_flushes_.load(), 0);
|
2018-10-12 18:34:03 -07:00
|
|
|
Close();
|
|
|
|
|
|
|
|
Reopen(options);
|
|
|
|
ASSERT_GE(listener->file_reads_.load(), listener->file_reads_success_.load());
|
|
|
|
ASSERT_GT(listener->file_reads_.load(), 0);
|
2020-07-07 18:19:32 -07:00
|
|
|
ASSERT_GE(listener->file_closes_.load(),
|
|
|
|
listener->file_closes_success_.load());
|
|
|
|
ASSERT_GT(listener->file_closes_.load(), 0);
|
|
|
|
ASSERT_GE(listener->file_syncs_.load(), listener->file_syncs_success_.load());
|
|
|
|
ASSERT_GT(listener->file_syncs_.load(), 0);
|
|
|
|
if (true == options.use_direct_io_for_flush_and_compaction) {
|
|
|
|
ASSERT_GE(listener->file_truncates_.load(),
|
|
|
|
listener->file_truncates_success_.load());
|
|
|
|
ASSERT_GT(listener->file_truncates_.load(), 0);
|
|
|
|
}
|
2018-10-12 18:34:03 -07:00
|
|
|
}
|
|
|
|
|
2021-09-16 17:17:40 -07:00
|
|
|
TEST_F(EventListenerTest, OnBlobFileOperationTest) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
TestFileOperationListener* listener = new TestFileOperationListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.enable_blob_files = true;
|
|
|
|
options.min_blob_size = 0;
|
|
|
|
options.enable_blob_garbage_collection = true;
|
|
|
|
options.blob_garbage_collection_age_cutoff = 0.5;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key1", "blob_value1"));
|
|
|
|
ASSERT_OK(Put("Key2", "blob_value2"));
|
|
|
|
ASSERT_OK(Put("Key3", "blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key3", "new_blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "new_blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key5", "blob_value5"));
|
|
|
|
ASSERT_OK(Put("Key6", "blob_value6"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_GT(listener->blob_file_writes_.load(), 0U);
|
|
|
|
ASSERT_GT(listener->blob_file_flushes_.load(), 0U);
|
|
|
|
Close();
|
|
|
|
|
|
|
|
Reopen(options);
|
|
|
|
ASSERT_GT(listener->blob_file_closes_.load(), 0U);
|
|
|
|
ASSERT_GT(listener->blob_file_syncs_.load(), 0U);
|
|
|
|
if (true == options.use_direct_io_for_flush_and_compaction) {
|
|
|
|
ASSERT_GT(listener->blob_file_truncates_.load(), 0U);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-05 10:50:27 -07:00
|
|
|
TEST_F(EventListenerTest, ReadManifestAndWALOnRecovery) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
|
|
|
|
TestFileOperationListener* listener = new TestFileOperationListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
|
|
|
|
options.use_direct_io_for_flush_and_compaction = false;
|
|
|
|
Status s = TryReopen(options);
|
|
|
|
if (s.IsInvalidArgument()) {
|
|
|
|
options.use_direct_io_for_flush_and_compaction = false;
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
ASSERT_OK(Put("foo", "aaa"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
size_t seq_reads = listener->file_seq_reads_.load();
|
|
|
|
Reopen(options);
|
|
|
|
ASSERT_GT(listener->file_seq_reads_.load(), seq_reads);
|
|
|
|
}
|
|
|
|
|
2021-09-16 17:17:40 -07:00
|
|
|
class BlobDBJobLevelEventListenerTest : public EventListener {
|
|
|
|
public:
|
|
|
|
explicit BlobDBJobLevelEventListenerTest(EventListenerTest* test)
|
|
|
|
: test_(test), call_count_(0) {}
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
const VersionStorageInfo* GetVersionStorageInfo() const {
|
2021-12-10 11:03:39 -08:00
|
|
|
VersionSet* const versions = test_->dbfull()->GetVersionSet();
|
2021-09-16 17:17:40 -07:00
|
|
|
assert(versions);
|
|
|
|
|
|
|
|
ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
|
|
|
|
EXPECT_NE(cfd, nullptr);
|
|
|
|
|
|
|
|
Version* const current = cfd->current();
|
|
|
|
EXPECT_NE(current, nullptr);
|
|
|
|
|
|
|
|
const VersionStorageInfo* const storage_info = current->storage_info();
|
|
|
|
EXPECT_NE(storage_info, nullptr);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
return storage_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckBlobFileAdditions(
|
|
|
|
const std::vector<BlobFileAdditionInfo>& blob_file_addition_infos) const {
|
|
|
|
const auto* vstorage = GetVersionStorageInfo();
|
|
|
|
|
|
|
|
EXPECT_FALSE(blob_file_addition_infos.empty());
|
|
|
|
|
|
|
|
for (const auto& blob_file_addition_info : blob_file_addition_infos) {
|
|
|
|
const auto meta = vstorage->GetBlobFileMetaData(
|
|
|
|
blob_file_addition_info.blob_file_number);
|
|
|
|
|
|
|
|
EXPECT_NE(meta, nullptr);
|
|
|
|
EXPECT_EQ(meta->GetBlobFileNumber(),
|
|
|
|
blob_file_addition_info.blob_file_number);
|
|
|
|
EXPECT_EQ(meta->GetTotalBlobBytes(),
|
|
|
|
blob_file_addition_info.total_blob_bytes);
|
|
|
|
EXPECT_EQ(meta->GetTotalBlobCount(),
|
|
|
|
blob_file_addition_info.total_blob_count);
|
|
|
|
EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty());
|
|
|
|
}
|
2021-09-16 17:17:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> GetFlushedFiles() {
|
|
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
|
|
std::vector<std::string> result;
|
|
|
|
for (const auto& fname : flushed_files_) {
|
|
|
|
result.push_back(fname);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
|
|
|
|
call_count_++;
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
|
2021-09-16 17:17:40 -07:00
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
|
|
flushed_files_.push_back(info.file_path);
|
|
|
|
}
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
|
2021-09-16 17:17:40 -07:00
|
|
|
EXPECT_EQ(info.blob_compression_type, kNoCompression);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
CheckBlobFileAdditions(info.blob_file_addition_infos);
|
2021-09-16 17:17:40 -07:00
|
|
|
}
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
void OnCompactionCompleted(DB* /*db*/,
|
|
|
|
const CompactionJobInfo& info) override {
|
2021-09-16 17:17:40 -07:00
|
|
|
call_count_++;
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
EXPECT_EQ(info.blob_compression_type, kNoCompression);
|
|
|
|
|
|
|
|
CheckBlobFileAdditions(info.blob_file_addition_infos);
|
|
|
|
|
|
|
|
EXPECT_FALSE(info.blob_file_garbage_infos.empty());
|
2021-09-16 17:17:40 -07:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 12:35:39 -08:00
|
|
|
for (const auto& blob_file_garbage_info : info.blob_file_garbage_infos) {
|
2021-09-16 17:17:40 -07:00
|
|
|
EXPECT_GT(blob_file_garbage_info.blob_file_number, 0U);
|
|
|
|
EXPECT_GT(blob_file_garbage_info.garbage_blob_count, 0U);
|
|
|
|
EXPECT_GT(blob_file_garbage_info.garbage_blob_bytes, 0U);
|
|
|
|
EXPECT_FALSE(blob_file_garbage_info.blob_file_path.empty());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
EventListenerTest* test_;
|
|
|
|
uint32_t call_count_;
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<std::string> flushed_files_;
|
|
|
|
std::mutex mutex_;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Test OnFlushCompleted EventListener called for blob files
|
|
|
|
TEST_F(EventListenerTest, BlobDBOnFlushCompleted) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.enable_blob_files = true;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
|
|
|
|
options.min_blob_size = 0;
|
|
|
|
BlobDBJobLevelEventListenerTest* blob_event_listener =
|
|
|
|
new BlobDBJobLevelEventListenerTest(this);
|
|
|
|
options.listeners.emplace_back(blob_event_listener);
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key1", "blob_value1"));
|
|
|
|
ASSERT_OK(Put("Key2", "blob_value2"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key3", "blob_value3"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_EQ(Get("Key1"), "blob_value1");
|
|
|
|
ASSERT_EQ(Get("Key2"), "blob_value2");
|
|
|
|
ASSERT_EQ(Get("Key3"), "blob_value3");
|
|
|
|
|
|
|
|
ASSERT_GT(blob_event_listener->call_count_, 0U);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test OnCompactionCompleted EventListener called for blob files
|
|
|
|
TEST_F(EventListenerTest, BlobDBOnCompactionCompleted) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.enable_blob_files = true;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.min_blob_size = 0;
|
|
|
|
BlobDBJobLevelEventListenerTest* blob_event_listener =
|
|
|
|
new BlobDBJobLevelEventListenerTest(this);
|
|
|
|
options.listeners.emplace_back(blob_event_listener);
|
|
|
|
|
|
|
|
options.enable_blob_garbage_collection = true;
|
|
|
|
options.blob_garbage_collection_age_cutoff = 0.5;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key1", "blob_value1"));
|
|
|
|
ASSERT_OK(Put("Key2", "blob_value2"));
|
|
|
|
ASSERT_OK(Put("Key3", "blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key3", "new_blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "new_blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key5", "blob_value5"));
|
|
|
|
ASSERT_OK(Put("Key6", "blob_value6"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
blob_event_listener->call_count_ = 0;
|
|
|
|
constexpr Slice* begin = nullptr;
|
|
|
|
constexpr Slice* end = nullptr;
|
|
|
|
|
|
|
|
// On compaction, because of blob_garbage_collection_age_cutoff, it will
|
|
|
|
// delete the oldest blob file and create new blob file during compaction.
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
|
|
|
|
|
|
|
|
// Make sure, OnCompactionCompleted is called.
|
|
|
|
ASSERT_GT(blob_event_listener->call_count_, 0U);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test CompactFiles calls OnCompactionCompleted EventListener for blob files
|
|
|
|
// and populate the blob files info.
|
|
|
|
TEST_F(EventListenerTest, BlobDBCompactFiles) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.enable_blob_files = true;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.min_blob_size = 0;
|
|
|
|
options.enable_blob_garbage_collection = true;
|
|
|
|
options.blob_garbage_collection_age_cutoff = 0.5;
|
|
|
|
|
|
|
|
BlobDBJobLevelEventListenerTest* blob_event_listener =
|
|
|
|
new BlobDBJobLevelEventListenerTest(this);
|
|
|
|
options.listeners.emplace_back(blob_event_listener);
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key1", "blob_value1"));
|
|
|
|
ASSERT_OK(Put("Key2", "blob_value2"));
|
|
|
|
ASSERT_OK(Put("Key3", "blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key3", "new_blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "new_blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key5", "blob_value5"));
|
|
|
|
ASSERT_OK(Put("Key6", "blob_value6"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
std::vector<std::string> output_file_names;
|
|
|
|
CompactionJobInfo compaction_job_info;
|
|
|
|
|
|
|
|
// On compaction, because of blob_garbage_collection_age_cutoff, it will
|
|
|
|
// delete the oldest blob file and create new blob file during compaction
|
|
|
|
// which will be populated in output_files_names.
|
|
|
|
ASSERT_OK(dbfull()->CompactFiles(
|
|
|
|
CompactionOptions(), blob_event_listener->GetFlushedFiles(), 1, -1,
|
|
|
|
&output_file_names, &compaction_job_info));
|
|
|
|
|
|
|
|
bool is_blob_in_output = false;
|
|
|
|
for (const auto& file : output_file_names) {
|
|
|
|
if (EndsWith(file, ".blob")) {
|
|
|
|
is_blob_in_output = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(is_blob_in_output);
|
|
|
|
|
|
|
|
for (const auto& blob_file_addition_info :
|
|
|
|
compaction_job_info.blob_file_addition_infos) {
|
|
|
|
EXPECT_GT(blob_file_addition_info.blob_file_number, 0U);
|
|
|
|
EXPECT_GT(blob_file_addition_info.total_blob_bytes, 0U);
|
|
|
|
EXPECT_GT(blob_file_addition_info.total_blob_count, 0U);
|
|
|
|
EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto& blob_file_garbage_info :
|
|
|
|
compaction_job_info.blob_file_garbage_infos) {
|
|
|
|
EXPECT_GT(blob_file_garbage_info.blob_file_number, 0U);
|
|
|
|
EXPECT_GT(blob_file_garbage_info.garbage_blob_count, 0U);
|
|
|
|
EXPECT_GT(blob_file_garbage_info.garbage_blob_bytes, 0U);
|
|
|
|
EXPECT_FALSE(blob_file_garbage_info.blob_file_path.empty());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class BlobDBFileLevelEventListener : public EventListener {
|
|
|
|
public:
|
|
|
|
void OnBlobFileCreationStarted(
|
|
|
|
const BlobFileCreationBriefInfo& info) override {
|
|
|
|
files_started_++;
|
|
|
|
EXPECT_FALSE(info.db_name.empty());
|
|
|
|
EXPECT_FALSE(info.cf_name.empty());
|
|
|
|
EXPECT_FALSE(info.file_path.empty());
|
|
|
|
EXPECT_GT(info.job_id, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnBlobFileCreated(const BlobFileCreationInfo& info) override {
|
|
|
|
files_created_++;
|
|
|
|
EXPECT_FALSE(info.db_name.empty());
|
|
|
|
EXPECT_FALSE(info.cf_name.empty());
|
|
|
|
EXPECT_FALSE(info.file_path.empty());
|
|
|
|
EXPECT_GT(info.job_id, 0);
|
|
|
|
EXPECT_GT(info.total_blob_count, 0U);
|
|
|
|
EXPECT_GT(info.total_blob_bytes, 0U);
|
|
|
|
EXPECT_EQ(info.file_checksum, kUnknownFileChecksum);
|
|
|
|
EXPECT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName);
|
|
|
|
EXPECT_TRUE(info.status.ok());
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnBlobFileDeleted(const BlobFileDeletionInfo& info) override {
|
|
|
|
files_deleted_++;
|
|
|
|
EXPECT_FALSE(info.db_name.empty());
|
|
|
|
EXPECT_FALSE(info.file_path.empty());
|
|
|
|
EXPECT_GT(info.job_id, 0);
|
|
|
|
EXPECT_TRUE(info.status.ok());
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckCounters() {
|
|
|
|
EXPECT_EQ(files_started_, files_created_);
|
|
|
|
EXPECT_GT(files_started_, 0U);
|
|
|
|
EXPECT_GT(files_deleted_, 0U);
|
|
|
|
EXPECT_LT(files_deleted_, files_created_);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2021-09-27 11:42:10 -07:00
|
|
|
std::atomic<uint32_t> files_started_{};
|
|
|
|
std::atomic<uint32_t> files_created_{};
|
|
|
|
std::atomic<uint32_t> files_deleted_{};
|
2021-09-16 17:17:40 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(EventListenerTest, BlobDBFileTest) {
|
|
|
|
Options options;
|
|
|
|
options.env = CurrentOptions().env;
|
|
|
|
options.enable_blob_files = true;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.min_blob_size = 0;
|
|
|
|
options.enable_blob_garbage_collection = true;
|
|
|
|
options.blob_garbage_collection_age_cutoff = 0.5;
|
|
|
|
|
|
|
|
BlobDBFileLevelEventListener* blob_event_listener =
|
|
|
|
new BlobDBFileLevelEventListener();
|
|
|
|
options.listeners.emplace_back(blob_event_listener);
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key1", "blob_value1"));
|
|
|
|
ASSERT_OK(Put("Key2", "blob_value2"));
|
|
|
|
ASSERT_OK(Put("Key3", "blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key3", "new_blob_value3"));
|
|
|
|
ASSERT_OK(Put("Key4", "new_blob_value4"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put("Key5", "blob_value5"));
|
|
|
|
ASSERT_OK(Put("Key6", "blob_value6"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
constexpr Slice* begin = nullptr;
|
|
|
|
constexpr Slice* end = nullptr;
|
|
|
|
|
|
|
|
// On compaction, because of blob_garbage_collection_age_cutoff, it will
|
|
|
|
// delete the oldest blob file and create new blob file during compaction.
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
|
2021-12-12 15:30:22 -08:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2021-09-16 17:17:40 -07:00
|
|
|
|
|
|
|
blob_event_listener->CheckCounters();
|
|
|
|
}
|
|
|
|
|
2020-02-20 12:07:53 -08:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2015-03-17 14:08:00 -07:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 14:45:18 -08:00
|
|
|
}
|