No need to limit to 20 files in UpdateAccumulatedStats() if options.max_open_files=-1
Summary: There is a hardcoded constraint in our statistics collection that prevents reading properties from more than 20 SST files. This means our statistics will be very inaccurate for databases with > 20 files since additional files are just ignored. The purpose of constraining the number of files used is to bound the I/O performed during statistics collection, since these statistics need to be recomputed every time the database reopened. However, this constraint doesn't take into account the case where option "max_open_files" is -1. In that case, all the file metadata has already been read, so MaybeInitializeFileMetaData() won't incur any I/O cost. so this diff gets rid of the 20-file constraint in case max_open_files == -1. Test Plan: write into unit test db/db_properties_test.cc - "ValidateSampleNumber". We generate 20 files with 2 rows and 10 files with 1 row. If max_open_files !=-1, the `rocksdb.estimate-num-keys` should be (10*1 + 10*2)/20 * 30 = 45. Otherwise, it should be the ground truth, 50. {F1089153} Reviewers: andrewkr Reviewed By: andrewkr Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D56253
This commit is contained in:
parent
8a1a603fdb
commit
cc87075d63
@ -253,6 +253,35 @@ TEST_F(DBPropertiesTest, ValidatePropertyInfo) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DBPropertiesTest, ValidateSampleNumber) {
|
||||
// When "max_open_files" is -1, we read all the files for
|
||||
// "rocksdb.estimate-num-keys" computation, which is the ground truth.
|
||||
// Otherwise, we sample 20 newest files to make an estimation.
|
||||
// Formula: lastest_20_files_active_key_ratio * total_files
|
||||
Options options = CurrentOptions();
|
||||
options.disable_auto_compactions = true;
|
||||
options.level0_stop_writes_trigger = 1000;
|
||||
DestroyAndReopen(options);
|
||||
int key = 0;
|
||||
for (int files = 20; files >= 10; files -= 10) {
|
||||
for (int i = 0; i < files; i++) {
|
||||
int rows = files / 10;
|
||||
for (int j = 0; j < rows; j++) {
|
||||
db_->Put(WriteOptions(), std::to_string(++key), "foo");
|
||||
}
|
||||
db_->Flush(FlushOptions());
|
||||
}
|
||||
}
|
||||
std::string num;
|
||||
Reopen(options);
|
||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num));
|
||||
ASSERT_EQ("45", num);
|
||||
options.max_open_files = -1;
|
||||
Reopen(options);
|
||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num));
|
||||
ASSERT_EQ("50", num);
|
||||
}
|
||||
|
||||
TEST_F(DBPropertiesTest, AggregatedTableProperties) {
|
||||
for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) {
|
||||
const int kKeysPerTable = 100;
|
||||
|
@ -1079,6 +1079,12 @@ void Version::UpdateAccumulatedStats(bool update_stats) {
|
||||
if (MaybeInitializeFileMetaData(file_meta)) {
|
||||
// each FileMeta will be initialized only once.
|
||||
storage_info_.UpdateAccumulatedStats(file_meta);
|
||||
// when option "max_open_files" is -1, all the file metadata has
|
||||
// already been read, so MaybeInitializeFileMetaData() won't incur
|
||||
// any I/O cost.
|
||||
if (vset_->db_options_->max_open_files == -1) {
|
||||
continue;
|
||||
}
|
||||
if (++init_count >= kMaxInitCount) {
|
||||
break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user