2014-01-16 01:22:34 +01:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/compaction.h"
|
2014-05-14 21:13:50 +02:00
|
|
|
|
2014-09-05 08:14:37 +02:00
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
2014-05-14 21:13:50 +02:00
|
|
|
#define __STDC_FORMAT_MACROS
|
2014-09-05 08:14:37 +02:00
|
|
|
#endif
|
|
|
|
|
2014-05-14 21:13:50 +02:00
|
|
|
#include <inttypes.h>
|
|
|
|
#include <vector>
|
|
|
|
|
2014-02-01 01:45:20 +01:00
|
|
|
#include "db/column_family.h"
|
2014-05-14 21:13:50 +02:00
|
|
|
#include "util/logging.h"
|
2015-04-02 20:06:30 +02:00
|
|
|
#include "util/sync_point.h"
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
2014-08-07 19:05:04 +02:00
|
|
|
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
2014-01-16 01:22:34 +01:00
|
|
|
uint64_t sum = 0;
|
|
|
|
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
2014-06-14 00:54:19 +02:00
|
|
|
sum += files[i]->fd.GetFileSize();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
2014-11-06 20:14:28 +01:00
|
|
|
void Compaction::SetInputVersion(Version* _input_version) {
|
|
|
|
input_version_ = _input_version;
|
2014-10-27 23:49:46 +01:00
|
|
|
cfd_ = input_version_->cfd();
|
|
|
|
|
|
|
|
cfd_->Ref();
|
|
|
|
input_version_->Ref();
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
edit_.SetColumnFamily(cfd_->GetID());
|
2014-10-27 23:49:46 +01:00
|
|
|
}
|
|
|
|
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
// helper function to determine if compaction is creating files at the
|
|
|
|
// bottommost level
|
|
|
|
bool Compaction::IsBottommostLevel(
|
|
|
|
int output_level, VersionStorageInfo* vstorage,
|
|
|
|
const std::vector<CompactionInputFiles>& inputs) {
|
|
|
|
if (inputs[0].level == 0 &&
|
|
|
|
inputs[0].files.back() != vstorage->LevelFiles(0).back()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// checks whether there are files living beyond the output_level.
|
|
|
|
for (int i = output_level + 1; i < vstorage->num_levels(); i++) {
|
|
|
|
if (vstorage->NumLevelFiles(i) > 0) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Compaction::IsFullCompaction(
|
|
|
|
VersionStorageInfo* vstorage,
|
|
|
|
const std::vector<CompactionInputFiles>& inputs) {
|
|
|
|
int num_files_in_compaction = 0;
|
|
|
|
int total_num_files = 0;
|
|
|
|
for (int l = 0; l < vstorage->num_levels(); l++) {
|
|
|
|
total_num_files += vstorage->NumLevelFiles(l);
|
2014-07-17 03:12:17 +02:00
|
|
|
}
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
|
|
num_files_in_compaction += inputs[i].size();
|
|
|
|
}
|
|
|
|
return num_files_in_compaction == total_num_files;
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
Compaction::Compaction(VersionStorageInfo* vstorage,
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
const MutableCFOptions& _mutable_cf_options,
|
|
|
|
std::vector<CompactionInputFiles> _inputs,
|
|
|
|
int _output_level, uint64_t _target_file_size,
|
|
|
|
uint64_t _max_grandparent_overlap_bytes,
|
|
|
|
uint32_t _output_path_id, CompressionType _compression,
|
|
|
|
std::vector<FileMetaData*> _grandparents,
|
|
|
|
bool _manual_compaction, double _score,
|
|
|
|
bool _deletion_compaction)
|
|
|
|
: start_level_(_inputs[0].level),
|
2014-11-08 00:22:10 +01:00
|
|
|
output_level_(_output_level),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
max_output_file_size_(_target_file_size),
|
2014-11-08 00:22:10 +01:00
|
|
|
max_grandparent_overlap_bytes_(_max_grandparent_overlap_bytes),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
mutable_cf_options_(_mutable_cf_options),
|
2014-11-08 00:22:10 +01:00
|
|
|
input_version_(nullptr),
|
2014-11-08 00:08:12 +01:00
|
|
|
number_levels_(vstorage->num_levels()),
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
cfd_(nullptr),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
output_path_id_(_output_path_id),
|
|
|
|
output_compression_(_compression),
|
2014-11-08 00:22:10 +01:00
|
|
|
deletion_compaction_(_deletion_compaction),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
inputs_(std::move(_inputs)),
|
|
|
|
grandparents_(std::move(_grandparents)),
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
grandparent_index_(0),
|
|
|
|
seen_key_(false),
|
|
|
|
overlapped_bytes_(0),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
score_(_score),
|
|
|
|
bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
|
|
|
|
is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
|
|
|
|
is_manual_compaction_(_manual_compaction),
|
|
|
|
level_ptrs_(std::vector<size_t>(number_levels_, 0)) {
|
|
|
|
MarkFilesBeingCompacted(true);
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
for (size_t i = 1; i < inputs_.size(); ++i) {
|
|
|
|
assert(inputs_[i].level > inputs_[i - 1].level);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// setup input_levels_
|
|
|
|
{
|
|
|
|
input_levels_.resize(num_input_levels());
|
|
|
|
for (size_t which = 0; which < num_input_levels(); which++) {
|
|
|
|
DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files,
|
|
|
|
&arena_);
|
|
|
|
}
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
Compaction::~Compaction() {
|
|
|
|
if (input_version_ != nullptr) {
|
|
|
|
input_version_->Unref();
|
|
|
|
}
|
2014-02-11 02:04:44 +01:00
|
|
|
if (cfd_ != nullptr) {
|
|
|
|
if (cfd_->Unref()) {
|
|
|
|
delete cfd_;
|
|
|
|
}
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2015-04-02 20:06:30 +02:00
|
|
|
bool Compaction::InputCompressionMatchesOutput() const {
|
|
|
|
int base_level = input_version_->storage_info()->base_level();
|
|
|
|
bool matches = (GetCompressionType(*cfd_->ioptions(), start_level_,
|
|
|
|
base_level) == output_compression_);
|
|
|
|
if (matches) {
|
|
|
|
TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
bool Compaction::IsTrivialMove() const {
|
|
|
|
// Avoid a move if there is lots of overlapping grandparent data.
|
|
|
|
// Otherwise, the move could create a parent file that will require
|
|
|
|
// a very expensive merge later on.
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
// If start_level_== output_level_, the purpose is to force compaction
|
|
|
|
// filter to be applied to that level, and thus cannot be a trivia move.
|
|
|
|
return (start_level_ != output_level_ && num_input_levels() == 1 &&
|
|
|
|
num_input_files(0) == 1 &&
|
|
|
|
input(0, 0)->fd.GetPathId() == GetOutputPathId() &&
|
2015-04-02 20:06:30 +02:00
|
|
|
InputCompressionMatchesOutput() &&
|
2014-01-22 19:55:16 +01:00
|
|
|
TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-11-06 20:14:28 +01:00
|
|
|
void Compaction::AddInputDeletions(VersionEdit* out_edit) {
|
2014-11-11 22:47:22 +01:00
|
|
|
for (size_t which = 0; which < num_input_levels(); which++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
2014-11-06 20:14:28 +01:00
|
|
|
out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
bool Compaction::KeyNotExistsBeyondOutputLevel(const Slice& user_key) {
|
2014-10-27 23:49:46 +01:00
|
|
|
assert(input_version_ != nullptr);
|
2014-09-09 00:04:34 +02:00
|
|
|
assert(cfd_->ioptions()->compaction_style != kCompactionStyleFIFO);
|
|
|
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
|
2014-01-16 01:22:34 +01:00
|
|
|
return bottommost_level_;
|
|
|
|
}
|
|
|
|
// Maybe use binary search to find right entry instead of linear search?
|
2014-02-03 21:08:33 +01:00
|
|
|
const Comparator* user_cmp = cfd_->user_comparator();
|
2014-07-17 23:36:41 +02:00
|
|
|
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
2014-10-27 23:49:46 +01:00
|
|
|
const std::vector<FileMetaData*>& files =
|
2014-10-31 16:48:19 +01:00
|
|
|
input_version_->storage_info()->LevelFiles(lvl);
|
2014-01-16 01:22:34 +01:00
|
|
|
for (; level_ptrs_[lvl] < files.size(); ) {
|
|
|
|
FileMetaData* f = files[level_ptrs_[lvl]];
|
|
|
|
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
|
|
|
// We've advanced far enough
|
|
|
|
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
|
2014-07-17 23:36:41 +02:00
|
|
|
// Key falls in this file's range, so definitely
|
|
|
|
// exists beyond output level
|
2014-01-16 01:22:34 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
level_ptrs_[lvl]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Compaction::ShouldStopBefore(const Slice& internal_key) {
|
|
|
|
// Scan to find earliest grandparent file that contains key.
|
2014-02-03 21:08:33 +01:00
|
|
|
const InternalKeyComparator* icmp = &cfd_->internal_comparator();
|
2014-01-16 01:22:34 +01:00
|
|
|
while (grandparent_index_ < grandparents_.size() &&
|
|
|
|
icmp->Compare(internal_key,
|
|
|
|
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
|
|
|
|
if (seen_key_) {
|
2014-06-14 00:54:19 +02:00
|
|
|
overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
assert(grandparent_index_ + 1 >= grandparents_.size() ||
|
|
|
|
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
|
|
|
|
grandparents_[grandparent_index_+1]->smallest.Encode())
|
|
|
|
< 0);
|
|
|
|
grandparent_index_++;
|
|
|
|
}
|
|
|
|
seen_key_ = true;
|
|
|
|
|
2014-01-22 19:55:16 +01:00
|
|
|
if (overlapped_bytes_ > max_grandparent_overlap_bytes_) {
|
2014-01-16 01:22:34 +01:00
|
|
|
// Too much overlap for current output; start new output
|
|
|
|
overlapped_bytes_ = 0;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark (or clear) each file that is being compacted
|
2014-07-17 23:36:41 +02:00
|
|
|
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
|
2014-11-11 22:47:22 +01:00
|
|
|
for (size_t i = 0; i < num_input_levels(); i++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
|
2014-07-17 23:36:41 +02:00
|
|
|
assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
|
|
|
|
inputs_[i][j]->being_compacted);
|
|
|
|
inputs_[i][j]->being_compacted = mark_as_compacted;
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
// Sample output:
|
|
|
|
// If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5,
|
|
|
|
// print: "3@0 + 2@3 + 1@4 files to L5"
|
|
|
|
const char* Compaction::InputLevelSummary(
|
|
|
|
InputLevelSummaryBuffer* scratch) const {
|
|
|
|
int len = 0;
|
|
|
|
bool is_first = true;
|
|
|
|
for (auto& input_level : inputs_) {
|
|
|
|
if (input_level.empty()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!is_first) {
|
|
|
|
len +=
|
|
|
|
snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " + ");
|
|
|
|
} else {
|
|
|
|
is_first = false;
|
|
|
|
}
|
|
|
|
len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
|
|
|
|
"%zu@%d", input_level.size(), input_level.level);
|
|
|
|
}
|
|
|
|
snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
|
|
|
|
" files to L%d", output_level());
|
|
|
|
|
|
|
|
return scratch->buffer;
|
|
|
|
}
|
|
|
|
|
Include bunch of more events into EventLogger
Summary:
Added these events:
* Recovery start, finish and also when recovery creates a file
* Trivial move
* Compaction start, finish and when compaction creates a file
* Flush start, finish
Also includes small fix to EventLogger
Also added option ROCKSDB_PRINT_EVENTS_TO_STDOUT which is useful when we debug things. I've spent far too much time chasing LOG files.
Still didn't get sst table properties in JSON. They are written very deeply into the stack. I'll address in separate diff.
TODO:
* Write specification. Let's first use this for a while and figure out what's good data to put here, too. After that we'll write spec
* Write tools that parse and analyze LOGs. This can be in python or go. Good intern task.
Test Plan: Ran db_bench with ROCKSDB_PRINT_EVENTS_TO_STDOUT. Here's the output: https://phabricator.fb.com/P19811976
Reviewers: sdong, yhchiang, rven, MarkCallaghan, kradhakrishnan, anthony
Reviewed By: anthony
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37521
2015-04-28 00:20:02 +02:00
|
|
|
uint64_t Compaction::CalculateTotalInputSize() const {
|
|
|
|
uint64_t size = 0;
|
|
|
|
for (auto& input_level : inputs_) {
|
|
|
|
for (auto f : input_level.files) {
|
|
|
|
size += f->fd.GetFileSize();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2014-02-01 01:45:20 +01:00
|
|
|
void Compaction::ReleaseCompactionFiles(Status status) {
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
MarkFilesBeingCompacted(false);
|
2014-02-01 01:45:20 +01:00
|
|
|
cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
void Compaction::ResetNextCompactionIndex() {
|
2014-10-27 23:49:46 +01:00
|
|
|
assert(input_version_ != nullptr);
|
2014-10-31 16:48:19 +01:00
|
|
|
input_version_->storage_info()->ResetNextCompactionIndex(start_level_);
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-05-14 21:13:50 +02:00
|
|
|
namespace {
|
|
|
|
int InputSummary(const std::vector<FileMetaData*>& files, char* output,
|
|
|
|
int len) {
|
2014-03-20 00:01:25 +01:00
|
|
|
*output = '\0';
|
2014-01-16 01:22:34 +01:00
|
|
|
int write = 0;
|
|
|
|
for (unsigned int i = 0; i < files.size(); i++) {
|
|
|
|
int sz = len - write;
|
2013-12-23 17:54:50 +01:00
|
|
|
int ret;
|
|
|
|
char sztxt[16];
|
2014-06-14 00:54:19 +02:00
|
|
|
AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
|
|
|
|
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
|
|
|
|
files.at(i)->fd.GetNumber(), sztxt);
|
2014-05-14 21:13:50 +02:00
|
|
|
if (ret < 0 || ret >= sz) break;
|
2014-01-16 01:22:34 +01:00
|
|
|
write += ret;
|
|
|
|
}
|
2014-05-14 21:13:50 +02:00
|
|
|
// if files.size() is non-zero, overwrite the last space
|
|
|
|
return write - !!files.size();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
2014-05-14 21:13:50 +02:00
|
|
|
} // namespace
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
void Compaction::Summary(char* output, int len) {
|
2014-05-14 21:13:50 +02:00
|
|
|
int write =
|
|
|
|
snprintf(output, len, "Base version %" PRIu64
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
" Base level %d, inputs: [",
|
2014-07-17 23:36:41 +02:00
|
|
|
input_version_->GetVersionNumber(),
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
start_level_);
|
2014-01-25 23:12:24 +01:00
|
|
|
if (write < 0 || write >= len) {
|
2014-01-16 01:22:34 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-11-11 22:47:22 +01:00
|
|
|
for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
|
2014-11-06 20:14:28 +01:00
|
|
|
if (level_iter > 0) {
|
2014-07-17 23:36:41 +02:00
|
|
|
write += snprintf(output + write, len - write, "], [");
|
|
|
|
if (write < 0 || write >= len) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2014-11-06 20:14:28 +01:00
|
|
|
write +=
|
|
|
|
InputSummary(inputs_[level_iter].files, output + write, len - write);
|
2014-07-17 23:36:41 +02:00
|
|
|
if (write < 0 || write >= len) {
|
|
|
|
return;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-05-14 21:13:50 +02:00
|
|
|
snprintf(output + write, len - write, "]");
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-10-02 01:19:16 +02:00
|
|
|
uint64_t Compaction::OutputFilePreallocationSize(
|
|
|
|
const MutableCFOptions& mutable_options) {
|
2014-06-05 22:19:35 +02:00
|
|
|
uint64_t preallocation_size = 0;
|
|
|
|
|
2014-09-09 00:04:34 +02:00
|
|
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
|
2014-10-02 01:19:16 +02:00
|
|
|
preallocation_size = mutable_options.MaxFileSizeForLevel(output_level());
|
2014-06-05 22:19:35 +02:00
|
|
|
} else {
|
2014-11-11 22:47:22 +01:00
|
|
|
for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
|
2014-11-06 20:14:28 +01:00
|
|
|
for (const auto& f : inputs_[level_iter].files) {
|
2014-07-17 23:36:41 +02:00
|
|
|
preallocation_size += f->fd.GetFileSize();
|
|
|
|
}
|
2014-06-05 22:19:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Over-estimate slightly so we don't end up just barely crossing
|
|
|
|
// the threshold
|
|
|
|
return preallocation_size * 1.1;
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
} // namespace rocksdb
|