Use flush time for the props.creation_time for FIFO compaction (#6612)
Summary: For FIFO compaction, we use flush time instead of oldest key time as the creation time. This is to prevent FIFO compaction dropping files whose oldest key time is older than TTL but which has newer keys than TTL. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6612 Test Plan: make check Reviewed By: siying Differential Revision: D20748217 Pulled By: riversand963 fbshipit-source-id: 3f7b00a847020760537cdddd12f6fe039e5bc663
This commit is contained in:
parent
bba5e7bc21
commit
00894a289f
@ -1,7 +1,4 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Fix a data race that might cause crash when calling DB::GetCreationTimeOfOldestFile() by a small chance. The bug was introduced in 6.6 Release.
|
||||
|
||||
## 6.8.0 (02/24/2020)
|
||||
### Java API Changes
|
||||
@ -30,6 +27,9 @@
|
||||
* `db_bench` now supports `value_size_distribution_type`, `value_size_min`, `value_size_max` options for generating random variable sized value. Added `blob_db_compression_type` option for BlobDB to enable blob compression.
|
||||
* Replace RocksDB namespace "rocksdb" with flag "ROCKSDB_NAMESPACE" which if is not defined, defined as "rocksdb" in header file rocksdb_namespace.h.
|
||||
|
||||
### Behavior changes
|
||||
* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly.
|
||||
|
||||
## 6.7.0 (01/21/2020)
|
||||
### Public API Change
|
||||
* Added a rocksdb::FileSystem class in include/rocksdb/file_system.h to encapsulate file creation/read/write operations, and an option DBOptions::file_system to allow a user to pass in an instance of rocksdb::FileSystem. If its a non-null value, this will take precendence over DBOptions::env for file operations. A new API rocksdb::FileSystem::Default() returns a platform default object. The DBOptions::env option and Env::Default() API will continue to be used for threading and other OS related functions, and where DBOptions::file_system is not specified, for file operations. For storage developers who are accustomed to rocksdb::Env, the interface in rocksdb::FileSystem is new and will probably undergo some changes as more storage systems are ported to it from rocksdb::Env. As of now, no env other than Posix has been ported to the new interface.
|
||||
|
@ -70,10 +70,9 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
|
||||
// avoid underflow
|
||||
if (current_time > mutable_cf_options.ttl) {
|
||||
for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
|
||||
auto f = *ritr;
|
||||
if (f->fd.table_reader != nullptr &&
|
||||
f->fd.table_reader->GetTableProperties() != nullptr) {
|
||||
auto creation_time =
|
||||
FileMetaData* f = *ritr;
|
||||
if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
|
||||
uint64_t creation_time =
|
||||
f->fd.table_reader->GetTableProperties()->creation_time;
|
||||
if (creation_time == 0 ||
|
||||
creation_time >= (current_time - mutable_cf_options.ttl)) {
|
||||
@ -96,11 +95,14 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction(
|
||||
}
|
||||
|
||||
for (const auto& f : inputs[0].files) {
|
||||
uint64_t creation_time = 0;
|
||||
if (f && f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
|
||||
creation_time = f->fd.table_reader->GetTableProperties()->creation_time;
|
||||
}
|
||||
ROCKS_LOG_BUFFER(log_buffer,
|
||||
"[%s] FIFO compaction: picking file %" PRIu64
|
||||
" with creation time %" PRIu64 " for deletion",
|
||||
cf_name.c_str(), f->fd.GetNumber(),
|
||||
f->fd.table_reader->GetTableProperties()->creation_time);
|
||||
cf_name.c_str(), f->fd.GetNumber(), creation_time);
|
||||
}
|
||||
|
||||
Compaction* c = new Compaction(
|
||||
|
@ -371,6 +371,11 @@ Status FlushJob::WriteLevel0Table() {
|
||||
meta_.oldest_ancester_time = std::min(current_time, oldest_key_time);
|
||||
meta_.file_creation_time = current_time;
|
||||
|
||||
uint64_t creation_time = (cfd_->ioptions()->compaction_style ==
|
||||
CompactionStyle::kCompactionStyleFIFO)
|
||||
? current_time
|
||||
: meta_.oldest_ancester_time;
|
||||
|
||||
s = BuildTable(
|
||||
dbname_, db_options_.env, db_options_.fs.get(), *cfd_->ioptions(),
|
||||
mutable_cf_options_, file_options_, cfd_->table_cache(), iter.get(),
|
||||
@ -383,8 +388,7 @@ Status FlushJob::WriteLevel0Table() {
|
||||
mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(),
|
||||
TableFileCreationReason::kFlush, event_logger_, job_context_->job_id,
|
||||
Env::IO_HIGH, &table_properties_, 0 /* level */,
|
||||
meta_.oldest_ancester_time, oldest_key_time, write_hint,
|
||||
current_time);
|
||||
creation_time, oldest_key_time, write_hint, current_time);
|
||||
LogFlush(db_options_.info_log);
|
||||
}
|
||||
ROCKS_LOG_INFO(db_options_.info_log,
|
||||
|
Loading…
Reference in New Issue
Block a user