diff --git a/HISTORY.md b/HISTORY.md index c92367368..d2298059c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -18,7 +18,7 @@ * Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`. * Fixed a potential incorrect result in opt mode and assertion failures caused by releasing snapshot(s) during compaction. * Fixed passing of BlobFileCompletionCallback to Compaction job and Atomic flush job which was default paramter (nullptr). BlobFileCompletitionCallback is internal callback that manages addition of blob files to SSTFileManager. -* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters +* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters. ### New Features * Made the EventListener extend the Customizable class. @@ -29,8 +29,8 @@ * Add a comment to suggest btrfs user to disable file preallocation by setting `options.allow_fallocate=false`. * Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench. * Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature. -* Added a stat rocksdb.secondary.cache.hits -* Added a PerfContext counter secondary_cache_hit_count +* Added a stat rocksdb.secondary.cache.hits. +* Added a PerfContext counter secondary_cache_hit_count. * The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`. * Added hybrid configuration of Ribbon filter and Bloom filter where some LSM levels use Ribbon for memory space efficiency and some use Bloom for speed. See NewRibbonFilterPolicy. This also changes the default behavior of NewRibbonFilterPolicy to use Bloom for flushes under Leveled and Universal compaction and Ribbon otherwise. The C API function `rocksdb_filterpolicy_create_ribbon` is unchanged but adds new `rocksdb_filterpolicy_create_ribbon_hybrid`. diff --git a/include/rocksdb/trace_reader_writer.h b/include/rocksdb/trace_reader_writer.h index 50c72f7bb..335e091dc 100644 --- a/include/rocksdb/trace_reader_writer.h +++ b/include/rocksdb/trace_reader_writer.h @@ -19,8 +19,7 @@ namespace ROCKSDB_NAMESPACE { // a time. class TraceWriter { public: - TraceWriter() {} - virtual ~TraceWriter() {} + virtual ~TraceWriter() = default; virtual Status Write(const Slice& data) = 0; virtual Status Close() = 0; @@ -31,19 +30,18 @@ class TraceWriter { // a time. A RocksDB Replayer could depend on this to replay operations. class TraceReader { public: - TraceReader() {} - virtual ~TraceReader() {} + virtual ~TraceReader() = default; virtual Status Read(std::string* data) = 0; virtual Status Close() = 0; - // Seek back to the trace header. Replayer can call this method for - // repeatedly replaying. Note this method may fail if the reader is already - // closed. + // Seek back to the trace header. Replayer can call this method to restart + // replaying. Note this method may fail if the reader is already closed. virtual Status Reset() = 0; }; -// Factory methods to read/write traces from/to a file. +// Factory methods to write/read traces to/from a file. +// The implementations may not be thread-safe. Status NewFileTraceWriter(Env* env, const EnvOptions& env_options, const std::string& trace_filename, std::unique_ptr* trace_writer); diff --git a/utilities/trace/replayer_impl.cc b/utilities/trace/replayer_impl.cc index 09d94441e..31023f1a2 100644 --- a/utilities/trace/replayer_impl.cc +++ b/utilities/trace/replayer_impl.cc @@ -157,8 +157,21 @@ Status ReplayerImpl::Replay( // Background decoding and execution status. Status bg_s = Status::OK(); uint64_t last_err_ts = static_cast(-1); - // Callback function used in background work to update bg_s at the first - // execution error (with the smallest Trace timestamp). + // Callback function used in background work to update bg_s for the ealiest + // TraceRecord which has execution error. This is different from the + // timestamp of the first execution error (either start or end timestamp). + // + // Suppose TraceRecord R1, R2, with timestamps T1 < T2. Their execution + // timestamps are T1_start, T1_end, T2_start, T2_end. + // Single-thread: there must be T1_start < T1_end < T2_start < T2_end. + // Multi-thread: T1_start < T2_start may not be enforced. Orders of them are + // totally unknown. + // In order to report the same `first` error in both single-thread and + // multi-thread replay, we can only rely on the TraceRecords' timestamps, + // rather than their executin timestamps. Although in single-thread replay, + // the first error is also the last error, while in multi-thread replay, the + // first error may not be the first error in execution, and it may not be + // the last error in exeution as well. auto error_cb = [&mtx, &bg_s, &last_err_ts](Status err, uint64_t err_ts) { std::lock_guard gd(mtx); // Only record the first error. @@ -188,7 +201,7 @@ Status ReplayerImpl::Replay( break; } - // In multi-threaded replay, sleep first thatn start decoding and + // In multi-threaded replay, sleep first then start decoding and // execution in a thread. std::chrono::system_clock::time_point sleep_to = replay_epoch + @@ -253,20 +266,6 @@ Status ReplayerImpl::ReadHeader(Trace* header) { return TracerHelper::DecodeHeader(encoded_trace, header); } -Status ReplayerImpl::ReadFooter(Trace* footer) { - assert(footer != nullptr); - Status s = ReadTrace(footer); - if (!s.ok()) { - return s; - } - if (footer->type != kTraceEnd) { - return Status::Corruption("Corrupted trace file. Incorrect footer."); - } - - // TODO: Add more validations later - return s; -} - Status ReplayerImpl::ReadTrace(Trace* trace) { assert(trace != nullptr); std::string encoded_trace; diff --git a/utilities/trace/replayer_impl.h b/utilities/trace/replayer_impl.h index 3a61b2ecb..367b0b51e 100644 --- a/utilities/trace/replayer_impl.h +++ b/utilities/trace/replayer_impl.h @@ -50,7 +50,6 @@ class ReplayerImpl : public Replayer { private: Status ReadHeader(Trace* header); - Status ReadFooter(Trace* footer); Status ReadTrace(Trace* trace); // Generic function to execute a Trace in a thread pool. @@ -69,14 +68,14 @@ class ReplayerImpl : public Replayer { int trace_file_version_; }; -// The passin arg of MultiThreadRepkay for each trace record. +// Arguments passed to BackgroundWork() for replaying in a thread pool. struct ReplayerWorkerArg { Trace trace_entry; int trace_file_version; // Handler to execute TraceRecord. TraceRecord::Handler* handler; // Callback function to report the error status and the timestamp of the - // TraceRecord. + // TraceRecord (not the start/end timestamp of executing the TraceRecord). std::function error_cb; // Callback function to report the trace execution status and operation // execution status/result(s).