rocksdb/db/blob/blob_log_sequential_reader.cc
mrambacher 3dff28cf9b Use SystemClock* instead of std::shared_ptr<SystemClock> in lower level routines (#8033)
Summary:
For performance purposes, the lower level routines were changed to use a SystemClock* instead of a std::shared_ptr<SystemClock>.  The shared ptr has some performance degradation on certain hardware classes.

For most of the system, there is no risk of the pointer being deleted/invalid because the shared_ptr will be stored elsewhere.  For example, the ImmutableDBOptions stores the Env which has a std::shared_ptr<SystemClock> in it.  The SystemClock* within the ImmutableDBOptions is essentially a "short cut" to gain access to this constant resource.

There were a few classes (PeriodicWorkScheduler?) where the "short cut" property did not hold.  In those cases, the shared pointer was preserved.

Using db_bench readrandom perf_level=3 on my EC2 box, this change performed as well or better than 6.17:

6.17: readrandom   :      28.046 micros/op 854902 ops/sec;   61.3 MB/s (355999 of 355999 found)
6.18: readrandom   :      32.615 micros/op 735306 ops/sec;   52.7 MB/s (290999 of 290999 found)
PR: readrandom   :      27.500 micros/op 871909 ops/sec;   62.5 MB/s (367999 of 367999 found)

(Note that the times for 6.18 are prior to revert of the SystemClock).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8033

Reviewed By: pdillinger

Differential Revision: D27014563

Pulled By: mrambacher

fbshipit-source-id: ad0459eba03182e454391b5926bf5cdd45657b67
2021-03-15 04:34:11 -07:00

133 lines
3.9 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "db/blob/blob_log_sequential_reader.h"
#include "file/random_access_file_reader.h"
#include "monitoring/statistics.h"
#include "util/stop_watch.h"
namespace ROCKSDB_NAMESPACE {
BlobLogSequentialReader::BlobLogSequentialReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, SystemClock* clock,
Statistics* statistics)
: file_(std::move(file_reader)),
clock_(clock),
statistics_(statistics),
next_byte_(0) {}
BlobLogSequentialReader::~BlobLogSequentialReader() = default;
Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
char* buf) {
assert(slice);
assert(file_);
StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size),
slice, buf, nullptr);
next_byte_ += size;
if (!s.ok()) {
return s;
}
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, slice->size());
if (slice->size() != size) {
return Status::Corruption("EOF reached while reading record");
}
return s;
}
Status BlobLogSequentialReader::ReadHeader(BlobLogHeader* header) {
assert(header);
assert(next_byte_ == 0);
static_assert(BlobLogHeader::kSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogHeader::kSize");
Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
}
if (buffer_.size() != BlobLogHeader::kSize) {
return Status::Corruption("EOF reached before file header");
}
return header->DecodeFrom(buffer_);
}
Status BlobLogSequentialReader::ReadRecord(BlobLogRecord* record,
ReadLevel level,
uint64_t* blob_offset) {
assert(record);
static_assert(BlobLogRecord::kHeaderSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogRecord::kHeaderSize");
Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
}
if (buffer_.size() != BlobLogRecord::kHeaderSize) {
return Status::Corruption("EOF reached before record header");
}
s = record->DecodeHeaderFrom(buffer_);
if (!s.ok()) {
return s;
}
uint64_t kb_size = record->key_size + record->value_size;
if (blob_offset != nullptr) {
*blob_offset = next_byte_ + record->key_size;
}
switch (level) {
case kReadHeader:
next_byte_ += kb_size;
break;
case kReadHeaderKey:
record->key_buf.reset(new char[record->key_size]);
s = ReadSlice(record->key_size, &record->key, record->key_buf.get());
next_byte_ += record->value_size;
break;
case kReadHeaderKeyBlob:
record->key_buf.reset(new char[record->key_size]);
s = ReadSlice(record->key_size, &record->key, record->key_buf.get());
if (s.ok()) {
record->value_buf.reset(new char[record->value_size]);
s = ReadSlice(record->value_size, &record->value,
record->value_buf.get());
}
if (s.ok()) {
s = record->CheckBlobCRC();
}
break;
}
return s;
}
Status BlobLogSequentialReader::ReadFooter(BlobLogFooter* footer) {
assert(footer);
static_assert(BlobLogFooter::kSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogFooter::kSize");
Status s = ReadSlice(BlobLogFooter::kSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
}
if (buffer_.size() != BlobLogFooter::kSize) {
return Status::Corruption("EOF reached before file footer");
}
return footer->DecodeFrom(buffer_);
}
} // namespace ROCKSDB_NAMESPACE