Blob DB: avoid having a separate read of checksum
Summary: Previously on a blob db read, we are making a read of the blob value, and then make another read to get CRC checksum. I'm combining the two read into one. readrandom db_bench with 1G database with base db size of 13M, value size 1k: `./db_bench --db=/home/yiwu/tmp/db_bench --use_blob_db --value_size=1024 --num=1000000 --benchmarks=readrandom --use_existing_db --cache_size=32000000` master: throughput 234MB/s, get micros p50 5.984 p95 9.998 p99 20.817 p100 787 this PR: throughput 261MB/s, get micros p50 5.157 p95 9.928 p99 20.724 p100 190 Closes https://github.com/facebook/rocksdb/pull/3301 Differential Revision: D6615950 Pulled By: yiwu-arbug fbshipit-source-id: 052410c6d8539ec0cc305d53793bbc8f3616baa3
This commit is contained in:
parent
af9746fd22
commit
6d4ef9e839
@ -1055,58 +1055,58 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
|||||||
std::shared_ptr<RandomAccessFileReader> reader =
|
std::shared_ptr<RandomAccessFileReader> reader =
|
||||||
GetOrOpenRandomAccessReader(bfile, env_, env_options_);
|
GetOrOpenRandomAccessReader(bfile, env_, env_options_);
|
||||||
|
|
||||||
std::string* valueptr = value->GetSelf();
|
assert(blob_index.offset() > key.size() + sizeof(uint32_t));
|
||||||
std::string value_c;
|
uint64_t record_offset = blob_index.offset() - key.size() - sizeof(uint32_t);
|
||||||
if (bdb_options_.compression != kNoCompression) {
|
uint64_t record_size = sizeof(uint32_t) + key.size() + blob_index.size();
|
||||||
valueptr = &value_c;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate the buffer. This is safe in C++11
|
// Allocate the buffer. This is safe in C++11
|
||||||
// Note that std::string::reserved() does not work, since previous value
|
std::string buffer_str(record_size, static_cast<char>(0));
|
||||||
// of the buffer can be larger than blob_index.size().
|
char* buffer = &buffer_str[0];
|
||||||
valueptr->resize(blob_index.size());
|
|
||||||
char* buffer = &(*valueptr)[0];
|
|
||||||
|
|
||||||
Slice blob_value;
|
// A partial blob record contain checksum, key and value.
|
||||||
|
Slice blob_record;
|
||||||
{
|
{
|
||||||
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
|
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
|
||||||
s = reader->Read(blob_index.offset(), blob_index.size(), &blob_value,
|
s = reader->Read(record_offset, record_size, &blob_record, buffer);
|
||||||
buffer);
|
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size());
|
||||||
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_value.size());
|
|
||||||
}
|
}
|
||||||
if (!s.ok() || blob_value.size() != blob_index.size()) {
|
if (!s.ok()) {
|
||||||
if (debug_level_ >= 2) {
|
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
"Failed to read blob from blob file %" PRIu64
|
||||||
"Failed to read blob from file: %s blob_offset: %" PRIu64
|
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||||
" blob_size: %" PRIu64 " read: %d key: %s status: '%s'",
|
", key_size: " PRIu64 ", read " PRIu64
|
||||||
bfile->PathName().c_str(), blob_index.offset(),
|
"bytes, status: '%s'",
|
||||||
blob_index.size(), static_cast<int>(blob_value.size()),
|
bfile->BlobFileNumber(), blob_index.offset(),
|
||||||
key.data(), s.ToString().c_str());
|
blob_index.size(), key.size(), s.ToString().c_str());
|
||||||
}
|
return s;
|
||||||
return Status::NotFound("Blob Not Found as couldnt retrieve Blob");
|
|
||||||
}
|
}
|
||||||
|
if (blob_record.size() != record_size) {
|
||||||
|
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||||
|
"Failed to read blob from blob file %" PRIu64
|
||||||
|
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||||
|
", key_size: " PRIu64 ", read " PRIu64
|
||||||
|
"bytes, status: '%s'",
|
||||||
|
bfile->BlobFileNumber(), blob_index.offset(),
|
||||||
|
blob_index.size(), key.size(), s.ToString().c_str());
|
||||||
|
|
||||||
// TODO(yiwu): Add an option to skip crc checking.
|
return Status::Corruption("Failed to retrieve blob from blob index.");
|
||||||
Slice crc_slice;
|
}
|
||||||
|
Slice crc_slice(blob_record.data(), sizeof(uint32_t));
|
||||||
|
Slice blob_value(blob_record.data() + sizeof(uint32_t) + key.size(),
|
||||||
|
blob_index.size());
|
||||||
uint32_t crc_exp;
|
uint32_t crc_exp;
|
||||||
std::string crc_str;
|
if (!GetFixed32(&crc_slice, &crc_exp)) {
|
||||||
crc_str.resize(sizeof(uint32_t));
|
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||||
char* crc_buffer = &(crc_str[0]);
|
"Unable to decode CRC from blob file %" PRIu64
|
||||||
s = reader->Read(blob_index.offset() - (key.size() + sizeof(uint32_t)),
|
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||||
sizeof(uint32_t), &crc_slice, crc_buffer);
|
", key size: %" PRIu64 ", status: '%s'",
|
||||||
if (!s.ok() || !GetFixed32(&crc_slice, &crc_exp)) {
|
bfile->BlobFileNumber(), blob_index.offset(),
|
||||||
if (debug_level_ >= 2) {
|
blob_index.size(), key.size(), s.ToString().c_str());
|
||||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
return Status::Corruption("Unable to decode checksum.");
|
||||||
"Failed to fetch blob crc file: %s blob_offset: %" PRIu64
|
|
||||||
" blob_size: %" PRIu64 " key: %s status: '%s'",
|
|
||||||
bfile->PathName().c_str(), blob_index.offset(),
|
|
||||||
blob_index.size(), key.data(), s.ToString().c_str());
|
|
||||||
}
|
|
||||||
return Status::NotFound("Blob Not Found as couldnt retrieve CRC");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t crc = crc32c::Value(key.data(), key.size());
|
uint32_t crc = crc32c::Value(blob_record.data() + sizeof(uint32_t),
|
||||||
crc = crc32c::Extend(crc, blob_value.data(), blob_value.size());
|
blob_record.size() - sizeof(uint32_t));
|
||||||
crc = crc32c::Mask(crc); // Adjust for storage
|
crc = crc32c::Mask(crc); // Adjust for storage
|
||||||
if (crc != crc_exp) {
|
if (crc != crc_exp) {
|
||||||
if (debug_level_ >= 2) {
|
if (debug_level_ >= 2) {
|
||||||
@ -1119,7 +1119,9 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
|||||||
return Status::Corruption("Corruption. Blob CRC mismatch");
|
return Status::Corruption("Corruption. Blob CRC mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bfile->compression() != kNoCompression) {
|
if (bfile->compression() == kNoCompression) {
|
||||||
|
value->PinSelf(blob_value);
|
||||||
|
} else {
|
||||||
BlockContents contents;
|
BlockContents contents;
|
||||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily());
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily());
|
||||||
{
|
{
|
||||||
@ -1130,11 +1132,9 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
|||||||
kBlockBasedTableVersionFormat, Slice(), bfile->compression(),
|
kBlockBasedTableVersionFormat, Slice(), bfile->compression(),
|
||||||
*(cfh->cfd()->ioptions()));
|
*(cfh->cfd()->ioptions()));
|
||||||
}
|
}
|
||||||
*(value->GetSelf()) = contents.data.ToString();
|
value->PinSelf(contents.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
value->PinSelf();
|
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user