2017-05-10 23:54:35 +02:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2017-05-10 23:54:35 +02:00
|
|
|
//
|
|
|
|
// Log format information shared by reader and writer.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2018-11-13 21:46:35 +01:00
|
|
|
#include <memory>
|
2017-05-10 23:54:35 +02:00
|
|
|
#include <utility>
|
2018-11-13 21:46:35 +01:00
|
|
|
|
2017-05-10 23:54:35 +02:00
|
|
|
#include "rocksdb/options.h"
|
2017-10-27 22:14:34 +02:00
|
|
|
#include "rocksdb/slice.h"
|
2017-05-10 23:54:35 +02:00
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "rocksdb/types.h"
|
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
constexpr uint32_t kMagicNumber = 2395959; // 0x00248f37
|
|
|
|
constexpr uint32_t kVersion1 = 1;
|
2017-08-04 02:46:00 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
using ExpirationRange = std::pair<uint64_t, uint64_t>;
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
// Format of blob log file header (30 bytes):
|
|
|
|
//
|
|
|
|
// +--------------+---------+---------+-------+-------------+-------------------+
|
|
|
|
// | magic number | version | cf id | flags | compression | expiration range |
|
|
|
|
// +--------------+---------+---------+-------+-------------+-------------------+
|
|
|
|
// | Fixed32 | Fixed32 | Fixed32 | char | char | Fixed64 Fixed64 |
|
|
|
|
// +--------------+---------+---------+-------+-------------+-------------------+
|
|
|
|
//
|
|
|
|
// List of flags:
|
|
|
|
// has_ttl: Whether the file contain TTL data.
|
|
|
|
//
|
|
|
|
// Expiration range in the header is a rough range based on
|
|
|
|
// blob_db_options.ttl_range_secs.
|
|
|
|
struct BlobLogHeader {
|
|
|
|
static constexpr size_t kSize = 30;
|
2017-05-23 19:30:04 +02:00
|
|
|
|
2019-11-26 22:16:39 +01:00
|
|
|
BlobLogHeader() = default;
|
|
|
|
BlobLogHeader(uint32_t _column_family_id, CompressionType _compression,
|
|
|
|
bool _has_ttl, const ExpirationRange& _expiration_range)
|
|
|
|
: column_family_id(_column_family_id),
|
|
|
|
compression(_compression),
|
|
|
|
has_ttl(_has_ttl),
|
|
|
|
expiration_range(_expiration_range) {}
|
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
uint32_t version = kVersion1;
|
2017-11-01 00:27:22 +01:00
|
|
|
uint32_t column_family_id = 0;
|
|
|
|
CompressionType compression = kNoCompression;
|
|
|
|
bool has_ttl = false;
|
2019-11-26 22:16:39 +01:00
|
|
|
ExpirationRange expiration_range;
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
void EncodeTo(std::string* dst);
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
Status DecodeFrom(Slice slice);
|
2017-05-10 23:54:35 +02:00
|
|
|
};
|
|
|
|
|
2017-12-15 22:18:32 +01:00
|
|
|
// Format of blob log file footer (32 bytes):
|
2017-10-27 22:14:34 +02:00
|
|
|
//
|
2017-12-15 22:18:32 +01:00
|
|
|
// +--------------+------------+-------------------+------------+
|
|
|
|
// | magic number | blob count | expiration range | footer CRC |
|
|
|
|
// +--------------+------------+-------------------+------------+
|
|
|
|
// | Fixed32 | Fixed64 | Fixed64 + Fixed64 | Fixed32 |
|
|
|
|
// +--------------+------------+-------------------+------------+
|
2017-10-27 22:14:34 +02:00
|
|
|
//
|
|
|
|
// The footer will be presented only when the blob file is properly closed.
|
|
|
|
//
|
|
|
|
// Unlike the same field in file header, expiration range in the footer is the
|
|
|
|
// range of smallest and largest expiration of the data in this file.
|
|
|
|
struct BlobLogFooter {
|
2017-12-15 22:18:32 +01:00
|
|
|
static constexpr size_t kSize = 32;
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-11-01 00:27:22 +01:00
|
|
|
uint64_t blob_count = 0;
|
|
|
|
ExpirationRange expiration_range = std::make_pair(0, 0);
|
|
|
|
uint32_t crc = 0;
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
void EncodeTo(std::string* dst);
|
2017-05-10 23:54:35 +02:00
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
Status DecodeFrom(Slice slice);
|
2017-05-10 23:54:35 +02:00
|
|
|
};
|
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
// Blob record format (32 bytes header + key + value):
|
|
|
|
//
|
|
|
|
// +------------+--------------+------------+------------+----------+---------+-----------+
|
|
|
|
// | key length | value length | expiration | header CRC | blob CRC | key | value |
|
|
|
|
// +------------+--------------+------------+------------+----------+---------+-----------+
|
|
|
|
// | Fixed64 | Fixed64 | Fixed64 | Fixed32 | Fixed32 | key len | value len |
|
|
|
|
// +------------+--------------+------------+------------+----------+---------+-----------+
|
|
|
|
//
|
|
|
|
// If file has has_ttl = false, expiration field is always 0, and the blob
|
|
|
|
// doesn't has expiration.
|
|
|
|
//
|
|
|
|
// Also note that if compression is used, value is compressed value and value
|
|
|
|
// length is compressed value length.
|
|
|
|
//
|
|
|
|
// Header CRC is the checksum of (key_len + val_len + expiration), while
|
|
|
|
// blob CRC is the checksum of (key + value).
|
|
|
|
//
|
|
|
|
// We could use variable length encoding (Varint64) to save more space, but it
|
|
|
|
// make reader more complicated.
|
|
|
|
struct BlobLogRecord {
|
|
|
|
// header include fields up to blob CRC
|
|
|
|
static constexpr size_t kHeaderSize = 32;
|
|
|
|
|
Introduce a blob file reader class (#7461)
Summary:
The patch adds a class called `BlobFileReader` that can be used to retrieve blobs
using the information available in blob references (e.g. blob file number, offset, and
size). This will come in handy when implementing blob support for `Get`, `MultiGet`,
and iterators, and also for compaction/garbage collection.
When a `BlobFileReader` object is created (using the factory method `Create`),
it first checks whether the specified file is potentially valid by comparing the file
size against the combined size of the blob file header and footer (files smaller than
the threshold are considered malformed). Then, it opens the file, and reads and verifies
the header and footer. The verification involves magic number/CRC checks
as well as checking for unexpected header/footer fields, e.g. incorrect column family ID
or TTL blob files.
Blobs can be retrieved using `GetBlob`. `GetBlob` validates the offset and compression
type passed by the caller (because of the presence of the header and footer, the
specified offset cannot be too close to the start/end of the file; also, the compression type
has to match the one in the blob file header), and retrieves and potentially verifies and
uncompresses the blob. In particular, when `ReadOptions::verify_checksums` is set,
`BlobFileReader` reads the blob record header as well (as opposed to just the blob itself)
and verifies the key/value size, the key itself, as well as the CRC of the blob record header
and the key/value pair.
In addition, the patch exposes the compression type from `BlobIndex` (both using an
accessor and via `DebugString`), and adds a blob file read latency histogram to
`InternalStats` that can be used with `BlobFileReader`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7461
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D23999219
Pulled By: ltamasi
fbshipit-source-id: deb6b1160d251258b308d5156e2ec063c3e12e5e
2020-10-08 00:43:23 +02:00
|
|
|
// Note that the offset field of BlobIndex actually points to the blob value
|
|
|
|
// as opposed to the start of the blob record. The following method can
|
|
|
|
// be used to calculate the adjustment needed to read the blob record header.
|
|
|
|
static uint64_t CalculateAdjustmentForRecordHeader(uint64_t key_size);
|
|
|
|
|
2017-11-01 00:27:22 +01:00
|
|
|
uint64_t key_size = 0;
|
|
|
|
uint64_t value_size = 0;
|
|
|
|
uint64_t expiration = 0;
|
|
|
|
uint32_t header_crc = 0;
|
|
|
|
uint32_t blob_crc = 0;
|
2017-10-27 22:14:34 +02:00
|
|
|
Slice key;
|
|
|
|
Slice value;
|
2018-11-13 21:46:35 +01:00
|
|
|
std::unique_ptr<char[]> key_buf;
|
|
|
|
std::unique_ptr<char[]> value_buf;
|
2017-10-27 22:14:34 +02:00
|
|
|
|
2017-11-28 20:42:28 +01:00
|
|
|
uint64_t record_size() const { return kHeaderSize + key_size + value_size; }
|
|
|
|
|
2017-10-27 22:14:34 +02:00
|
|
|
void EncodeHeaderTo(std::string* dst);
|
|
|
|
|
|
|
|
Status DecodeHeaderFrom(Slice src);
|
|
|
|
|
|
|
|
Status CheckBlobCRC() const;
|
2017-05-10 23:54:35 +02:00
|
|
|
};
|
|
|
|
|
Introduce a blob file reader class (#7461)
Summary:
The patch adds a class called `BlobFileReader` that can be used to retrieve blobs
using the information available in blob references (e.g. blob file number, offset, and
size). This will come in handy when implementing blob support for `Get`, `MultiGet`,
and iterators, and also for compaction/garbage collection.
When a `BlobFileReader` object is created (using the factory method `Create`),
it first checks whether the specified file is potentially valid by comparing the file
size against the combined size of the blob file header and footer (files smaller than
the threshold are considered malformed). Then, it opens the file, and reads and verifies
the header and footer. The verification involves magic number/CRC checks
as well as checking for unexpected header/footer fields, e.g. incorrect column family ID
or TTL blob files.
Blobs can be retrieved using `GetBlob`. `GetBlob` validates the offset and compression
type passed by the caller (because of the presence of the header and footer, the
specified offset cannot be too close to the start/end of the file; also, the compression type
has to match the one in the blob file header), and retrieves and potentially verifies and
uncompresses the blob. In particular, when `ReadOptions::verify_checksums` is set,
`BlobFileReader` reads the blob record header as well (as opposed to just the blob itself)
and verifies the key/value size, the key itself, as well as the CRC of the blob record header
and the key/value pair.
In addition, the patch exposes the compression type from `BlobIndex` (both using an
accessor and via `DebugString`), and adds a blob file read latency histogram to
`InternalStats` that can be used with `BlobFileReader`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7461
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D23999219
Pulled By: ltamasi
fbshipit-source-id: deb6b1160d251258b308d5156e2ec063c3e12e5e
2020-10-08 00:43:23 +02:00
|
|
|
// Checks whether a blob offset is potentially valid or not.
|
|
|
|
inline bool IsValidBlobOffset(uint64_t value_offset, uint64_t key_size,
|
|
|
|
uint64_t value_size, uint64_t file_size) {
|
|
|
|
if (value_offset <
|
|
|
|
BlobLogHeader::kSize + BlobLogRecord::kHeaderSize + key_size) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value_offset + value_size + BlobLogFooter::kSize > file_size) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|