More refactoring ahead of footer & meta changes (#9240)
Summary: I'm working on a new format_version=6 to support context checksum (https://github.com/facebook/rocksdb/issues/9058) and this includes much of the refactoring and test updates to support that change. Test coverage data and manual inspection agree on dead code in block_based_table_reader.cc (removed). Pull Request resolved: https://github.com/facebook/rocksdb/pull/9240 Test Plan: tests enhanced to cover more cases etc. Extreme case performance testing indicates small % regression in fillseq (w/ compaction), though CPU profile etc. doesn't suggest any explanation. There is enhanced correctness checking in Footer::DecodeFrom, but this should be negligible. TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=30000000 -checksum_type=1 --disable_wal={false,true} (Each is ops/s averaged over 50 runs, run simultaneously with competing configuration for load fairness) Before w/ wal: 454512 After w/ wal: 444820 (-2.1%) Before w/o wal: 1004560 After w/o wal: 998897 (-0.6%) Since this doesn't modify WAL code, one would expect real effects to be larger in w/o wal case. This regression will be corrected in a follow-up PR. Reviewed By: ajkr Differential Revision: D32813769 Pulled By: pdillinger fbshipit-source-id: 444a244eabf3825cd329b7d1b150cddce320862f
This commit is contained in:
parent
f57745814f
commit
653c392e47
@ -554,7 +554,7 @@ class ColumnFamilyTest
|
|||||||
INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest,
|
INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest,
|
||||||
testing::Values(test::kDefaultFormatVersion));
|
testing::Values(test::kDefaultFormatVersion));
|
||||||
INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest,
|
INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest,
|
||||||
testing::Values(test::kLatestFormatVersion));
|
testing::Values(kLatestFormatVersion));
|
||||||
|
|
||||||
TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) {
|
TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) {
|
||||||
for (int iter = 0; iter < 3; ++iter) {
|
for (int iter = 0; iter < 3; ++iter) {
|
||||||
@ -746,8 +746,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
std::make_tuple(test::kDefaultFormatVersion, false)));
|
std::make_tuple(test::kDefaultFormatVersion, false)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
FormatLatest, FlushEmptyCFTestWithParam,
|
FormatLatest, FlushEmptyCFTestWithParam,
|
||||||
testing::Values(std::make_tuple(test::kLatestFormatVersion, true),
|
testing::Values(std::make_tuple(kLatestFormatVersion, true),
|
||||||
std::make_tuple(test::kLatestFormatVersion, false)));
|
std::make_tuple(kLatestFormatVersion, false)));
|
||||||
|
|
||||||
TEST_P(ColumnFamilyTest, AddDrop) {
|
TEST_P(ColumnFamilyTest, AddDrop) {
|
||||||
Open();
|
Open();
|
||||||
|
@ -317,7 +317,7 @@ class ComparatorDBTest
|
|||||||
INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest,
|
INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest,
|
||||||
testing::Values(test::kDefaultFormatVersion));
|
testing::Values(test::kDefaultFormatVersion));
|
||||||
INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest,
|
INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest,
|
||||||
testing::Values(test::kLatestFormatVersion));
|
testing::Values(kLatestFormatVersion));
|
||||||
|
|
||||||
TEST_P(ComparatorDBTest, Bytewise) {
|
TEST_P(ComparatorDBTest, Bytewise) {
|
||||||
for (int rand_seed = 301; rand_seed < 306; rand_seed++) {
|
for (int rand_seed = 301; rand_seed < 306; rand_seed++) {
|
||||||
|
@ -546,7 +546,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) {
|
|||||||
BlockHandle range_del_handle;
|
BlockHandle range_del_handle;
|
||||||
ASSERT_OK(FindMetaBlockInFile(
|
ASSERT_OK(FindMetaBlockInFile(
|
||||||
file_reader.get(), file_size, kBlockBasedTableMagicNumber,
|
file_reader.get(), file_size, kBlockBasedTableMagicNumber,
|
||||||
ImmutableOptions(options_), kRangeDelBlock, &range_del_handle));
|
ImmutableOptions(options_), kRangeDelBlockName, &range_del_handle));
|
||||||
|
|
||||||
ASSERT_OK(TryReopen());
|
ASSERT_OK(TryReopen());
|
||||||
ASSERT_OK(test::CorruptFile(env_, filename,
|
ASSERT_OK(test::CorruptFile(env_, filename,
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "rocksdb/flush_block_policy.h"
|
#include "rocksdb/flush_block_policy.h"
|
||||||
#include "rocksdb/merge_operator.h"
|
#include "rocksdb/merge_operator.h"
|
||||||
#include "rocksdb/perf_context.h"
|
#include "rocksdb/perf_context.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
#include "rocksdb/utilities/debug.h"
|
#include "rocksdb/utilities/debug.h"
|
||||||
#include "table/block_based/block_based_table_reader.h"
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
#include "table/block_based/block_builder.h"
|
#include "table/block_based/block_builder.h"
|
||||||
@ -972,8 +973,15 @@ TEST_F(DBBasicTest, MultiGetEmpty) {
|
|||||||
} while (ChangeCompactOptions());
|
} while (ChangeCompactOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DBBasicTest, ChecksumTest) {
|
class DBBlockChecksumTest : public DBBasicTest,
|
||||||
|
public testing::WithParamInterface<uint32_t> {};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(FormatVersions, DBBlockChecksumTest,
|
||||||
|
testing::ValuesIn(test::kFooterFormatVersionsToTest));
|
||||||
|
|
||||||
|
TEST_P(DBBlockChecksumTest, BlockChecksumTest) {
|
||||||
BlockBasedTableOptions table_options;
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.format_version = GetParam();
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
const int kNumPerFile = 2;
|
const int kNumPerFile = 2;
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "db/column_family.h"
|
#include "db/column_family.h"
|
||||||
#include "db/db_test_util.h"
|
#include "db/db_test_util.h"
|
||||||
#include "port/stack_trace.h"
|
#include "port/stack_trace.h"
|
||||||
|
#include "rocksdb/persistent_cache.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
#include "util/compression.h"
|
#include "util/compression.h"
|
||||||
|
@ -551,10 +551,9 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
FormatLatest, DBBloomFilterTestWithParam,
|
FormatLatest, DBBloomFilterTestWithParam,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
std::make_tuple(BFP::kDeprecatedBlock, false,
|
std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion),
|
||||||
test::kLatestFormatVersion),
|
std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion),
|
||||||
std::make_tuple(BFP::kAutoBloom, true, test::kLatestFormatVersion),
|
std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion)));
|
||||||
std::make_tuple(BFP::kAutoBloom, false, test::kLatestFormatVersion)));
|
|
||||||
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
||||||
|
|
||||||
TEST_F(DBBloomFilterTest, BloomFilterRate) {
|
TEST_F(DBBloomFilterTest, BloomFilterRate) {
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "rocksdb/env_encryption.h"
|
#include "rocksdb/env_encryption.h"
|
||||||
#include "rocksdb/unique_id.h"
|
#include "rocksdb/unique_id.h"
|
||||||
#include "rocksdb/utilities/object_registry.h"
|
#include "rocksdb/utilities/object_registry.h"
|
||||||
|
#include "table/format.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
@ -516,6 +517,11 @@ Options DBTestBase::GetOptions(
|
|||||||
table_options.index_block_restart_interval = 8;
|
table_options.index_block_restart_interval = 8;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kBlockBasedTableWithLatestFormat: {
|
||||||
|
// In case different from default
|
||||||
|
table_options.format_version = kLatestFormatVersion;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kOptimizeFiltersForHits: {
|
case kOptimizeFiltersForHits: {
|
||||||
options.optimize_filters_for_hits = true;
|
options.optimize_filters_for_hits = true;
|
||||||
set_block_based_table_factory = true;
|
set_block_based_table_factory = true;
|
||||||
|
@ -867,6 +867,7 @@ class DBTestBase : public testing::Test {
|
|||||||
kBlockBasedTableWithIndexRestartInterval,
|
kBlockBasedTableWithIndexRestartInterval,
|
||||||
kBlockBasedTableWithPartitionedIndex,
|
kBlockBasedTableWithPartitionedIndex,
|
||||||
kBlockBasedTableWithPartitionedIndexFormat4,
|
kBlockBasedTableWithPartitionedIndexFormat4,
|
||||||
|
kBlockBasedTableWithLatestFormat,
|
||||||
kPartitionedFilterWithNewTableReaderForCompactions,
|
kPartitionedFilterWithNewTableReaderForCompactions,
|
||||||
kUniversalSubcompactions,
|
kUniversalSubcompactions,
|
||||||
kUnorderedWrite,
|
kUnorderedWrite,
|
||||||
|
@ -41,16 +41,33 @@ class ExternalSSTTestEnv : public EnvWrapper {
|
|||||||
bool fail_link_;
|
bool fail_link_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ExternalSSTFileTestBase : public DBTestBase {
|
||||||
|
public:
|
||||||
|
ExternalSSTFileTestBase()
|
||||||
|
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) {
|
||||||
|
sst_files_dir_ = dbname_ + "/sst_files/";
|
||||||
|
DestroyAndRecreateExternalSSTFilesDir();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DestroyAndRecreateExternalSSTFilesDir() {
|
||||||
|
ASSERT_OK(DestroyDir(env_, sst_files_dir_));
|
||||||
|
ASSERT_OK(env_->CreateDir(sst_files_dir_));
|
||||||
|
}
|
||||||
|
|
||||||
|
~ExternalSSTFileTestBase() override {
|
||||||
|
DestroyDir(env_, sst_files_dir_).PermitUncheckedError();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::string sst_files_dir_;
|
||||||
|
};
|
||||||
|
|
||||||
class ExternSSTFileLinkFailFallbackTest
|
class ExternSSTFileLinkFailFallbackTest
|
||||||
: public DBTestBase,
|
: public ExternalSSTFileTestBase,
|
||||||
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||||
public:
|
public:
|
||||||
ExternSSTFileLinkFailFallbackTest()
|
ExternSSTFileLinkFailFallbackTest()
|
||||||
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true),
|
: test_env_(new ExternalSSTTestEnv(env_, true)) {
|
||||||
test_env_(new ExternalSSTTestEnv(env_, true)) {
|
|
||||||
sst_files_dir_ = dbname_ + "/sst_files/";
|
|
||||||
EXPECT_EQ(DestroyDir(env_, sst_files_dir_), Status::OK());
|
|
||||||
EXPECT_EQ(env_->CreateDir(sst_files_dir_), Status::OK());
|
|
||||||
options_ = CurrentOptions();
|
options_ = CurrentOptions();
|
||||||
options_.disable_auto_compactions = true;
|
options_.disable_auto_compactions = true;
|
||||||
options_.env = test_env_;
|
options_.env = test_env_;
|
||||||
@ -65,25 +82,15 @@ class ExternSSTFileLinkFailFallbackTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string sst_files_dir_;
|
|
||||||
Options options_;
|
Options options_;
|
||||||
ExternalSSTTestEnv* test_env_;
|
ExternalSSTTestEnv* test_env_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ExternalSSTFileTest
|
class ExternalSSTFileTest
|
||||||
: public DBTestBase,
|
: public ExternalSSTFileTestBase,
|
||||||
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||||
public:
|
public:
|
||||||
ExternalSSTFileTest()
|
ExternalSSTFileTest() {}
|
||||||
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) {
|
|
||||||
sst_files_dir_ = dbname_ + "/sst_files/";
|
|
||||||
DestroyAndRecreateExternalSSTFilesDir();
|
|
||||||
}
|
|
||||||
|
|
||||||
void DestroyAndRecreateExternalSSTFilesDir() {
|
|
||||||
ASSERT_OK(DestroyDir(env_, sst_files_dir_));
|
|
||||||
ASSERT_OK(env_->CreateDir(sst_files_dir_));
|
|
||||||
}
|
|
||||||
|
|
||||||
Status GenerateOneExternalFile(
|
Status GenerateOneExternalFile(
|
||||||
const Options& options, ColumnFamilyHandle* cfh,
|
const Options& options, ColumnFamilyHandle* cfh,
|
||||||
@ -282,13 +289,8 @@ class ExternalSSTFileTest
|
|||||||
return db_->IngestExternalFile(files, opts);
|
return db_->IngestExternalFile(files, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
~ExternalSSTFileTest() override {
|
|
||||||
DestroyDir(env_, sst_files_dir_).PermitUncheckedError();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int last_file_id_ = 0;
|
int last_file_id_ = 0;
|
||||||
std::string sst_files_dir_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(ExternalSSTFileTest, Basic) {
|
TEST_F(ExternalSSTFileTest, Basic) {
|
||||||
@ -2382,10 +2384,18 @@ TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) {
|
|||||||
ASSERT_EQ(1, num_compression_dicts);
|
ASSERT_EQ(1, num_compression_dicts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ExternalSSTBlockChecksumTest
|
||||||
|
: public ExternalSSTFileTestBase,
|
||||||
|
public testing::WithParamInterface<uint32_t> {};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(FormatVersions, ExternalSSTBlockChecksumTest,
|
||||||
|
testing::ValuesIn(test::kFooterFormatVersionsToTest));
|
||||||
|
|
||||||
// Very slow, not worth the cost to run regularly
|
// Very slow, not worth the cost to run regularly
|
||||||
TEST_F(ExternalSSTFileTest, DISABLED_HugeBlockChecksum) {
|
TEST_P(ExternalSSTBlockChecksumTest, DISABLED_HugeBlockChecksum) {
|
||||||
for (auto t : GetSupportedChecksums()) {
|
|
||||||
BlockBasedTableOptions table_options;
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.format_version = GetParam();
|
||||||
|
for (auto t : GetSupportedChecksums()) {
|
||||||
table_options.checksum = t;
|
table_options.checksum = t;
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
@ -1271,8 +1271,8 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
// By setting the magic number to kInvalidTableMagicNumber, we can by
|
// By setting the magic number to kNullTableMagicNumber, we can bypass
|
||||||
// pass the magic number check in the footer.
|
// the magic number check in the footer.
|
||||||
std::unique_ptr<RandomAccessFileReader> file_reader(
|
std::unique_ptr<RandomAccessFileReader> file_reader(
|
||||||
new RandomAccessFileReader(
|
new RandomAccessFileReader(
|
||||||
std::move(file), file_name, nullptr /* env */, io_tracer_,
|
std::move(file), file_name, nullptr /* env */, io_tracer_,
|
||||||
@ -1281,7 +1281,7 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
|
|||||||
std::unique_ptr<TableProperties> props;
|
std::unique_ptr<TableProperties> props;
|
||||||
s = ReadTableProperties(
|
s = ReadTableProperties(
|
||||||
file_reader.get(), file_meta->fd.GetFileSize(),
|
file_reader.get(), file_meta->fd.GetFileSize(),
|
||||||
Footer::kInvalidTableMagicNumber /* table's magic number */, *ioptions,
|
Footer::kNullTableMagicNumber /* table's magic number */, *ioptions,
|
||||||
&props);
|
&props);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
|
@ -53,6 +53,7 @@ class StatsHistoryIterator {
|
|||||||
// REQUIRES: Valid()
|
// REQUIRES: Valid()
|
||||||
virtual uint64_t GetStatsTime() const = 0;
|
virtual uint64_t GetStatsTime() const = 0;
|
||||||
|
|
||||||
|
// DEPRECATED (was never used)
|
||||||
virtual int GetFormatVersion() const { return -1; }
|
virtual int GetFormatVersion() const { return -1; }
|
||||||
|
|
||||||
// Return the current stats history as an std::map which specifies the
|
// Return the current stats history as an std::map which specifies the
|
||||||
|
@ -44,6 +44,9 @@ class WritableFileWriter;
|
|||||||
struct ConfigOptions;
|
struct ConfigOptions;
|
||||||
struct EnvOptions;
|
struct EnvOptions;
|
||||||
|
|
||||||
|
// Types of checksums to use for checking integrity of logical blocks within
|
||||||
|
// files. All checksums currently use 32 bits of checking power (1 in 4B
|
||||||
|
// chance of failing to detect random corruption).
|
||||||
enum ChecksumType : char {
|
enum ChecksumType : char {
|
||||||
kNoChecksum = 0x0,
|
kNoChecksum = 0x0,
|
||||||
kCRC32c = 0x1,
|
kCRC32c = 0x1,
|
||||||
@ -390,10 +393,9 @@ struct BlockBasedTableOptions {
|
|||||||
// Default: 0 (disabled)
|
// Default: 0 (disabled)
|
||||||
uint32_t read_amp_bytes_per_bit = 0;
|
uint32_t read_amp_bytes_per_bit = 0;
|
||||||
|
|
||||||
// We currently have five versions:
|
// We currently have these versions:
|
||||||
// 0 -- This version is currently written out by all RocksDB's versions by
|
// 0 -- This version can be read by really old RocksDB's. Doesn't support
|
||||||
// default. Can be read by really old RocksDB's. Doesn't support changing
|
// changing checksum type (default is CRC32).
|
||||||
// checksum (default is CRC32).
|
|
||||||
// 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
|
// 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
|
||||||
// checksum, like xxHash. It is written by RocksDB when
|
// checksum, like xxHash. It is written by RocksDB when
|
||||||
// BlockBasedTableOptions::checksum is something other than kCRC32c. (version
|
// BlockBasedTableOptions::checksum is something other than kCRC32c. (version
|
||||||
|
@ -1744,7 +1744,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
|
|||||||
}
|
}
|
||||||
#endif // !NDEBUG
|
#endif // !NDEBUG
|
||||||
|
|
||||||
const std::string* properties_block_meta = &kPropertiesBlock;
|
const std::string* properties_block_meta = &kPropertiesBlockName;
|
||||||
TEST_SYNC_POINT_CALLBACK(
|
TEST_SYNC_POINT_CALLBACK(
|
||||||
"BlockBasedTableBuilder::WritePropertiesBlock:Meta",
|
"BlockBasedTableBuilder::WritePropertiesBlock:Meta",
|
||||||
&properties_block_meta);
|
&properties_block_meta);
|
||||||
@ -1769,7 +1769,7 @@ void BlockBasedTableBuilder::WriteCompressionDictBlock(
|
|||||||
#endif // NDEBUG
|
#endif // NDEBUG
|
||||||
}
|
}
|
||||||
if (ok()) {
|
if (ok()) {
|
||||||
meta_index_builder->Add(kCompressionDictBlock,
|
meta_index_builder->Add(kCompressionDictBlockName,
|
||||||
compression_dict_block_handle);
|
compression_dict_block_handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1781,7 +1781,7 @@ void BlockBasedTableBuilder::WriteRangeDelBlock(
|
|||||||
BlockHandle range_del_block_handle;
|
BlockHandle range_del_block_handle;
|
||||||
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression,
|
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression,
|
||||||
&range_del_block_handle, BlockType::kRangeDeletion);
|
&range_del_block_handle, BlockType::kRangeDeletion);
|
||||||
meta_index_builder->Add(kRangeDelBlock, range_del_block_handle);
|
meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1799,14 +1799,16 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
|
|||||||
// this is guaranteed by BlockBasedTableBuilder's constructor
|
// this is guaranteed by BlockBasedTableBuilder's constructor
|
||||||
assert(r->table_options.checksum == kCRC32c ||
|
assert(r->table_options.checksum == kCRC32c ||
|
||||||
r->table_options.format_version != 0);
|
r->table_options.format_version != 0);
|
||||||
Footer footer(
|
Footer footer;
|
||||||
legacy ? kLegacyBlockBasedTableMagicNumber : kBlockBasedTableMagicNumber,
|
footer
|
||||||
r->table_options.format_version);
|
.set_table_magic_number(legacy ? kLegacyBlockBasedTableMagicNumber
|
||||||
footer.set_metaindex_handle(metaindex_block_handle);
|
: kBlockBasedTableMagicNumber)
|
||||||
footer.set_index_handle(index_block_handle);
|
.set_format_version(r->table_options.format_version)
|
||||||
footer.set_checksum(r->table_options.checksum);
|
.set_metaindex_handle(metaindex_block_handle)
|
||||||
|
.set_index_handle(index_block_handle)
|
||||||
|
.set_checksum_type(r->table_options.checksum);
|
||||||
std::string footer_encoding;
|
std::string footer_encoding;
|
||||||
footer.EncodeTo(&footer_encoding);
|
footer.EncodeTo(&footer_encoding, r->get_offset());
|
||||||
assert(ok());
|
assert(ok());
|
||||||
IOStatus ios = r->file->Append(footer_encoding);
|
IOStatus ios = r->file->Append(footer_encoding);
|
||||||
if (ios.ok()) {
|
if (ios.ok()) {
|
||||||
|
@ -650,7 +650,7 @@ Status BlockBasedTableFactory::ValidateOptions(
|
|||||||
"Enable pin_l0_filter_and_index_blocks_in_cache, "
|
"Enable pin_l0_filter_and_index_blocks_in_cache, "
|
||||||
", but block cache is disabled");
|
", but block cache is disabled");
|
||||||
}
|
}
|
||||||
if (!BlockBasedTableSupportedVersion(table_options_.format_version)) {
|
if (!IsSupportedFormatVersion(table_options_.format_version)) {
|
||||||
return Status::InvalidArgument(
|
return Status::InvalidArgument(
|
||||||
"Unsupported BlockBasedTable format_version. Please check "
|
"Unsupported BlockBasedTable format_version. Please check "
|
||||||
"include/rocksdb/table.h for more info");
|
"include/rocksdb/table.h for more info");
|
||||||
|
@ -600,7 +600,7 @@ Status BlockBasedTable::Open(
|
|||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
if (!BlockBasedTableSupportedVersion(footer.version())) {
|
if (!IsSupportedFormatVersion(footer.format_version())) {
|
||||||
return Status::Corruption(
|
return Status::Corruption(
|
||||||
"Unknown Footer version. Maybe this file was created with newer "
|
"Unknown Footer version. Maybe this file was created with newer "
|
||||||
"version of RocksDB?");
|
"version of RocksDB?");
|
||||||
@ -757,7 +757,7 @@ Status BlockBasedTable::ReadPropertiesBlock(
|
|||||||
InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
|
InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
|
||||||
Status s;
|
Status s;
|
||||||
BlockHandle handle;
|
BlockHandle handle;
|
||||||
s = FindOptionalMetaBlock(meta_iter, kPropertiesBlock, &handle);
|
s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle);
|
||||||
|
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
||||||
@ -856,7 +856,7 @@ Status BlockBasedTable::ReadRangeDelBlock(
|
|||||||
BlockCacheLookupContext* lookup_context) {
|
BlockCacheLookupContext* lookup_context) {
|
||||||
Status s;
|
Status s;
|
||||||
BlockHandle range_del_handle;
|
BlockHandle range_del_handle;
|
||||||
s = FindOptionalMetaBlock(meta_iter, kRangeDelBlock, &range_del_handle);
|
s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
ROCKS_LOG_WARN(
|
ROCKS_LOG_WARN(
|
||||||
rep_->ioptions.logger,
|
rep_->ioptions.logger,
|
||||||
@ -925,7 +925,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|||||||
rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
|
rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
|
||||||
|
|
||||||
// Find compression dictionary handle
|
// Find compression dictionary handle
|
||||||
s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlock,
|
s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName,
|
||||||
&rep_->compression_dict_handle);
|
&rep_->compression_dict_handle);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
@ -1808,7 +1808,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|||||||
// begin address of each read request, we need to add the offset
|
// begin address of each read request, we need to add the offset
|
||||||
// in each read request. Checksum is stored in the block trailer,
|
// in each read request. Checksum is stored in the block trailer,
|
||||||
// beyond the payload size.
|
// beyond the payload size.
|
||||||
s = VerifyBlockChecksum(footer.checksum(), data + req_offset,
|
s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset,
|
||||||
handle.size(), rep_->file->file_name(),
|
handle.size(), rep_->file->file_name(),
|
||||||
handle.offset());
|
handle.offset());
|
||||||
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
|
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
|
||||||
@ -1875,9 +1875,9 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|||||||
if (compression_type != kNoCompression) {
|
if (compression_type != kNoCompression) {
|
||||||
UncompressionContext context(compression_type);
|
UncompressionContext context(compression_type);
|
||||||
UncompressionInfo info(context, uncompression_dict, compression_type);
|
UncompressionInfo info(context, uncompression_dict, compression_type);
|
||||||
s = UncompressBlockContents(info, req.result.data() + req_offset,
|
s = UncompressBlockContents(
|
||||||
handle.size(), &contents, footer.version(),
|
info, req.result.data() + req_offset, handle.size(), &contents,
|
||||||
rep_->ioptions, memory_allocator);
|
footer.format_version(), rep_->ioptions, memory_allocator);
|
||||||
} else {
|
} else {
|
||||||
// There are two cases here:
|
// There are two cases here:
|
||||||
// 1) caller uses the shared buffer (scratch or direct io buffer);
|
// 1) caller uses the shared buffer (scratch or direct io buffer);
|
||||||
@ -3008,15 +3008,15 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
|
|||||||
return BlockType::kFilter;
|
return BlockType::kFilter;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (meta_block_name == kPropertiesBlock) {
|
if (meta_block_name == kPropertiesBlockName) {
|
||||||
return BlockType::kProperties;
|
return BlockType::kProperties;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (meta_block_name == kCompressionDictBlock) {
|
if (meta_block_name == kCompressionDictBlockName) {
|
||||||
return BlockType::kCompressionDictionary;
|
return BlockType::kCompressionDictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (meta_block_name == kRangeDelBlock) {
|
if (meta_block_name == kRangeDelBlockName) {
|
||||||
return BlockType::kRangeDeletion;
|
return BlockType::kRangeDeletion;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3045,7 +3045,7 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
|
|||||||
s = handle.DecodeFrom(&input);
|
s = handle.DecodeFrom(&input);
|
||||||
BlockContents contents;
|
BlockContents contents;
|
||||||
const Slice meta_block_name = index_iter->key();
|
const Slice meta_block_name = index_iter->key();
|
||||||
if (meta_block_name == kPropertiesBlock) {
|
if (meta_block_name == kPropertiesBlockName) {
|
||||||
// Unfortunate special handling for properties block checksum w/
|
// Unfortunate special handling for properties block checksum w/
|
||||||
// global seqno
|
// global seqno
|
||||||
std::unique_ptr<TableProperties> table_properties;
|
std::unique_ptr<TableProperties> table_properties;
|
||||||
@ -3111,8 +3111,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|||||||
// 5. index_type
|
// 5. index_type
|
||||||
Status BlockBasedTable::CreateIndexReader(
|
Status BlockBasedTable::CreateIndexReader(
|
||||||
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
|
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
|
||||||
InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch,
|
InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
|
||||||
bool pin, BlockCacheLookupContext* lookup_context,
|
BlockCacheLookupContext* lookup_context,
|
||||||
std::unique_ptr<IndexReader>* index_reader) {
|
std::unique_ptr<IndexReader>* index_reader) {
|
||||||
// kHashSearch requires non-empty prefix_extractor but bypass checking
|
// kHashSearch requires non-empty prefix_extractor but bypass checking
|
||||||
// prefix_extractor here since we have no access to MutableCFOptions.
|
// prefix_extractor here since we have no access to MutableCFOptions.
|
||||||
@ -3136,25 +3136,12 @@ Status BlockBasedTable::CreateIndexReader(
|
|||||||
case BlockBasedTableOptions::kHashSearch: {
|
case BlockBasedTableOptions::kHashSearch: {
|
||||||
std::unique_ptr<Block> metaindex_guard;
|
std::unique_ptr<Block> metaindex_guard;
|
||||||
std::unique_ptr<InternalIterator> metaindex_iter_guard;
|
std::unique_ptr<InternalIterator> metaindex_iter_guard;
|
||||||
auto meta_index_iter = preloaded_meta_index_iter;
|
|
||||||
bool should_fallback = false;
|
bool should_fallback = false;
|
||||||
if (rep_->internal_prefix_transform.get() == nullptr) {
|
if (rep_->internal_prefix_transform.get() == nullptr) {
|
||||||
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
||||||
"No prefix extractor passed in. Fall back to binary"
|
"No prefix extractor passed in. Fall back to binary"
|
||||||
" search index.");
|
" search index.");
|
||||||
should_fallback = true;
|
should_fallback = true;
|
||||||
} else if (meta_index_iter == nullptr) {
|
|
||||||
auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
|
|
||||||
&metaindex_iter_guard);
|
|
||||||
if (!s.ok()) {
|
|
||||||
// we simply fall back to binary search in case there is any
|
|
||||||
// problem with prefix hash index loading.
|
|
||||||
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
|
||||||
"Unable to read the metaindex block."
|
|
||||||
" Fall back to binary search index.");
|
|
||||||
should_fallback = true;
|
|
||||||
}
|
|
||||||
meta_index_iter = metaindex_iter_guard.get();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (should_fallback) {
|
if (should_fallback) {
|
||||||
@ -3162,9 +3149,9 @@ Status BlockBasedTable::CreateIndexReader(
|
|||||||
use_cache, prefetch, pin,
|
use_cache, prefetch, pin,
|
||||||
lookup_context, index_reader);
|
lookup_context, index_reader);
|
||||||
} else {
|
} else {
|
||||||
return HashIndexReader::Create(this, ro, prefetch_buffer,
|
return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter,
|
||||||
meta_index_iter, use_cache, prefetch,
|
use_cache, prefetch, pin, lookup_context,
|
||||||
pin, lookup_context, index_reader);
|
index_reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -3357,17 +3344,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
|
|||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
if (metaindex_iter->key() == kPropertiesBlock) {
|
if (metaindex_iter->key() == kPropertiesBlockName) {
|
||||||
out_stream << " Properties block handle: "
|
out_stream << " Properties block handle: "
|
||||||
<< metaindex_iter->value().ToString(true) << "\n";
|
<< metaindex_iter->value().ToString(true) << "\n";
|
||||||
} else if (metaindex_iter->key() == kCompressionDictBlock) {
|
} else if (metaindex_iter->key() == kCompressionDictBlockName) {
|
||||||
out_stream << " Compression dictionary block handle: "
|
out_stream << " Compression dictionary block handle: "
|
||||||
<< metaindex_iter->value().ToString(true) << "\n";
|
<< metaindex_iter->value().ToString(true) << "\n";
|
||||||
} else if (strstr(metaindex_iter->key().ToString().c_str(),
|
} else if (strstr(metaindex_iter->key().ToString().c_str(),
|
||||||
"filter.rocksdb.") != nullptr) {
|
"filter.rocksdb.") != nullptr) {
|
||||||
out_stream << " Filter block handle: "
|
out_stream << " Filter block handle: "
|
||||||
<< metaindex_iter->value().ToString(true) << "\n";
|
<< metaindex_iter->value().ToString(true) << "\n";
|
||||||
} else if (metaindex_iter->key() == kRangeDelBlock) {
|
} else if (metaindex_iter->key() == kRangeDelBlockName) {
|
||||||
out_stream << " Range deletion block handle: "
|
out_stream << " Range deletion block handle: "
|
||||||
<< metaindex_iter->value().ToString(true) << "\n";
|
<< metaindex_iter->value().ToString(true) << "\n";
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "table/block_based/filter_block.h"
|
#include "table/block_based/filter_block.h"
|
||||||
#include "table/block_based/uncompression_dict_reader.h"
|
#include "table/block_based/uncompression_dict_reader.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
|
#include "table/persistent_cache_options.h"
|
||||||
#include "table/table_properties_internal.h"
|
#include "table/table_properties_internal.h"
|
||||||
#include "table/table_reader.h"
|
#include "table/table_reader.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
|
@ -3,15 +3,15 @@
|
|||||||
// COPYING file in the root directory) and Apache 2.0 License
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#include "table/block_based/partitioned_filter_block.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "rocksdb/filter_policy.h"
|
|
||||||
|
|
||||||
#include "table/block_based/block_based_table_reader.h"
|
|
||||||
#include "table/block_based/partitioned_filter_block.h"
|
|
||||||
#include "table/block_based/filter_policy_internal.h"
|
|
||||||
|
|
||||||
#include "index_builder.h"
|
#include "index_builder.h"
|
||||||
|
#include "rocksdb/filter_policy.h"
|
||||||
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
|
#include "table/block_based/filter_policy_internal.h"
|
||||||
|
#include "table/format.h"
|
||||||
#include "test_util/testharness.h"
|
#include "test_util/testharness.h"
|
||||||
#include "test_util/testutil.h"
|
#include "test_util/testutil.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
@ -292,10 +292,11 @@ class PartitionedFilterBlockTest
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
|
// Format versions potentially intersting to partitioning
|
||||||
testing::Values(test::kDefaultFormatVersion));
|
INSTANTIATE_TEST_CASE_P(FormatVersions, PartitionedFilterBlockTest,
|
||||||
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
|
testing::ValuesIn(std::set<uint32_t>{
|
||||||
testing::Values(test::kLatestFormatVersion));
|
2, 3, 4, test::kDefaultFormatVersion,
|
||||||
|
kLatestFormatVersion}));
|
||||||
|
|
||||||
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
|
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
|
||||||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
@ -32,8 +32,8 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
|
|||||||
if (footer_.GetBlockTrailerSize() > 0) {
|
if (footer_.GetBlockTrailerSize() > 0) {
|
||||||
assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize);
|
assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize);
|
||||||
if (read_options_.verify_checksums) {
|
if (read_options_.verify_checksums) {
|
||||||
io_status_ = status_to_io_status(
|
io_status_ = status_to_io_status(VerifyBlockChecksum(
|
||||||
VerifyBlockChecksum(footer_.checksum(), slice_.data(), block_size_,
|
footer_.checksum_type(), slice_.data(), block_size_,
|
||||||
file_->file_name(), handle_.offset()));
|
file_->file_name(), handle_.offset()));
|
||||||
}
|
}
|
||||||
compression_type_ =
|
compression_type_ =
|
||||||
@ -315,7 +315,7 @@ IOStatus BlockFetcher::ReadBlockContents() {
|
|||||||
UncompressionContext context(compression_type_);
|
UncompressionContext context(compression_type_);
|
||||||
UncompressionInfo info(context, uncompression_dict_, compression_type_);
|
UncompressionInfo info(context, uncompression_dict_, compression_type_);
|
||||||
io_status_ = status_to_io_status(UncompressBlockContents(
|
io_status_ = status_to_io_status(UncompressBlockContents(
|
||||||
info, slice_.data(), block_size_, contents_, footer_.version(),
|
info, slice_.data(), block_size_, contents_, footer_.format_version(),
|
||||||
ioptions_, memory_allocator_));
|
ioptions_, memory_allocator_));
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
num_heap_buf_memcpy_++;
|
num_heap_buf_memcpy_++;
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include "table/block_based/block.h"
|
#include "table/block_based/block.h"
|
||||||
#include "table/block_based/block_type.h"
|
#include "table/block_based/block_type.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
|
#include "table/persistent_cache_options.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
|
@ -381,7 +381,7 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
return status_;
|
return status_;
|
||||||
}
|
}
|
||||||
|
|
||||||
meta_index_builder.Add(kPropertiesBlock, property_block_handle);
|
meta_index_builder.Add(kPropertiesBlockName, property_block_handle);
|
||||||
Slice meta_index_block = meta_index_builder.Finish();
|
Slice meta_index_block = meta_index_builder.Finish();
|
||||||
|
|
||||||
BlockHandle meta_index_block_handle;
|
BlockHandle meta_index_block_handle;
|
||||||
@ -393,11 +393,14 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
return status_;
|
return status_;
|
||||||
}
|
}
|
||||||
|
|
||||||
Footer footer(kCuckooTableMagicNumber, 1);
|
Footer footer;
|
||||||
footer.set_metaindex_handle(meta_index_block_handle);
|
footer.set_table_magic_number(kCuckooTableMagicNumber)
|
||||||
footer.set_index_handle(BlockHandle::NullBlockHandle());
|
.set_format_version(1)
|
||||||
|
.set_metaindex_handle(meta_index_block_handle)
|
||||||
|
.set_index_handle(BlockHandle::NullBlockHandle())
|
||||||
|
.set_checksum_type(kNoChecksum);
|
||||||
std::string footer_encoding;
|
std::string footer_encoding;
|
||||||
footer.EncodeTo(&footer_encoding);
|
footer.EncodeTo(&footer_encoding, offset);
|
||||||
io_status_ = file_->Append(footer_encoding);
|
io_status_ = file_->Append(footer_encoding);
|
||||||
status_ = io_status_;
|
status_ = io_status_;
|
||||||
return status_;
|
return status_;
|
||||||
|
183
table/format.cc
183
table/format.cc
@ -20,9 +20,11 @@
|
|||||||
#include "options/options_helper.h"
|
#include "options/options_helper.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
#include "table/block_based/block.h"
|
#include "table/block_based/block.h"
|
||||||
#include "table/block_based/block_based_table_reader.h"
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
#include "table/persistent_cache_helper.h"
|
#include "table/persistent_cache_helper.h"
|
||||||
|
#include "util/cast_util.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/compression.h"
|
#include "util/compression.h"
|
||||||
#include "util/crc32c.h"
|
#include "util/crc32c.h"
|
||||||
@ -58,6 +60,15 @@ void BlockHandle::EncodeTo(std::string* dst) const {
|
|||||||
PutVarint64Varint64(dst, offset_, size_);
|
PutVarint64Varint64(dst, offset_, size_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* BlockHandle::EncodeTo(char* dst) const {
|
||||||
|
// Sanity check that all fields have been set
|
||||||
|
assert(offset_ != ~uint64_t{0});
|
||||||
|
assert(size_ != ~uint64_t{0});
|
||||||
|
char* cur = EncodeVarint64(dst, offset_);
|
||||||
|
cur = EncodeVarint64(cur, size_);
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
|
||||||
Status BlockHandle::DecodeFrom(Slice* input) {
|
Status BlockHandle::DecodeFrom(Slice* input) {
|
||||||
if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) {
|
if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
@ -166,8 +177,8 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) {
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void Footer::set_table_magic_number(uint64_t magic_number) {
|
Footer& Footer::set_table_magic_number(uint64_t magic_number) {
|
||||||
assert(!HasInitializedTableMagicNumber());
|
assert(table_magic_number_ == kNullTableMagicNumber);
|
||||||
table_magic_number_ = magic_number;
|
table_magic_number_ = magic_number;
|
||||||
if (magic_number == kBlockBasedTableMagicNumber ||
|
if (magic_number == kBlockBasedTableMagicNumber ||
|
||||||
magic_number == kLegacyBlockBasedTableMagicNumber) {
|
magic_number == kLegacyBlockBasedTableMagicNumber) {
|
||||||
@ -176,64 +187,80 @@ void Footer::set_table_magic_number(uint64_t magic_number) {
|
|||||||
} else {
|
} else {
|
||||||
block_trailer_size_ = 0;
|
block_trailer_size_ = 0;
|
||||||
}
|
}
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
// legacy footer format:
|
// Footer format, in three parts:
|
||||||
// metaindex handle (varint64 offset, varint64 size)
|
// * Part1
|
||||||
// index handle (varint64 offset, varint64 size)
|
// -> format_version == 0 (inferred from legacy magic number)
|
||||||
// <padding> to make the total size 2 * BlockHandle::kMaxEncodedLength
|
// <empty> (0 bytes)
|
||||||
// table_magic_number (8 bytes)
|
// -> format_version >= 1
|
||||||
// new footer format:
|
|
||||||
// checksum type (char, 1 byte)
|
// checksum type (char, 1 byte)
|
||||||
|
// * Part2
|
||||||
// metaindex handle (varint64 offset, varint64 size)
|
// metaindex handle (varint64 offset, varint64 size)
|
||||||
// index handle (varint64 offset, varint64 size)
|
// index handle (varint64 offset, varint64 size)
|
||||||
// <padding> to make the total size 2 * BlockHandle::kMaxEncodedLength + 1
|
// <zero padding> for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40
|
||||||
// footer version (4 bytes)
|
// * Part3
|
||||||
// table_magic_number (8 bytes)
|
// -> format_version == 0 (inferred from legacy magic number)
|
||||||
void Footer::EncodeTo(std::string* dst) const {
|
// legacy magic number (8 bytes)
|
||||||
assert(HasInitializedTableMagicNumber());
|
// -> format_version >= 1 (inferred from NOT legacy magic number)
|
||||||
if (IsLegacyFooterFormat(table_magic_number())) {
|
// format_version (uint32LE, 4 bytes), also called "footer version"
|
||||||
// has to be default checksum with legacy footer
|
// newer magic number (8 bytes)
|
||||||
assert(checksum_ == kCRC32c);
|
void Footer::EncodeTo(std::string* dst, uint64_t footer_offset) const {
|
||||||
|
(void)footer_offset; // Future use
|
||||||
|
|
||||||
|
// Sanitize magic numbers & format versions
|
||||||
|
assert(table_magic_number_ != kNullTableMagicNumber);
|
||||||
|
uint64_t magic = table_magic_number_;
|
||||||
|
uint32_t fv = format_version_;
|
||||||
|
assert(fv != kInvalidFormatVersion);
|
||||||
|
assert(IsLegacyFooterFormat(magic) == (fv == 0));
|
||||||
|
|
||||||
|
ChecksumType ct = checksum_type();
|
||||||
|
|
||||||
|
// Allocate destination data and generate parts 1 and 3
|
||||||
const size_t original_size = dst->size();
|
const size_t original_size = dst->size();
|
||||||
metaindex_handle_.EncodeTo(dst);
|
char* part2;
|
||||||
index_handle_.EncodeTo(dst);
|
if (fv > 0) {
|
||||||
dst->resize(original_size + 2 * BlockHandle::kMaxEncodedLength); // Padding
|
dst->resize(original_size + kNewVersionsEncodedLength);
|
||||||
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu));
|
char* part1 = &(*dst)[original_size];
|
||||||
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32));
|
part2 = part1 + 1;
|
||||||
assert(dst->size() == original_size + kVersion0EncodedLength);
|
char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
|
||||||
|
assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 12);
|
||||||
|
// Generate parts 1 and 3
|
||||||
|
part1[0] = ct;
|
||||||
|
EncodeFixed32(part3, fv);
|
||||||
|
EncodeFixed64(part3 + 4, magic);
|
||||||
} else {
|
} else {
|
||||||
const size_t original_size = dst->size();
|
dst->resize(original_size + kVersion0EncodedLength);
|
||||||
dst->push_back(static_cast<char>(checksum_));
|
part2 = &(*dst)[original_size];
|
||||||
metaindex_handle_.EncodeTo(dst);
|
char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
|
||||||
index_handle_.EncodeTo(dst);
|
assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 8);
|
||||||
dst->resize(original_size + kNewVersionsEncodedLength - 12); // Padding
|
// Legacy SST files use kCRC32c checksum but it's not stored in footer.
|
||||||
PutFixed32(dst, version());
|
assert(ct == kNoChecksum || ct == kCRC32c);
|
||||||
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu));
|
// Generate part 3 (part 1 empty)
|
||||||
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32));
|
EncodeFixed64(part3, magic);
|
||||||
assert(dst->size() == original_size + kNewVersionsEncodedLength);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate Part2
|
||||||
|
// Variable size encode handles (sigh)
|
||||||
|
part2 = metaindex_handle_.EncodeTo(part2);
|
||||||
|
/*part2 = */ index_handle_.EncodeTo(part2);
|
||||||
|
|
||||||
|
// remainder of part2 is already zero padded
|
||||||
}
|
}
|
||||||
|
|
||||||
Footer::Footer(uint64_t _table_magic_number, uint32_t _version)
|
Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
|
||||||
: version_(_version),
|
(void)input_offset; // Future use
|
||||||
checksum_(kCRC32c),
|
|
||||||
table_magic_number_(_table_magic_number) {
|
|
||||||
// This should be guaranteed by constructor callers
|
|
||||||
assert(!IsLegacyFooterFormat(_table_magic_number) || version_ == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
Status Footer::DecodeFrom(Slice* input) {
|
// Only decode to unused Footer
|
||||||
assert(!HasInitializedTableMagicNumber());
|
assert(table_magic_number_ == kNullTableMagicNumber);
|
||||||
assert(input != nullptr);
|
assert(input != nullptr);
|
||||||
assert(input->size() >= kMinEncodedLength);
|
assert(input->size() >= kMinEncodedLength);
|
||||||
|
|
||||||
const char* magic_ptr =
|
const char* magic_ptr =
|
||||||
input->data() + input->size() - kMagicNumberLengthByte;
|
input->data() + input->size() - kMagicNumberLengthByte;
|
||||||
const uint32_t magic_lo = DecodeFixed32(magic_ptr);
|
uint64_t magic = DecodeFixed64(magic_ptr);
|
||||||
const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4);
|
|
||||||
uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) |
|
|
||||||
(static_cast<uint64_t>(magic_lo)));
|
|
||||||
|
|
||||||
// We check for legacy formats here and silently upconvert them
|
// We check for legacy formats here and silently upconvert them
|
||||||
bool legacy = IsLegacyFooterFormat(magic);
|
bool legacy = IsLegacyFooterFormat(magic);
|
||||||
@ -242,44 +269,51 @@ Status Footer::DecodeFrom(Slice* input) {
|
|||||||
}
|
}
|
||||||
set_table_magic_number(magic);
|
set_table_magic_number(magic);
|
||||||
|
|
||||||
|
// Parse Part3
|
||||||
if (legacy) {
|
if (legacy) {
|
||||||
// The size is already asserted to be at least kMinEncodedLength
|
// The size is already asserted to be at least kMinEncodedLength
|
||||||
// at the beginning of the function
|
// at the beginning of the function
|
||||||
input->remove_prefix(input->size() - kVersion0EncodedLength);
|
input->remove_prefix(input->size() - kVersion0EncodedLength);
|
||||||
version_ = 0 /* legacy */;
|
format_version_ = 0 /* legacy */;
|
||||||
checksum_ = kCRC32c;
|
checksum_type_ = kCRC32c;
|
||||||
} else {
|
} else {
|
||||||
version_ = DecodeFixed32(magic_ptr - 4);
|
const char* part3_ptr = magic_ptr - 4;
|
||||||
// Footer version 1 and higher will always occupy exactly this many bytes.
|
format_version_ = DecodeFixed32(part3_ptr);
|
||||||
// It consists of the checksum type, two block handles, padding,
|
if (!IsSupportedFormatVersion(format_version_)) {
|
||||||
// a version number, and a magic number
|
return Status::Corruption("Corrupt or unsupported format_version: " +
|
||||||
|
ROCKSDB_NAMESPACE::ToString(format_version_));
|
||||||
|
}
|
||||||
|
// All known format versions >= 1 occupy exactly this many bytes.
|
||||||
if (input->size() < kNewVersionsEncodedLength) {
|
if (input->size() < kNewVersionsEncodedLength) {
|
||||||
return Status::Corruption("input is too short to be an sstable");
|
return Status::Corruption("Input is too short to be an SST file");
|
||||||
} else {
|
|
||||||
input->remove_prefix(input->size() - kNewVersionsEncodedLength);
|
|
||||||
}
|
}
|
||||||
uint32_t chksum;
|
uint64_t adjustment = input->size() - kNewVersionsEncodedLength;
|
||||||
if (!GetVarint32(input, &chksum)) {
|
input->remove_prefix(adjustment);
|
||||||
return Status::Corruption("bad checksum type");
|
|
||||||
}
|
// Parse Part1
|
||||||
checksum_ = static_cast<ChecksumType>(chksum);
|
char chksum = input->data()[0];
|
||||||
if (chksum != static_cast<uint32_t>(checksum_) ||
|
checksum_type_ = lossless_cast<ChecksumType>(chksum);
|
||||||
!IsSupportedChecksumType(checksum_)) {
|
if (!IsSupportedChecksumType(checksum_type())) {
|
||||||
return Status::Corruption("unknown checksum type " +
|
return Status::Corruption(
|
||||||
ROCKSDB_NAMESPACE::ToString(chksum));
|
"Corrupt or unsupported checksum type: " +
|
||||||
|
ROCKSDB_NAMESPACE::ToString(lossless_cast<uint8_t>(chksum)));
|
||||||
}
|
}
|
||||||
|
// Consume checksum type field
|
||||||
|
input->remove_prefix(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse Part2
|
||||||
Status result = metaindex_handle_.DecodeFrom(input);
|
Status result = metaindex_handle_.DecodeFrom(input);
|
||||||
if (result.ok()) {
|
if (result.ok()) {
|
||||||
result = index_handle_.DecodeFrom(input);
|
result = index_handle_.DecodeFrom(input);
|
||||||
}
|
}
|
||||||
if (result.ok()) {
|
if (!result.ok()) {
|
||||||
// We skip over any leftover data (just padding for now) in "input"
|
|
||||||
const char* end = magic_ptr + kMagicNumberLengthByte;
|
|
||||||
*input = Slice(end, input->data() + input->size() - end);
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark all input consumed (skip padding & part3)
|
||||||
|
*input = Slice(input->data() + input->size(), 0U);
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Footer::ToString() const {
|
std::string Footer::ToString() const {
|
||||||
@ -293,14 +327,12 @@ std::string Footer::ToString() const {
|
|||||||
result.append("table_magic_number: " +
|
result.append("table_magic_number: " +
|
||||||
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
|
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
|
||||||
} else {
|
} else {
|
||||||
result.append("checksum: " + ROCKSDB_NAMESPACE::ToString(checksum_) +
|
|
||||||
"\n ");
|
|
||||||
result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n ");
|
result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n ");
|
||||||
result.append("index handle: " + index_handle_.ToString() + "\n ");
|
result.append("index handle: " + index_handle_.ToString() + "\n ");
|
||||||
result.append("footer version: " + ROCKSDB_NAMESPACE::ToString(version_) +
|
|
||||||
"\n ");
|
|
||||||
result.append("table_magic_number: " +
|
result.append("table_magic_number: " +
|
||||||
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
|
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
|
||||||
|
result.append("format version: " +
|
||||||
|
ROCKSDB_NAMESPACE::ToString(format_version_) + "\n ");
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -319,9 +351,8 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
|
|||||||
std::string footer_buf;
|
std::string footer_buf;
|
||||||
AlignedBuf internal_buf;
|
AlignedBuf internal_buf;
|
||||||
Slice footer_input;
|
Slice footer_input;
|
||||||
size_t read_offset =
|
uint64_t read_offset = (file_size > Footer::kMaxEncodedLength)
|
||||||
(file_size > Footer::kMaxEncodedLength)
|
? file_size - Footer::kMaxEncodedLength
|
||||||
? static_cast<size_t>(file_size - Footer::kMaxEncodedLength)
|
|
||||||
: 0;
|
: 0;
|
||||||
Status s;
|
Status s;
|
||||||
// TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now,
|
// TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now,
|
||||||
@ -353,7 +384,7 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
|
|||||||
file->file_name());
|
file->file_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
s = footer->DecodeFrom(&footer_input);
|
s = footer->DecodeFrom(&footer_input, read_offset);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -376,7 +407,7 @@ inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) {
|
|||||||
// more byte, except we don't need to re-mix the input checksum as long as
|
// more byte, except we don't need to re-mix the input checksum as long as
|
||||||
// we do this step only once (per checksum).
|
// we do this step only once (per checksum).
|
||||||
const uint32_t kRandomPrime = 0x6b9083d9;
|
const uint32_t kRandomPrime = 0x6b9083d9;
|
||||||
return checksum ^ static_cast<uint8_t>(last_byte) * kRandomPrime;
|
return checksum ^ lossless_cast<uint8_t>(last_byte) * kRandomPrime;
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
165
table/format.h
165
table/format.h
@ -8,21 +8,20 @@
|
|||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <stdint.h>
|
|
||||||
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "file/file_prefetch_buffer.h"
|
#include "file/file_prefetch_buffer.h"
|
||||||
#include "file/random_access_file_reader.h"
|
#include "file/random_access_file_reader.h"
|
||||||
|
|
||||||
#include "rocksdb/options.h"
|
|
||||||
#include "rocksdb/slice.h"
|
|
||||||
#include "rocksdb/status.h"
|
|
||||||
#include "rocksdb/table.h"
|
|
||||||
|
|
||||||
#include "memory/memory_allocator.h"
|
#include "memory/memory_allocator.h"
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
#include "port/malloc.h"
|
#include "port/malloc.h"
|
||||||
#include "port/port.h" // noexcept
|
#include "port/port.h" // noexcept
|
||||||
#include "table/persistent_cache_options.h"
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/status.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
@ -32,7 +31,7 @@ struct ReadOptions;
|
|||||||
extern bool ShouldReportDetailedTime(Env* env, Statistics* stats);
|
extern bool ShouldReportDetailedTime(Env* env, Statistics* stats);
|
||||||
|
|
||||||
// the length of the magic number in bytes.
|
// the length of the magic number in bytes.
|
||||||
const int kMagicNumberLengthByte = 8;
|
constexpr uint32_t kMagicNumberLengthByte = 8;
|
||||||
|
|
||||||
// BlockHandle is a pointer to the extent of a file that stores a data
|
// BlockHandle is a pointer to the extent of a file that stores a data
|
||||||
// block or a meta block.
|
// block or a meta block.
|
||||||
@ -52,6 +51,7 @@ class BlockHandle {
|
|||||||
void set_size(uint64_t _size) { size_ = _size; }
|
void set_size(uint64_t _size) { size_ = _size; }
|
||||||
|
|
||||||
void EncodeTo(std::string* dst) const;
|
void EncodeTo(std::string* dst) const;
|
||||||
|
char* EncodeTo(char* dst) const;
|
||||||
Status DecodeFrom(Slice* input);
|
Status DecodeFrom(Slice* input);
|
||||||
Status DecodeSizeFrom(uint64_t offset, Slice* input);
|
Status DecodeSizeFrom(uint64_t offset, Slice* input);
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ class BlockHandle {
|
|||||||
static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; }
|
static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; }
|
||||||
|
|
||||||
// Maximum encoding length of a BlockHandle
|
// Maximum encoding length of a BlockHandle
|
||||||
enum { kMaxEncodedLength = 10 + 10 };
|
static constexpr uint32_t kMaxEncodedLength = 2 * kMaxVarint64Length;
|
||||||
|
|
||||||
inline bool operator==(const BlockHandle& rhs) const {
|
inline bool operator==(const BlockHandle& rhs) const {
|
||||||
return offset_ == rhs.offset_ && size_ == rhs.size_;
|
return offset_ == rhs.offset_ && size_ == rhs.size_;
|
||||||
@ -117,94 +117,107 @@ inline uint32_t GetCompressFormatForVersion(uint32_t format_version) {
|
|||||||
return format_version >= 2 ? 2 : 1;
|
return format_version >= 2 ? 2 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool BlockBasedTableSupportedVersion(uint32_t version) {
|
constexpr uint32_t kLatestFormatVersion = 5;
|
||||||
return version <= 5;
|
|
||||||
|
inline bool IsSupportedFormatVersion(uint32_t version) {
|
||||||
|
return version <= kLatestFormatVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Footer encapsulates the fixed information stored at the tail
|
// Footer encapsulates the fixed information stored at the tail end of every
|
||||||
// end of every table file.
|
// SST file. In general, it should only include things that cannot go
|
||||||
|
// elsewhere under the metaindex block. For example, checksum_type is
|
||||||
|
// required for verifying metaindex block checksum (when applicable), but
|
||||||
|
// index block handle can easily go in metaindex block (possible future).
|
||||||
class Footer {
|
class Footer {
|
||||||
public:
|
public:
|
||||||
// Constructs a footer without specifying its table magic number.
|
Footer() {}
|
||||||
// In such case, the table magic number of such footer should be
|
|
||||||
// initialized via @ReadFooterFromFile().
|
|
||||||
// Use this when you plan to load Footer with DecodeFrom(). Never use this
|
|
||||||
// when you plan to EncodeTo.
|
|
||||||
Footer() : Footer(kInvalidTableMagicNumber, 0) {}
|
|
||||||
|
|
||||||
// Use this constructor when you plan to write out the footer using
|
// Uses builder pattern rather than distinctive ctors
|
||||||
// EncodeTo(). Never use this constructor with DecodeFrom().
|
|
||||||
// `version` is same as `format_version` for block-based table.
|
|
||||||
Footer(uint64_t table_magic_number, uint32_t version);
|
|
||||||
|
|
||||||
// The version of the footer in this file
|
|
||||||
uint32_t version() const { return version_; }
|
|
||||||
|
|
||||||
// The checksum type used in this file
|
|
||||||
ChecksumType checksum() const { return checksum_; }
|
|
||||||
void set_checksum(const ChecksumType c) { checksum_ = c; }
|
|
||||||
|
|
||||||
// The block handle for the metaindex block of the table
|
|
||||||
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
|
|
||||||
void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; }
|
|
||||||
|
|
||||||
// The block handle for the index block of the table
|
|
||||||
const BlockHandle& index_handle() const { return index_handle_; }
|
|
||||||
|
|
||||||
void set_index_handle(const BlockHandle& h) { index_handle_ = h; }
|
|
||||||
|
|
||||||
|
// Table magic number identifies file as RocksDB SST file and which kind of
|
||||||
|
// SST format is use.
|
||||||
|
Footer& set_table_magic_number(uint64_t tmn);
|
||||||
uint64_t table_magic_number() const { return table_magic_number_; }
|
uint64_t table_magic_number() const { return table_magic_number_; }
|
||||||
|
|
||||||
void EncodeTo(std::string* dst) const;
|
// A version (footer and more) within a kind of SST. (It would add more
|
||||||
|
// unnecessary complexity to separate footer versions and
|
||||||
|
// BBTO::format_version.)
|
||||||
|
Footer& set_format_version(uint32_t fv) {
|
||||||
|
format_version_ = fv;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
uint32_t format_version() const { return format_version_; }
|
||||||
|
|
||||||
// Set the current footer based on the input slice.
|
// Block handle for metaindex block.
|
||||||
//
|
Footer& set_metaindex_handle(const BlockHandle& h) {
|
||||||
// REQUIRES: table_magic_number_ is not set (i.e.,
|
metaindex_handle_ = h;
|
||||||
// HasInitializedTableMagicNumber() is true). The function will initialize the
|
return *this;
|
||||||
// magic number
|
}
|
||||||
Status DecodeFrom(Slice* input);
|
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
|
||||||
|
|
||||||
// Encoded length of a Footer. Note that the serialization of a Footer will
|
// Block handle for (top-level) index block.
|
||||||
// always occupy at least kMinEncodedLength bytes. If fields are changed
|
Footer& set_index_handle(const BlockHandle& h) {
|
||||||
// the version number should be incremented and kMaxEncodedLength should be
|
index_handle_ = h;
|
||||||
// increased accordingly.
|
return *this;
|
||||||
enum {
|
}
|
||||||
// Footer version 0 (legacy) will always occupy exactly this many bytes.
|
const BlockHandle& index_handle() const { return index_handle_; }
|
||||||
// It consists of two block handles, padding, and a magic number.
|
|
||||||
kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8,
|
|
||||||
// Footer of versions 1 and higher will always occupy exactly this many
|
|
||||||
// bytes. It consists of the checksum type, two block handles, padding,
|
|
||||||
// a version number (bigger than 1), and a magic number
|
|
||||||
kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8,
|
|
||||||
kMinEncodedLength = kVersion0EncodedLength,
|
|
||||||
kMaxEncodedLength = kNewVersionsEncodedLength,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const uint64_t kInvalidTableMagicNumber = 0;
|
// Checksum type used in the file.
|
||||||
|
Footer& set_checksum_type(ChecksumType ct) {
|
||||||
|
checksum_type_ = ct;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
ChecksumType checksum_type() const {
|
||||||
|
return static_cast<ChecksumType>(checksum_type_);
|
||||||
|
}
|
||||||
|
|
||||||
// convert this object to a human readable form
|
// Appends serialized footer to `dst`. The starting offset of the footer
|
||||||
|
// within the file is required for future work.
|
||||||
|
void EncodeTo(std::string* dst, uint64_t footer_offset) const;
|
||||||
|
|
||||||
|
// Deserialize a footer (populate fields) from `input` and check for various
|
||||||
|
// corruptions. On success (and some error cases) `input` is advanced past
|
||||||
|
// the footer. Like EncodeTo, the offset within the file will be nedded for
|
||||||
|
// future work
|
||||||
|
Status DecodeFrom(Slice* input, uint64_t input_offset);
|
||||||
|
|
||||||
|
// Convert this object to a human readable form
|
||||||
std::string ToString() const;
|
std::string ToString() const;
|
||||||
|
|
||||||
// Block trailer size used by file with this footer (e.g. 5 for block-based
|
// Block trailer size used by file with this footer (e.g. 5 for block-based
|
||||||
// table and 0 for plain table)
|
// table and 0 for plain table)
|
||||||
inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
|
inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
|
||||||
|
|
||||||
|
// Encoded lengths of Footers. Bytes for serialized Footer will always be
|
||||||
|
// >= kMinEncodedLength and <= kMaxEncodedLength.
|
||||||
|
//
|
||||||
|
// Footer version 0 (legacy) will always occupy exactly this many bytes.
|
||||||
|
// It consists of two block handles, padding, and a magic number.
|
||||||
|
static constexpr uint32_t kVersion0EncodedLength =
|
||||||
|
2 * BlockHandle::kMaxEncodedLength + kMagicNumberLengthByte;
|
||||||
|
static constexpr uint32_t kMinEncodedLength = kVersion0EncodedLength;
|
||||||
|
|
||||||
|
// Footer of versions 1 and higher will always occupy exactly this many
|
||||||
|
// bytes. It originally consisted of the checksum type, two block handles,
|
||||||
|
// padding (to maximum handle encoding size), a format version number, and a
|
||||||
|
// magic number.
|
||||||
|
static constexpr uint32_t kNewVersionsEncodedLength =
|
||||||
|
1 + 2 * BlockHandle::kMaxEncodedLength + 4 + kMagicNumberLengthByte;
|
||||||
|
static constexpr uint32_t kMaxEncodedLength = kNewVersionsEncodedLength;
|
||||||
|
|
||||||
|
static constexpr uint64_t kNullTableMagicNumber = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// REQUIRES: magic number wasn't initialized.
|
static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU;
|
||||||
void set_table_magic_number(uint64_t magic_number);
|
static constexpr int kInvalidChecksumType =
|
||||||
|
(1 << (sizeof(ChecksumType) * 8)) | kNoChecksum;
|
||||||
|
|
||||||
// return true if @table_magic_number_ is set to a value different
|
uint64_t table_magic_number_ = kNullTableMagicNumber;
|
||||||
// from @kInvalidTableMagicNumber.
|
uint32_t format_version_ = kInvalidFormatVersion;
|
||||||
bool HasInitializedTableMagicNumber() const {
|
|
||||||
return (table_magic_number_ != kInvalidTableMagicNumber);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t version_;
|
|
||||||
ChecksumType checksum_;
|
|
||||||
uint8_t block_trailer_size_ = 0; // set based on magic number
|
|
||||||
BlockHandle metaindex_handle_;
|
BlockHandle metaindex_handle_;
|
||||||
BlockHandle index_handle_;
|
BlockHandle index_handle_;
|
||||||
uint64_t table_magic_number_ = 0;
|
int checksum_type_ = kInvalidChecksumType;
|
||||||
|
uint8_t block_trailer_size_ = 0; // set based on magic number
|
||||||
};
|
};
|
||||||
|
|
||||||
// Read the footer from file
|
// Read the footer from file
|
||||||
|
@ -26,11 +26,11 @@
|
|||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
const std::string kPropertiesBlock = "rocksdb.properties";
|
const std::string kPropertiesBlockName = "rocksdb.properties";
|
||||||
// Old property block name for backward compatibility
|
// Old property block name for backward compatibility
|
||||||
const std::string kPropertiesBlockOldName = "rocksdb.stats";
|
const std::string kPropertiesBlockOldName = "rocksdb.stats";
|
||||||
const std::string kCompressionDictBlock = "rocksdb.compression_dict";
|
const std::string kCompressionDictBlockName = "rocksdb.compression_dict";
|
||||||
const std::string kRangeDelBlock = "rocksdb.range_del";
|
const std::string kRangeDelBlockName = "rocksdb.range_del";
|
||||||
|
|
||||||
MetaIndexBuilder::MetaIndexBuilder()
|
MetaIndexBuilder::MetaIndexBuilder()
|
||||||
: meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
|
: meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
|
||||||
@ -381,7 +381,7 @@ Status ReadTablePropertiesHelper(
|
|||||||
// Modified version of BlockFetcher checksum verification
|
// Modified version of BlockFetcher checksum verification
|
||||||
// (See write_global_seqno comment above)
|
// (See write_global_seqno comment above)
|
||||||
if (s.ok() && footer.GetBlockTrailerSize() > 0) {
|
if (s.ok() && footer.GetBlockTrailerSize() > 0) {
|
||||||
s = VerifyBlockChecksum(footer.checksum(), properties_block.data(),
|
s = VerifyBlockChecksum(footer.checksum_type(), properties_block.data(),
|
||||||
block_size, file->file_name(), handle.offset());
|
block_size, file->file_name(), handle.offset());
|
||||||
if (s.IsCorruption()) {
|
if (s.IsCorruption()) {
|
||||||
if (new_table_properties->external_sst_file_global_seqno_offset != 0) {
|
if (new_table_properties->external_sst_file_global_seqno_offset != 0) {
|
||||||
@ -391,8 +391,8 @@ Status ReadTablePropertiesHelper(
|
|||||||
new_table_properties->external_sst_file_global_seqno_offset -
|
new_table_properties->external_sst_file_global_seqno_offset -
|
||||||
handle.offset();
|
handle.offset();
|
||||||
EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0);
|
EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0);
|
||||||
s = VerifyBlockChecksum(footer.checksum(), tmp_buf.data(), block_size,
|
s = VerifyBlockChecksum(footer.checksum_type(), tmp_buf.data(),
|
||||||
file->file_name(), handle.offset());
|
block_size, file->file_name(), handle.offset());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -413,7 +413,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
|
|||||||
BlockHandle block_handle;
|
BlockHandle block_handle;
|
||||||
Footer footer;
|
Footer footer;
|
||||||
Status s = FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
|
Status s = FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
|
||||||
kPropertiesBlock, &block_handle,
|
kPropertiesBlockName, &block_handle,
|
||||||
memory_allocator, prefetch_buffer, &footer);
|
memory_allocator, prefetch_buffer, &footer);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
@ -438,7 +438,7 @@ Status FindOptionalMetaBlock(InternalIterator* meta_index_iter,
|
|||||||
if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) {
|
if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) {
|
||||||
Slice v = meta_index_iter->value();
|
Slice v = meta_index_iter->value();
|
||||||
return block_handle->DecodeFrom(&v);
|
return block_handle->DecodeFrom(&v);
|
||||||
} else if (meta_block_name == kPropertiesBlock) {
|
} else if (meta_block_name == kPropertiesBlockName) {
|
||||||
// Have to try old name for compatibility
|
// Have to try old name for compatibility
|
||||||
meta_index_iter->Seek(kPropertiesBlockOldName);
|
meta_index_iter->Seek(kPropertiesBlockOldName);
|
||||||
if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
|
if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
|
||||||
|
@ -31,10 +31,10 @@ class RandomAccessFile;
|
|||||||
struct TableProperties;
|
struct TableProperties;
|
||||||
|
|
||||||
// Meta block names for metaindex
|
// Meta block names for metaindex
|
||||||
extern const std::string kPropertiesBlock;
|
extern const std::string kPropertiesBlockName;
|
||||||
extern const std::string kPropertiesBlockOldName;
|
extern const std::string kPropertiesBlockOldName;
|
||||||
extern const std::string kCompressionDictBlock;
|
extern const std::string kCompressionDictBlockName;
|
||||||
extern const std::string kRangeDelBlock;
|
extern const std::string kRangeDelBlockName;
|
||||||
|
|
||||||
class MetaIndexBuilder {
|
class MetaIndexBuilder {
|
||||||
public:
|
public:
|
||||||
|
@ -279,7 +279,7 @@ Status PlainTableBuilder::Finish() {
|
|||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return std::move(s);
|
return std::move(s);
|
||||||
}
|
}
|
||||||
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
|
meta_index_builer.Add(kPropertiesBlockName, property_block_handle);
|
||||||
|
|
||||||
// -- write metaindex block
|
// -- write metaindex block
|
||||||
BlockHandle metaindex_block_handle;
|
BlockHandle metaindex_block_handle;
|
||||||
@ -292,11 +292,13 @@ Status PlainTableBuilder::Finish() {
|
|||||||
|
|
||||||
// Write Footer
|
// Write Footer
|
||||||
// no need to write out new footer if we're using default checksum
|
// no need to write out new footer if we're using default checksum
|
||||||
Footer footer(kLegacyPlainTableMagicNumber, 0);
|
Footer footer;
|
||||||
footer.set_metaindex_handle(metaindex_block_handle);
|
footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
|
||||||
footer.set_index_handle(BlockHandle::NullBlockHandle());
|
.set_format_version(0)
|
||||||
|
.set_metaindex_handle(metaindex_block_handle)
|
||||||
|
.set_index_handle(BlockHandle::NullBlockHandle());
|
||||||
std::string footer_encoding;
|
std::string footer_encoding;
|
||||||
footer.EncodeTo(&footer_encoding);
|
footer.EncodeTo(&footer_encoding, offset_);
|
||||||
io_status_ = file_->Append(footer_encoding);
|
io_status_ = file_->Append(footer_encoding);
|
||||||
if (io_status_.ok()) {
|
if (io_status_.ok()) {
|
||||||
offset_ += footer_encoding.size();
|
offset_ += footer_encoding.size();
|
||||||
|
@ -74,7 +74,7 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
|
|||||||
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
||||||
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
||||||
// avoid any warnings.
|
// avoid any warnings.
|
||||||
uint64_t magic_number = Footer::kInvalidTableMagicNumber;
|
uint64_t magic_number = Footer::kNullTableMagicNumber;
|
||||||
|
|
||||||
// read table magic number
|
// read table magic number
|
||||||
Footer footer;
|
Footer footer;
|
||||||
|
@ -21,16 +21,15 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "block_fetcher.h"
|
|
||||||
#include "cache/lru_cache.h"
|
#include "cache/lru_cache.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "memtable/stl_wrappers.h"
|
#include "memtable/stl_wrappers.h"
|
||||||
#include "meta_blocks.h"
|
|
||||||
#include "monitoring/statistics.h"
|
#include "monitoring/statistics.h"
|
||||||
#include "options/options_helper.h"
|
#include "options/options_helper.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
|
#include "port/stack_trace.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/compression_type.h"
|
#include "rocksdb/compression_type.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
@ -53,9 +52,11 @@
|
|||||||
#include "table/block_based/block_based_table_reader.h"
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
#include "table/block_based/block_builder.h"
|
#include "table/block_based/block_builder.h"
|
||||||
#include "table/block_based/flush_block_policy.h"
|
#include "table/block_based/flush_block_policy.h"
|
||||||
|
#include "table/block_fetcher.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/get_context.h"
|
#include "table/get_context.h"
|
||||||
#include "table/internal_iterator.h"
|
#include "table/internal_iterator.h"
|
||||||
|
#include "table/meta_blocks.h"
|
||||||
#include "table/plain/plain_table_factory.h"
|
#include "table/plain/plain_table_factory.h"
|
||||||
#include "table/scoped_arena_iterator.h"
|
#include "table/scoped_arena_iterator.h"
|
||||||
#include "table/sst_file_writer_collectors.h"
|
#include "table/sst_file_writer_collectors.h"
|
||||||
@ -1356,10 +1357,8 @@ class FileChecksumTestHelper {
|
|||||||
|
|
||||||
uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
|
uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
|
INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest,
|
||||||
testing::Values(test::kDefaultFormatVersion));
|
testing::ValuesIn(test::kFooterFormatVersionsToTest));
|
||||||
INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
|
|
||||||
testing::Values(test::kLatestFormatVersion));
|
|
||||||
|
|
||||||
// This test serves as the living tutorial for the prefix scan of user collected
|
// This test serves as the living tutorial for the prefix scan of user collected
|
||||||
// properties.
|
// properties.
|
||||||
@ -2228,7 +2227,8 @@ TEST_P(BlockBasedTableTest, BadChecksumType) {
|
|||||||
const MutableCFOptions new_moptions(options);
|
const MutableCFOptions new_moptions(options);
|
||||||
Status s = c.Reopen(new_ioptions, new_moptions);
|
Status s = c.Reopen(new_ioptions, new_moptions);
|
||||||
ASSERT_NOK(s);
|
ASSERT_NOK(s);
|
||||||
ASSERT_MATCHES_REGEX(s.ToString(), "Corruption: unknown checksum type 123.*");
|
ASSERT_EQ(s.ToString(),
|
||||||
|
"Corruption: Corrupt or unsupported checksum type: 123");
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -4166,106 +4166,107 @@ TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(TableTest, FooterTests) {
|
TEST(TableTest, FooterTests) {
|
||||||
|
Random* r = Random::GetTLSInstance();
|
||||||
|
uint64_t data_size = (uint64_t{1} << r->Uniform(40)) + r->Uniform(100);
|
||||||
|
uint64_t index_size = r->Uniform(1000000000);
|
||||||
|
uint64_t metaindex_size = r->Uniform(1000000);
|
||||||
|
// 5 == block trailer size
|
||||||
|
BlockHandle index(data_size + 5, index_size);
|
||||||
|
BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size);
|
||||||
|
uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5;
|
||||||
{
|
{
|
||||||
// upconvert legacy block based
|
// upconvert legacy block based
|
||||||
std::string encoded;
|
std::string encoded;
|
||||||
Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
|
Footer footer;
|
||||||
BlockHandle meta_index(10, 5), index(20, 15);
|
footer.set_table_magic_number(kLegacyBlockBasedTableMagicNumber)
|
||||||
footer.set_metaindex_handle(meta_index);
|
.set_format_version(0)
|
||||||
footer.set_index_handle(index);
|
.set_metaindex_handle(meta_index)
|
||||||
footer.EncodeTo(&encoded);
|
.set_index_handle(index);
|
||||||
|
footer.EncodeTo(&encoded, footer_offset);
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
Slice encoded_slice(encoded);
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
|
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
|
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
||||||
ASSERT_EQ(decoded_footer.version(), 0U);
|
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
||||||
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
|
||||||
}
|
}
|
||||||
|
// block based, various checksums, various versions
|
||||||
for (auto t : GetSupportedChecksums()) {
|
for (auto t : GetSupportedChecksums()) {
|
||||||
// block based, various checksums
|
for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) {
|
||||||
std::string encoded;
|
std::string encoded;
|
||||||
Footer footer(kBlockBasedTableMagicNumber, 1);
|
Footer footer;
|
||||||
BlockHandle meta_index(10, 5), index(20, 15);
|
footer.set_table_magic_number(kBlockBasedTableMagicNumber)
|
||||||
footer.set_metaindex_handle(meta_index);
|
.set_format_version(fv)
|
||||||
footer.set_index_handle(index);
|
.set_metaindex_handle(meta_index)
|
||||||
footer.set_checksum(t);
|
.set_index_handle(index)
|
||||||
footer.EncodeTo(&encoded);
|
.set_checksum_type(t);
|
||||||
|
footer.EncodeTo(&encoded, footer_offset);
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
Slice encoded_slice(encoded);
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
|
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(),
|
||||||
ASSERT_EQ(decoded_footer.checksum(), t);
|
kBlockBasedTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.checksum_type(), t);
|
||||||
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(),
|
||||||
|
meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
||||||
ASSERT_EQ(decoded_footer.version(), 1U);
|
ASSERT_EQ(decoded_footer.format_version(), fv);
|
||||||
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Plain table is not supported in ROCKSDB_LITE
|
// Plain table is not supported in ROCKSDB_LITE
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
{
|
{
|
||||||
// upconvert legacy plain table
|
// upconvert legacy plain table
|
||||||
std::string encoded;
|
std::string encoded;
|
||||||
Footer footer(kLegacyPlainTableMagicNumber, 0);
|
Footer footer;
|
||||||
BlockHandle meta_index(10, 5), index(20, 15);
|
footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
|
||||||
footer.set_metaindex_handle(meta_index);
|
.set_format_version(0)
|
||||||
footer.set_index_handle(index);
|
.set_metaindex_handle(meta_index)
|
||||||
footer.EncodeTo(&encoded);
|
.set_index_handle(index);
|
||||||
|
footer.EncodeTo(&encoded, footer_offset);
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
Slice encoded_slice(encoded);
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
|
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
|
ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
||||||
ASSERT_EQ(decoded_footer.version(), 0U);
|
ASSERT_EQ(decoded_footer.format_version(), 0U);
|
||||||
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// xxhash plain table (not currently used)
|
// xxhash plain table (not currently used)
|
||||||
std::string encoded;
|
std::string encoded;
|
||||||
Footer footer(kPlainTableMagicNumber, 1);
|
Footer footer;
|
||||||
BlockHandle meta_index(10, 5), index(20, 15);
|
footer.set_table_magic_number(kPlainTableMagicNumber)
|
||||||
footer.set_metaindex_handle(meta_index);
|
.set_format_version(1)
|
||||||
footer.set_index_handle(index);
|
.set_metaindex_handle(meta_index)
|
||||||
footer.set_checksum(kxxHash);
|
.set_index_handle(index)
|
||||||
footer.EncodeTo(&encoded);
|
.set_checksum_type(kxxHash);
|
||||||
|
footer.EncodeTo(&encoded, footer_offset);
|
||||||
Footer decoded_footer;
|
Footer decoded_footer;
|
||||||
Slice encoded_slice(encoded);
|
Slice encoded_slice(encoded);
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
|
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
|
||||||
ASSERT_EQ(decoded_footer.checksum(), kxxHash);
|
ASSERT_EQ(decoded_footer.checksum_type(), kxxHash);
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
||||||
ASSERT_EQ(decoded_footer.version(), 1U);
|
ASSERT_EQ(decoded_footer.format_version(), 1U);
|
||||||
|
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
|
||||||
}
|
}
|
||||||
#endif // !ROCKSDB_LITE
|
#endif // !ROCKSDB_LITE
|
||||||
{
|
|
||||||
// version == 2
|
|
||||||
std::string encoded;
|
|
||||||
Footer footer(kBlockBasedTableMagicNumber, 2);
|
|
||||||
BlockHandle meta_index(10, 5), index(20, 15);
|
|
||||||
footer.set_metaindex_handle(meta_index);
|
|
||||||
footer.set_index_handle(index);
|
|
||||||
footer.EncodeTo(&encoded);
|
|
||||||
Footer decoded_footer;
|
|
||||||
Slice encoded_slice(encoded);
|
|
||||||
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
|
|
||||||
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
|
|
||||||
ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
|
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
|
|
||||||
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
|
|
||||||
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
|
|
||||||
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
|
|
||||||
ASSERT_EQ(decoded_footer.version(), 2U);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class IndexBlockRestartIntervalTest
|
class IndexBlockRestartIntervalTest
|
||||||
@ -4786,7 +4787,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
|
|||||||
|
|
||||||
// -- Read properties block
|
// -- Read properties block
|
||||||
BlockHandle properties_handle;
|
BlockHandle properties_handle;
|
||||||
ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlock,
|
ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlockName,
|
||||||
&properties_handle));
|
&properties_handle));
|
||||||
ASSERT_FALSE(properties_handle.IsNull());
|
ASSERT_FALSE(properties_handle.IsNull());
|
||||||
BlockContents properties_contents;
|
BlockContents properties_contents;
|
||||||
@ -4873,7 +4874,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
|
|||||||
key_at_max_offset = metaindex_iter->key().ToString();
|
key_at_max_offset = metaindex_iter->key().ToString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
|
ASSERT_EQ(kPropertiesBlockName, key_at_max_offset);
|
||||||
// index handle is stored in footer rather than metaindex block, so need
|
// index handle is stored in footer rather than metaindex block, so need
|
||||||
// separate logic to verify it comes before properties block.
|
// separate logic to verify it comes before properties block.
|
||||||
ASSERT_GT(max_offset, footer.index_handle().offset());
|
ASSERT_GT(max_offset, footer.index_handle().offset());
|
||||||
@ -5369,6 +5370,7 @@ TEST_P(
|
|||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,12 @@ namespace ROCKSDB_NAMESPACE {
|
|||||||
namespace test {
|
namespace test {
|
||||||
|
|
||||||
const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version;
|
const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version;
|
||||||
const uint32_t kLatestFormatVersion = 5u;
|
const std::set<uint32_t> kFooterFormatVersionsToTest{
|
||||||
|
5U,
|
||||||
|
// In case any interesting future changes
|
||||||
|
kDefaultFormatVersion,
|
||||||
|
kLatestFormatVersion,
|
||||||
|
};
|
||||||
|
|
||||||
std::string RandomKey(Random* rnd, int len, RandomKeyType type) {
|
std::string RandomKey(Random* rnd, int len, RandomKeyType type) {
|
||||||
// Make sure to generate a wide variety of characters so we
|
// Make sure to generate a wide variety of characters so we
|
||||||
|
@ -44,7 +44,7 @@ class SequentialFileReader;
|
|||||||
namespace test {
|
namespace test {
|
||||||
|
|
||||||
extern const uint32_t kDefaultFormatVersion;
|
extern const uint32_t kDefaultFormatVersion;
|
||||||
extern const uint32_t kLatestFormatVersion;
|
extern const std::set<uint32_t> kFooterFormatVersionsToTest;
|
||||||
|
|
||||||
// Return a random key with the specified length that may contain interesting
|
// Return a random key with the specified length that may contain interesting
|
||||||
// characters (e.g. \x00, \xff, etc.).
|
// characters (e.g. \x00, \xff, etc.).
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
#include "rocksdb/rocksdb_namespace.h"
|
#include "rocksdb/rocksdb_namespace.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
@ -20,4 +22,21 @@ inline DestClass* static_cast_with_check(SrcClass* x) {
|
|||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A wrapper around static_cast for lossless conversion between integral
|
||||||
|
// types, including enum types. For example, this can be used for converting
|
||||||
|
// between signed/unsigned or enum type and underlying type without fear of
|
||||||
|
// stripping away data, now or in the future.
|
||||||
|
template <typename To, typename From>
|
||||||
|
inline To lossless_cast(From x) {
|
||||||
|
using FromValue = typename std::remove_reference<From>::type;
|
||||||
|
static_assert(
|
||||||
|
std::is_integral<FromValue>::value || std::is_enum<FromValue>::value,
|
||||||
|
"Only works on integral types");
|
||||||
|
static_assert(std::is_integral<To>::value || std::is_enum<To>::value,
|
||||||
|
"Only works on integral types");
|
||||||
|
static_assert(sizeof(To) >= sizeof(FromValue), "Must be lossless");
|
||||||
|
return static_cast<To>(x);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
|
||||||
// The maximum length of a varint in bytes for 64-bit.
|
// The maximum length of a varint in bytes for 64-bit.
|
||||||
const unsigned int kMaxVarint64Length = 10;
|
const uint32_t kMaxVarint64Length = 10;
|
||||||
|
|
||||||
// Standard Put... routines append to a string
|
// Standard Put... routines append to a string
|
||||||
extern void PutFixed16(std::string* dst, uint16_t value);
|
extern void PutFixed16(std::string* dst, uint16_t value);
|
||||||
|
Loading…
Reference in New Issue
Block a user